22. Oktober 2007
PDF 2-Up Script mit pyPdf
Mit dem folgenden Skript (sicherlich noch ausbaufähig!) kann man eine PDF-Datei ins 2-Up Format umwandeln (d.h. 2 Seiten auf eine):
#!/bin/env/python
# -*- coding: utf-8 -*-
# Basiert auf der pyPdf-Library von Mathieu Fenniak
# Siehe http://pybrary.net/pyPdf
#
# (C) Henning von Bargen 2007
#
import os
import sys
import unittest
from cStringIO import StringIO
from pyPdf import PdfFileWriter, PdfFileReader
from reportlab.lib.units import cm, mm, inch
from reportlab.lib import pagesizes
from reportlab.pdfgen import canvas
from pyPdf.pdf import *
from pyPdf.generic import *
PAGESIZE = pagesizes.landscape(pagesizes.A4)
def framePage(canvas, title):
canvas.saveState()
canvas.setFont('Times-BoldItalic',12)
canvas.drawCentredString(PAGESIZE[0] * 1/2, 200*mm, title)
canvas.setFont('Times-Roman',10)
canvas.drawCentredString(PAGESIZE[0] * 1/4, 10*mm,
'Page %d' % (canvas.getPageNumber() * 2 - 1))
canvas.drawCentredString(PAGESIZE[0] * 3/4, 10*mm,
'Page %d' % (canvas.getPageNumber() * 2))
canvas.restoreState()
#canvas.doForm("frame") # Hier wäre Platz für ein Logo
def makeEmptyPagesPdf(nPages):
buffer = StringIO()
c = canvas.Canvas(None)
c.setPageSize(PAGESIZE)
#c.setPageCompression(0)
#c.setPageCallBack(pageCallBack)
#framePageForm(c) # define the frame form
c.showOutline()
for page in range(nPages):
print page
framePage(c, "This is a title")
c.showPage()
buffer.write(c.getpdfdata())
buffer.seek(0)
return buffer
def _pushTransformPopGS(contents, offset, scale, pdf):
# adds a graphics state "push" and "pop" to the beginning and end
# of a content stream. This isolates it from changes such as
# transformation matricies.
print "offset:",offset
print "scale:",scale
stream = ContentStream(contents, pdf)
stream.operations.insert(0, [[], "q"])
arr = [scale[0],0,0,scale[1],offset[0],offset[1]]
arr = [FloatObject(str(x)) for x in arr]
stream.operations.insert(1, [arr, "cm"])
stream.operations.append([[], "Q"])
return stream
def mergePage(page1, page2, offset, scale):
newResources = DictionaryObject()
rename = {}
originalResources = page1["/Resources"].getObject()
page2Resources = page2["/Resources"].getObject()
for res in "/ExtGState", "/Font", "/XObject", "/ColorSpace", "/Pattern", "/Shading":
new, newrename = PageObject._mergeResources(originalResources, page2Resources, res)
if new:
newResources[NameObject(res)] = new
rename.update(newrename)
# Combine /ProcSet sets.
newResources[NameObject("/ProcSet")] = ArrayObject(
ImmutableSet(originalResources.get("/ProcSet", ArrayObject()).getObject()).union(
ImmutableSet(page2Resources.get("/ProcSet", ArrayObject()).getObject())
)
)
newContentArray = ArrayObject()
originalContent = page1["/Contents"].getObject()
newContentArray.append(PageObject._pushPopGS(originalContent, page1.pdf))
page2Content = page2['/Contents'].getObject()
page2Content = PageObject._contentStreamRename(page2Content, rename, page1.pdf)
page2Content = _pushTransformPopGS(page2Content, offset, scale, page1.pdf)
newContentArray.append(page2Content)
page1[NameObject('/Contents')] = ContentStream(newContentArray, page1.pdf)
page1[NameObject('/Resources')] = newResources
def write2Up(infname, outfname):
output = PdfFileWriter()
input = PdfFileReader(open(infname, "rb"))
nPages = input.getNumPages()
print "nPages=",nPages
nNupPages = int((nPages+1)/2.0)
print "nNupPages=",nNupPages
emptyPages = PdfFileReader(makeEmptyPagesPdf(nNupPages))
page = None
nupindx = 0
for indx in range(nPages):
print "processing page", indx
if indx % 2 == 0:
if page is not None:
output.addPage(page)
print "nupindx=",nupindx
page = emptyPages.getPage(nupindx)
nupindx += 1
mergePage(page, input.getPage(indx),
(PAGESIZE[0]/2 * (indx%2), 0), # offset x,y
(0.7, 0.7) # scaling x,y
)
if page is not None:
output.addPage(page)
outputStream = open(outfname, "wb")
output.write(outputStream)
if __name__ == "__main__":
infname = sys.argv[1]
if len(sys.argv) > 2:
outfname = sys.argv[2]
else:
outfname = os.path.splitext(infname)[0] + "-new.pdf"
write2Up(infname, outfname)