#!/usr/bin/python ############################################################################## # # Kevin DELDYCKE # Guillaume MICHON # # Copyright (C) 2003-2005 Nexedi SARL # # This program is Free Software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # ############################################################################## # TODO : # - Clean this entire piece of ugly code ! (kev) # - Padding in tables (take a look at styles.xml in an embedded object ; or perhaps there are implicit default values) # - Determining if a table (or a text or a shape) is in the content frame or not, to place it in the correct file # - Place the table in space according to its coordinates in oo file (for example, by using "parsing tables" and invisible columns) import getopt, sys, os import string import re from string import lower, upper from zipfile import ZipFile from xml.dom import implementation from xml.dom import EMPTY_NAMESPACE, XML_NAMESPACE from xml.dom.ext import PrettyPrint from xml.dom.ext.reader import PyExpat from xml.dom import Node pdf_content_frame = "pdf_content_frame" # Name of the rectangle which represents the frame in the oo document inchPoint = 0.013837 # Value in inches of a typographic point pt = inchPoint * 2.54 # Value in centimeters of a typographic point unit = "cm" # Define the used position unit verbose = 0 # Set extra printing floatRegularExpr = re.compile("^\d*\.?\d*") # A float number (3.14 for example) version = "0.1.4" lines_limit = 50 cols_limit = 20 class tableTemplate: attributes = [ {'ooName':'fo:background-color', 'ptName':'BACKGROUND', 'ignored_if_empty':'no'}, {'ooName':'fo:color' , 'ptName':'TEXTCOLOR' , 'ignored_if_empty':'no'}, {'ooName':'fo:text-align' , 'ptName':'ALIGNMENT' , 'ignored_if_empty':'yes'}, {'ooName':'fo:border' , 'ptName':'GRID' , 'ignored_if_empty':'no'}, {'ooName':'fo:vertical-align' , 'ptName':'VALIGN' , 'ignored_if_empty':'yes'}, {'ooName':'fo:font-size' , 'ptName':'FONTSIZE' , 'ignored_if_empty':'yes'}, {'ooName':'style:font-name' , 'ptName':'FONT' , 'ignored_if_empty':'yes'}, {'ooName':'fo:border-left' , 'ptName':'LINEBEFORE', 'scan':'vertical' , 'ignored_if_empty':'no'}, {'ooName':'fo:border-right' , 'ptName':'LINEAFTER' , 'scan':'vertical' , 'ignored_if_empty':'no'}, {'ooName':'fo:border-top' , 'ptName':'LINEABOVE' , 'scan':'horizontal', 'ignored_if_empty':'no'}, {'ooName':'fo:border-bottom' , 'ptName':'LINEBELOW' , 'scan':'horizontal', 'ignored_if_empty':'no'} ] borderRegularExpr = re.compile(""" ^ # Beginning of string (\d*\.?\d*) # A float value \D* # Float ends with a non-numerical digit .* # Middle of string : anything (\#{1}\S{6}) # A RGB hexa value $ # End of string """, re.VERBOSE) # A string description of a border in oo format def __init__(self, name): self.content = [] self.currentLine = [] self.name = name def endLine(self): self.content.append(self.currentLine) self.currentLine = [] def append(self, cell, styleDict, default_style): myCell = {'oo_default_style':default_style} for i in range(len(tableTemplate.attributes)): (node_ns, node_name) = tableTemplate.attributes[i]["ooName"].split(":", 2) cellAttribute = getStyleAttribute(styleDict, cell, "table", node_name, node_ns, default_style) if cellAttribute: myCell[tableTemplate.attributes[i]["ooName"]] = cellAttribute self.currentLine.append(myCell) def isEmptyCell(self, line, col): isEmpty = 1 for i in range(len(tableTemplate.attributes)): attr = tableTemplate.attributes[i] if attr['ignored_if_empty'] == 'no': try: my_attribute = self.content[line][col][attr['ooName']] if my_attribute != "none": isEmpty = 0 break except: pass return isEmpty def getMinimalBounds(self): """ Reduce self.content limits in order to have a table smaller than 256 x 32,000. """ empty_lines = 0 no_more_empty_lines = 0 # Eliminate all empty cells at the ends of lines and columns for line in range(len(self.content)-1, -1, -1): empty_cells = 0 for cell in range(len(self.content[line])-1, -1, -1): if self.isEmptyCell(line, cell): empty_cells += 1 else: break if (not no_more_empty_lines) and (empty_cells == len(self.content[line])): empty_lines += 1 else: line_size = len(self.content[line]) - empty_cells self.content[line] = self.content[line][:line_size] no_more_empty_lines = 1 content_size = len(self.content) - empty_lines self.content = self.content[:content_size] # Remake a rectangle with self.content max_cols = 0 for line in range(len(self.content)): if len(self.content[line]) > max_cols: max_cols = len(self.content[line]) for line in range(len(self.content)): while len(self.content[line]) < max_cols: self.content[line].append({}) return {'width':max_cols, 'height':len(self.content)} # Build the Table Template def render(self, stylesheet, template, template_root): tablestyle = template.createElement("tablestyle") tablestyle.setAttribute("name", self.name) stylesheet.appendChild(tablestyle) for k in range(len(tableTemplate.attributes)): current_attribute = tableTemplate.attributes[k] current_attribute_ooName = current_attribute["ooName"] current_attribute_matches = self._scanForAttribute(current_attribute) for i in range(len(current_attribute_matches)): if current_attribute_matches[i]["value"] != "none": stylecmd = template.createElement("stylecmd") attrString = "('" + current_attribute["ptName"] + "', " + current_attribute_matches[i]["coord"] + "," if current_attribute_ooName == "fo:background-color" or current_attribute_ooName == "fo:color": attrString += color_hex2dec(current_attribute_matches[i]["value"]) + ")" elif current_attribute_ooName == "fo:text-align" or current_attribute_ooName == "fo:vertical-align": attrString += "'" + upper(current_attribute_matches[i]["value"]) + "')" elif current_attribute_ooName in ("fo:border", "fo:border-left", "fo:border-right", "fo:border-top", "fo:border-bottom"): border_values = self.parseBorder(current_attribute_matches[i]["value"]) attrString += "%s,%s)" % (str(border_values["width"]), border_values["color"]) elif current_attribute_ooName == "fo:font-size": attrString += current_attribute_matches[i]["value"][:-2] + ")" elif current_attribute_ooName == "style:font-name": attrString += "'%s')" % (getFontName(current_attribute_matches[i]["value"])) stylecmd.setAttribute("expr", attrString) tablestyle.appendChild(stylecmd) return 1 def _scanForAttribute(self, current_attribute): attribute = current_attribute["ooName"] returnValue = [] found = [] foundLine = [] scanType = "both" try: scanType = current_attribute["scan"] except: pass if scanType == "horizontal": cellsCondition = 0 else: cellsCondition = 1 # Build the map of cells whose attribute is set for line in self.content: for cell in line: try: bla = cell[attribute] if bla: foundLine.append(1) else: foundLine.append(0) except: foundLine.append(0) found.append(foundLine) foundLine = [] # Scan lines for groups of cells with the same attribute if scanType == "both" or scanType == "horizontal": for line in range(len(found)): cells = 0 attrValue = "" for col in range(len(found[line])): if found[line][col]: foundValue = self.content[line][col][attribute] if attrValue and attrValue != foundValue: self._buildAttribute(line, col-1, cells, attrValue, returnValue, found, cellsCondition) cells = 0 attrValue = "" cells += 1 attrValue = foundValue else: self._buildAttribute(line, col-1, cells, attrValue, returnValue, found, cellsCondition) cells = 0 attrValue = "" # End of line : keep coords and attribute value, and reset self._buildAttribute(line, col, cells, attrValue, returnValue, found, cellsCondition) # Scan columns for groups of cells with the same attribute if scanType == "both" or scanType == "vertical" and len(found) > 0: for col in range(len(found[0])): cells = 0 attrValue = "" for line in range(len(found)): if found[line][col]: foundValue = self.content[line][col][attribute] if attrValue and attrValue != foundValue: # Keep coords and attribute value for later return self._buildAttribute(line-1, col, cells, attrValue, returnValue, found, 0, "vertical") cells = 0 attrValue = "" cells += 1 attrValue = foundValue else: self._buildAttribute(line-1, col, cells, attrValue, returnValue, found, 0, "vertical") cells = 0 attrValue = "" # End of column : keep coords and attribute value, and reset self._buildAttribute(line, col, cells, attrValue, returnValue, found, 0, "vertical") return returnValue # Build an item of attribute dictionnary def _buildAttribute(self, line, col, cells, attrValue, attributeDict, found, cellsCondition=1, geometry="horizontal"): if cells > cellsCondition: attributeDict.append( {'coord':self._buildCoord(line, col, cells, geometry=geometry), 'value':attrValue } ) for i in range(cells): if geometry == "horizontal": found[line][col-i] = 0 else: found[line-i][col] = 0 # Build coords like "(0,0),(3,0)" def _buildCoord(self, line, col, numberOfCells, lastCell=1, geometry="horizontal"): if numberOfCells < 1: return "" if lastCell: mult = -1 else: mult = 1 if geometry == "horizontal": returnValue = "(" + str(col + mult * (numberOfCells-1) ) + "," + str(line) + ")" else: returnValue = "(" + str(col) + "," + str(line + mult * (numberOfCells-1) ) + ")" if lastCell: returnValue += ",(" + str(col) + "," + str(line) + ")" else: returnValue = "(" + str(col) + "," + str(line) + ")," + returnValue return returnValue # Returns line width in points and color from a oo string # like "0.0008inch solid #000000" def parseBorder(self,sourceString): REresult = tableTemplate.borderRegularExpr.search(sourceString) if REresult: width = float(REresult.groups()[0]) width = round(width / inchPoint, 2) color = color_hex2dec(REresult.groups()[1]) else: width = 0.1 color = "(0,0,0)" return {"width":width, "color":color} # Define new bounds for self.content def newBounds(self, height=0, width=0): while height > len(self.content): self.content.append( [] ) for line in range(height): while width > len(self.content[line]): self.content[line].append({}) # End of class def usage(): print """oo2pt v""" + version + """ Usage : oo2pt [options] file1.sxd [file2.sxd ...] Options : -o directory, --output=DIRECTORY Define an output directory. -h, --help Show this screen. -v, --verbose Print verbose messages. """ return def oo2pt(oo_doc): global oo_content_root, oo_styles_root, template, page_height, page_width, page_leftmargin, page_rightmargin, page_topmargin, page_bottommargin, static, frame, style_list, unit, ns, global_output_dir, frame_found, pagetp frame_found=0 # Verify the existenz of the file if not os.path.isfile(oo_doc): print "ERROR: " + oo_doc + " doesn't exist" return # Define global file name and path oo_doc = os.path.abspath(oo_doc) file_name = (os.path.split(oo_doc)[1])[:-4] if global_output_dir == None: output_dir = os.path.dirname(oo_doc) else: output_dir = os.path.dirname(global_output_dir) if verbose: print "Input file: " + oo_doc print "Output dir: " + output_dir print "Base filename: " + file_name # Try to unzip the Open Office doc try: oo_unzipped = ZipFile(oo_doc, mode="r") except: print "ERROR: " + oo_doc + " is not a valid OpenOffice file" return # Test the integrity of the file if oo_unzipped.testzip() != None: print "ERROR: " + oo_doc + " is corrupted" return # Regular expression to delete the doctype reference #dtd_killer = re.compile(r'(<\s*!\s*DOCTYPE)([^>]*)>', re.I) # List and load the content of the zip file oo_filelist = oo_unzipped.namelist() oo_files = {} for name in oo_filelist: oo_files[name] = oo_unzipped.read(name) # Delete all doctype reference #oo_files[name] = dtd_killer.sub('', oo_files[name]) # Get all pictures and extract them # TODO: Don't extract unused images (this case sometime append: oo include unused images) for file in oo_files: if os.path.dirname(file) == "Pictures": picture_path = os.path.join(output_dir, os.path.split(file)[1]) picture_out = open(picture_path, 'w') picture_out.write(oo_files[file]) print "Created: " + picture_path # Create the PyExpat reader reader = PyExpat.Reader() # Create DOM trees of oo xml files oo_content_root = reader.fromString(oo_files["content.xml"]) oo_styles_root = reader.fromString(oo_files["styles.xml"]) # Create a namespace table doc_ns = oo_styles_root.getElementsByTagName("office:document-styles") ns = {} for i in range(doc_ns[0].attributes.length): if doc_ns[0].attributes.item(i).nodeType == Node.ATTRIBUTE_NODE: name = doc_ns[0].attributes.item(i).name if name[:5] == "xmlns": ns[name[6:]] = doc_ns[0].attributes.item(i).value # Create a document type node using the doctype name "template" # A blank system ID and blank public ID (i.e. no DTD information) template_type = implementation.createDocumentType("template", None, None) # Create a document node, which also creates a document element node # For the element, use a blank namespace URI and local name "template" template = implementation.createDocument(EMPTY_NAMESPACE, "template", template_type) # Get the template root element template_root = template.documentElement # Get page master styles pm_styles = oo_styles_root.getElementsByTagName("office:master-styles") pm_ns = pm_styles[0].childNodes[0].namespaceURI pm_name = pm_styles[0].childNodes[0].getAttributeNS(pm_ns, "page-master-name") # Get properties of used page master pm_list = oo_styles_root.getElementsByTagName("style:page-master") pm_properties = None for pm in pm_list: if pm.getAttributeNS(pm_ns, "name") == pm_name: pm_properties = (pm.getElementsByTagName("style:properties"))[0] # Transfert attributes from the oo page to the template page if pm_properties != None: page_leftmargin = pm_properties.getAttributeNS(ns["fo"], "margin-left") page_rightmargin = pm_properties.getAttributeNS(ns["fo"], "margin-right") page_topmargin = pm_properties.getAttributeNS(ns["fo"], "margin-top") page_bottommargin = pm_properties.getAttributeNS(ns["fo"], "margin-bottom") template_root.setAttribute("leftmargin", page_leftmargin) template_root.setAttribute("rightmargin", page_rightmargin) template_root.setAttribute("topmargin", page_topmargin) template_root.setAttribute("bottommargin", page_bottommargin) template_root.setAttribute("allowsplitting", "1") template_root.setAttribute("filename", file_name + ".pdf") if pm_properties.getAttributeNS(ns["style"], "print-orientation") == "portrait": landscape = "0" else: landscape = "1" template_root.setAttribute("landscape", landscape) template_root.setAttribute("showboundary", "0") template_root.setAttribute("tal:define", "portal python:here.portal_url.getPortalObject()") template_root.setAttribute("pagesize", "A4") page_height = pm_properties.getAttributeNS(ns["fo"], "page-height") page_width = pm_properties.getAttributeNS(ns["fo"], "page-width") # Create a page template element pagetp = template.createElement("pagetemplate") pagetp.setAttribute("id", "FirstPage") pagetp.setAttribute("startframe", "content") #pagetp.setAttribute("nextid", "Page") template_root.appendChild(pagetp) # Create a default tablestyle to make tables visible stylesheet = template.createElement("stylesheet") template_root.appendChild(stylesheet) tablestyle = template.createElement("tablestyle") tablestyle.setAttribute("name", "default") stylesheet.appendChild(tablestyle) stylecmd = template.createElement("stylecmd") stylecmd.setAttribute("expr", "('GRID', (0,0), (-1,-1), 0.1, colors.black)") tablestyle.appendChild(stylecmd) # Create a static element static = template.createElement("static") pagetp.appendChild(static) # Get all style definitions styles_list = oo_content_root.getElementsByTagName("style:style") # Get all style definitions style_list = [] for style in styles_list: style_list.append(style) parseBody() # Add a default frame if frame_found == 0: create_default_frame() # Sort all graphics element by z-index groups = template_root.getElementsByTagName("static") for group in groups: group.childNodes.sort(lambda a, b: zIndexCmp(a, b)) # List all spreadsheets to convert them into tables spreadsheets = [] emb_objects = oo_content_root.getElementsByTagName("draw:object") for embedded in emb_objects: document = embedded.getAttributeNS(ns["xlink"], "href") if document: try: object_content = reader.fromString(oo_files[ document[3:] + '/content.xml' ]) if object_content.getElementsByTagName("table:table"): spreadsheets.append(object_content) except: pass # Close the archive oo_unzipped.close() if spreadsheets: # Create a new page template file template_type = implementation.createDocumentType("template", None, None) template_pdf = implementation.createDocument(EMPTY_NAMESPACE, "document", template_type) template_pdf_root = template_pdf.documentElement template_pdf_root.setAttribute("filename", "report01.pdf") template_pdf_root.setAttribute("xmlns:tal", "http://xml.zope.org/namespaces/tal") title = template_pdf.createElement("title") new_string = template_pdf.createTextNode("Title") title.appendChild(new_string) template_pdf_root.appendChild(title) author = template_pdf.createElement("author") new_string = template_pdf.createTextNode("Author") author.appendChild(new_string) template_pdf_root.appendChild(author) subject = template_pdf.createElement("subject") new_string = template_pdf.createTextNode("Subject") subject.appendChild(new_string) template_pdf_root.appendChild(subject) content = template_pdf.createElement("content") # Insert spreadsheets as tables ttp_number = 0 for spr in spreadsheets: # Get styles per family sprStyles = {} for tree in spr.getElementsByTagName("office:automatic-styles"): for style in tree.getElementsByTagName("style:style"): family = style.getAttributeNS(ns["style"], "family") styleName = style.getAttributeNS(ns["style"], "name") try: bla = sprStyles[family] except: sprStyles[family] = {} sprStyles[family][styleName] = style # Create the tables for table in spr.getElementsByTagName("table:table"): tableTpName = "ttp" + str(ttp_number) XMLTable = template_pdf.createElement("table") XMLTable.setAttribute("splitbyrow", "1") XMLTable.setAttribute("repeatrows", "0") XMLTable.setAttribute("repeatcols", "0") tableTp = tableTemplate(tableTpName) texts = [] # Store informations on column widths and default styles line_number = 0 widths = [] default_styles = [] for col in table.getElementsByTagName("table:table-column"): repeated = col.getAttributeNS(ns["table"],"number-columns-repeated") colWidth = getStyleAttribute(sprStyles, col, "table", "column-width") defStyle = col.getAttributeNS(ns["table"],"default-cell-style-name") if colWidth == "": colWidth = -1 if not repeated: repeated = 1 for i in range(int(repeated)): widths.append(colWidth) default_styles.append(defStyle) # Scan table and store usable informations for line in table.getElementsByTagName("table:table-row"): repeated_lines = line.getAttributeNS(ns["table"], "number-rows-repeated") if not repeated_lines: repeated_lines = 1 else: repeated_lines = int(repeated_lines) for i in range(repeated_lines): if line_number > lines_limit: break texts_line = {'line':[]} col_number=0 for cell in line.getElementsByTagName("table:table-cell"): repeated_cells = cell.getAttributeNS(ns["table"],"number-columns-repeated") if not repeated_cells: repeated_cells = 1 else: repeated_cells = int(repeated_cells) for j in range(repeated_cells): if col_number > cols_limit: break texts_cell = {'texts':[]} # Apply the style tableTp.append(cell, sprStyles, default_styles[col_number]) # Insert column width if it is the first row if line_number == 0 and widths[col_number] != -1: texts_cell['width'] = convertToUnit(widths[col_number], unit) textTags = cell.getElementsByTagName("text:p") for text in textTags: for k in range(text.childNodes.length): child = text.childNodes[k] if child.nodeType == Node.TEXT_NODE: texts_cell['texts'].append(child.nodeValue) texts_line['line'].append(texts_cell) col_number += 1 # Insert line height rowHeight = getStyleAttribute(sprStyles, line, "table", "row-height") if rowHeight != "": texts_line['height'] = convertToUnit(rowHeight, unit) tableTp.endLine() texts.append(texts_line) line_number += 1 # Reduce the table to the minimum style_min_bounds = tableTp.getMinimalBounds() text_min_bounds = getMinimalBounds(texts) if style_min_bounds['width'] < text_min_bounds['width']: width = text_min_bounds['width'] else: width = style_min_bounds['width'] if style_min_bounds['height'] < text_min_bounds['height']: height = text_min_bounds['height'] else: height = style_min_bounds['height'] tableTp.newBounds(width=width, height=height) setBounds(texts, width=width, height=height, widths=widths) # Finally define the table in the page template for line in range(height): XMLLine = template_pdf.createElement("tr") line_content = texts[line]['line'] for cell in range(width): cell_content = line_content[cell]['texts'] XMLCell = template_pdf.createElement("td") # Column width try: XMLCell.setAttribute("colwidth", line_content[cell]['width']) except: pass # Text content if len(cell_content) == 0: new_string = template_pdf.createTextNode(" ") XMLCell.appendChild(new_string) for text_nb in range(len(cell_content)): new_string = template_pdf.createTextNode(cell_content[text_nb]) XMLCell.appendChild(new_string) XMLLine.appendChild(XMLCell) # Line height try: XMLLine.setAttribute("rowheight", texts[line]['height']) except: pass XMLTable.appendChild(XMLLine) if tableTp.render(stylesheet, template, template_root): XMLTable.setAttribute("style", tableTpName) else: XMLTable.setAttribute("style", "default") content.appendChild(XMLTable) ttp_number += 1 template_pdf_root.appendChild(content) # Create output file path out_file = os.path.join(output_dir, file_name + '.pt') out = open(out_file, 'w') # Print out the result out.write('') PrettyPrint(template_pdf_root, out, 'utf-8') print "Created: " + out_file # Create output file path for the PDF Template out_file = os.path.join(output_dir, file_name + '_pdf_template.pt') out = open(out_file, 'w') # Print out the result out.write('') PrettyPrint(template_root, out) print "Created: " + out_file return def getStyleAttribute(styleDict, element, elementGeneralType, attribute, precolonName="style", default_style="Default"): """ Get an attribute from the style dictionary. """ styleName = element.getAttributeNS(ns[elementGeneralType], "style-name") if not styleName: styleName = default_style try: elementStyle = styleDict[element.localName][styleName] for property in elementStyle.getElementsByTagName("style:properties"): styleAttribute = property.getAttributeNS(ns[precolonName], attribute) if styleAttribute != "": return styleAttribute except: return "" def getMinimalBounds(texts): """ Scans the text table to reduce it to its minimum size. """ empty_lines = 0 no_more_empty_lines = 0 # Eliminate all empty cells at the ends of lines and columns for line in range(len(texts)-1, -1, -1): empty_cells = 0 line_content = texts[line]['line'] for cell in range(len(line_content)-1, -1, -1): if len(line_content[cell]['texts']) == 0: empty_cells += 1 else: break if (not no_more_empty_lines) and (empty_cells == len(line_content)): empty_lines += 1 else: line_size = len(line_content) - empty_cells texts[line]['line'] = line_content[:line_size] no_more_empty_lines = 1 texts_size = len(texts) - empty_lines texts = texts[:texts_size] # Determine minimum bounds max_cols = 0 for line in range(len(texts)): line_content = texts[line]['line'] if len(line_content) > max_cols: max_cols = len(line_content) return { 'width':max_cols, 'height':len(texts) } def setBounds(texts, width=0, height=0, widths=[]): """ Enlarge table to given bounds. """ while height > len(texts): texts.append( {'line':[]} ) for line in range(height): while width > len(texts[line]['line']): new_width = -1 if line == 0: try: new_width = widths[len(texts[line]['line'])] except: new_width = -1 if new_width != -1: texts[line]['line'].append( {'texts':[], 'width':convertToUnit(new_width, unit)} ) else: texts[line]['line'].append( {'texts':[]} ) def zIndexCmp(a, b): """ This function compare the z-index of 2 different graphic element. It's only used by a lamba expression some lines above. """ a_z = a.getAttribute("z") b_z = b.getAttribute("z") if len(a_z) == 0: a_z = 1 if len(b_z) == 0: b_z = 1 return cmp(int(a_z), int(b_z)) def parseBody(): global found, frame_found found = 0 def getNodeStyle(node): for style in style_list: (node_ns, node_tag) = node.nodeName.split(':') if node_tag == "text-box": style_tagname = "text-style-name" else: style_tagname = "style-name" if style.getAttributeNS(ns["style"], "name") == node.getAttributeNS(ns[node_ns], style_tagname): # Style attributes found return style.childNodes[0] def getFunctionName(string): s = '' for word in string.split(":")[1].split("-"): s += word.capitalize() return 'add' + s + '(node)' def addRect(shape): global found shape_attributes = {} # Verify if the rectangle is the user frame try: if shape.getAttributeNS(ns["draw"], "name") == pdf_content_frame: found = 1 except: pass x = shape.getAttributeNS(ns["svg"], "x") y = shape.getAttributeNS(ns["svg"], "y") height = shape.getAttributeNS(ns["svg"], "height") width = shape.getAttributeNS(ns["svg"], "width") y = str(float(page_height[:-2]) - float(y[:-2]) - float(height[:-2])) + unit if found == 1: return { "x" : x , "y" : y , "height": height , "width" : width } oo_fill_type = style.getAttributeNS(ns["draw"], "fill") oo_fill_color = style.getAttributeNS(ns["draw"], "fill-color") oo_stroke_type = style.getAttributeNS(ns["draw"], "stroke") oo_stroke_color = style.getAttributeNS(ns["svg"], "stroke-color") oo_stroke_width = style.getAttributeNS(ns["svg"], "stroke-width") # Default OO value stroke_width = "0" stroke_color = "(0.0,0.0,0.0)" fill_color = "(0.0,0.72156862745098038,1.0)" if oo_stroke_type not in (None, '', 'none'): if oo_stroke_color not in (None, ''): stroke_color = color_hex2dec(oo_stroke_color) if oo_stroke_width not in (None, ''): stroke_width = oo_stroke_width if oo_fill_type not in (None, '', 'none'): if oo_fill_color not in (None, ''): fill_color = color_hex2dec(oo_fill_color) new_shape = template.createElement("rectangle") new_shape.setAttribute("x", x) new_shape.setAttribute("y", y) new_shape.setAttribute("width", width) new_shape.setAttribute("height", height) new_shape.setAttribute("linewidth", stroke_width) new_shape.setAttribute("stroke", stroke_color) new_shape.setAttribute("z", shape.getAttributeNS(ns["draw"], "z-index")) new_shape.setAttribute("fill", fill_color) static.appendChild(new_shape) def addEllipse(shape): x = shape.getAttributeNS(ns["svg"], "x") y = shape.getAttributeNS(ns["svg"], "y") height = shape.getAttributeNS(ns["svg"], "height") width = shape.getAttributeNS(ns["svg"], "width") linewidth = style.getAttributeNS(ns["svg"], "stroke-width") stroke = style.getAttributeNS(ns["svg"], "stroke-color") fill = style.getAttributeNS(ns["draw"], "fill-color") fill_type = style.getAttributeNS(ns["draw"], "fill") y = str(float(page_height[:-2]) - float(y[:-2]) - float(height[:-2])) + unit x2 = str(float(x[:-2]) + float(width[:-2])) + unit y2 = str(float(y[:-2]) + float(height[:-2])) + unit if len(linewidth) == 0: linewidth = "1" if len(stroke) != 0: stroke = color_hex2dec(stroke) else: stroke = "(0.0,0.0,0.0)" new_shape = template.createElement("ellipse") new_shape.setAttribute("x1", x) new_shape.setAttribute("y1", y) new_shape.setAttribute("x2", x2) new_shape.setAttribute("y2", y2) new_shape.setAttribute("linewidth", linewidth) new_shape.setAttribute("stroke", stroke) new_shape.setAttribute("z", shape.getAttributeNS(ns["draw"], "z-index")) if fill_type != "none": if len(fill) != 0: fill = color_hex2dec(fill) else: fill = "(0.0,0.72156862745098038,1.0)" new_shape.setAttribute("fill", fill) static.appendChild(new_shape) def addLine(shape): x1 = shape.getAttributeNS(ns["svg"], "x1") x2 = shape.getAttributeNS(ns["svg"], "x2") y1 = shape.getAttributeNS(ns["svg"], "y1") y2 = shape.getAttributeNS(ns["svg"], "y2") linewidth = style.getAttributeNS(ns["svg"], "stroke-width") stroke = style.getAttributeNS(ns["svg"], "stroke-color") y1 = str(float(page_height[:-2]) - float(y1[:-2])) + unit y2 = str(float(page_height[:-2]) - float(y2[:-2])) + unit # Try to get the user shape properties or set the openoffice default if len(linewidth) == 0: linewidth = "1" if len(stroke) != 0: stroke = color_hex2dec(stroke) else: stroke = "(0.0,0.0,0.0)" # Create a new element in the template tree new_shape = template.createElement("line") new_shape.setAttribute("x1", x1) new_shape.setAttribute("y1", y1) new_shape.setAttribute("x2", x2) new_shape.setAttribute("y2", y2) new_shape.setAttribute("linewidth", linewidth) new_shape.setAttribute("stroke", stroke) new_shape.setAttribute("z", shape.getAttributeNS(ns["draw"], "z-index")) static.appendChild(new_shape) def addImage(shape): x = shape.getAttributeNS(ns["svg"], "x") y = shape.getAttributeNS(ns["svg"], "y") height = shape.getAttributeNS(ns["svg"], "height") width = shape.getAttributeNS(ns["svg"], "width") img_file = shape.getAttributeNS(ns["xlink"], "href") y = str(float(page_height[:-2]) - float(y[:-2]) - float(height[:-2])) + unit img_file = os.path.split(img_file)[1] new_shape = template.createElement("fixedimage") new_shape.setAttribute("x", x) new_shape.setAttribute("y", y) new_shape.setAttribute("height", height) new_shape.setAttribute("width", width) # TODO: png image are not supported. Convert them to jpg ? new_shape.setAttribute("filename", "zodb:" + img_file) new_shape.setAttribute("z", shape.getAttributeNS(ns["draw"], "z-index")) static.appendChild(new_shape) def addCircle(shape): addEllipse(shape) def addTextBox(textbox): x = textbox.getAttributeNS(ns["svg"], "x") y = textbox.getAttributeNS(ns["svg"], "y") height = textbox.getAttributeNS(ns["svg"], "height") width = textbox.getAttributeNS(ns["svg"], "width") align = style.getAttributeNS(ns["fo"], "text-align") size = style.getAttributeNS(ns["fo"], "font-size") font = style.getAttributeNS(ns["style"], "font-name") color = style.getAttributeNS(ns["fo"], "color") font_weight = style.getAttributeNS(ns["fo"], "font-weight") font_style = style.getAttributeNS(ns["fo"], "font-style") # Modify the text point reference according to the alignement if align == "center": x = str(float(x[:-2]) + (float(width[:-2]) / 2.0)) + unit elif align == "right": x = str(float(x[:-2]) + float(width[:-2])) + unit else: # Set the default OO font align value align = "left" # Change the coordinates following the new origin y = str(float(page_height[:-2]) - float(y[:-2])) + unit # Set the default OO color value if empty if len(color) != 0: color = color_hex2dec(color) else: color = "(0.0,0.0,0.0)" # Set the default OO font size value if empty if len(size) == 0: size = "12pt" # Get the font bold = False italic = False if font_weight == "bold": bold = True if font_style == "italic": italic = True font = getFontName(font, bold, italic) # Number of the paragraph para_count = 1 # Get all paragraphs #print "range::" + repr(range(textbox.childNodes.length)) for i in range(textbox.childNodes.length): paragraph = textbox.childNodes[i] # Create a new infostring for each new paragraph if paragraph.nodeName == "text:p": # Get all text span and concatenate their content to build the paragraph string string = "" para_y = str(float(y[:-2]) - (float(size[:-2]) * pt * para_count)) + unit for j in range(paragraph.childNodes.length): text_span = paragraph.childNodes[j] if text_span.nodeName == "text:span": # Get text for k in range(text_span.childNodes.length): text = text_span.childNodes[k] if text.nodeType == Node.TEXT_NODE: string += text.nodeValue # This statement is for the case when there is no text_span to compose the paragraph but directly the text elif text_span.nodeType == Node.TEXT_NODE: string += text_span.nodeValue # Create a new infostring in the template tree # Transfert attributes from the oo shape to the template shape new_infostring = template.createElement("infostring") new_string = template.createTextNode(string) new_infostring.appendChild(new_string) new_infostring.setAttribute("x", x) new_infostring.setAttribute("y", para_y) new_infostring.setAttribute("align", align) new_infostring.setAttribute("size", size[:-2]) new_infostring.setAttribute("font", font) new_infostring.setAttribute("color", color) new_infostring.setAttribute("z", textbox.getAttributeNS(ns["draw"], "z-index")) static.appendChild(new_infostring) para_count += 1 body = oo_content_root.getElementsByTagName("office:body")[0] pages = body.getElementsByTagName("draw:page") for page in pages: for i in range(page.childNodes.length): node = page.childNodes[i] # TODO: recursive function to support shapes aggregated in groups if node.nodeName in ("draw:rect", "draw:ellipse", "draw:line", "draw:circle", "draw:image", "draw:text-box"): style = getNodeStyle(node) new_shape = eval(getFunctionName(node.nodeName)) if found != 0 and frame_found == 0: # User frame found ; we add it create_frame( new_shape['x'] , new_shape['y'] , new_shape['width'] , new_shape['height'] ) found = 0 frame_found = 1 def convertToUnit(value, destUnit): """ Takes a value with its unit in a string ("1.1inch") and converts it to another unit. Returns a string ("2.794cm"). """ sourceUnit = re.sub(floatRegularExpr, "", value) valueDigits = len(value) - len(sourceUnit) floatValue = float( value[:valueDigits] ) return ( str(convertUnit(sourceUnit, destUnit, floatValue)) + destUnit ) def convertUnit(sourceUnit, destUnit, value): """ Converts a value to another unit. """ source = lower(sourceUnit) dest = lower (destUnit) if source == dest: return value try: return eval(source + "_to_" + dest + "(" + str(value) + ")") except: return value def inch_to_cm(inches): """ Converts an inch value to a centimeter value. """ return inches * 2.54 def cm_to_inch(cm): """ Converts a centimeter value to an inch value. """ if cm == 0: return 0 return cm / 2.54 def color_hex2dec(hex_color): """ Convert color from hexadecimal codification to a decimal one. """ if lower(hex_color) == "transparent": hex_color = "#FFFFFF" if (hex_color != None) and (hex_color != ""): R = str(int(hex_color[1:3],16)/255.0) V = str(int(hex_color[3:5],16)/255.0) B = str(int(hex_color[5:7],16)/255.0) return '(%s,%s,%s)' % (R, V, B) else: return None def getFontName(font_name, bold=False, italic=False): """ Get a CMFReportTool compatible font from any font. We have to do this because user sometime use fancy fonts not installed on the system and CMFReportTool only support few fonts (because of Platypus). """ # Set default font to Helvetica font = "Helvetica" font_family_dict = { "Helvetica": ['helvetica', 'arial', 'verdana'] , "Courier" : ['courier', 'time'] } # Try to guess the most appropriate font for font_family in font_family_dict.keys(): for dict_word in font_family_dict[font_family]: if dict_word.lower() in font_name.lower(): font = font_family font += "-" if bold == True: font += "Bold" if italic == True: font += "Oblique" if not (bold or italic): font = font[:-1] return font def create_default_frame(): frame_w = str(float(page_width[:-2]) - float(page_leftmargin[:-2]) - float(page_rightmargin[:-2])) + unit frame_h = str(float(page_height[:-2]) - float(page_topmargin[:-2]) - float(page_bottommargin[:-2])) + unit frame_x = page_leftmargin frame_y = page_bottommargin create_frame(frame_x, frame_y, frame_w, frame_h) def create_frame(x, y, width, height): """ Create a frame. """ frame = template.createElement("frame") frame.setAttribute("id", "content") frame.setAttribute("nextid", "content") frame.setAttribute("x", x) frame.setAttribute("y", y) frame.setAttribute("width", width) frame.setAttribute("height", height) frame.setAttribute("leftpadding", "0.1cm") frame.setAttribute("rightpadding", "0.1cm") frame.setAttribute("toppadding", "0.2cm") frame.setAttribute("bottompadding", "0.5cm") frame.setAttribute("showBoundary", "0") pagetp.appendChild(frame) if verbose: print ("Frame created : (%s, %s), dimension (%s, %s)" % (x,y,width,height)) if __name__ == "__main__": global global_output_dir # Get all parameters try: opts, args = getopt.getopt(sys.argv[1:], "hvo:", ["help", "verbose", "output="]) except getopt.GetoptError: # print help information and exit: usage() sys.exit(2) # Set the output file global_output_dir = None for o, a in opts: if o in ("-h", "--help"): usage() sys.exit() if o in ("-v", "--verbose"): verbose = 1 if o in ("-o", "--output"): global_output_dir = a # Check args number: at least one input file is require if len(args) == 0: usage() sys.exit(2) # Convert every oo doc for oo_doc in args: oo2pt(oo_doc)