, and . Also vacuums the defs of any non-referenced renderable elements. Returns the number of unreferenced elements removed from the document. """ global numElemsRemoved num = 0 # Remove certain unreferenced elements outside of defs removeTags = ['linearGradient', 'radialGradient', 'pattern'] identifiedElements = findElementsWithId(doc.documentElement) referencedIDs = findReferencedElements(doc.documentElement) for id in identifiedElements: if not id in referencedIDs: goner = identifiedElements[id] if goner != None and goner.parentNode != None and goner.nodeName in removeTags: goner.parentNode.removeChild(goner) num += 1 numElemsRemoved += 1 # Remove most unreferenced elements inside defs defs = doc.documentElement.getElementsByTagName('defs') for aDef in defs: elemsToRemove = removeUnusedDefs(doc, aDef) for elem in elemsToRemove: elem.parentNode.removeChild(elem) numElemsRemoved += 1 num += 1 return num def shortenIDs(doc, unprotectedElements=None): """ Shortens ID names used in the document. ID names referenced the most often are assigned the shortest ID names. If the list unprotectedElements is provided, only IDs from this list will be shortened. Returns the number of bytes saved by shortening ID names in the document. """ num = 0 identifiedElements = findElementsWithId(doc.documentElement) if unprotectedElements is None: unprotectedElements = identifiedElements referencedIDs = findReferencedElements(doc.documentElement) # Make idList (list of idnames) sorted by reference count # descending, so the highest reference count is first. # First check that there's actually a defining element for the current ID name. # (Cyn: I've seen documents with #id references but no element with that ID!) idList = [(referencedIDs[rid][0], rid) for rid in referencedIDs if rid in unprotectedElements] idList.sort(reverse=True) idList = [rid for count, rid in idList] curIdNum = 1 for rid in idList: curId = intToID(curIdNum) # First make sure that *this* element isn't already using # the ID name we want to give it. if curId != rid: # Then, skip ahead if the new ID is already in identifiedElement. while curId in identifiedElements: curIdNum += 1 curId = intToID(curIdNum) # Then go rename it. num += renameID(doc, rid, curId, identifiedElements, referencedIDs) curIdNum += 1 return num def intToID(idnum): """ Returns the ID name for the given ID number, spreadsheet-style, i.e. from a to z, then from aa to az, ba to bz, etc., until zz. """ rid = '' while idnum > 0: idnum -= 1 rid = chr((idnum % 26) + ord('a')) + rid idnum = int(idnum / 26) return rid def renameID(doc, idFrom, idTo, identifiedElements, referencedIDs): """ Changes the ID name from idFrom to idTo, on the declaring element as well as all references in the document doc. Updates identifiedElements and referencedIDs. Does not handle the case where idTo is already the ID name of another element in doc. Returns the number of bytes saved by this replacement. """ num = 0 definingNode = identifiedElements[idFrom] definingNode.setAttribute("id", idTo) del identifiedElements[idFrom] identifiedElements[idTo] = definingNode referringNodes = referencedIDs[idFrom] # Look for the idFrom ID name in each of the referencing elements, # exactly like findReferencedElements would. # Cyn: Duplicated processing! for node in referringNodes[1]: # if this node is a style element, parse its text into CSS if node.nodeName == 'style' and node.namespaceURI == NS['SVG']: # node.firstChild will be either a CDATA or a Text node now if node.firstChild != None: # concatenate the value of all children, in case # there's a CDATASection node surrounded by whitespace # nodes # (node.normalize() will NOT work here, it only acts on Text nodes) oldValue = "".join([child.nodeValue for child in node.childNodes]) # not going to reparse the whole thing newValue = oldValue.replace('url(#' + idFrom + ')', 'url(#' + idTo + ')') newValue = newValue.replace("url(#'" + idFrom + "')", 'url(#' + idTo + ')') newValue = newValue.replace('url(#"' + idFrom + '")', 'url(#' + idTo + ')') # and now replace all the children with this new stylesheet. # again, this is in case the stylesheet was a CDATASection node.childNodes[:] = [node.ownerDocument.createTextNode(newValue)] num += len(oldValue) - len(newValue) # if xlink:href is set to #idFrom, then change the id href = node.getAttributeNS(NS['XLINK'],'href') if href == '#' + idFrom: node.setAttributeNS(NS['XLINK'],'href', '#' + idTo) num += len(idFrom) - len(idTo) # if the style has url(#idFrom), then change the id styles = node.getAttribute('style') if styles != '': newValue = styles.replace('url(#' + idFrom + ')', 'url(#' + idTo + ')') newValue = newValue.replace("url('#" + idFrom + "')", 'url(#' + idTo + ')') newValue = newValue.replace('url("#' + idFrom + '")', 'url(#' + idTo + ')') node.setAttribute('style', newValue) num += len(styles) - len(newValue) # now try the fill, stroke, filter attributes for attr in referencingProps: oldValue = node.getAttribute(attr) if oldValue != '': newValue = oldValue.replace('url(#' + idFrom + ')', 'url(#' + idTo + ')') newValue = newValue.replace("url('#" + idFrom + "')", 'url(#' + idTo + ')') newValue = newValue.replace('url("#' + idFrom + '")', 'url(#' + idTo + ')') node.setAttribute(attr, newValue) num += len(oldValue) - len(newValue) del referencedIDs[idFrom] referencedIDs[idTo] = referringNodes return num def unprotected_ids(doc, options): u"""Returns a list of unprotected IDs within the document doc.""" identifiedElements = findElementsWithId(doc.documentElement) if not (options.protect_ids_noninkscape or options.protect_ids_list or options.protect_ids_prefix): return identifiedElements if options.protect_ids_list: protect_ids_list = options.protect_ids_list.split(",") if options.protect_ids_prefix: protect_ids_prefixes = options.protect_ids_prefix.split(",") for id in identifiedElements.keys(): protected = False if options.protect_ids_noninkscape and not id[-1].isdigit(): protected = True if options.protect_ids_list and id in protect_ids_list: protected = True if options.protect_ids_prefix: for prefix in protect_ids_prefixes: if id.startswith(prefix): protected = True if protected: del identifiedElements[id] return identifiedElements def removeUnreferencedIDs(referencedIDs, identifiedElements): """ Removes the unreferenced ID attributes. Returns the number of ID attributes removed """ global numIDsRemoved keepTags = ['font'] num = 0; for id in identifiedElements.keys(): node = identifiedElements[id] if referencedIDs.has_key(id) == False and not node.nodeName in keepTags: node.removeAttribute('id') numIDsRemoved += 1 num += 1 return num def removeNamespacedAttributes(node, namespaces): global numAttrsRemoved num = 0 if node.nodeType == 1 : # remove all namespace'd attributes from this element attrList = node.attributes attrsToRemove = [] for attrNum in xrange(attrList.length): attr = attrList.item(attrNum) if attr != None and attr.namespaceURI in namespaces: attrsToRemove.append(attr.nodeName) for attrName in attrsToRemove : num += 1 numAttrsRemoved += 1 node.removeAttribute(attrName) # now recurse for children for child in node.childNodes: num += removeNamespacedAttributes(child, namespaces) return num def removeNamespacedElements(node, namespaces): global numElemsRemoved num = 0 if node.nodeType == 1 : # remove all namespace'd child nodes from this element childList = node.childNodes childrenToRemove = [] for child in childList: if child != None and child.namespaceURI in namespaces: childrenToRemove.append(child) for child in childrenToRemove : num += 1 numElemsRemoved += 1 node.removeChild(child) # now recurse for children for child in node.childNodes: num += removeNamespacedElements(child, namespaces) return num def removeMetadataElements(doc): global numElemsRemoved num = 0 # clone the list, as the tag list is live from the DOM elementsToRemove = [element for element in doc.documentElement.getElementsByTagName('metadata')] for element in elementsToRemove: element.parentNode.removeChild(element) num += 1 numElemsRemoved += 1 return num def removeNestedGroups(node): """ This walks further and further down the tree, removing groups which do not have any attributes or a title/desc child and promoting their children up one level """ global numElemsRemoved num = 0 groupsToRemove = [] # Only consider elements for promotion if this element isn't a . # (partial fix for bug 594930, required by the SVG spec however) if not (node.nodeType == 1 and node.nodeName == 'switch'): for child in node.childNodes: if child.nodeName == 'g' and child.namespaceURI == NS['SVG'] and len(child.attributes) == 0: # only collapse group if it does not have a title or desc as a direct descendant, for grandchild in child.childNodes: if grandchild.nodeType == 1 and grandchild.namespaceURI == NS['SVG'] and \ grandchild.nodeName in ['title','desc']: break else: groupsToRemove.append(child) for g in groupsToRemove: while g.childNodes.length > 0: g.parentNode.insertBefore(g.firstChild, g) g.parentNode.removeChild(g) numElemsRemoved += 1 num += 1 # now recurse for children for child in node.childNodes: if child.nodeType == 1: num += removeNestedGroups(child) return num def moveCommonAttributesToParentGroup(elem, referencedElements): """ This recursively calls this function on all children of the passed in element and then iterates over all child elements and removes common inheritable attributes from the children and places them in the parent group. But only if the parent contains nothing but element children and whitespace. The attributes are only removed from the children if the children are not referenced by other elements in the document. """ num = 0 childElements = [] # recurse first into the children (depth-first) for child in elem.childNodes: if child.nodeType == 1: # only add and recurse if the child is not referenced elsewhere if not child.getAttribute('id') in referencedElements: childElements.append(child) num += moveCommonAttributesToParentGroup(child, referencedElements) # else if the parent has non-whitespace text children, do not # try to move common attributes elif child.nodeType == 3 and child.nodeValue.strip(): return num # only process the children if there are more than one element if len(childElements) <= 1: return num commonAttrs = {} # add all inheritable properties of the first child element # FIXME: Note there is a chance that the first child is a set/animate in which case # its fill attribute is not what we want to look at, we should look for the first # non-animate/set element attrList = childElements[0].attributes for num in xrange(attrList.length): attr = attrList.item(num) # this is most of the inheritable properties from http://www.w3.org/TR/SVG11/propidx.html # and http://www.w3.org/TR/SVGTiny12/attributeTable.html if attr.nodeName in ['clip-rule', 'display-align', 'fill', 'fill-opacity', 'fill-rule', 'font', 'font-family', 'font-size', 'font-size-adjust', 'font-stretch', 'font-style', 'font-variant', 'font-weight', 'letter-spacing', 'pointer-events', 'shape-rendering', 'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity', 'stroke-width', 'text-anchor', 'text-decoration', 'text-rendering', 'visibility', 'word-spacing', 'writing-mode']: # we just add all the attributes from the first child commonAttrs[attr.nodeName] = attr.nodeValue # for each subsequent child element for childNum in xrange(len(childElements)): # skip first child if childNum == 0: continue child = childElements[childNum] # if we are on an animateXXX/set element, ignore it (due to the 'fill' attribute) if child.localName in ['set', 'animate', 'animateColor', 'animateTransform', 'animateMotion']: continue distinctAttrs = [] # loop through all current 'common' attributes for name in commonAttrs.keys(): # if this child doesn't match that attribute, schedule it for removal if child.getAttribute(name) != commonAttrs[name]: distinctAttrs.append(name) # remove those attributes which are not common for name in distinctAttrs: del commonAttrs[name] # commonAttrs now has all the inheritable attributes which are common among all child elements for name in commonAttrs.keys(): for child in childElements: child.removeAttribute(name) elem.setAttribute(name, commonAttrs[name]) # update our statistic (we remove N*M attributes and add back in M attributes) num += (len(childElements)-1) * len(commonAttrs) return num def createGroupsForCommonAttributes(elem): """ Creates elements to contain runs of 3 or more consecutive child elements having at least one common attribute. Common attributes are not promoted to the by this function. This is handled by moveCommonAttributesToParentGroup. If all children have a common attribute, an extra is not created. This function acts recursively on the given element. """ num = 0 global numElemsRemoved # TODO perhaps all of the Presentation attributes in http://www.w3.org/TR/SVG/struct.html#GElement # could be added here # Cyn: These attributes are the same as in moveAttributesToParentGroup, and must always be for curAttr in ['clip-rule', 'display-align', 'fill', 'fill-opacity', 'fill-rule', 'font', 'font-family', 'font-size', 'font-size-adjust', 'font-stretch', 'font-style', 'font-variant', 'font-weight', 'letter-spacing', 'pointer-events', 'shape-rendering', 'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity', 'stroke-width', 'text-anchor', 'text-decoration', 'text-rendering', 'visibility', 'word-spacing', 'writing-mode']: # Iterate through the children in reverse order, so item(i) for # items we have yet to visit still returns the correct nodes. curChild = elem.childNodes.length - 1 while curChild >= 0: childNode = elem.childNodes.item(curChild) if childNode.nodeType == 1 and childNode.getAttribute(curAttr) != '': # We're in a possible run! Track the value and run length. value = childNode.getAttribute(curAttr) runStart, runEnd = curChild, curChild # Run elements includes only element tags, no whitespace/comments/etc. # Later, we calculate a run length which includes these. runElements = 1 # Backtrack to get all the nodes having the same # attribute value, preserving any nodes in-between. while runStart > 0: nextNode = elem.childNodes.item(runStart - 1) if nextNode.nodeType == 1: if nextNode.getAttribute(curAttr) != value: break else: runElements += 1 runStart -= 1 else: runStart -= 1 if runElements >= 3: # Include whitespace/comment/etc. nodes in the run. while runEnd < elem.childNodes.length - 1: if elem.childNodes.item(runEnd + 1).nodeType == 1: break else: runEnd += 1 runLength = runEnd - runStart + 1 if runLength == elem.childNodes.length: # Every child has this # If the current parent is a already, if elem.nodeName == 'g' and elem.namespaceURI == NS['SVG']: # do not act altogether on this attribute; all the # children have it in common. # Let moveCommonAttributesToParentGroup do it. curChild = -1 continue # otherwise, it might be an

#!/usr/bin/env python # -*- coding: utf-8 -*- # Scour # # Copyright 2010 Jeff Schiller # Copyright 2010 Louis Simard # # This file is part of Scour, http://www.codedread.com/scour/ # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Notes: # rubys' path-crunching ideas here: http://intertwingly.net/code/svgtidy/spec.rb # (and implemented here: http://intertwingly.net/code/svgtidy/svgtidy.rb ) # Yet more ideas here: http://wiki.inkscape.org/wiki/index.php/Save_Cleaned_SVG # # * Process Transformations # * Collapse all group based transformations # Even more ideas here: http://esw.w3.org/topic/SvgTidy # * analysis of path elements to see if rect can be used instead? (must also need to look # at rounded corners) # Next Up: # - why are marker-start, -end not removed from the style attribute? # - why are only overflow style properties considered and not attributes? # - only remove unreferenced elements if they are not children of a referenced element # - add an option to remove ids if they match the Inkscape-style of IDs # - investigate point-reducing algorithms # - parse transform attribute # - if a has only one element in it, collapse the (ensure transform, etc are carried down) # necessary to get true division from __future__ import division import os import sys import xml.dom.minidom import re import math from svg_regex import svg_parser from svg_transform import svg_transform_parser import optparse from yocto_css import parseCssString # Python 2.3- did not have Decimal try: from decimal import * except ImportError: print >>sys.stderr, "Scour requires Python 2.4." # Import Psyco if available try: import psyco psyco.full() except ImportError: pass APP = 'scour' VER = '0.26' COPYRIGHT = 'Copyright Jeff Schiller, Louis Simard, 2010' NS = { 'SVG': 'http://www.w3.org/2000/svg', 'XLINK': 'http://www.w3.org/1999/xlink', 'SODIPODI': 'http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd', 'INKSCAPE': 'http://www.inkscape.org/namespaces/inkscape', 'ADOBE_ILLUSTRATOR': 'http://ns.adobe.com/AdobeIllustrator/10.0/', 'ADOBE_GRAPHS': 'http://ns.adobe.com/Graphs/1.0/', 'ADOBE_SVG_VIEWER': 'http://ns.adobe.com/AdobeSVGViewerExtensions/3.0/', 'ADOBE_VARIABLES': 'http://ns.adobe.com/Variables/1.0/', 'ADOBE_SFW': 'http://ns.adobe.com/SaveForWeb/1.0/', 'ADOBE_EXTENSIBILITY': 'http://ns.adobe.com/Extensibility/1.0/', 'ADOBE_FLOWS': 'http://ns.adobe.com/Flows/1.0/', 'ADOBE_IMAGE_REPLACEMENT': 'http://ns.adobe.com/ImageReplacement/1.0/', 'ADOBE_CUSTOM': 'http://ns.adobe.com/GenericCustomNamespace/1.0/', 'ADOBE_XPATH': 'http://ns.adobe.com/XPath/1.0/' } unwanted_ns = [ NS['SODIPODI'], NS['INKSCAPE'], NS['ADOBE_ILLUSTRATOR'], NS['ADOBE_GRAPHS'], NS['ADOBE_SVG_VIEWER'], NS['ADOBE_VARIABLES'], NS['ADOBE_SFW'], NS['ADOBE_EXTENSIBILITY'], NS['ADOBE_FLOWS'], NS['ADOBE_IMAGE_REPLACEMENT'], NS['ADOBE_CUSTOM'], NS['ADOBE_XPATH'] ] svgAttributes = [ 'clip-rule', 'display', 'fill', 'fill-opacity', 'fill-rule', 'filter', 'font-family', 'font-size', 'font-stretch', 'font-style', 'font-variant', 'font-weight', 'line-height', 'marker', 'marker-end', 'marker-mid', 'marker-start', 'opacity', 'overflow', 'stop-color', 'stop-opacity', 'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity', 'stroke-width', 'visibility' ] colors = { 'aliceblue': 'rgb(240, 248, 255)', 'antiquewhite': 'rgb(250, 235, 215)', 'aqua': 'rgb( 0, 255, 255)', 'aquamarine': 'rgb(127, 255, 212)', 'azure': 'rgb(240, 255, 255)', 'beige': 'rgb(245, 245, 220)', 'bisque': 'rgb(255, 228, 196)', 'black': 'rgb( 0, 0, 0)', 'blanchedalmond': 'rgb(255, 235, 205)', 'blue': 'rgb( 0, 0, 255)', 'blueviolet': 'rgb(138, 43, 226)', 'brown': 'rgb(165, 42, 42)', 'burlywood': 'rgb(222, 184, 135)', 'cadetblue': 'rgb( 95, 158, 160)', 'chartreuse': 'rgb(127, 255, 0)', 'chocolate': 'rgb(210, 105, 30)', 'coral': 'rgb(255, 127, 80)', 'cornflowerblue': 'rgb(100, 149, 237)', 'cornsilk': 'rgb(255, 248, 220)', 'crimson': 'rgb(220, 20, 60)', 'cyan': 'rgb( 0, 255, 255)', 'darkblue': 'rgb( 0, 0, 139)', 'darkcyan': 'rgb( 0, 139, 139)', 'darkgoldenrod': 'rgb(184, 134, 11)', 'darkgray': 'rgb(169, 169, 169)', 'darkgreen': 'rgb( 0, 100, 0)', 'darkgrey': 'rgb(169, 169, 169)', 'darkkhaki': 'rgb(189, 183, 107)', 'darkmagenta': 'rgb(139, 0, 139)', 'darkolivegreen': 'rgb( 85, 107, 47)', 'darkorange': 'rgb(255, 140, 0)', 'darkorchid': 'rgb(153, 50, 204)', 'darkred': 'rgb(139, 0, 0)', 'darksalmon': 'rgb(233, 150, 122)', 'darkseagreen': 'rgb(143, 188, 143)', 'darkslateblue': 'rgb( 72, 61, 139)', 'darkslategray': 'rgb( 47, 79, 79)', 'darkslategrey': 'rgb( 47, 79, 79)', 'darkturquoise': 'rgb( 0, 206, 209)', 'darkviolet': 'rgb(148, 0, 211)', 'deeppink': 'rgb(255, 20, 147)', 'deepskyblue': 'rgb( 0, 191, 255)', 'dimgray': 'rgb(105, 105, 105)', 'dimgrey': 'rgb(105, 105, 105)', 'dodgerblue': 'rgb( 30, 144, 255)', 'firebrick': 'rgb(178, 34, 34)', 'floralwhite': 'rgb(255, 250, 240)', 'forestgreen': 'rgb( 34, 139, 34)', 'fuchsia': 'rgb(255, 0, 255)', 'gainsboro': 'rgb(220, 220, 220)', 'ghostwhite': 'rgb(248, 248, 255)', 'gold': 'rgb(255, 215, 0)', 'goldenrod': 'rgb(218, 165, 32)', 'gray': 'rgb(128, 128, 128)', 'grey': 'rgb(128, 128, 128)', 'green': 'rgb( 0, 128, 0)', 'greenyellow': 'rgb(173, 255, 47)', 'honeydew': 'rgb(240, 255, 240)', 'hotpink': 'rgb(255, 105, 180)', 'indianred': 'rgb(205, 92, 92)', 'indigo': 'rgb( 75, 0, 130)', 'ivory': 'rgb(255, 255, 240)', 'khaki': 'rgb(240, 230, 140)', 'lavender': 'rgb(230, 230, 250)', 'lavenderblush': 'rgb(255, 240, 245)', 'lawngreen': 'rgb(124, 252, 0)', 'lemonchiffon': 'rgb(255, 250, 205)', 'lightblue': 'rgb(173, 216, 230)', 'lightcoral': 'rgb(240, 128, 128)', 'lightcyan': 'rgb(224, 255, 255)', 'lightgoldenrodyellow': 'rgb(250, 250, 210)', 'lightgray': 'rgb(211, 211, 211)', 'lightgreen': 'rgb(144, 238, 144)', 'lightgrey': 'rgb(211, 211, 211)', 'lightpink': 'rgb(255, 182, 193)', 'lightsalmon': 'rgb(255, 160, 122)', 'lightseagreen': 'rgb( 32, 178, 170)', 'lightskyblue': 'rgb(135, 206, 250)', 'lightslategray': 'rgb(119, 136, 153)', 'lightslategrey': 'rgb(119, 136, 153)', 'lightsteelblue': 'rgb(176, 196, 222)', 'lightyellow': 'rgb(255, 255, 224)', 'lime': 'rgb( 0, 255, 0)', 'limegreen': 'rgb( 50, 205, 50)', 'linen': 'rgb(250, 240, 230)', 'magenta': 'rgb(255, 0, 255)', 'maroon': 'rgb(128, 0, 0)', 'mediumaquamarine': 'rgb(102, 205, 170)', 'mediumblue': 'rgb( 0, 0, 205)', 'mediumorchid': 'rgb(186, 85, 211)', 'mediumpurple': 'rgb(147, 112, 219)', 'mediumseagreen': 'rgb( 60, 179, 113)', 'mediumslateblue': 'rgb(123, 104, 238)', 'mediumspringgreen': 'rgb( 0, 250, 154)', 'mediumturquoise': 'rgb( 72, 209, 204)', 'mediumvioletred': 'rgb(199, 21, 133)', 'midnightblue': 'rgb( 25, 25, 112)', 'mintcream': 'rgb(245, 255, 250)', 'mistyrose': 'rgb(255, 228, 225)', 'moccasin': 'rgb(255, 228, 181)', 'navajowhite': 'rgb(255, 222, 173)', 'navy': 'rgb( 0, 0, 128)', 'oldlace': 'rgb(253, 245, 230)', 'olive': 'rgb(128, 128, 0)', 'olivedrab': 'rgb(107, 142, 35)', 'orange': 'rgb(255, 165, 0)', 'orangered': 'rgb(255, 69, 0)', 'orchid': 'rgb(218, 112, 214)', 'palegoldenrod': 'rgb(238, 232, 170)', 'palegreen': 'rgb(152, 251, 152)', 'paleturquoise': 'rgb(175, 238, 238)', 'palevioletred': 'rgb(219, 112, 147)', 'papayawhip': 'rgb(255, 239, 213)', 'peachpuff': 'rgb(255, 218, 185)', 'peru': 'rgb(205, 133, 63)', 'pink': 'rgb(255, 192, 203)', 'plum': 'rgb(221, 160, 221)', 'powderblue': 'rgb(176, 224, 230)', 'purple': 'rgb(128, 0, 128)', 'red': 'rgb(255, 0, 0)', 'rosybrown': 'rgb(188, 143, 143)', 'royalblue': 'rgb( 65, 105, 225)', 'saddlebrown': 'rgb(139, 69, 19)', 'salmon': 'rgb(250, 128, 114)', 'sandybrown': 'rgb(244, 164, 96)', 'seagreen': 'rgb( 46, 139, 87)', 'seashell': 'rgb(255, 245, 238)', 'sienna': 'rgb(160, 82, 45)', 'silver': 'rgb(192, 192, 192)', 'skyblue': 'rgb(135, 206, 235)', 'slateblue': 'rgb(106, 90, 205)', 'slategray': 'rgb(112, 128, 144)', 'slategrey': 'rgb(112, 128, 144)', 'snow': 'rgb(255, 250, 250)', 'springgreen': 'rgb( 0, 255, 127)', 'steelblue': 'rgb( 70, 130, 180)', 'tan': 'rgb(210, 180, 140)', 'teal': 'rgb( 0, 128, 128)', 'thistle': 'rgb(216, 191, 216)', 'tomato': 'rgb(255, 99, 71)', 'turquoise': 'rgb( 64, 224, 208)', 'violet': 'rgb(238, 130, 238)', 'wheat': 'rgb(245, 222, 179)', 'white': 'rgb(255, 255, 255)', 'whitesmoke': 'rgb(245, 245, 245)', 'yellow': 'rgb(255, 255, 0)', 'yellowgreen': 'rgb(154, 205, 50)', } default_attributes = { # excluded all attributes with 'auto' as default # SVG 1.1 presentation attributes 'baseline-shift': 'baseline', 'clip-path': 'none', 'clip-rule': 'nonzero', 'color': '#000', 'color-interpolation-filters': 'linearRGB', 'color-interpolation': 'sRGB', 'direction': 'ltr', 'display': 'inline', 'enable-background': 'accumulate', 'fill': '#000', 'fill-opacity': '1', 'fill-rule': 'nonzero', 'filter': 'none', 'flood-color': '#000', 'flood-opacity': '1', 'font-size-adjust': 'none', 'font-size': 'medium', 'font-stretch': 'normal', 'font-style': 'normal', 'font-variant': 'normal', 'font-weight': 'normal', 'glyph-orientation-horizontal': '0deg', 'letter-spacing': 'normal', 'lighting-color': '#fff', 'marker': 'none', 'marker-start': 'none', 'marker-mid': 'none', 'marker-end': 'none', 'mask': 'none', 'opacity': '1', 'pointer-events': 'visiblePainted', 'stop-color': '#000', 'stop-opacity': '1', 'stroke': 'none', 'stroke-dasharray': 'none', 'stroke-dashoffset': '0', 'stroke-linecap': 'butt', 'stroke-linejoin': 'miter', 'stroke-miterlimit': '4', 'stroke-opacity': '1', 'stroke-width': '1', 'text-anchor': 'start', 'text-decoration': 'none', 'unicode-bidi': 'normal', 'visibility': 'visible', 'word-spacing': 'normal', 'writing-mode': 'lr-tb', # SVG 1.2 tiny properties 'audio-level': '1', 'solid-color': '#000', 'solid-opacity': '1', 'text-align': 'start', 'vector-effect': 'none', 'viewport-fill': 'none', 'viewport-fill-opacity': '1', } def isSameSign(a,b): return (a <= 0 and b <= 0) or (a >= 0 and b >= 0) scinumber = re.compile(r"[-+]?(\d*\.?)?\d+[eE][-+]?\d+") number = re.compile(r"[-+]?(\d*\.?)?\d+") sciExponent = re.compile(r"[eE]([-+]?\d+)") unit = re.compile("(em|ex|px|pt|pc|cm|mm|in|%){1,1}$") class Unit(object): # Integer constants for units. INVALID = -1 NONE = 0 PCT = 1 PX = 2 PT = 3 PC = 4 EM = 5 EX = 6 CM = 7 MM = 8 IN = 9 # String to Unit. Basically, converts unit strings to their integer constants. s2u = { '': NONE, '%': PCT, 'px': PX, 'pt': PT, 'pc': PC, 'em': EM, 'ex': EX, 'cm': CM, 'mm': MM, 'in': IN, } # Unit to String. Basically, converts unit integer constants to their corresponding strings. u2s = { NONE: '', PCT: '%', PX: 'px', PT: 'pt', PC: 'pc', EM: 'em', EX: 'ex', CM: 'cm', MM: 'mm', IN: 'in', } # @staticmethod def get(unitstr): if unitstr is None: return Unit.NONE try: return Unit.s2u[unitstr] except KeyError: return Unit.INVALID # @staticmethod def str(unitint): try: return Unit.u2s[unitint] except KeyError: return 'INVALID' get = staticmethod(get) str = staticmethod(str) class SVGLength(object): def __init__(self, str): try: # simple unitless and no scientific notation self.value = float(str) if int(self.value) == self.value: self.value = int(self.value) self.units = Unit.NONE except ValueError: # we know that the length string has an exponent, a unit, both or is invalid # parse out number, exponent and unit self.value = 0 unitBegin = 0 scinum = scinumber.match(str) if scinum != None: # this will always match, no need to check it numMatch = number.match(str) expMatch = sciExponent.search(str, numMatch.start(0)) self.value = (float(numMatch.group(0)) * 10 ** float(expMatch.group(1))) unitBegin = expMatch.end(1) else: # unit or invalid numMatch = number.match(str) if numMatch != None: self.value = float(numMatch.group(0)) unitBegin = numMatch.end(0) if int(self.value) == self.value: self.value = int(self.value) if unitBegin != 0 : unitMatch = unit.search(str, unitBegin) if unitMatch != None : self.units = Unit.get(unitMatch.group(0)) # invalid else: # TODO: this needs to set the default for the given attribute (how?) self.value = 0 self.units = Unit.INVALID def findElementsWithId(node, elems=None): """ Returns all elements with id attributes """ if elems is None: elems = {} id = node.getAttribute('id') if id != '' : elems[id] = node if node.hasChildNodes() : for child in node.childNodes: # from http://www.w3.org/TR/DOM-Level-2-Core/idl-definitions.html # we are only really interested in nodes of type Element (1) if child.nodeType == 1 : findElementsWithId(child, elems) return elems referencingProps = ['fill', 'stroke', 'filter', 'clip-path', 'mask', 'marker-start', 'marker-end', 'marker-mid'] def findReferencedElements(node, ids=None): """ Returns the number of times an ID is referenced as well as all elements that reference it. node is the node at which to start the search. The return value is a map which has the id as key and each value is an array where the first value is a count and the second value is a list of nodes that referenced it. Currently looks at fill, stroke, clip-path, mask, marker, and xlink:href attributes. """ global referencingProps if ids is None: ids = {} # TODO: input argument ids is clunky here (see below how it is called) # GZ: alternative to passing dict, use **kwargs # if this node is a style element, parse its text into CSS if node.nodeName == 'style' and node.namespaceURI == NS['SVG']: # one stretch of text, please! (we could use node.normalize(), but # this actually modifies the node, and we don't want to keep # whitespace around if there's any) stylesheet = "".join([child.nodeValue for child in node.childNodes]) if stylesheet != '': cssRules = parseCssString(stylesheet) for rule in cssRules: for propname in rule['properties']: propval = rule['properties'][propname] findReferencingProperty(node, propname, propval, ids) return ids # else if xlink:href is set, then grab the id href = node.getAttributeNS(NS['XLINK'],'href') if href != '' and len(href) > 1 and href[0] == '#': # we remove the hash mark from the beginning of the id id = href[1:] if id in ids: ids[id][0] += 1 ids[id][1].append(node) else: ids[id] = [1,[node]] # now get all style properties and the fill, stroke, filter attributes styles = node.getAttribute('style').split(';') for attr in referencingProps: styles.append(':'.join([attr, node.getAttribute(attr)])) for style in styles: propval = style.split(':') if len(propval) == 2 : prop = propval[0].strip() val = propval[1].strip() findReferencingProperty(node, prop, val, ids) if node.hasChildNodes() : for child in node.childNodes: if child.nodeType == 1 : findReferencedElements(child, ids) return ids def findReferencingProperty(node, prop, val, ids): global referencingProps if prop in referencingProps and val != '' : if len(val) >= 7 and val[0:5] == 'url(#' : id = val[5:val.find(')')] if ids.has_key(id) : ids[id][0] += 1 ids[id][1].append(node) else: ids[id] = [1,[node]] # if the url has a quote in it, we need to compensate elif len(val) >= 8 : id = None # double-quote if val[0:6] == 'url("#' : id = val[6:val.find('")')] # single-quote elif val[0:6] == "url('#" : id = val[6:val.find("')")] if id != None: if ids.has_key(id) : ids[id][0] += 1 ids[id][1].append(node) else: ids[id] = [1,[node]] numIDsRemoved = 0 numElemsRemoved = 0 numAttrsRemoved = 0 numRastersEmbedded = 0 numPathSegmentsReduced = 0 numCurvesStraightened = 0 numBytesSavedInPathData = 0 numBytesSavedInColors = 0 numBytesSavedInIDs = 0 numBytesSavedInLengths = 0 numBytesSavedInTransforms = 0 numPointsRemovedFromPolygon = 0 numCommentBytes = 0 def removeUnusedDefs(doc, defElem, elemsToRemove=None): if elemsToRemove is None: elemsToRemove = [] identifiedElements = findElementsWithId(doc.documentElement) referencedIDs = findReferencedElements(doc.documentElement) keepTags = ['font', 'style', 'metadata', 'script', 'title', 'desc'] for elem in defElem.childNodes: # only look at it if an element and not referenced anywhere else if elem.nodeType == 1 and (elem.getAttribute('id') == '' or \ (not elem.getAttribute('id') in referencedIDs)): # we only inspect the children of a group in a defs if the group # is not referenced anywhere else if elem.nodeName == 'g' and elem.namespaceURI == NS['SVG']: elemsToRemove = removeUnusedDefs(doc, elem, elemsToRemove) # we only remove if it is not one of our tags we always keep (see above) elif not elem.nodeName in keepTags: elemsToRemove.append(elem) return elemsToRemove def removeUnreferencedElements(doc): """ Removes all unreferenced elements except for