You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
408 lines
14 KiB
408 lines
14 KiB
5 years ago
|
#
|
||
|
# genxmlif, Release 0.9.0
|
||
|
# file: xmlifElementTree.py
|
||
|
#
|
||
|
# XML interface class to elementtree toolkit by Fredrik Lundh
|
||
|
#
|
||
|
# history:
|
||
|
# 2005-04-25 rl created
|
||
|
# 2007-05-25 rl performance optimization (caching) added, some bugfixes
|
||
|
# 2007-06-29 rl complete re-design, ElementExtension class introduced
|
||
|
# 2008-07-01 rl Limited support of XInclude added
|
||
|
#
|
||
|
# Copyright (c) 2005-2008 by Roland Leuthe. All rights reserved.
|
||
|
#
|
||
|
# --------------------------------------------------------------------
|
||
|
# The generic XML interface is
|
||
|
#
|
||
|
# Copyright (c) 2005-2008 by Roland Leuthe
|
||
|
#
|
||
|
# By obtaining, using, and/or copying this software and/or its
|
||
|
# associated documentation, you agree that you have read, understood,
|
||
|
# and will comply with the following terms and conditions:
|
||
|
#
|
||
|
# Permission to use, copy, modify, and distribute this software and
|
||
|
# its associated documentation for any purpose and without fee is
|
||
|
# hereby granted, provided that the above copyright notice appears in
|
||
|
# all copies, and that both that copyright notice and this permission
|
||
|
# notice appear in supporting documentation, and that the name of
|
||
|
# the author not be used in advertising or publicity
|
||
|
# pertaining to distribution of the software without specific, written
|
||
|
# prior permission.
|
||
|
#
|
||
|
# THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
|
||
|
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
|
||
|
# ABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR
|
||
|
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
|
||
|
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||
|
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||
|
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||
|
# OF THIS SOFTWARE.
|
||
|
# --------------------------------------------------------------------
|
||
|
|
||
|
import sys
|
||
|
import string
|
||
|
import urllib
|
||
|
from xml.dom import EMPTY_NAMESPACE, XMLNS_NAMESPACE
|
||
|
from xml.parsers.expat import ExpatError
|
||
|
# from version 2.5 on the elementtree module is part of the standard python distribution
|
||
|
if sys.version_info[:2] >= (2,5):
|
||
|
from xml.etree.ElementTree import ElementTree, _ElementInterface, XMLTreeBuilder, TreeBuilder
|
||
|
from xml.etree import ElementInclude
|
||
|
else:
|
||
|
from elementtree.ElementTree import ElementTree, _ElementInterface, XMLTreeBuilder, TreeBuilder
|
||
|
from elementtree import ElementInclude
|
||
|
from ..genxmlif import XMLIF_ELEMENTTREE, GenXmlIfError
|
||
|
from xmlifUtils import convertToAbsUrl, processWhitespaceAction, collapseString, toClarkQName, splitQName
|
||
|
from xmlifBase import XmlIfBuilderExtensionBase
|
||
|
from xmlifApi import XmlInterfaceBase
|
||
|
|
||
|
#########################################################
|
||
|
# Derived interface class for elementtree toolkit
|
||
|
|
||
|
class XmlInterfaceElementTree (XmlInterfaceBase):
|
||
|
#####################################################
|
||
|
# for description of the interface methods see xmlifbase.py
|
||
|
#####################################################
|
||
|
|
||
|
def __init__ (self, verbose, useCaching, processXInclude):
|
||
|
XmlInterfaceBase.__init__ (self, verbose, useCaching, processXInclude)
|
||
|
self.xmlIfType = XMLIF_ELEMENTTREE
|
||
|
if self.verbose:
|
||
|
print "Using elementtree interface module..."
|
||
|
|
||
|
|
||
|
def createXmlTree (self, namespace, xmlRootTagName, attributeDict={}, publicId=None, systemId=None):
|
||
|
rootNode = ElementExtension(toClarkQName(xmlRootTagName), attributeDict)
|
||
|
rootNode.xmlIfExtSetParentNode(None)
|
||
|
treeWrapper = self.treeWrapperClass(self, ElementTreeExtension(rootNode), self.useCaching)
|
||
|
rootNodeWrapper = self.elementWrapperClass (rootNode, treeWrapper, []) # TODO: namespace handling
|
||
|
return treeWrapper
|
||
|
|
||
|
|
||
|
def parse (self, file, baseUrl="", ownerDoc=None):
|
||
|
absUrl = convertToAbsUrl (file, baseUrl)
|
||
|
fp = urllib.urlopen (absUrl)
|
||
|
try:
|
||
|
tree = ElementTreeExtension()
|
||
|
treeWrapper = self.treeWrapperClass(self, tree, self.useCaching)
|
||
|
parser = ExtXMLTreeBuilder(file, absUrl, self, treeWrapper)
|
||
|
treeWrapper.getTree().parse(fp, parser)
|
||
|
fp.close()
|
||
|
|
||
|
# XInclude support
|
||
|
if self.processXInclude:
|
||
|
loaderInst = ExtXIncludeLoader (self.parse, absUrl, ownerDoc)
|
||
|
try:
|
||
|
ElementInclude.include(treeWrapper.getTree().getroot(), loaderInst.loader)
|
||
|
except IOError, errInst:
|
||
|
raise GenXmlIfError, "%s: IOError: %s" %(file, str(errInst))
|
||
|
|
||
|
except ExpatError, errstr:
|
||
|
fp.close()
|
||
|
raise GenXmlIfError, "%s: ExpatError: %s" %(file, str(errstr))
|
||
|
except ElementInclude.FatalIncludeError, errInst:
|
||
|
fp.close()
|
||
|
raise GenXmlIfError, "%s: XIncludeError: %s" %(file, str(errInst))
|
||
|
|
||
|
return treeWrapper
|
||
|
|
||
|
|
||
|
def parseString (self, text, baseUrl="", ownerDoc=None):
|
||
|
absUrl = convertToAbsUrl ("", baseUrl)
|
||
|
tree = ElementTreeExtension()
|
||
|
treeWrapper = self.treeWrapperClass(self, tree, self.useCaching)
|
||
|
parser = ExtXMLTreeBuilder("", absUrl, self, treeWrapper)
|
||
|
parser.feed(text)
|
||
|
treeWrapper.getTree()._setroot(parser.close())
|
||
|
|
||
|
# XInclude support
|
||
|
if self.processXInclude:
|
||
|
loaderInst = ExtXIncludeLoader (self.parse, absUrl, ownerDoc)
|
||
|
ElementInclude.include(treeWrapper.getTree().getroot(), loaderInst.loader)
|
||
|
|
||
|
return treeWrapper
|
||
|
|
||
|
|
||
|
#########################################################
|
||
|
# Extension (derived) class for ElementTree class
|
||
|
|
||
|
class ElementTreeExtension (ElementTree):
|
||
|
|
||
|
def xmlIfExtGetRootNode (self):
|
||
|
return self.getroot()
|
||
|
|
||
|
|
||
|
def xmlIfExtCreateElement (self, nsName, attributeDict, curNs):
|
||
|
clarkQName = toClarkQName(nsName)
|
||
|
return ElementExtension (clarkQName, attributeDict)
|
||
|
|
||
|
|
||
|
def xmlIfExtCloneTree (self, rootElementCopy):
|
||
|
return self.__class__(element=rootElementCopy)
|
||
|
|
||
|
|
||
|
#########################################################
|
||
|
# Wrapper class for Element class
|
||
|
|
||
|
class ElementExtension (_ElementInterface):
|
||
|
|
||
|
def __init__ (self, xmlRootTagName, attributeDict):
|
||
|
_ElementInterface.__init__(self, xmlRootTagName, attributeDict)
|
||
|
|
||
|
|
||
|
def xmlIfExtUnlink (self):
|
||
|
self.xmlIfExtElementWrapper = None
|
||
|
self.__xmlIfExtParentElement = None
|
||
|
|
||
|
|
||
|
def xmlIfExtCloneNode (self):
|
||
|
nodeCopy = self.__class__(self.tag, self.attrib.copy())
|
||
|
nodeCopy.text = self.text
|
||
|
nodeCopy.tail = self.tail
|
||
|
return nodeCopy
|
||
|
|
||
|
|
||
|
def xmlIfExtGetTagName (self):
|
||
|
return self.tag
|
||
|
|
||
|
|
||
|
def xmlIfExtGetNamespaceURI (self):
|
||
|
prefix, localName = splitQName(self.tag)
|
||
|
return prefix
|
||
|
|
||
|
|
||
|
def xmlIfExtGetParentNode (self):
|
||
|
return self.__xmlIfExtParentElement
|
||
|
|
||
|
|
||
|
def xmlIfExtSetParentNode (self, parentElement):
|
||
|
self.__xmlIfExtParentElement = parentElement
|
||
|
|
||
|
|
||
|
def xmlIfExtGetChildren (self, filterTag=None):
|
||
|
if filterTag == None:
|
||
|
return self.getchildren()
|
||
|
else:
|
||
|
clarkFilterTag = toClarkQName(filterTag)
|
||
|
return self.findall(clarkFilterTag)
|
||
|
|
||
|
|
||
|
def xmlIfExtGetFirstChild (self, filterTag=None):
|
||
|
# replace base method (performance optimized)
|
||
|
if filterTag == None:
|
||
|
children = self.getchildren()
|
||
|
if children != []:
|
||
|
element = children[0]
|
||
|
else:
|
||
|
element = None
|
||
|
else:
|
||
|
clarkFilterTag = toClarkQName(filterTag)
|
||
|
element = self.find(clarkFilterTag)
|
||
|
|
||
|
return element
|
||
|
|
||
|
|
||
|
def xmlIfExtGetElementsByTagName (self, filterTag=(None,None)):
|
||
|
clarkFilterTag = toClarkQName(filterTag)
|
||
|
descendants = []
|
||
|
for node in self.xmlIfExtGetChildren():
|
||
|
descendants.extend(node.getiterator(clarkFilterTag))
|
||
|
return descendants
|
||
|
|
||
|
|
||
|
def xmlIfExtGetIterator (self, filterTag=(None,None)):
|
||
|
clarkFilterTag = toClarkQName(filterTag)
|
||
|
return self.getiterator (clarkFilterTag)
|
||
|
|
||
|
|
||
|
def xmlIfExtAppendChild (self, childElement):
|
||
|
self.append (childElement)
|
||
|
childElement.xmlIfExtSetParentNode(self)
|
||
|
|
||
|
|
||
|
def xmlIfExtInsertBefore (self, childElement, refChildElement):
|
||
|
self.insert (self.getchildren().index(refChildElement), childElement)
|
||
|
childElement.xmlIfExtSetParentNode(self)
|
||
|
|
||
|
|
||
|
def xmlIfExtRemoveChild (self, childElement):
|
||
|
self.remove (childElement)
|
||
|
|
||
|
|
||
|
def xmlIfExtInsertSubtree (self, refChildElement, subTree, insertSubTreeRootNode):
|
||
|
if refChildElement != None:
|
||
|
insertIndex = self.getchildren().index (refChildElement)
|
||
|
else:
|
||
|
insertIndex = 0
|
||
|
if insertSubTreeRootNode:
|
||
|
elementList = [subTree.xmlIfExtGetRootNode(),]
|
||
|
else:
|
||
|
elementList = subTree.xmlIfExtGetRootNode().xmlIfExtGetChildren()
|
||
|
elementList.reverse()
|
||
|
for element in elementList:
|
||
|
self.insert (insertIndex, element)
|
||
|
element.xmlIfExtSetParentNode(self)
|
||
|
|
||
|
|
||
|
def xmlIfExtGetAttributeDict (self):
|
||
|
attrDict = {}
|
||
|
for attrName, attrValue in self.attrib.items():
|
||
|
namespaceEndIndex = string.find (attrName, '}')
|
||
|
if namespaceEndIndex != -1:
|
||
|
attrName = (attrName[1:namespaceEndIndex], attrName[namespaceEndIndex+1:])
|
||
|
else:
|
||
|
attrName = (EMPTY_NAMESPACE, attrName)
|
||
|
attrDict[attrName] = attrValue
|
||
|
return attrDict
|
||
|
|
||
|
|
||
|
def xmlIfExtGetAttribute (self, tupleOrAttrName):
|
||
|
clarkQName = toClarkQName(tupleOrAttrName)
|
||
|
if self.attrib.has_key(clarkQName):
|
||
|
return self.attrib[clarkQName]
|
||
|
else:
|
||
|
return None
|
||
|
|
||
|
|
||
|
def xmlIfExtSetAttribute (self, tupleOrAttrName, attributeValue, curNs):
|
||
|
self.attrib[toClarkQName(tupleOrAttrName)] = attributeValue
|
||
|
|
||
|
|
||
|
def xmlIfExtRemoveAttribute (self, tupleOrAttrName):
|
||
|
clarkQName = toClarkQName(tupleOrAttrName)
|
||
|
if self.attrib.has_key(clarkQName):
|
||
|
del self.attrib[clarkQName]
|
||
|
|
||
|
|
||
|
def xmlIfExtGetElementValueFragments (self, ignoreEmtpyStringFragments):
|
||
|
elementValueList = []
|
||
|
if self.text != None:
|
||
|
elementValueList.append(self.text)
|
||
|
for child in self.getchildren():
|
||
|
if child.tail != None:
|
||
|
elementValueList.append(child.tail)
|
||
|
if ignoreEmtpyStringFragments:
|
||
|
elementValueList = filter (lambda s: collapseString(s) != "", elementValueList)
|
||
|
if elementValueList == []:
|
||
|
elementValueList = ["",]
|
||
|
return elementValueList
|
||
|
|
||
|
|
||
|
def xmlIfExtGetElementText (self):
|
||
|
if self.text != None:
|
||
|
return self.text
|
||
|
else:
|
||
|
return ""
|
||
|
|
||
|
|
||
|
def xmlIfExtGetElementTailText (self):
|
||
|
if self.tail != None:
|
||
|
return self.tail
|
||
|
else:
|
||
|
return ""
|
||
|
|
||
|
|
||
|
def xmlIfExtSetElementValue (self, elementValue):
|
||
|
self.text = elementValue
|
||
|
for child in self.getchildren():
|
||
|
child.tail = None
|
||
|
|
||
|
|
||
|
def xmlIfExtProcessWsElementValue (self, wsAction):
|
||
|
noOfTextFragments = reduce(lambda sum, child: sum + (child.tail != None), self.getchildren(), 0)
|
||
|
noOfTextFragments += (self.text != None)
|
||
|
|
||
|
rstrip = 0
|
||
|
lstrip = 1
|
||
|
if self.text != None:
|
||
|
if noOfTextFragments == 1:
|
||
|
rstrip = 1
|
||
|
self.text = processWhitespaceAction (self.text, wsAction, lstrip, rstrip)
|
||
|
noOfTextFragments -= 1
|
||
|
lstrip = 0
|
||
|
for child in self.getchildren():
|
||
|
if child.tail != None:
|
||
|
if noOfTextFragments == 1:
|
||
|
rstrip = 1
|
||
|
child.tail = processWhitespaceAction (child.tail, wsAction, lstrip, rstrip)
|
||
|
noOfTextFragments -= 1
|
||
|
lstrip = 0
|
||
|
|
||
|
|
||
|
###################################################
|
||
|
# Element tree builder class derived from XMLTreeBuilder
|
||
|
# extended to store related line numbers in the Element object
|
||
|
|
||
|
class ExtXMLTreeBuilder (XMLTreeBuilder, XmlIfBuilderExtensionBase):
|
||
|
def __init__(self, filePath, absUrl, xmlIf, treeWrapper):
|
||
|
XMLTreeBuilder.__init__(self, target=TreeBuilder(element_factory=ElementExtension))
|
||
|
self._parser.StartNamespaceDeclHandler = self._start_ns
|
||
|
self._parser.EndNamespaceDeclHandler = self._end_ns
|
||
|
self.namespaces = []
|
||
|
XmlIfBuilderExtensionBase.__init__(self, filePath, absUrl, treeWrapper, xmlIf.elementWrapperClass)
|
||
|
|
||
|
def _start(self, tag, attrib_in):
|
||
|
elem = XMLTreeBuilder._start(self, tag, attrib_in)
|
||
|
self.start(elem)
|
||
|
|
||
|
def _start_list(self, tag, attrib_in):
|
||
|
elem = XMLTreeBuilder._start_list(self, tag, attrib_in)
|
||
|
self.start(elem, attrib_in)
|
||
|
|
||
|
def _end(self, tag):
|
||
|
elem = XMLTreeBuilder._end(self, tag)
|
||
|
self.end(elem)
|
||
|
|
||
|
def _start_ns(self, prefix, value):
|
||
|
self.namespaces.insert(0, (prefix, value))
|
||
|
|
||
|
def _end_ns(self, prefix):
|
||
|
assert self.namespaces.pop(0)[0] == prefix, "implementation confused"
|
||
|
|
||
|
|
||
|
def start(self, element, attributes):
|
||
|
# bugfix for missing start '{'
|
||
|
for i in range (0, len(attributes), 2):
|
||
|
attrName = attributes[i]
|
||
|
namespaceEndIndex = string.find (attrName, '}')
|
||
|
if namespaceEndIndex != -1 and attrName[0] != "{":
|
||
|
attributes[i] = '{' + attributes[i]
|
||
|
# bugfix end
|
||
|
|
||
|
XmlIfBuilderExtensionBase.startElementHandler (self, element, self._parser.ErrorLineNumber, self.namespaces[:], attributes)
|
||
|
if len(self._target._elem) > 1:
|
||
|
element.xmlIfExtSetParentNode (self._target._elem[-2])
|
||
|
else:
|
||
|
for namespace in self.namespaces:
|
||
|
if namespace[1] != None:
|
||
|
element.xmlIfExtElementWrapper.setAttribute((XMLNS_NAMESPACE, namespace[0]), namespace[1])
|
||
|
|
||
|
|
||
|
def end(self, element):
|
||
|
XmlIfBuilderExtensionBase.endElementHandler (self, element, self._parser.ErrorLineNumber)
|
||
|
|
||
|
|
||
|
###################################################
|
||
|
# XInclude loader
|
||
|
#
|
||
|
|
||
|
class ExtXIncludeLoader:
|
||
|
|
||
|
def __init__(self, parser, baseUrl, ownerDoc):
|
||
|
self.parser = parser
|
||
|
self.baseUrl = baseUrl
|
||
|
self.ownerDoc = ownerDoc
|
||
|
|
||
|
def loader(self, href, parse, encoding=None):
|
||
|
if parse == "xml":
|
||
|
data = self.parser(href, self.baseUrl, self.ownerDoc).getTree().getroot()
|
||
|
else:
|
||
|
absUrl = convertToAbsUrl (href, self.baseUrl)
|
||
|
fp = urllib.urlopen (absUrl)
|
||
|
data = fp.read()
|
||
|
if encoding:
|
||
|
data = data.decode(encoding)
|
||
|
fp.close()
|
||
|
return data
|