Logo Search packages:      
Sourcecode: python-xml version File versions  Download package

trex.py

# PyTREX: A clean-room implementation of TREX in Python
# by James Tauber
#
# http://pytrex.sourceforge.net/
#
# Copyright (c) 2001, James Tauber
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
#   notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
#   notice, this list of conditions and the following disclaimer in
#   the documentation and/or other materials provided with the
#   distribution.
# * The name "James Tauber" may not be used to endorse or promote
#   products derived from this software without specific prior written
#   permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.


########################################################################
#
# TO USE FROM THE COMMAND LINE:
# - python pytrex.py <trex-file> <instance-file>
#
# TO USE IN OTHER PYTHON SCRIPTS:
# - import the pytrex.py file (must be on PYTHONPATH):
#     from pytrex import *
# - parse the TREX file:
#     trex = parse_TREX("foo.trex")
# - parse the instance file:
#     instance = parse_Instance("bar.xml")
# - validate
#     match = validate(trex, instance)
#   match will be an Error object if invalid - test with isError()
#
# You can see an internal representation of the TREX grammar and the
# instance with trex.display() and instance.display() respectively
#
# You can also see a representation of the match object returned by
# validate with match.display()
#
# NOT IMPLEMENTED YET
# - ns attribute inheritance that takes into account inclusion
# - anonymous datatypes
#
# questions:
#
# zeroOrMore = empty | oneOrMore
# but empty doesn't allow whitespace and zeroOrMore does
#
# DATATYPE SUPPORT
#
# PyTREX has support for named datatypes in general but none in
# particular. To add support for a particular datatype, write a
# function (or lambda) that takes a string and returns 1 or 0
# depending on whether the datatype allows the given string as a
# lexical representation. Then register the datatype by calling: 
#
#   register_datatype(<namespaceURI of datatype>, <NCName of datatype>,
#                     <test function>)



########################################################################
### COMMON

class HandlerBase:
    def __init__(self, parser, parent, atts):
        self.parser = parser
        self.parent = parent
        if self.parent != None:
            self.ns_decls = self.parent.ns_decls
        else:
            self.ns_decls = {}
        self.set_handlers()

    def set_handlers(self):
        self.parser.StartElementHandler = self.child
        self.parser.CharacterDataHandler = self.char
        self.parser.EndElementHandler = self.end
        self.parser.StartNamespaceDeclHandler = self.start_ns_decl
        self.parser.EndNamespaceDeclHandler = self.end_ns_decl

    def start_ns_decl(self, prefix, uri):
        self.ns_decls[prefix] = uri

    def end_ns_decl(self, prefix):
        del self.ns_decls[prefix]

    def child(self, name, atts):
        pass

    def char(self, data):
        pass

    def child(self, name, atts):
        pass
    
    def end(self, name):
        if self.parent != None:
            self.parent.set_handlers()
        else: # must be root
            pass



########################################################################
### TREX PARSING

trex_ns = "http://www.thaiopensource.com/trex"

def parse_TREX(location, baseURI=None):
    if baseURI==None:
        baseURI = location

    import xml.parsers.expat
    parser = xml.parsers.expat.ParserCreate(namespace_separator="^")
    parser.SetBase(baseURI)
    parser.returns_unicode = 1

    r = T_RootHandler(parser)

    from urllib2 import urlopen
    # TODO: doesn't catch well-formedness errors in TREX
    try:
        f = urlopen(location)
        parser.ParseFile(f)
    except IOError, e:
        print "IOError reading TREX file", e
        import sys; sys.exit()
    except xml.parsers.expat.error:
        print "Error parsing file at line '%s' and column '%s'\n" % (parser.ErrorLineNumber, parser.ErrorColumnNumber)
        f.close()
        import sys; sys.exit()
    except TREXError, e:
        print "Error parsing TREX file:", e.value
        f.close()
        import sys; sys.exit()
    f.close()

    return r.product


class TREXError:
    def __init__(self, value):
        self.value = value


class T_HandlerBase(HandlerBase):
    def __init__(self, parser, parent, atts):
        HandlerBase.__init__(self, parser, parent, atts)
        if atts != None:
            if atts.has_key("ns"):
                self.ns_attr = atts["ns"]
            else: 
                self.ns_attr = parent.ns_attr
        else: # must be root
            self.ns_attr = ""
        if parent != None:
            self.using_trex_ns = parent.using_trex_ns

    # handle children of elements that can take pattern children
    def child_pattern(self, name, atts):
        if not handlePattern(self.parser, self, name, atts):
            if in_trex_ns(name):
                raise TREXError, "%s not allowed here" % name
            elif not self.using_trex_ns and in_default_ns(name):
                raise TREXError, "%s not allowed here" % name
            else:
                T_Ignore(self.parser, self, name, atts)

    # handle children of elements that take name-class
    def child_nameclass(self, name, atts):
        if not handleNameClass(self.parser, self, name, atts):
            if in_trex_ns(name):
                raise TREXError, "%s not allowed here" % name
            elif not self.using_trex_ns and in_default_ns(name):
                raise TREXError, "%s not allowed here" % name
            else:
                T_Ignore(self.parser, self, name, atts)

    # handle children of elements that take name-class and patterns
    def child_nameclass_pattern(self, name, atts):
        if self.product.name_class==None:
            self.child_nameclass(name, atts)
        else:
            self.child_pattern(name, atts)

    # handle children of elements that take no children
    def child_none(self, name, atts):
        raise TREXError, "%s not allowed here" % name

    # handler children of elements that can only take non-trex children
    def child_non_trex(self, name, atts):
        if in_trex_ns(name):
            raise TREXError, "%s not allowed here" % ncname
        elif not self.using_trex_ns and in_default_ns(name):
            raise TREXError, "%s not allowed here" % ncname
        else:
            T_Ignore(self.parser, self, name, atts)


class T_Ignore(T_HandlerBase):
    def __init__(self, parser, parent, name, atts):
        T_HandlerBase.__init__(self, parser, parent, None)

    child = T_HandlerBase.child_non_trex


class T_RootHandler(T_HandlerBase):
    def __init__(self, parser, parent = None, atts = None):
        T_HandlerBase.__init__(self, parser, parent, atts)

    def child(self, name, atts):
        if name[:len(trex_ns)+1] == trex_ns+"^":
            self.using_trex_ns = 1
        else:
            self.using_trex_ns = 0
        if not handlePattern(self.parser, self, name, atts):
            raise TREXError, "%s not supported as root" % name

    def add_pattern(self, pattern):
        self.product = pattern


def in_trex_ns(name):
    return name[:len(trex_ns)+1] == trex_ns+"^"


def in_default_ns(name):
    return not "^" in name


def trex_ncname(name, using_trex_ns):
    if in_trex_ns(name):
        if using_trex_ns:
            return name[len(trex_ns)+1:]
        else:
            raise TREXError, "root pattern isn't in trex namespace but descendant is"
    else:
        if using_trex_ns:
            return ""
        else:
            return name


def handleNameClass(parser, handler, name, atts):
    name = trex_ncname(name, handler.using_trex_ns)
    if name == "name":
        T_NameHandler(parser, handler, atts)
    elif name == "anyName":
        T_AnyNameHandler(parser, handler, atts)
    elif name == "nsName":
        T_NSNameHandler(parser, handler, atts)
    elif name == "choice":
        T_NameClass_ChoiceHandler(parser, handler, atts)
    elif name == "difference":
        T_DifferenceHandler(parser, handler, atts)
    elif name == "not":
        T_NotHandler(parser, handler, atts)
    else:
        return 0
    return 1


def handlePattern(parser, handler, name, atts):
    name = trex_ncname(name, handler.using_trex_ns)
    if name=="element":
        T_ElementHandler(parser, handler, atts)
    elif name=="empty":
        T_EmptyHandler(parser, handler, atts)
    elif name=="notAllowed":
        T_NotAllowedHandler(parser, handler, atts)
    elif name=="zeroOrMore":
        T_ZeroOrMoreHandler(parser, handler, atts)
    elif name=="oneOrMore":
        T_OneOrMoreHandler(parser, handler, atts)
    elif name=="anyString":
        T_AnyStringHandler(parser, handler, atts)
    elif name=="string":
        T_StringHandler(parser, handler, atts)
    elif name=="optional":
        T_OptionalHandler(parser, handler, atts)
    elif name=="choice":
        T_ChoiceHandler(parser, handler, atts)
    elif name=="concur":
        T_ConcurHandler(parser, handler, atts)
    elif name=="interleave":
        T_InterleaveHandler(parser, handler, atts)
    elif name=="mixed":
        T_MixedHandler(parser, handler, atts)
    elif name=="group":
        T_GroupHandler(parser, handler, atts)
    elif name=="attribute":
        T_AttributeHandler(parser, handler, atts)
    elif name=="grammar":
        T_GrammarHandler(parser, handler, atts)
    elif name=="ref":
        T_RefHandler(parser, handler, atts)
    elif name=="include":
        T_IncludeHandler(parser, handler, atts)
    elif name=="data":
        T_DataHandler(parser, handler, atts)
    else:
        return 0
    return 1


class T_ElementHandler(T_HandlerBase):
    def __init__(self, parser, parent, atts):
        T_HandlerBase.__init__(self, parser, parent, atts)
        self.product = T_Element()
        if atts.has_key("name"):
            name = atts["name"]
            if ":" in name:
                # QName
                from string import split
                prefix, ncname = split(name, ":")
                if self.ns_decls.has_key(prefix):
                    ns = self.ns_decls[prefix]
                else:
                    raise TREXError, "QName %s has unknown prefix" % name
            else:
                ns = self.ns_attr
                ncname = name
            self.add_nameclass(ExpandedName(ns, ncname))

    child = T_HandlerBase.child_nameclass_pattern
                    
    def end(self, name):
        if self.product.name_class==None:
            raise TREXError, "element must have a name"
        self.parent.add_pattern(self.product)
        T_HandlerBase.end(self, name)

    def add_nameclass(self, name_class):
        self.product.name_class = name_class

    def add_pattern(self, pattern):
        if self.product.pattern==None:
            self.product.pattern = pattern
        else:
            group = T_Group(self.product.pattern, pattern)
            self.product.pattern = group


class T_AttributeHandler(T_HandlerBase):
    def __init__(self, parser, parent, atts):
        T_HandlerBase.__init__(self, parser, parent, atts)
        self.product = T_Attribute()
        if atts.has_key("ns"):
            local_ns = atts["ns"]
        else:
            local_ns = ""
        if atts.has_key("global") and atts["global"] == "true":
            ns = self.ns_attr
        else:
            ns = local_ns
        if atts.has_key("name"):
            name = atts["name"]
            if ":" in name:
                # QName
                from string import split
                prefix, ncname = split(name, ":")
                if self.ns_decls.has_key(prefix):
                    ns = self.ns_decls[prefix]
                else:
                    raise TREXError, "QName %s has unknown prefix" % name
            else:
                # ns already established earlier
                ncname = name
            self.add_nameclass(ExpandedName(ns, ncname))

    child = T_HandlerBase.child_nameclass_pattern

    def end(self, name):
        if self.product.name_class==None:
            raise TREXError, "attribute must have a name"
        if self.product.pattern==None:
            self.product.pattern = T_AnyString()
        self.parent.add_pattern(self.product)
        T_HandlerBase.end(self, name)

    def add_nameclass(self, name_class):
        self.product.name_class = name_class

    def add_pattern(self, pattern):
        self.product.pattern = pattern


class T_NameHandler(T_HandlerBase):
    def __init__(self, parser, parent, atts):
        T_HandlerBase.__init__(self, parser, parent, atts)
        self.product = ExpandedName()
        self.chardata = ""

    def char(self, data):
        self.chardata = self.chardata + data

    child = T_HandlerBase.child_none

    def end(self, name):
        self.product.namespaceURI = ""
        self.product.NCName = self.chardata
        self.parent.add_nameclass(self.product)
        T_HandlerBase.end(self, name)


class T_AnyNameHandler(T_HandlerBase):
    def __init__(self, parser, parent, atts):
        T_HandlerBase.__init__(self, parser, parent, atts)
        self.product = AnyName()

    def char(self, data):
        raise TREXError, "anyName should not have character data"

    child = T_HandlerBase.child_non_trex

    def end(self, name):
        self.parent.add_nameclass(self.product)
        T_HandlerBase.end(self, name)


class T_NSNameHandler(T_HandlerBase):
    def __init__(self, parser, parent, atts):
        T_HandlerBase.__init__(self, parser, parent, atts)
        self.product = NSName(self.ns_attr)

    def char(self, data):
        raise TREXError, "nsName should not have character data"

    child = T_HandlerBase.child_non_trex

    def end(self, name):
        self.parent.add_nameclass(self.product)
        T_HandlerBase.end(self, name)


class T_EmptyHandler(T_HandlerBase):
    def __init__(self, parser, parent, atts):
        T_HandlerBase.__init__(self, parser, parent, atts)
        self.product = T_Empty()
        
    def char(self, data):
        raise TREXError, "empty should not have character data"

    child = T_HandlerBase.child_non_trex

    def end(self, name):
        self.parent.add_pattern(self.product)
        T_HandlerBase.end(self, name)


class T_NotAllowedHandler(T_HandlerBase):
    def __init__(self, parser, parent, atts):
        T_HandlerBase.__init__(self, parser, parent, atts)
        self.product = T_NotAllowed()
        
    def char(self, data):
        raise TREXError, "notAllowed should not have character data"

    child = T_HandlerBase.child_non_trex

    def end(self, name):
        self.parent.add_pattern(self.product)
        T_HandlerBase.end(self, name)


class T_AnyStringHandler(T_HandlerBase):
    def __init__(self, parser, parent, atts):
        T_HandlerBase.__init__(self, parser, parent, atts)
        self.product = T_AnyString()
        
    def char(self, data):
        raise TREXError, "anyString should not have character data"

    child = T_HandlerBase.child_non_trex

    def end(self, name):
        self.parent.add_pattern(self.product)
        T_HandlerBase.end(self, name)


class T_StringHandler(T_HandlerBase):
    def __init__(self, parser, parent, atts):
        T_HandlerBase.__init__(self, parser, parent, atts)
        self.chardata = ""
        self.whitespace_normalize = 1
        if atts.has_key("whiteSpace"):
            if atts["whiteSpace"]=="normalize":
                self.whitespace_normalize = 1
            elif atts["whiteSpace"]=="preserve":
                self.whitespace_normalize = 0
            else:
                raise TREXError, "whiteSpace attribute on string must be normalize or preserve, not %s" % atts["whiteSpace"]
        
    def char(self, data):
        self.chardata = self.chardata + data

    child = T_HandlerBase.child_non_trex

    def end(self, name):
        self.parent.add_pattern(T_String(self.chardata, self.whitespace_normalize))
        T_HandlerBase.end(self, name)


class T_DataHandler(T_HandlerBase):
    def __init__(self, parser, parent, atts):
        T_HandlerBase.__init__(self, parser, parent, atts)
        if atts.has_key("type"):
            type = atts["type"]
            if ":" in type:
                # QName
                from string import split
                prefix, ncname = split(type, ":")
                if self.ns_decls.has_key(prefix):
                    ns = self.ns_decls[prefix]
                else:
                    raise TREXError, "QName %s has unknown prefix" % name
            else:
                ns = self.ns_attr
                ncname = type
            self.type_namespace = ns
            self.type_ncname = ncname
        else:
            raise TREXError, "data must have type attribute"
        
    def char(self, data):
        raise TREXError, "data should not have character data"

    child = T_HandlerBase.child_non_trex

    def end(self, name):
        self.parent.add_pattern(T_Data(self.type_namespace, self.type_ncname))
        T_HandlerBase.end(self, name)


class T_IncludeHandler(T_HandlerBase):
    def __init__(self, parser, parent, atts):
        T_HandlerBase.__init__(self, parser, parent, atts)
        if atts.has_key("href"):
            self.product = parse_TREX(atts["href"])
        else:
            raise TREXError, "include must have href attribute"
        
    def char(self, data):
        raise TREXError, "include should not have character data"

    child = T_HandlerBase.child_non_trex

    def end(self, name):
        self.parent.add_pattern(self.product)
        T_HandlerBase.end(self, name)
    

class T_ZeroOrMoreHandler(T_HandlerBase):
    def __init__(self, parser, parent, atts):
        T_HandlerBase.__init__(self, parser, parent, atts)

    child = T_HandlerBase.child_pattern

    def end(self, name):
        self.parent.add_pattern(self.product)
        T_HandlerBase.end(self, name)

    def add_pattern(self, pattern):
        self.product = T_Choice(T_Empty(), T_OneOrMore(pattern))


class T_MixedHandler(T_HandlerBase):
    def __init__(self, parser, parent, atts):
        T_HandlerBase.__init__(self, parser, parent, atts)

    child = T_HandlerBase.child_pattern

    def end(self, name):
        self.parent.add_pattern(self.product)
        T_HandlerBase.end(self, name)

    def add_pattern(self, pattern):
        self.product = T_Interleave(T_AnyString(), pattern)


class T_OneOrMoreHandler(T_HandlerBase):
    def __init__(self, parser, parent, atts):
        T_HandlerBase.__init__(self, parser, parent, atts)

    child = T_HandlerBase.child_pattern

    def end(self, name):
        self.parent.add_pattern(self.product)
        T_HandlerBase.end(self, name)

    def add_pattern(self, pattern):
        self.product = T_OneOrMore(pattern)


class T_OptionalHandler(T_HandlerBase):
    def __init__(self, parser, parent, atts):
        T_HandlerBase.__init__(self, parser, parent, atts)

    child = T_HandlerBase.child_pattern

    def end(self, name):
        self.parent.add_pattern(self.product)
        T_HandlerBase.end(self, name)

    def add_pattern(self, pattern):
        self.product = T_Choice(T_Empty(), pattern)


class T_ChoiceHandler(T_HandlerBase):
    def __init__(self, parser, parent, atts):
        T_HandlerBase.__init__(self, parser, parent, atts)
        self.pattern_1 = None
        self.pattern_2 = None

    child = T_HandlerBase.child_pattern

    def end(self, name):
        self.parent.add_pattern(self.product)
        T_HandlerBase.end(self, name)

    def add_pattern(self, pattern):
        if self.pattern_1==None:
            self.pattern_1 = pattern
            self.product = self.pattern_1
        elif self.pattern_2==None:
            self.pattern_2 = pattern
            self.product = T_Choice(self.pattern_1, self.pattern_2)
        else:
            self.product = T_Choice(self.product, pattern)


class T_ConcurHandler(T_HandlerBase):
    def __init__(self, parser, parent, atts):
        T_HandlerBase.__init__(self, parser, parent, atts)
        self.pattern_1 = None
        self.pattern_2 = None

    child = T_HandlerBase.child_pattern

    def end(self, name):
        self.parent.add_pattern(self.product)
        T_HandlerBase.end(self, name)

    def add_pattern(self, pattern):
        if self.pattern_1==None:
            self.pattern_1 = pattern
            self.product = self.pattern_1
        elif self.pattern_2==None:
            self.pattern_2 = pattern
            self.product = T_Concur(self.pattern_1, self.pattern_2)
        else:
            self.product = T_Concur(self.product, pattern)


class T_NameClass_ChoiceHandler(T_HandlerBase):
    def __init__(self, parser, parent, atts):
        T_HandlerBase.__init__(self, parser, parent, atts)
        self.nameclass_1 = None
        self.nameclass_2 = None

    child = T_HandlerBase.child_nameclass

    def end(self, name):
        self.parent.add_nameclass(self.product)
        T_HandlerBase.end(self, name)

    def add_nameclass(self, nameclass):
        if self.nameclass_1==None:
            self.nameclass_1 = nameclass
            self.product = self.nameclass_1
        elif self.nameclass_2==None:
            self.nameclass_2 = nameclass
            self.product = NameClassChoice(self.nameclass_1, self.nameclass_2)
        else:
            self.product = NameClassChoice(self.product, nameclass)


class T_NotHandler(T_HandlerBase):
    def __init__(self, parser, parent, atts):
        T_HandlerBase.__init__(self, parser, parent, atts)
        self.nameclass = None

    child = T_HandlerBase.child_nameclass

    def end(self, name):
        self.parent.add_nameclass(self.product)
        T_HandlerBase.end(self, name)

    def add_nameclass(self, nameclass):
        self.product = Difference(AnyName(), nameclass)
    

class T_DifferenceHandler(T_HandlerBase):
    def __init__(self, parser, parent, atts):
        T_HandlerBase.__init__(self, parser, parent, atts)
        self.nameclass_1 = None
        self.nameclass_2 = None

    child = T_HandlerBase.child_nameclass

    def end(self, name):
        self.parent.add_nameclass(self.product)
        T_HandlerBase.end(self, name)

    def add_nameclass(self, nameclass):
        if self.nameclass_1==None:
            self.nameclass_1 = nameclass
            self.product = self.nameclass_1
        elif self.nameclass_2==None:
            self.nameclass_2 = nameclass
            self.product = Difference(self.nameclass_1, self.nameclass_2)
        else:
            self.product = Difference(self.product, nameclass)


class T_InterleaveHandler(T_HandlerBase):
    def __init__(self, parser, parent, atts):
        T_HandlerBase.__init__(self, parser, parent, atts)
        self.pattern_1 = None
        self.pattern_2 = None

    child = T_HandlerBase.child_pattern

    def end(self, name):
        self.parent.add_pattern(self.product)
        T_HandlerBase.end(self, name)

    def add_pattern(self, pattern):
        if self.pattern_1==None:
            self.pattern_1 = pattern
            self.product = self.pattern_1
        elif self.pattern_2==None:
            self.pattern_2 = pattern
            self.product = T_Interleave(self.pattern_1, self.pattern_2)
        else:
            self.product = T_Interleave(self.product, pattern)


class T_GroupHandler(T_HandlerBase):
    def __init__(self, parser, parent, atts):
        T_HandlerBase.__init__(self, parser, parent, atts)
        self.pattern_1 = None

    child = T_HandlerBase.child_pattern

    def end(self, name):
        self.parent.add_pattern(self.product)
        T_HandlerBase.end(self, name)

    def add_pattern(self, pattern):
        if self.pattern_1==None:
            self.product = self.pattern_1 = pattern
        else:
            self.product = self.pattern_1 = T_Group(self.pattern_1, pattern)


class T_GrammarHandler(T_HandlerBase):
    def __init__(self, parser, parent, atts):
        T_HandlerBase.__init__(self, parser, parent, atts)
        self.product = T_Grammar()
            
    def child(self, name, atts):
        ncname = trex_ncname(name, self.using_trex_ns)
        if ncname=="start":
            T_StartHandler(self.parser, self, atts)
        elif ncname=="define":
            T_DefineHandler(self.parser, self, atts)
        elif ncname=="include":
            T_IncludeGrammarHandler(self.parser, self, atts)
        else:
            self.child_non_trex(name, atts)

    def end(self, name):
        if self.product.start==None:
            raise TREXError, "grammar must have a start"
        self.parent.add_pattern(self.product)
        T_HandlerBase.end(self, name)

    def set_start(self, pattern, combine=None):
        if self.product.start==None:
            self.product.start = pattern
        else:
            if combine=="replace":
                self.product.start = pattern
            elif combine=="choice":
                self.product.start = T_Choice(self.product.start, pattern)
            elif combine=="group":
                self.product.start = T_Group(self.product.start, pattern)
            elif combine=="interleave":
                self.product.start = T_Interleave(self.product.start, pattern)
            elif combine=="concur":
                raise TREXError, "combine='%s' not supported yet" % combine
            elif combine==None:
                self.product.start = pattern #TODO is this allowed?
            else:
                raise TREXError, "unknown value %s for combine" % combine
        
    def add_definition(self, name, pattern, combine=None):
        if not self.product.definitions.has_key(name):
            self.product.add_definition(name, pattern)
        else:
            if combine=="replace":
                self.product.add_definition(name, pattern)
            elif combine=="choice":
                self.product.add_definition(name, T_Choice(self.product.definitions[name], pattern))
            elif combine=="group":
                self.product.add_definition(name, T_Group(self.product.definitions[name], pattern))
            elif combine=="interleave":
                self.product.add_definition(name, T_Interleave(self.product.definitions[name], pattern))
            elif combine=="concur":
                raise TREXError, "combine='%s' not supported yet" % combine
            elif combine==None:
                raise TREXError, "overriding '%s' of grammar" % name
            else:
                raise TREXError, "unknown value %s for combine" % combine


class T_IncludeGrammarHandler(T_HandlerBase):
    def __init__(self, parser, parent, atts):
        T_HandlerBase.__init__(self, parser, parent, atts)
        if atts.has_key("href"):
            self.product = parse_TREX(atts["href"])
        else:
            raise TREXError, "include must have href attribute"
        
    def char(self, data):
        raise TREXError, "include should not have character data"

    child = T_HandlerBase.child_non_trex

    def end(self, name):
        self.parent.set_start(self.product.start)
        for definition_name in self.product.definitions.keys():
            self.parent.add_definition(definition_name, self.product.definitions[definition_name])
        T_HandlerBase.end(self, name)


class T_StartHandler(T_HandlerBase):
    def __init__(self, parser, parent, atts):
        T_HandlerBase.__init__(self, parser, parent, atts)
        self.product = None
        if atts.has_key("name"):
            self.name = atts["name"]
        else:
            self.name = None
        if atts.has_key("combine"):
            self.combine = atts["combine"]
        else:
            self.combine = None
    
    child = T_HandlerBase.child_pattern

    def end(self, name):
        if self.product==None:
            raise TREXError, "start must contain a pattern"
        self.parent.set_start(self.product)
        if self.name != None:
            self.parent.add_definition(self.name, self.product, self.combine)
        T_HandlerBase.end(self, name)

    def add_pattern(self, pattern):
        self.product = pattern


class T_RefHandler(T_HandlerBase):
    def __init__(self, parser, parent, atts):
        T_HandlerBase.__init__(self, parser, parent, atts)
        if atts.has_key("name"):
            name = atts["name"]
        else:
            raise TREXError, "ref must have name attribute"
        if atts.has_key("parent"):
            if atts["parent"] == "true":
                parent = 1
            elif atts["parent"] == "false":
                parent = 0
            else:
                raise TREXError, "ref parent attribute must be 'true' or 'false', not '%s'" % atts["parent"]
        else:
            parent = 0
        self.product = T_Ref(name, parent)
        
    def char(self, data):
        raise TREXError, "ref should not have character data"

    child = T_HandlerBase.child_non_trex

    def end(self, name):
        self.parent.add_pattern(self.product)
        T_HandlerBase.end(self, name)


class T_DefineHandler(T_HandlerBase):
    def __init__(self, parser, parent, atts):
        T_HandlerBase.__init__(self, parser, parent, atts)
        self.pattern = None
        if atts.has_key("name"):
            self.name = atts["name"]
        else:
            raise TREXError, "define must have a name"
        if atts.has_key("combine"):
            self.combine = atts["combine"]
        else:
            self.combine = None

    child = T_HandlerBase.child_pattern

    def end(self, name):
        self.parent.add_definition(self.name, self.pattern, self.combine)
        T_HandlerBase.end(self, name)

    def add_pattern(self, pattern):
        if self.pattern==None:
            self.pattern = pattern
        else:
            self.pattern = T_Group(self.pattern, pattern)



########################################################################
### TREX REPRESENTATION / VALIDATION

def validate(trex, instance):
    return trex.M({}, instance.children, {})


class Pattern:
    # each pattern has the following methods:
    # 
    # display()
    #   prints a string representation of the pattern
    #   (recursing over components)
    #
    # M(a,c,e)
    #   returns a Match object indicating whether the match succeeded or,
    #   if not, why not
    #
    # M_consume(a,c,e)
    #   similar to M above but allows for the match to consume only part of
    #   a and c. Because of non-determinism, multiple consumptions are possible
    #   and so the Match object returned will contain a list of possible
    #   remainders unless no matches are possible
    #
    # M_interleave(a,c,e)
    #   similar to M_consume but implements interleaving by allowing
    #   consumption from any part of the given c
    pass


class Match:
    # returned by M and M_consume
    def __init__(self, remainder=None):
        if remainder==None:
            self.remainders = []
        else:
            self.remainders = [remainder]

    def add(self, match):
        self.remainders.extend(match.remainders)

    def isError(self):
        return 0

    def display(self):
        print "(MATCH [",
        for remainder in self.remainders:
            remainder.display()
        print "] )",

    def __repr__(self):
        return "<match %s>" % self.remainders

    def __cmp__(self, other):
        if self.remainders == other.remainders:
            return 0
        else:
            return -1

class Error(Match):
    def __init__(self, message, *children):
        self.message = message
        self.children = children
        
    def isError(self):
        return 1

    def display(self):
        print "(ERROR",
        print self.message,
        for error in self.children:
            error.display()
        print ")",


class Remainder:
    def __init__(self, a, c):
        self.a = a
        self.c = c

    def display(self):
        print "(",self.a, "[",
        for node in self.c:
            node.display()
        print "] )",

    def __repr__(self):
        return "<%s,%s>" % (self.a, self.c)

    def __cmp__(self, other):
        if other==None:
            return -1
        if self.a != other.a:
            return -1
        if self.c != other.c:
            return -1
        return 0


class Environment:
    def __init__(self, e={}, parent=None):
        self.e = e
        self.parent = parent


def normalize(s):
    ns = ""
    state = 0
    for c in s:
        if state==0:
            if c in [chr(9),chr(10),chr(13),chr(32)]:
                continue
            else:
                ns = ns + c
                state=1
                continue
        elif state==1:
            if c in [chr(9),chr(10),chr(13),chr(32)]:
                state=2
                continue
            else:
                ns = ns + c
                continue
        elif state==2:
            if c in [chr(9),chr(10),chr(13),chr(32)]:
                continue
            else:
                ns = ns + " " + c
                state=1
    return ns


datatype_registry = {}


# test_function must take a string and return a boolean (ie 1 or 0)
def register_datatype(namespace_uri, ncname, test_function):
    datatype_registry[namespace_uri + "^" + ncname] = test_function


def allows(namespace_uri, ncname, s):
    key = namespace_uri + "^" + ncname
    if datatype_registry.has_key(key):
        if datatype_registry[namespace_uri + "^" + ncname](s):
            return Match()
        else:
            return Error("'%s' not allowed by '%s' in '%s'" % (s, ncname, namespace_uri))
    else:
        return Error("unknown datatype '%s' in '%s'" % (ncname, namespace_uri))


# sample datatype test function (used by tests)
def is_integer(cdata):
    try:
        int(cdata)
    except ValueError:
        return 0
    return 1

# sample datatype registration
register_datatype("http://pytrex.sourceforge.net/2001/03", "integer", is_integer)


class T_Element(Pattern):
    def __init__(self, name_class=None, pattern=None):
        self.name_class = name_class
        self.pattern = pattern

    def display(self):
        print "(ELEMENT",
        self.name_class.display()
        self.pattern.display()
        print ")",

    def M(self, a, c, e):
        if len(a) > 0:
            return Error("has attributes")
        c_state=0
        for node in c:
            if node.is_whitespace():
                continue
            if node.is_element():
                if c_state==1:
                    return Error("second element")
                n = node.expanded_name
                a_1 = node.attributes
                c_1 = node.children
                c_state=1
        if c_state==0:
            return Error("no element")
        match = self.name_class.C(n)
        if match.isError():
            return Error("name doesn't match", match)
        match = self.pattern.M(a_1,c_1,e)
        if match.isError():
            return Error("pattern doesn't match", match)
        return Match()

    def M_consume(self, a, c, e):
        c_state=0
        for pos in range(0,len(c)):
            if c[pos].is_whitespace():
                continue
            if c[pos].is_element():
                if c_state==1:
                    return Match(Remainder(a, c[pos:]))
                n = c[pos].expanded_name
                a_1 = c[pos].attributes
                c_1 = c[pos].children
                c_state=1
                match = self.name_class.C(n)
                if match.isError():
                    return Error("name doesn't match", match)
                match = self.pattern.M(a_1, c_1, e)
                if match.isError():
                    return Error("pattern doesn't match", match)
        if c_state==0:
            return Error("no element")
        match = self.name_class.C(n)
        if match.isError():
            return Error("name doesn't match", match)
        match = self.pattern.M(a_1, c_1, e)
        if match.isError():
            return Error("pattern doesn't match", match)
        return Match(Remainder(a, []))

    def M_interleave(self, a, c, e):
        c_2 = []
        taken = 0
        for pos in range(0,len(c)):
            if c[pos].is_element():
                n = c[pos].expanded_name
                a_1 = c[pos].attributes
                c_1 = c[pos].children
                match = self.name_class.C(n)
                if match.isError():
                    c_2.append(c[pos])
                    continue
                match = self.pattern.M(a_1, c_1, e)
                if match.isError():
                    c_2.append(c[pos])
                    continue
                taken = 1
            else:
                c_2.append(c[pos])
        if taken:
            return Match(Remainder(a, c_2))
        else:
            return Error("element in interleave did not match")


class T_Attribute(Pattern):
    def __init__(self, name_class=None, pattern=None):
        self.name_class = name_class
        self.pattern = pattern

    def display(self):
        print "(ATTRIBUTE",
        self.name_class.display()
        self.pattern.display()
        print ")",

    def M(self, a, c, e):
        if len(c)>0:
            return Error("has children when should be empty")
        if len(a)!=1:
            return Error("incorrect number of attributes")
        n = a[0].expanded_name
        v = a[0].value
        match_1 = self.name_class.C(n)
        match_2 = self.pattern.M({}, v, e)
        if (not match_1.isError()) and (not match_2.isError()):
            return Match()
        return Error("attribute did not match")

    def M_consume(self, a, c, e):
        for attr in a:
            n = attr.expanded_name
            v = attr.value
            match_1 = self.name_class.C(n)
            match_2 = self.pattern.M({}, v, e)
            if (not match_1.isError()) and (not match_2.isError()):
                a_2 = []
                for attr2 in a:
                    if attr2 != attr:
                        a_2.append(attr2)
                return Match(Remainder(a_2,c))
        return Error("attribute didn't match") # or should this be Match(Remainder(a,c))

    M_interleave = M_consume


class T_Empty(Pattern):
    def display(self):
        print "(EMPTY)",
        
    def M(self, a, c, e):
        if len(a) > 0:
            return Error("has attributes")
        if len(c) > 0:
            return Error("has children when should be empty")
        return Match()

    def M_consume(self, a, c, e):
        return Match(Remainder(a,c))

    M_interleave = M_consume


class T_NotAllowed(Pattern):
    def display(self):
        print "(NOT-ALLOWED)",
        
    def M(self, a, c, e):
        return Error("not allowed")

    M_consume = M
    M_interleave = M


class T_AnyString(Pattern):
    def display(self):
        print "(ANY-STRING)",
        
    def M(self, a, c, e):
        if len(a) > 0:
            return Error("has attributes")
        for node in c:
            if node.is_element():
                return Error("anyString but got element")
        return Match()

    def M_consume(self, a, c, e):
        if len(a) > 0:
            return Error("has attributes")
        if len(c)==0:
            return Error("anyString but no children")
        for pos in range(0,len(c)):
            if c[pos].is_element():
                if pos==0:
                    return Error("element where string required")
                else:
                    return Match(Remainder(a,c[pos:]))
        return Match(Remainder(a,[]))

    def M_interleave(self, a, c, e):
        c_2 = []
        taken = 0
        for pos in range(0, len(c)):
            if c[pos].is_element():
                c_2.append(c[pos])
            else:
                taken=1
        if taken:
            return Match(Remainder(a, c_2))
        else:
            return Error("anyString but no characters") # TODO maybe this is okay!?!?


class T_String(Pattern):
    def __init__(self, chardata, whitespace_normalize):
        self.chardata = chardata
        self.whitespace_normalize = whitespace_normalize
        
    def display(self):
        print "(STRING '%s')" % self.chardata

    def M(self, a, c, e):
        if len(a) > 0:
            return Error("has attributes")
        cdata = ""
        for node in c:
            if node.is_element():
                return Error("string but got element")
            else:
                 cdata = cdata + node.data
        if self.whitespace_normalize:
            if normalize(cdata) == normalize(self.chardata):
                return Match()
            else:
                return Error("character data '%s' did not match string '%s'" % (normalize(cdata), normalize(self.chardata)))
        else:
            if cdata == self.chardata:
                return Match()
            else:
                return Error("character data '%s' did not match string '%s'" % (cdata, self.chardata))

    # TODO should flag an error in the following cases as string shouldn't
    # appear in group or interleave
    M_consume = M
    M_interleave = M


class T_Data(Pattern):
    def __init__(self, type_namespace, type_ncname):
        self.type_namespace = type_namespace
        self.type_ncname = type_ncname
        
    def display(self):
        print "(DATA '%s' '%s')" % (self.type_namespace, self.type_ncname)

    def M(self, a, c, e):
        if len(a) > 0:
            return Error("has attributes")
        cdata = ""
        for node in c:
            if node.is_element():
                return Error("string but got element")
            else:
                 cdata = cdata + node.data
        return allows(self.type_namespace, self.type_ncname, cdata)

    # TODO should flag an error in the following cases as data shouldn't
    # appear in group or interleave
    M_consume = M
    M_interleave = M


class T_Choice(Pattern):
    def __init__(self, pattern_1=None, pattern_2=None):
        self.pattern_1 = pattern_1
        self.pattern_2 = pattern_2

    def display(self):
        print "(CHOICE",
        self.pattern_1.display()
        self.pattern_2.display()
        print ")",
        
    def M(self, a, c, e):
        match_1 = self.pattern_1.M(a, c ,e)
        if not match_1.isError():
            return Match()
        match_2 = self.pattern_2.M(a, c, e)
        if not match_2.isError():
            return Match()
        return Error("both items of a choice failed", match_1, match_2)

    def M_consume(self, a, c, e):
        match = Match()
        match_1 = self.pattern_1.M_consume(a, c ,e)
        if not match_1.isError():
            match.add(match_1)
        match_2 = self.pattern_2.M_consume(a, c, e)
        if not match_2.isError():
            match.add(match_2)
        if match_1.isError() and match_2.isError():
            return Error("both items of a choice failed", match_1, match_2)
        return match

    def M_interleave(self, a, c, e):
        match = Match()
        match_1 = self.pattern_1.M_interleave(a, c ,e)
        if not match_1.isError():
            match.add(match_1)
        match_2 = self.pattern_2.M_interleave(a, c, e)
        if not match_2.isError():
            match.add(match_2)
        if match_1.isError() and match_2.isError():
            return Error("both items of a choice failed", match_1, match_2)
        return match


class T_Concur(Pattern):
    def __init__(self, pattern_1=None, pattern_2=None):
        self.pattern_1 = pattern_1
        self.pattern_2 = pattern_2

    def display(self):
        print "(CONCUR",
        self.pattern_1.display()
        self.pattern_2.display()
        print ")",
        
    def M(self, a, c, e):
        match_1 = self.pattern_1.M(a, c ,e)
        if match_1.isError():
            return match_1
        match_2 = self.pattern_2.M(a, c, e)
        if match_2.isError():
            return match_2
        return Match()

    def M_consume(self, a, c, e):
        match_1 = self.pattern_1.M_consume(a, c ,e)
        if match_1.isError():
            return match_1
        match_2 = self.pattern_2.M_consume(a, c, e)
        if match_2.isError():
            return match_2
        if match_1 == match_2:
            return match_1
        else:
            return Error("two patterns of concur consumed different amounts")

    def M_interleave(self, a, c, e):
        match_1 = self.pattern_1.M_interleave(a, c ,e)
        if match_1.isError():
            return match_1
        match_2 = self.pattern_2.M_interleave(a, c, e)
        if match_2.isError():
            return match_2
        if match_1 == match_2:
            return match_1
        else:
            return Error("two patterns of concur interleaved different amounts")


class T_Interleave(Pattern):
    def __init__(self, pattern_1=None, pattern_2=None):
        self.pattern_1 = pattern_1
        self.pattern_2 = pattern_2

    def display(self):
        print "(INTERLEAVE",
        self.pattern_1.display()
        self.pattern_2.display()
        print ")",

    def M(self, a, c, e):
        match_1 = self.pattern_1.M_interleave(a,c,e)
        if match_1.isError():
            return Error("first pattern of interleave failed", match_1)
        match = Match()
      for remainder in match_1.remainders:
          a_2 = remainder.a
            c_2 = remainder.c
            match = self.pattern_2.M(a_2, c_2, e)
            if not match.isError():
                return Match()
        return Error("second pattern of interleave failed", match)

    def M_consume(self, a, c, e):
        match_1 = self.pattern_1.M_interleave(a,c,e)
        if match_1.isError():
            return Error("first pattern of interleave failed", match_1)
        match = Match()
      for remainder in match_1.remainders:
          a_2 = remainder.a
            c_2 = remainder.c
            match = self.pattern_2.M_consume(a_2, c_2, e)
            if not match.isError():
                return match
        return Error("second pattern of interleave failed", match)

    def M_interleave(self, a, c, e):
        match_1 = self.pattern_1.M_interleave(a,c,e)
        if match_1.isError():
            return Error("first pattern of interleave failed", match_1)
        match = Match()
      for remainder in match_1.remainders:
          a_2 = remainder.a
            c_2 = remainder.c
            match_2 = self.pattern_2.M_interleave(a_2, c_2, e)
            if not match.isError():
                match.add(match_2)
        return match
        

class T_OneOrMore(Pattern):
    def __init__(self, pattern=None):
        self.pattern = pattern

    def display(self):
        print "(ONE-OR-MORE",
        self.pattern.display()
        print ")",

    def M(self, a, c, e):
        group = T_Group(self.pattern, T_Choice(T_Empty(), T_OneOrMore(self.pattern)))
        match = group.M(a, c, e)
        if match.isError():
            return Error("oneOrMore failed")
        return Match()

    def M_consume(self, a, c, e):
        group = T_Group(self.pattern, T_Choice(T_Empty(), T_OneOrMore(self.pattern)))
        return group.M_consume(a, c, e)

    def M_interleave(self, a, c, e):
        group = T_Group(self.pattern, T_Choice(T_Empty(), T_OneOrMore(self.pattern)))
        return group.M_interleave(a, c, e)
   

class T_Group(Pattern):
    def __init__(self, pattern_1=None, pattern_2=None):
        self.pattern_1 = pattern_1
        self.pattern_2 = pattern_2

    def display(self):
        print "(GROUP",
        self.pattern_1.display()
        self.pattern_2.display()
        print ")",

    def M(self, a, c, e):
        match_1 = self.pattern_1.M_consume(a,c,e)
        if match_1.isError():
            return Error("first pattern of group failed", match_1)
        match = Match()
      for remainder in match_1.remainders:
          a_2 = remainder.a
            c_2 = remainder.c
            match = self.pattern_2.M(a_2, c_2, e)
            if not match.isError():
                return Match()
        return Error("second pattern of group failed", match)

    def M_consume(self, a, c, e):
        match_1 = self.pattern_1.M_consume(a,c,e)
        if match_1.isError():
            return Error("first pattern of group failed", match_1)
        match = Match()
        for remainder in match_1.remainders:
            a_2 = remainder.a
            c_2 = remainder.c
            match_2 = self.pattern_2.M_consume(a_2, c_2, e)
            if not match_2.isError():
                match.add(match_2)
        return match

    def M_interleave(self, a, c, e):
        # TODO I'm not 100% what it means to interleave a group (eg does order matter?)
        match_1 = self.pattern_1.M_interleave(a,c,e)
        if match_1.isError():
            return Error("first pattern of group failed", match_1)
        match = Match()
        for remainder in match_1.remainders:
            a_2 = remainder.a
            c_2 = remainder.c
            match_2 = self.pattern_2.M_interleave(a_2, c_2, e)
            if not match_2.isError():
                match.add(match_2)
        return match


class T_Grammar(Pattern):
    def __init__(self):
        self.start = None
        self.definitions = {}

    def display(self):
        print "(GRAMMAR",
        self.start.display()
        for definition in self.definitions.keys():
            print "(%s=" % definition,
            self.definitions[definition].display()
            print ")",
        print ")",

    def add_definition(self, name, definition):
        self.definitions[name] = definition

    def M(self, a, c, e):
        return self.start.M(a, c, Environment(self.definitions, e))

    def M_consume(self, a, c, e):
        return self.start.M_consume(a, c, Environment(self.definitions, e))

    def M_interleave(self, a, c, e):
        return self.start.M_interleave(a, c, Environment(self.defintions, e))


class T_Ref(Pattern):
    def __init__(self, name, parent):
        self.name = name
        self.parent = parent
        
    def display(self):
        print "(REF =%s %s)" % (self.name, self.parent)

    def M(self, a, c, e):
        if self.parent == 0:
            if not e.e.has_key(self.name):
                return Error("ref to unknown pattern '%s'" % self.name)
            else:
                pattern = e.e[self.name]
                return pattern.M(a, c, e)
        else:
            if not e.parent.e.has_key(self.name):
                return Error("ref to unknown pattern '%s'" % self.name)
            else:
                pattern = e.parent.e[self.name]
                return pattern.M(a, c, e.parent)
            

    def M_consume(self, a, c, e):
        if self.parent == 0:
            if not e.e.has_key(self.name):
                return Error("ref to unknown pattern '%s'" % self.name)
            else:
                pattern = e.e[self.name]
                return pattern.M_consume(a, c, e)
        else:
            if not e.parent.e.has_key(self.name):
                return Error("ref to unknown pattern '%s'" % self.name)
            else:
                pattern = e.parent.e[self.name]
                return pattern.M_consume(a, c, e.parent)

    def M_interleave(self, a, c, e):
        if self.parent == 0:
            if not e.e.has_key(self.name):
                return Error("ref to unknown pattern '%s'" % self.name)
            else:
                pattern = e.e[self.name]
                return pattern.M_interleave(a, c, e)
        else:
            if not e.parent.e.has_key(self.name):
                return Error("ref to unknown pattern '%s'" % self.name)
            else:
                pattern = e.parent.e[self.name]
                return pattern.M_interleave(a, c, e.parent)


class NameClass:
    pass


class ExpandedName(NameClass):
    def __init__(self, namespaceURI=None, NCName=None):
        self.namespaceURI = namespaceURI
        self.NCName = NCName

    def display(self):
        print "(EXPANDED-NAME '%s' '%s')" % (self.namespaceURI, self.NCName),

    def C(self, n):
        if self.namespaceURI==n.namespaceURI and self.NCName==n.localName:
            return Match()
        else:
            return Error("expanded name doesn't match: %s^%s != %s^%s" % (self.namespaceURI, self.NCName, n.namespaceURI, n.localName))


class AnyName(NameClass):
    def display(self):
        print "(ANY-NAME)",

    def C(self, n):
        return Match()


class NSName(NameClass):
    def __init__(self, namespaceURI):
        self.namespaceURI = namespaceURI

    def display(self):
        print "(NS-NAME '%s')" % self.namespaceURI

    def C(self, n):
        if self.namespaceURI==n.namespaceURI:
            return Match()
        else:
            return Error("namespace doesn't match: %s != %s" % (self.namespaceURI, n.namespaceURI))


class NameClassChoice(NameClass):
    def __init__(self, nameclass_1, nameclass_2):
        self.nameclass_1 = nameclass_1
        self.nameclass_2 = nameclass_2

    def display(self):
        print "(CHOICE",
        self.nameclass_1.display()
        self.nameclass_2.display()
        print ")",

    def C(self, n):
        match_1 = self.nameclass_1.C(n)
        if not match_1.isError():
            return Match()
        match_2 = self.nameclass_2.C(n)
        if not match_2.isError():
            return Match()
        return Error("both items of a choice failed", match_1, match_2)


class Difference(NameClass):
    def __init__(self, nameclass_1, nameclass_2):
        self.nameclass_1 = nameclass_1
        self.nameclass_2 = nameclass_2

    def display(self):
        print "(DIFFERENCE",
        self.nameclass_1.display()
        self.nameclass_2.display()
        print ")",

    def C(self, n):
        match = self.nameclass_1.C(n)
        if match.isError():
            return Error("first name-class of a difference failed", match)
        match = self.nameclass_2.C(n)
        if not match.isError():
            return Error("second name-class of a difference failed", match)
        return Match()



########################################################################
### INSTANCE REPRESENTATION
#
# Basically the instance data model from section 2
#

class I_Node:
    pass


class I_Root(I_Node):
    def __init__(self):
        self.children = []

    def add_child(self, node):
        self.children.append(node)
        
    def is_whitespace(self):
        return 0

    def is_element(self):
        return 0

    def display(self):
        print "(ROOT",
        for child in self.children:
            child.display()
        print ")"


class I_ExpandedName:
    def __init__(self, namespaceURI, localName):
        self.namespaceURI = namespaceURI
        self.localName = localName


class I_Element(I_Node):
    def __init__(self):
        self.expanded_name = None
        self.attributes = []
        self.children = []

    def add_child(self, node):
        self.children.append(node)

    def add_attribute(self, node):
        self.attributes.append(node)

    def is_whitespace(self):
        return 0

    def is_element(self):
        return 1

    def display(self):
        print "(%s" % self.expanded_name.localName,
        for attr in self.attributes:
            attr.display()
        for child in self.children:
            child.display()
        print ")",

    def __repr__(self):
        return "<%s>" % self.expanded_name.localName


class I_Attribute(I_Node):
    def __init__(self, expanded_name=None, value=None):
        self.expanded_name = expanded_name
        self.value = value

    def is_whitespace(self):
        return 0

    def is_element(self):
        return 1

    def display(self):
        print "(@%s" % self.expanded_name.localName,
        self.value[0].display()
        print ")",

    def __repr__(self):
        return "<%s=%s>" % (self.expanded_name.localName, self.value)


class I_CharData(I_Node):
    def __init__(self, data):
        self.data = data
        
    def is_whitespace(self):
        for char in self.data:
            if char not in [chr(9),chr(10),chr(13),chr(32)]:
                return 0
        return 1

    def is_element(self):
        return 0

    def display(self):
        print "'%s'" % self.data,

    def __repr__(self):
        return "'%s'" % self.data
    

########################################################################
### INSTANCE PARSING

# TODO wellformedness errors don't seem to get reported

def parse_Instance(location, baseURI=None):
    if baseURI==None:
        baseURI = location

    import xml.parsers.expat
    parser = xml.parsers.expat.ParserCreate(namespace_separator="^")
    parser.SetBase(baseURI)
    parser.returns_unicode = 1

    i = I_RootHandler(parser)

    from urllib2 import urlopen
    f = urlopen(location)
    try:
        parser.ParseFile(f)
    except xml.parsers.expat.error:
        import sys
        sys.stderr.write("Error parsing file at line '%s' and column '%s'\n" % (parser.ErrorLineNumber, parser.ErrorColumnNumber) )
        sys.stderr.flush()
    f.close()

    return i.product


class I_RootHandler(HandlerBase):
    def __init__(self, parser, parent = None, atts = None):
        HandlerBase.__init__(self, parser, parent, atts)
        self.product = I_Root()

    def child(self, name, atts):
        I_ElementHandler(self.parser, self, name, atts)

    def char(self, data):
        self.product.add_child(I_CharData(data))

    def end(self, name):
        HandlerBase.end(self, name)

    def add_child(self, node):
        self.product.add_child(node)


class I_ElementHandler(HandlerBase):
    def __init__(self, parser, parent, name, atts):
        HandlerBase.__init__(self, parser, parent, atts)
        self.product = I_Element()
        import string
        n = string.split(name,"^")
        if len(n)==1:
            namespaceURI=""
            localName=n[0]
        else:
            namespaceURI=n[0]
            localName=n[1]
        self.product.expanded_name = I_ExpandedName(namespaceURI, localName)
        for attr in atts.keys():
            n = string.split(attr,"^")
            if len(n)==1:
                namespaceURI=""
                localName=n[0]
            else:
                namespaceURI=n[0]
                localName=n[1]
            self.product.add_attribute(I_Attribute(I_ExpandedName(namespaceURI, localName), [I_CharData(atts[attr])]))

    def child(self, name, atts):
        I_ElementHandler(self.parser, self, name, atts)

    def char(self, data):
        self.product.add_child(I_CharData(data))

    def end(self, name):
        self.parent.add_child(self.product)
        HandlerBase.end(self, name)

    def add_child(self, node):
        self.product.add_child(node)



########################################################################
### MAIN LINE

if __name__ == "__main__":
    import sys
    if len(sys.argv)==3:
        match = validate(parse_TREX(sys.argv[1]),parse_Instance(sys.argv[2]))
        if match.isError():
            match.display()
        else:
            print "match"
    else:
        print "usage: python pytrex.py <trex-file> <instance-file>"

Generated by  Doxygen 1.6.0   Back to index