# N3 parser #-------------------------------------- # # Author : G.Naudts E-mail : naudts_vannoten@yahoo.com # Address : Secretarisdreef 5 2288 Bouwel Belgium # The parser is based on the grammar at the back and has been token from : # http://2001/blindfold/sample/n3.bnf # and the parser uses also the structures defined in N3 primer : # http://www.w3.org/200/10/swap/Primer.htm # The output data structure consists basically of triples # (id,short value, full value) . The full value gives the complete URI. # The tree structure of N3 is kept intact, but dummy subjects and verbs # are introduced where these are missing (_subject and _verb) # Points are eliminated. # In ":a is :b of :c" of is eliminated and the verb :b is preceded # by "Reverse" meaning subject and object have to be reversed. # In ":a has :b of :c" has and of are eliminated. # # Anonymous nodes get an anonymous subject value = _T$$$1 ... _T$$$n where # n is the index of the last anonymous node . # The parser is basically recursive descent with look-ahead features. # The prelude used is standard.pre . # When an error occurs the stream is synchronized on the next point # and an error message is included in the stream. # With thanks to Mark P.Jones for his inspiring prolog interpreter. # # I give here a bnf of the output : # # ParserOutput ::= Triple (ParserOutput)*| # TripleSet (ParserOutput)*| # AnonSet (ParserOutput)* # # Triple ::= "Triple" Sep Subject Verb Object # # AnonTriple ::= AnonSubject Verb Object # # TripleSet ::= Sep Triple* "EndOfSet" Sep # # AnonSet ::= Sep AnonTriple* "EndOfSet" Sep # # Subject ::= "Subject" Sep String Sep| # "Set" Sep TripleSet| # "AnonSet" Sep AnonSet # # AnonSubject ::= "Subject" Sep "_T$$$" n Sep # # Verb ::= ["Reverse" sep] "Verb" Sep String Sep| (Reverse means subject # and object must be reversed) # "Set" Sep TripleList| # "AnonSet" Sep AnonSet # # Object ::= "Object" Sep String Sep| # "ObjectSet" Sep TripleSet| # "AnonSet" Sep AnonSet # # n ::= (digit)* # # Sep ::= Separator # # Prefix ::= "Prefix" Sep String Sep # # The separator is defined in the source code; might be : /@/ # The output is defined by constants that are defined in a section # indicated by the header : Constants . # _subject and _verb refer to the latest subject and verb. # # To do: better synchronization && error messages. # import UtilsN3, sys, PVa, N3Http, string class N3Parser: # a class that contains utility functions utils = UtilsN3.Utils() # the dictionary that contains the prefixes prefixDictionary = {":":""} # a counter for assigning anonymous functions anonCounter = 0 # a debug flag debug = 1 # flag for directing the output saveOnDisk = 0 # the name of the file that is being parsed fileName = "" # get the file prefix pers = PVa.PersistentString("pathPrefix.txt") filePrefix = pers.get() # get the proxy # proxy with authentication: put the proxy address and port in # "proxy.txt" and the userid and password (userid:password) in # the file "userPass.txt" # for proxy without authentication put "noAuth" # in the file "proxy.txt" and put the address and port number of # the proxy in the environment variable "http-proxy". # both files should exist. # if no proxy is required, leave the files empty. pr = PVa.PersistentString("proxy.txt") proxy = pr.get() # get the proxy userid and password pr1 = PVa.PersistentString("userPass.txt") userPass = pr1.get() # proxy flag 0 = proxy with authentication; 1 = no proxy; # 2 = proxy without authentication authProxy = 0 # the internet access object http = N3Http.N3Http() # the list of testcases testCases = [ "http://www.w3.org/2002/03owlt/ontAx.n3", "animal-result.n3","animal-simple.n3", "animal.n3", "authen.axiom.n3", "authen.lemma.n3", "authen.proof.n3", "danb-query.n3", "danb-result.n3", "danb.n3", "danc-query.n3", "danc-result.n3", "danc.n3", "etc.n3", "gedcom-facts.n3", "gedcom-proof.n3", "gedcom-query.n3", "gedcom-relations-result.n3", "gedcom-relations-test.n3", "gedcom-relations.n3", "graph.axiom.n3", "graph.lemma.n3", "graph.proof.n3", "janet-result.n3", "janet-test.n3", "janet.n3", "lists-query.n3", "lists-result.n3", "lists.n3", "vogel.q.n3", "rdf-facts.n3", "rdf-query.n3", "rdf-result.n3", "rdf-rules.n3", "rdfc25May-result.n3", "rdfc25May-test.n3", "rdfc25May.n3", "rdfs-query.n3", "rdfs-result.n3", "rdfs-rules.n3", "russell.axiom.n3", "russell.lemma.n3", "russell.proof.n3", "subclass-query.n3", "subclass-result.n3", "subclass.n3", "subprop-query.n3", "subprop-result.n3", "subprop.n3", "test-result.n3", "test-test.n3", "test.n3", "tpoint-all.n3", "tpoint-facts.n3", "tpoint-query.n3", "tpoint-result.n3", "tpoint.n3", "varprop-query.n3", "varprop-result.n3", "varprop.n3", "ziv-query.n3", "ziv-result.n3", "ziv.n3", "wol-facts.n3", "wol-query.n3", "wol-rules.n3", "VOGEL.N3", "vogel.l.n3", "boole.lemma.n3", "boole.axiom.n3", "induction.axiom.n3", "induction.query.n3", "allValuesFrom.n3", "Owls.n3"] def __init__(self): if self.proxy == " " or self.proxy == "": self.authProxy = 1 elif self.proxy == "noAuth": self.authProxy = 2 # read a N3 file into a string def readN3(self, fileName): inFile = open(fileName,"r") self.n3String = inFile.read() inFile.close() # function for entering a number # the input string is displayed as prompt def enterNumber(self, s): done = 0 while (not done): try: i = input(s) done = 1 except: print("Please enter a number.\n") return i # interactive part def interact(self): self.printMenu() i = -1 while (not i == 0): try: i = self.enterNumber("") if i == 0: print ("\nThe program is stopped.") return elif i == 1: print "The testcases must be in a directory named \"testCases\"." print "The path prefix for this directory must be in the file:" print "\"pathPrefix.txt\" (with trailing slash)." elif i == 2: print "Please enter the name of the file to be parsed:" print "For access to the web please enter a complete url." print "For a file just give the absolute or relative path." self.fileName = raw_input("") if (self.utils.startsWith(self.fileName,"http://")): self.readUrl(self.fileName) else: self.readN3(self.fileName) li = self.parseN3("", self.n3String, 0) self.writeOutput(li) elif i == 3: print "Do you want to save the output to disk?" print "The filename will be: input_filename + .pr." print "Answer Y or N." s = raw_input("") done = 0 while (not done ): if s == "Y": self.saveOnDisk = 1 print ("!!!! Output will be to disk. !!!!!") done = 1 elif s == "N": self.saveOnDisk = 0 done = 1 print ("!!!! Output will be to the screen. !!!!!") else: s = raw_input("Please enter Y or N.\n") elif i == 4: self.choiceTestCases() i = self.enterNumber("") li = self.testFiles(i) self.writeOutput(li) elif i == 5: print "Proxy with authentication: put the proxy address and port in" print " \"proxy.txt\" and the userid and password (userid:password) in" print " the file \"userPass.txt\"." print " For a proxy without authentication put \"noAuth\"" print " in the file \"proxy.txt\" and put the address and port" print " number of the proxy in the environmnet variable:" print " \"http-proxy\"." print " If no proxy is required leave the files empty." print " Both files should exist." elif i == 6: print "Create the parameter files (see items 1 and 5 of the menu)." print " 1) create the file \"pathPrefix.txt\"" print " 2) create the file \"proxy.txt\"" print " 3) create the file \"userPass.txt\"" print " 4) stop" print "Please enter a number." i = self.enterNumber("") print "Please enter the content of the file:" while i < 4: if i == 1: print "pathPrefix.txt" fileName = "pathPrefix.txt" elif i == 2: print "proxy.txt" fileName = "proxy.txt" elif i == 3: print "userPass.txt" fileName = "userPass.txt" s = raw_input("") if s == "": s = " " file = PVa.PersistentString(fileName) file.modify(s) self.printMenu() except IOError, (errno, strerror): print "I/O error(%s): %s" % (errno, strerror) print("Please retry.") self.printMenu() def testParseAFile(self): print "testParseAFile" print self.parseAFile(self.filePrefix + "testCases/authen.axiom.n3") def parseAFile(self, fileName): if (self.utils.startsWith(fileName,"http://")): self.readUrl(fileName) else: self.readN3(fileName) li = self.parseN3("", self.n3String, 0) return li[0] # write the output to screen or to disk def writeOutput(self, li): if self.saveOnDisk == 0: print ("Dictionary:",self.prefixDictionary) print ("\nResults:\n" + li[0]) else: outFile = open(self.fileName + ".pr", "w") outFile.write("Dictionary:" + `self.prefixDictionary`) outFile.write("\nResults:\n" + li[0]) outFile.close() # handling of the test cases def testFiles(self, i): web = 0 n = 0 n1 = len(self.testCases) done = 0 while not done and n synchronize on next point. elif self.utils.checkCharacter ("}", sd[1:]) == "F": return (out + "Error Missing }" + self.sep, self.utils.skipTillCharacter(".", self.utils.skipBlancs (sd[1:])), newLen) # no point found -- last statement without point or # something is fundamentally wrong -- return empty rest string. elif ((first == "{" )and (self.utils.checkCharacter ("}", sd[1:]) == "F") and ( self.utils.checkCharacter (".", sd[1:])== "F")): return (out + "Error Missing Point" + self.sep, sd[1:], newLen) # must be the next triple : parseSubject and call parsePropertyList. else: out1, rest, len1 = self.parseTriple(out, s, oldLen) return self.parseTripleSet(out1, rest, len1) # parseTriple parses a singel triple. def parseTriple(self, out, s, oldLen): if s == "": return (out, "", oldLen) else: sd = self.utils.skipBlancs(s) first = sd[0] if self.debug: newLen = len (sd) else: newLen = 0 # higher separators -- return. if first == "]" or first == "}": return (out, sd, newLen) # end of triple return elif first == ".": return (out, sd[1:], newLen) # parse a propertylist now. else: out1, rest, len1 = self.parseSubject (out, sd, newLen) return self.parsePropertyList (out1, rest, len1) # parseTripleSpecial parses a singel triple for an anonymous set. def parseTripleSpecial(self, out, s, oldLen): if s == "": return (out, "", oldLen) else: sd = self.utils.skipBlancs(s) first = sd[0] if self.debug: newLen = len (sd) else: newLen = 0 # higher separators -- return. if first == "]" or first == "}": return (out, sd, newLen) # end of triple return elif first == ".": return (out, sd[1:], newLen) # embedded anonymous set. elif first == "[": out1, rest, len1 = self.parseSubject (out, sd, newLen) return self.parsePropertyList (out1, rest, len1) # parse a propertylist now else: out1, rest, len1 = self.parsePropertyList (out, sd, newLen) return self.parseTriple(out1, rest, len1) # test the function parseAnonSet. def testParseAnonSet(self): print ("\ntestParseAnonSet\n****************\n") print(("", "[:b :c.].", 0), self.parseObject("", "[:b :c.].", 0)) print(( "", "[ :b :c; :e :f; :g :h, :i, :p]", 0), self.parseObject( "", "[ :b :c; :e :f; :g :h, :i, :p]", 0)) print(( "", "[ :b :c; :d [:e :f]; :g :h]", 0), self.parseObject ("", "[ :b :c; :d [:e :f]; :g :h]", 0)) # parse a set of anonymous triples : insert "AnonSet ". # and call parsePropertyList. # Then call recusively parseAnon ; then insert "EndOfSet ". def parseAnonSet(self, out, s, oldLen): if s == "": return (out, "", oldLen) else: sd = self.utils.skipBlancs(s) first = sd[0] if self.debug: newLen = len (sd) else: newLen = 0 # "." found recall parsePropertyList. if first == ".": out1, rest, len1 = self.parsePropertyList (out, self.utils.skipBlancs(sd[1:]), newLen) return self.parseAnonSet (out1, rest, len1) # parse a set of anonymous triples: assign a subject # and then call parsePropertyList. elif first == "&" and self.utils.checkCharacter ("]", sd[1:])== "T": out1, rest, len1 = self.parseTripleSpecial (out + "AnonSet" + self.sep + "Subject" + self.sep + self.createAnonSubject() + self.sep, sd[1:], newLen) return self.parseAnonSet (out1, rest, len1) # "]" found - insert "EndOfSet " and return. elif first == "]": return (out + "EndOfSet" + self.sep, self.utils.skipBlancs(sd[1:]), newLen) # "{" found call parseTripleSet elif first == "{": return self.parseTripleSet (out, sd, newLen) # "]" not found ==> synchronize on next point. elif (first == "&" and self.utils.checkCharacter ("]", sd[1:]) == "F" and self.utils.checkCharacter (".", sd[1:])) == "T": return (out + "Error Missing ]", self.utils.skipTillCharacter (".", self.utils.skipBlancs(sd[1:])), newLen) # no point found -- last statement without point or # something is fundamentally wrong -- return empty rest string. elif (first == "&" and self.utils.checkCharacter ("]", sd[1:]) == "F" and self.utils.checkCharacter (".", sd[1:]) == "F"): return (out + "Error Missing Point", sd, newLen) else: return (out + "Unknown error parsing anonset", sd, newLen) # test the function parsePropertyList. def testParsePropertyList(self): print("\ntestParsePropertyList\n*******************\n") print(( "", ":b :c; :d :e; :f :g.", 0), self.parsePropertyList( "", ":b :c; :d :e; :f :g.", 0)) print(( "", ":b :c.", 0), self.parsePropertyList( "", ":b :c.", 0)) print(( "", ":b :c; :d :e; :f :g:", 0), self.parsePropertyList("", ":b :c; :d :e; :f :g:", 0)) print(( "", ":a, :b,:c", 0), self.parsePropertyList("", ":a, :b,:c", 0)) # parses one or more properties. def parsePropertyList (self, out, s, oldLen): if s == "": return (out, "", oldLen) else: sd = self.utils.skipBlancs(s) first = sd[0] if self.debug: newLen = len (sd) else: newLen = 0 # end of propertyList if sd[0] == "}": return (out, sd, newLen) # end of propertyList elif sd[0] == ".": return (out, sd, newLen) # end of anonymous set elif sd[0] == "]": return (out, sd, newLen) # propertylist with subject already defined. elif sd[0] == ";": out1, rest, len1 = self.parseProperty (out + "_subject" + self.sep, sd[1:], newLen) return self.parsePropertyList(out1, rest, len1) else: out1, rest, len1 = self.parseProperty (out, sd, newLen) return self.parsePropertyList (out1, rest, len1) # parse a single property. def parseProperty(self, out, s, oldLen): if s == "": return (out, "", oldLen) else: sd = self.utils.skipBlancs(s) first = sd[0] if self.debug: newLen = len (sd) else: newLen = 0 if first == "}": # end of property return (out, sd, newLen) elif first == "]": # end of property return (out, sd, newLen) elif first == ".": # end of property return (out, sd, newLen) else: # out1, rest, len1 = self.parseVerb (out, sd, newLen) return self.parseNodeList(out1, rest, len1) # test the function parseNodeList. def testParseNodeList(self): print ("\ntestParseNodeList\n***********************\n") print (("", ":a, :b,:c.", 0), "\n", self.parseNodeList("", ":a, :b,:c.", 0)) print (("", ":a.", 0), "\n", self.parseNodeList("", ":a.", 0)) print (("", ":a", 0), "\n", self.parseNodeList("", ":a", 0)) print (("", ":a, :b,:c", 0), "\n", self.parseNodeList("", ":a, :b,:c", 0)) # parses nodes separated by , . # The tokens _subject and _verb are placed as placeholders # for the real subject and verb. def parseNodeList(self, out, s, oldLen): if s == "": return (out, "", oldLen) else: sd = self.utils.skipBlancs(s) first = sd[0] if self.debug: newLen = len (sd) else: newLen = 0 if first == "." or first == ";" or first == "}" or first == "]": # higher separators -- return return (out, sd, newLen) elif first == ",": # parse the next node (= object) out1, rest, len1 = self.parseObject ("", sd[1:], newLen) return self.parseNodeList (out + "_subject" + self.sep + "_verb" + self.sep + out1, self.utils.skipBlancs(rest), len1) else: # parse the first (and possibly last) node out1, rest, len1 = self.parseObject (out, sd, newLen) return self.parseNodeList (out1, rest, len1) def testParseSubject(self): print ("Test parseSubject: ", self.parseSubject("", "dc:c ", 1)) # parse a subject . def parseSubject(self, out, s, oldLen): if s == "": return (out, "", oldLen) else: sd = self.utils.skipBlancs(s) first = sd[0] if self.debug: newLen = len (sd) else: newLen = 0 if first == "{": # embedded sets return self.parseTripleSet (out, "&" + sd[1:], newLen) elif first == "[": # embedded anonymous sets return self.parseAnonSet (out, "&" + sd[1:], newLen) else: # parse a subject name, value, rest = self.parseNode (self.utils.skipBlancs (sd)) rest1, bool = self.testResult (name, value, rest) if bool == "T": return (out + "Subject" + self.sep + value + self.sep, self.utils.skipBlancs (rest1), newLen) else: return (out + name + self.sep + value + self.sep, self.utils.skipBlancs (rest1), newLen) def testParseVerb(self): print ("Test parseVerb: ", self.parseVerb("", "dc:c ", 1)) # parse a verb. def parseVerb(self, out, s, oldLen): if s == "": return (out, "", oldLen) else: sd = self.utils.skipBlancs(s) first = sd[0] if self.debug: newLen = len (sd) else: newLen = 0 if first == "{": # embedded sets return self.parseTripleSet (out, "&" + sd[1:], newLen) elif first == "[": # embedded anonymous sets return self.parseAnonSet (out, "&" + sd[1:], newLen) # "is" detected, insert "Inverse" elif (sd[0:2] == "is"): sd1 = self.utils.skipBlancs(sd[2:]) return self.parseVerb(out + "Inverse" + self.sep, sd1, newLen) elif sd[0:3] == "has": # has detected - must be skipped sd1 = self.utils.skipBlancs(sd[3:]) return self.parseVerb(out, "&" + sd1[1:], newLen) else: # parse a verb name, value, rest = self.parseNode (sd) rest1, bool = self.testResult (name, value, rest) if bool == "T": return (out + "Verb" + self.sep + value + self.sep, self.utils.skipBlancs (rest1), newLen) else: return (out + name + self.sep + value + self.sep, self.utils.skipBlancs (rest1), newLen) def testParseObject(self): print ("Test parseObject: ", self.parseObject("", "dc:c ", 1)) # parse an object. def parseObject(self, out, s, oldLen): if s == "": return (out, "", oldLen) else: sd = self.utils.skipBlancs(s) first = sd[0] if self.debug: newLen = len (sd) else: newLen = 0 if first == "{": # embedded sets return self.parseTripleSet (out, "&" + sd[1:], newLen) elif first == "[": # embedded anonymous sets return self.parseAnonSet (out, "&" + sd[1:], newLen) elif sd[0:2] == "of": # of detected - must be skipped sd1 = self.utils.skipBlancs(sd[2:]) return self.parseObject(out,sd1, newLen) else: # parse an object name, value, rest = self.parseNode (self.utils.skipBlancs (sd)) rest1, bool = self.testResult (name, value, rest) if bool == "T": return (out + "Object" + self.sep + value + self.sep, self.utils.skipBlancs (rest1), newLen) else: return (out + name + self.sep + value + self.sep, self.utils.skipBlancs (rest1), newLen) # create an anonymous subject; form= _T$$$n . def createAnonSubject(self): self.anonCounter = self.anonCounter + 1 s = "_T$$$" + `self.anonCounter` return s + "/@/" + s # function for parsing nodes. # input is the string to be parsed. # returns a multiple that exists a node, the value and # the rest string. # Formats of a node (=URI): # <#...> # <> # :... # prefix:... # # ".." (constant) def parseNode(self, s): if s == "": return ("", "", "") else: sd = self.utils.skipBlancs(s) y = sd[0] # node in a nodelist if y == ";": return self.parseNode (sd[1:]) # starts with ":" This refers to the parsed document. elif y == ":": return self.parseNodeThis (sd) # starts with "<" Three cases : <> <#..> elif y == "<": return self.parseNodeLesser(sd[1:]) # starts with "?:" This is a variable. elif y == "?": return self.parseVariableQ(sd) # starts with "_:" This is a variable. elif y == "_": return self.parseVariable(sd) # intercept special comment """ elif self.utils.startsWith (sd, "\"\"\"") == "T": return self.parseSpecialComment(sd[3:]) # starts with '"' Constant elif y == '"': return self.parseConstant(sd[1:]) # = is a special case elif y == "=": return ("Node", "=" +self.sep + self.daml + "equivalent", self.utils.skipBlancs(sd[1:])) # the verb is "a" elif self.utils.startsWith (sd, "a") == "T": return ("Node","a" + self.sep + self.rdf + "type", self.utils.skipBlancs(sd[1:])) # skip "of" normally detected in parseObject elif self.utils.startsWith (sd, "of ") == "T": return self.parseNode (self.utils.skipBlancs (sd[2:])) # "has" detected skip this normally dtected in parseVerb elif self.utils.startsWith (sd, "has ") == "T": return self.parseNode (self.utils.skipBlancs (sd[3:])) # "this" detected elif self.utils.startsWith (sd, "this") == "T": return ("Node", "this" + self.sep + "this", self.utils.skipBlancs (sd[4:])) # "is" detected, insert "Inverse" normally detected in parseVerb elif self.utils.startsWith (sd, "is ") == "T": type, value, rest = self.parseNode (self.utils.skipBlancs(sd[2:])) return (type, "Inverse" + self.sep + value, self.utils.skipBlancs (rest)) # lonely : detected. Must be format: prefix:postfix. # The prefix must be known. else: return self.parseNodePrefix(s) # test the function parseNode. def testParseNode(self): print ("<> :a :b", self.parseNode ("<> :a :b")) print (":]", self.parseNode (":]")) print (":a :b :c .", self.parseNode (":a :b :c .")) print ("dc:a dc:b dc:c . { dc:ho \"oke\".}", self.parseNode ("dc:a dc:b dc:c . { dc:ho \"oke\".}")) print (" dc:b dc:c .", self.parseNode (" dc:b dc:c .")) print (";<#pat> :a :b.", self.parseNode (";<#pat> :a :b.")) print ("<#pat> :a :b", self.parseNode ("<#pat> :a :b")) print ("\"Hallo\" dc:b dc:c . { dc:ho \"oke\".}", self.parseNode ("\"Hallo\" dc:b dc:c . { dc:ho \"oke\".}")) print ("<>", self.parseNode ("<> :a :b")) print (" :a :b.", self.parseNode (" :a :b.")) print ("\"\"\" hhh\nelklklke\"\"\"", self.parseNode ("\"\"\" hhh\nelklklke\"\"\"")) return ("Test OK") # parse a node with format prefix:postfix def parseNodePrefix(self, s): sd = self.utils.skipBlancs(s) if sd == "": return ("", "", "") else: bool1, prefix, rest1 = self.utils.parseUntil (":", sd) # format (Bool, String, String) bool2, postfix, rest2 = self.utils.parseUntilDelim (self.utils.delimNode, self.utils.skipBlancs(rest1)) # format (Bool, String, String) pre = self.getFromDictionary(prefix + ":") # normal case if bool1 == "T" and bool2 == "T" and pre != "Error": return ("Object", prefix + ":" + postfix + self.sep + pre + postfix, rest2) else: # error return ("Error parsing prefix:postfix : ", "", s) # parse a constant def parseConstant (self, inString): if inString == "": return ("","","") else: bool1, const, rest1 = self.utils.parseUntil ("\"", self.utils.skipBlancs (inString)) # format (Bool, String, String) if bool1 == "T": # " found return ("Constant", const + self.sep + const, self.utils.skipBlancs(rest1)) else: return ("Error parsing constant: ", "", self.utils.skipBlancs(inString)) # parse a special comment (starts with """) def parseSpecialComment (self, inString): if inString == "": return ("", "", "") else: bool, parsed, rest = self.utils.parseUntilString(inString, "\"\"\"") if bool == "T": return ("Constant", parsed + self.sep + parsed, self.utils.skipBlancs(rest[3:])) else: return ("Error parsing special constant: ", "", self.utils.skipBlancs(inString)) # parse a node that starts with _: (variable) def parseVariable(self, s): sd = self.utils.skipBlancs(s) if sd == "": return ("","","") else: bool1, node1, rest1 = self.utils.parseUntilDelim(self.utils.delimNode, sd) if bool1 == "T": # normal case return ("Node", node1 + self.sep + node1, self.utils.skipBlancs(rest1)) else: # error return ("Error parsing variable (_:xxx) : ", "", sd) # parse a node that starts with ?: (variable) def parseVariableQ(self, s): sd = self.utils.skipBlancs(s) if sd == "": return ("","","") else: bool1, node1, rest1 = self.utils.parseUntilDelim(self.utils.delimNode, sd) if bool1 == "T": # normal case return ("Node", node1 + self.sep + node1, self.utils.skipBlancs(rest1)) else: # error return ("Error parsing variable (?xxx) : ", "", sd) # parse a node that starts with : def parseNodeThis(self, s): if s == "": return ("", "", "") else: bool1, node1, rest1 = self.utils.parseUntilDelim(self.utils.delimNode, s) if bool1 == "T": return ("Node", node1 + self.sep + self.getFromDictionary(":") + node1[1:] + ">", self.utils.skipBlancs(rest1)) else: # an error has happened return ("Error parsing : node " + s + " ", "Error", s) # parse a node that starts with < def parseNodeLesser(self, s): if s == "": return ("", "", "") else: bool1, rest1 = self.utils.takec(">", self.utils.skipBlancs(s)) if bool1 == "T": # parse <> = the parsed document return ("Node", "<>" + self.sep + self.getFromDictionary(":"), rest1) else: bool2, rest2 = self.utils.takec("#", self.utils.skipBlancs1(s)) bool3, node1, rest3 = self.utils.parseUntil(">", self.utils.skipBlancs (rest2)) # parse <#...> if (bool2 == "T" and bool3 == "T"): return ("Node", "<#" + node1 + ">" + self.sep + self.getFromDictionary(":")[:-1] + "#" + node1 + ">", rest3) else: bool4, node2, rest4 = self.utils.parseUntil(">", self.utils.skipBlancs(s)) if bool4 == "T": # parse return ("Node", "<" + node2 + ">" + self.sep + node2, rest4) else: return ("Error missing > :", "", s) # function for parsing prefixes. # input are the string to be parsed. # returns a multiple that exists of an identifier, the value and # the rest string. # format of a prefix : # @prefix ...: # the prefix is added in the global prefix dictionary as #("prefix","value-of-prefix") def parsePrefix(self, s): if s == "": return ("", "", "") else: sd = self.utils.skipBlancs( s) bool1, rest1 = self.utils.parseString( "@prefix", sd) bool2, prefix, rest2 = self.utils.parseUntil (":", self.utils.skipBlancs (rest1)) bool3, rest3 = self.utils.takec ("<", self.utils.skipBlancs(rest2)) bool4, uri, rest4 = self.utils.parseUntil (">", self.utils.skipBlancs(rest3)) bool5, rest5 = self.utils.takec (".", self.utils.skipBlancs (rest4)) if (bool1 == "T" and bool2 == "T" and bool3 == "T" and bool4 == "T" and bool5 == "T"): self.prefixDictionary [prefix + ":"] = uri return ("Prefix", "@prefix " + prefix + ": <" + uri + ">.", rest5) else: # an error has happened return ("Error parsing prefix" + self.sep, "", sd) # parse the bind directive; has become obsolete. def parseBind(self, s): sd = self.utils.skipBlancs( s) bool1, rest1 = self.utils.parseString ("bind", sd) bool2, prefix, rest2 = self.utils.parseUntil (":", self.utils.skipBlancs (rest1)) bool3, rest3 = self.utils.takec ("<", self.utils.skipBlancs (rest2)) bool4, uri, rest4 = self.utils.parseUntil (">", self.utils.skipBlancs (rest3)) bool5, rest5 = self.utils.takec (".", self.utils.skipBlancs (rest4)) # normal case if (bool1 == "T" and bool2 == "T" and bool3 == "T" and bool4 == "T" and bool5 == "T"): self.prefixDictionary [prefix + ":"] = uri return ("Prefix", "@prefix " + prefix + ": <" + uri + ">.", rest4) else: # an error has happened return ("Error parsing bind: " + self.sep, "", sd) # get an entry from the dictionary def getFromDictionary(self, s): try: return self.prefixDictionary [s] except: return "entry " + s + " not found in dictionary" + self.sep # ************* known uri's ****************** rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" rdfs = "http://www.w3.org/2000/01/rdf-schema#" daml = "http://www.daml.org/2001/03/daml+oil#" # ************* --------------- ************** sep = "/@/" # test of node parsing 1 baseURI = "http://www/w3.org" p = "<#pat> :a :b" # test with parseAtom g p # test of node parsing 2 p1 = "<> :a :b" # test with parseAtom g p1 # test of prefix parsing p2 = "@prefix dc: . @prefix : <#authen>." # test of comment parseing p2a = "# blabla \r\n @prefix dc: ." # test of node with :... p3 = ":a :b :c . { dc:ho \"oke\".}" p3a = "{ dc:ho \"oke\".}" # test of node with prefix:... p4 = "dc:a dc:b dc:c . { dc:ho \"oke\".}" # test of variable ?xxx p4a = "?a :b :c." # test of node with p5 = " dc:b dc:c . { dc:ho \"oke\".}" p5a = " dc:ho \"oke\".}" # test of node with ".." (constant) p6 = "\"Hallo\" dc:b dc:c . { dc:ho \"oke\".}" # test of comment p7 = "# ddddddd \r\n :a :b :c" # test of parseNodeList p8 = ":a, :b, :c." # test of parsePropertyList p9 = ":a :b; :c :d; :e :f." # test of tripleset p10 = "{:a :b :c. :d :e :f.}" p10a = ":a :b :c . :d :e :f ." # test of embedded triplesets p11 = ("{{:person :member :institution. " + ":institution :w3cmember ." + ":institution :subscribed :mailinglist} :implies " + "{:person :authenticated :mailinglist}} a :Truth;" + ":forAll :person, :mailinglist, :institution.") # test of embedded anonymous sets p11a = ("[[[:member :institution; " + ":w3cmember .]" + ":institution [:subscribed :mailinglist]] :implies " + "[:authenticated :mailinglist]] a :Truth.") p12 = ("# $Id: authen.axiom.n3,v 1.2 2001/10/01 00:12:34 amdus Exp $\n" + " \n" + "@prefix log: .\n" + "@prefix : .\n" + " \n" + " :member .\n" + " :w3cmember .\n" + " :subscribed .\n" + " \n" + "{{:person :member :institution.\n" + ":institution :w3cmember .\n" + ":institution :subscribed :mailinglist} log:implies\n" + "{:person :authenticated :mailinglist}} a log:Truth; log:forAll :person, :mailinglist, :institution.\n") p12a = ("{{:person :member :institution.\n" + ":institution :w3cmember .\n" + ":institution :subscribed :mailinglist} log:implies\n" + "{:person :authenticated :mailinglist}} a log:Truth; log:forAll :person, :mailinglist, :institution.\n") p12b = ":a is :b of [:c :d]." p13 = (" :member .\n" + " :w3cmember .\n" + " :subscribed .\n" + " \n") p14 = "@prefix : ." p15 = "<#QA> :includes :b." # test of bind p16 = "bind ds: ." # # The bnf grammar #---------------------- # # Taken from # on 2001-08-03 (version of 2001-04-10) # # Modifications: # # $Log: n3.bnf,v $ # Revision 1.4 2001/08/06 20:56:21 sandro # added space* and space+ in several places # removed "#" from forbidden chars in URI_Reference # handles comments # made directives actually part of the grammar (!) # allowed nprefix to be zero-length # # Revision 1.3 2001/08/03 13:44:43 sandro # filled in remaining non-terminals # # Revision 1.2 2001/08/03 13:02:48 sandro # standardized BNF so blindfold can compile it # added ::= for each rule # added | for branches # added ; at end of rule # added # before comments # put quotes around literals # turn hypen into underscore in identifiers # rename prefix to nprefix (hack around blindfold keyword for now) # # Revision 1.1 2001/08/03 12:34:38 sandro # added opening comments # # # # document ::= void # | statementlist; # # space ::= " " | "\n" | "\r" | comment; # # comment ::= "#" [^\r\n]*; # # statement ::= subject space+ property_list # | directive # ; # # statementlist ::= (statement space* ("." space*)?)* ; # # subject ::= node; # # verb ::= ">-" prop "->" # has xxx of # | "<-" prop "<-" # is xxx of # # | operator # has operator:xxx of??? NOT IMPLMENTED # | prop # has xxx of -- shorthand # | "has" prop # has xxx of # | "is" prop "of" # is xxx of # | "a" # has rdf:type of # | "=" # has daml:equivalent of # ; # # prop ::= node; # # node ::= uri_ref2 # | anonnode # | "this" # | node # ; # # nodelist ::= void # (used in lists) # | node # | node nodelist # ; # # anonnode ::= "[" property_list "]" # something which ... # | "{" statementlist "}" # the statementlist itself as a resource # | "(" nodelist ")" # short for eg [ n3:first node1; n3:rest [ n3:first node2; n3:rest: n3:null ]] # ; # # property_list ::= void # to allow [...]. # | verb space+ object_list # | verb space+ object_list space+ ";" space+ property_list # | ":-" anonnode #to allow two anonymous forms to be given eg [ a :Truth; :- { :sky :color :blue } ] ) # | ":-" anonnode ";" property_list # ; # # object_list ::= object # | object "," object_list # ; # # uri_ref2 ::= qname # | "<" URI_Reference ">" # ; # # qname ::= nprefix ":" localname; # ??? Allow omit colon when prefix void - keyword clash # # object ::= subject # | string1 # " constant-value-with-escaping " # | string2 # """ constant value with escaping including single or double occurences of quotes and/or newlines """ # # well-formed-xml-element ???? legacy or structured stuff - not implemented or distinguished # ; # # directive ::= "bind" space+ nprefix ":" uri_ref2 # Namespace declartion. Trailing "#" is omitted & assumed. Obsolete. # | "@prefix" space+ nprefix ":" space+ uri_ref2 # Namespace declaration # ; # ## operator ::= (Not implemented) ## + >- operator:plus -> ## - >- operator:minus -> ## / >- operator:slash-> ## * >- operator:star-> (etc? @@) # # fragid ::= alpha alphanumeric* ; # # alpha ::= [a-zA-Z]; # # alphanumeric ::= alpha | [0-9] | "_"; # # void ::= "" ; # nothing # # URI_Reference ::= [^{}<>]*; # short version # # nprefix ::= "" | ((alpha | "_") alphanumeric*); # # localname ::= fragid; # # string1 ::= '"' string1_char* '"'; # # string1_char ::= '\\"' | [^\"] ; # should disallow some other characters, etc. # # string2 ::= '"""' string2_char* '"""'; # # string2_char ::= [^"] | ([^] [^] [^"]); # something like this; need to think about it some more # #-----------------------------------------------------------------------} # test of this class n3Parser = N3Parser() #n3Parser.readN3("c:/Python/Euler/authen.axiom.n3") #print (n3Parser.n3String) #print ("Test parsePrefix: ",n3Parser.parsePrefix(p2)) #print ("Test parseBind: ",n3Parser.parseBind(p16)) #print ("Test parseNodeLesser: ",n3Parser.parseNodeLesser(p[1:])) #print ("Test parseNodeLesser: ",n3Parser.parseNodeLesser(p1[1:])) #print ("Test parseNodeLesser: ",n3Parser.parseNodeLesser(p5[1:])) #print ("Test parseNodeLesser: ",n3Parser.parseNodeLesser(p5a[1:])) #print ("Test parseNodeThis: ",n3Parser.parseNodeThis(p3)) #print ("Test parseVariableQ: ",n3Parser.parseVariableQ(p4a)) #print ("Test parseConstant: ",n3Parser.parseConstant(p6[1:])) #print ("Test node: ", n3Parser.testParseNode()) #print ("Test createAnonSubject: ",n3Parser.createAnonSubject()) #n3Parser.testParseVerb() #n3Parser.testParseSubject() #n3Parser.testParseObject() #n3Parser.testParseNodeList() #n3Parser.testParsePropertyList() #n3Parser.testParseAnonSet() #n3Parser.testParseTripleSet() #n3Parser.testParseN3() #n3Parser.fileName = "test" #n3Parser.saveOnDisk = 1 #n3Parser.writeOutput(("test test test", "test")) #print n3Parser.proxy,n3Parser.userPass #n3Parser.testParseAFile() #n3Parser.testParseTriple() n3Parser.interact() #print ("Dictionary: ",n3Parser.prefixDictionary)