import "Utils" import "IO.hs" {-- -- N3 parser -------------------------------------- -- -- Author : G.Naudts E-mail : naudts_vannoten@yahoo.com -- Address : Secretarisdreef 5 2288 Bouwel Belgium -- The parser is based on the grammar at the back and has been token from : -- http://2001/blindfold/sample/n3.bnf -- and the parser uses also the structures defined in N3 primer : -- http://www.w3.org/200/10/swap/Primer.htm -- The output data structure consists basically of pairs (id,value) . -- All abbreviations of N3 are resolved in the output . -- Anonymous nodes get an anonymous subject value = T1 ... Tn where -- n is the index of the last anonymous node . -- The parser is a look-ahead parser . -- The prelude used is standard.pre . -- When an error occurs the stream is synchronized on the next point . -- With thanks to Mark P.Jones for his inspiring prolog interpreter . -- Download Gofer at : http://www.cse.ogi.edu/~mpj/goferarc/ -- -- I give here a bnf of the output : -- -- ParserOutput ::= ParsedString "End" -- -- ParsedString ::= Triple (ParserOutput)*| -- TripleSet (ParserOutput)* -- -- Triple ::= "Triple/" Subject Verb Object -- -- AnonTriple ::= "AnonTriple/" AnonSubject Verb Object -- -- TripleSet ::= "TripleSet/" Triple* AnonTriple* "EndOfSet/" -- -- Subject ::= "Subject/" String "/"| -- TripleSet -- -- AnonSubject ::= "AnonSubject/T" n "/" -- -- Verb ::= "Verb/" String "/" -- -- Object ::= "Object/" String "/"| -- TripleSet -- -- n ::= (digit)* -- -- The output is defined by constants that are defined in a section -- indicated by the header : Constants . module N3Parser where -- readN3 reads and parses a N3 file and puts the output on sysout --readN3 :: String -> Dialogue --readN3 f = readFile f abort -- (\input -> appendChan stdout (n3Parser input) abort done)-} -- n3Parser get an input string and starts the parsing proces with the -- global list; first spaces are skipped n3Parser :: String -> String n3Parser s = parseN3 (g, "", skipBlancs s) -- testResult gets a parsed item; checks for errors ; -- if error skips till the next point and -- continues parsing ; returns the global list and the rest string testResult :: (Globals, String, String, String) -> (Globals, String) testResult (g, s1, s2, s3) | startsWith s1 "Error" = (g, skipTillCharacter '.' s3) | otherwise = (g, s3) -- function parseN3 : top level of the parser -- Takes the global list and a string (without leading spaces) -- and returns a string consisting of a list of identifier-value pairs -- separated by blancs -- e.g. "Subject :a Verb :b Object :c" parseN3 :: ParserData -> String parseN3 (g, out, "") = "" parseN3 (g, out, s@(x:xs)) -- parse a prefix -- no output | x == '@' = parseN3 (g2, out, skipBlancs s4) -- parse a comment | x == '#' = parseN3 (g, out, skipBlancs s5) -- anonymous node | x == '[' = parseN3 (parseAnonSet (g, out, s)) -- must be a triple(set) parse triplelist | otherwise = parseN3 (parseTripleSet (g, out, s)) where (g1, s1, s2, s3) = parsePrefix g s (g2, s4) = testResult (g1, s1, s2, s3) s5 = parseComment s -- parse a set of triples : insert "TripleSet " and call parsePropertyList -- Then call recusively parseTripelSet ; then insert "EndOfSet " parseTripleSet :: ParserData -> ParserData parseTripleSet (g, out, "") = (g, out, "") parseTripleSet p@(g, out, s@(x:xs)) -- begin of set detected; check for end of set and call recursively | y == '{' && bv1 = parseTripleSet (parseTriple (g, out ++ " " ++ "TripleSet", skipBlancs ys)) -- '}' found - insert "EndOfSet " and return | y == '}' = (g, out ++ " " ++ "EndOfSet ", skipBlancs ys) -- '.' found ; end of triple; return if single triple | y == '.' && not bv1 = parseTriple (g, out,skipBlancs ys) -- '.' found and analyzing set; recall parseTripleSet and skip point | y == '.' && bv1 = parseTripleSet (g, out, skipBlancs ys) -- '}' not found ==> synchronize on next point. | y == '{' && not bv1 && bv3 = (g, "Error Missing }", skipTillCharacter '.' (skipBlancs ys)) -- no point found -- last statement without point or -- something is fundamentally wrong -- return empty rest string | y == '{' && not bv1 && not bv3 = (g, "Error Missing Point", "") -- must be a single triple : parseSubject and call parsePropertyList | otherwise = parseTriple p where bv1 = checkCharacter '}' ys bv3 = checkCharacter '.' ys s1@(y:ys) = skipBlancs s -- parseTriple parses a singel triple parseTriple :: ParserData -> ParserData parseTriple (g, out, "") = (g, out, "") parseTriple (g, out, s) = parsePropertyList (parseSubject (g, out, (skipBlancs s))) -- parse a set of anonymous triples : insert "AnonSet " -- and call parsePropertyList -- Then call recusively parseAnon ; then insert "EndOfSet " parseAnonSet :: ParserData -> ParserData parseAnonSet (g, out, "") = (g, out, "") parseAnonSet (g, out, s@(x:xs)) -- parse a set of anonymous triples: assign a subject -- and then call parsePropertyList | bv2 = parseAnonSet (parsePropertyList (g3, out ++ " " ++ "AnonSet" ++ s5, skipBlancs s)) -- ']' found - insert "EndOfSet " and return | x == ']' = (g, out ++ " " ++ "EndOfSet ", skipBlancs xs) -- '{' found call parseTripleSet | x == '{' = parseTripleSet (g, out, s) -- ']' not found ==> synchronize on next point. | x == '[' && not bv2 && bv3 = (g, "Error Missing ]", skipTillCharacter '.' (skipBlancs xs)) -- no point found -- last statement without point or -- something is fundamentally wrong -- return empty rest string | x == '[' && not bv2 && not bv3 = (g, "Error Missing Point", "") -- nothing appropriate return | otherwise = (g, out, s) where bv2 = checkCharacter ']' xs bv3 = checkCharacter '.' xs (g3, s5) = createAnonSubject g -- parses a verb and then calls parseNodeList -- accumulates in second parameter --parsePropertyList :: ParserData -> ParserData parsePropertyList (g, out, "") = (g, out, "") parsePropertyList (g, out, s) -- end of propertyList | y == '.' = (g, out, s1) -- propertylist with subject already defined | y == ';' = parsePropertyList (parseProperty (g, out ++ " " ++ subject, ys)) | otherwise = parsePropertyList (parseProperty (g, out, s1)) where subject = "Subject " ++ (getStringParam g "LastSubject") s1@(y:ys) = skipBlancs s -- parse a single property --parseProperty :: ParserData -> ParserData parseProperty (g, out, "") = (g, out, "") parseProperty (g, out, s) = (parseNodeList (parseVerb (g, out, (skipBlancs s)))) -- test the function parseNodeList -- testParseNodeList :: [ParserData] testParseNodeList = stringToFile (composed testList) "testParseNodeList.tst" where testList = [(g2, "", ":a, :b,:c."), (g2, "", ":a."), (g2, "", ":a"), (g2, "", ":a, :b,:c")] -- compose the output list composed [] = [] composed (x:xs) = "\n\nInput:" ++ tripleToString x ++ "\n\nOutput" ++ tripleToString (parseNodeList x) ++ composed xs -- display ParserData in a more or less readable format displayTriple :: ParserData -> IO () displayTriple (g, out, rest) = putStr ("\n" ++ show g ++ "\n" ++ "out: " ++ out ++ "\n" ++ "rest: " ++ rest) -- display a list of ParserData displayTripleList :: [ParserData] -> IO () displayTripleList [] = putStr "Empty list" displayTripleList triple = putStr (tripleListToString triple) -- transforms ParserData in string format tripleToString :: ParserData -> String tripleToString (g, out, rest) = "\n\n" ++ show g ++ "\n" ++ "out: " ++ out ++ "\n" ++ "rest: " ++ rest -- transform a list of ParserData to String tripleListToString :: [ParserData] -> String tripleListToString [] = [] tripleListToString (x:xs) = tripleToString x ++ tripleListToString xs -- save a string to a file of which the name is indicated -- by the second string -- stringToFile :: (String, String) -> IO stringToFile s fileName = do toHandle <- openFile fileName WriteMode hPutStr toHandle s hClose toHandle putStr "Done." -- parses nodes separated by , . -- Subject and verb are retrieved from the globals . --parseNodeList :: ParserData -> ParserData parseNodeList (g, out, "") = (g, out, "") parseNodeList (g, out, s) | y == '.' = (g, out, sd) | y == ';' = (g, out, sd) | y == '{' = parseNodeList (parseTripleSet (g, out, sd)) | y == '[' = parseNodeList (parseAnonSet (g, out, sd)) | y == ',' = parseNodeList (g3, out ++ " " ++ subject ++ " " ++ verb ++ " " ++ s5 ++ " " ++ s6, skipBlancs s7) | otherwise = parseNodeList (parseObject (g, out, sd)) where sd@(y:ys) = skipBlancs s subject = "Subject " ++ getStringParam g "LastSubject" verb = "Verb " ++ getStringParam g "LastVerb" (g3, s5, s6, s7) = parseNode g ys -- parse a subject . --parseSubject :: ParserData -> ParserData parseSubject (g, out, "") = (g, out, "") parseSubject (g, out, s) = (g2, out ++ " " ++ s1 ++ " " ++ s2, skipBlancs s4) where (g1, s1, s2, s3) = saveSubject (parseNode g (skipBlancs sd)) (g2, s4) = testResult (g1, s1, s2, s3) sd = skipBlancs s -- parse a verb --parseVerb :: ParserData -> ParserData parseVerb (g, out, "") = (g, out, "") parseVerb (g, out, s) = (g2, out ++ " " ++ s1 ++ " " ++ s2, skipBlancs s4) where (g1, s1, s2, s3) = saveVerb (parseNode g (skipBlancs sd)) (g2, s4) = testResult (g1, s1, s2, s3) sd = skipBlancs s -- parse an object --parseObject :: ParserData -> ParserData parseObject (g, out, "") = (g, out, "") parseObject (g, out, s) = (g2, out ++ " " ++ s1 ++ " " ++ s2, skipBlancs s4) where (g1, s1, s2, s3) = parseNode g (skipBlancs sd) (g2, s4) = testResult (g1, s1, s2, s3) sd = skipBlancs s -- save a subject in the global list saveSubject :: (Globals, String, String, String) -> (Globals, String, String, String) saveSubject (g, s1, s2, s3) = (g1, "Subject", s2, s3) where g1 = setStringParam g "LastSubject" s2 -- save a verb in the global list saveVerb :: (Globals, String, String, String) -> (Globals, String, String, String) saveVerb (g, s1, s2, s3) = (g1, "Verb", s2, s3) where g1 = setStringParam g "LastVerb" s2 createAnonSubject :: Globals -> (Globals, String) createAnonSubject g = (g1, s) where i = getIntParam g "AnonCounter" g1 = setIntParam g "AnonCounter" (i+1) s = "T" ++ intToString (i+1) -- test the function parseNode -- testParseNode :: testParseNode = stringToFile (composeNodes testList) "testParseNode.tst" where testList = [ "<> :a :b", ":a :b :c .", "@prefix dc: .", "dc:a dc:b dc:c . { dc:ho \"oke\".}", " dc:b dc:c . { dc:ho \"oke\".}", ";<#pat> :a :b.", -- "<#pat> :a :b", "\"Hallo\" dc:b dc:c . { dc:ho \"oke\".}"] -- compose the output list composeNodes [] = [] composeNodes (x:xs) = "\n\nInput: " ++ x ++ "\n\nOutput " ++ quadToString (parseNode g x) ++ composeNodes xs -- transforms output of parseNode in string format quadToString :: (Globals, String, String, String) -> String quadToString (g, s1, s2, s3) = "\n\n" ++ show g ++ "\n" ++ "item: " ++ s1 ++ " value: " ++ s2 ++ " rest: " ++ s3 -- transforms a pair (g, s) to String pairToString :: (Globals, String) -> String pairToString (g, s) = "\n\n" ++ show g ++ "\n" ++ "input: " ++ s -- function for parsing nodes -- input are the global data and the string to be parsed -- returns a multiple that exists of global list, a node, the value and -- the rest string . -- Formats of a node (=URI): -- <#...> -- <> -- :... -- prefix:... -- -- ".." (constant) parseNode :: Globals -> String -> (Globals, String, String, String) parseNode g "" = (g, "", "", "") parseNode g s -- node in a node list |y == ';' = parseNode g ys -- <#...> |bv1 && bv2 && bv3 = (g, "Object", (getStringParam g "BaseURI") ++ "#" ++ node, post3) -- <> |bv1 && bv4 = (g, "Object",getStringParam g "BaseURI",post4) -- prefix:... |bv7 && bv8 && bv9 = (g, "Object", uri ++"#" ++ postfix, post8) -- "..." constant |bv10 && bv11 = (g, "Constant", const, post11) -- |bv1 && bv12 = (g, "Object", uria, post12) -- :... |bv5 && bv6 = (g, "Object", getStringParam g "BaseURI"++"#" ++ node2, post6) -- otherwise error |otherwise = (g, "Error in parsing node: "++ s,"", sd) -- secondary definitions where (bv1, post1) = takec '<' sd (bv2, post2) = takec '#' (skipBlancs post1) (bv3, node, post3) = parseUntil '>' (skipBlancs post2) (bv4, post4) = takec '>' (skipBlancs post1) (bv5, post5) = takec ':' sd (bv6, node2, post6) = parseUntilDelim delimNode (skipBlancs post5) (bv7, prefix, post7) = parseUntil ':' sd (bv8, postfix, post8) = parseUntilDelim delimNode (skipBlancs post7) uri = getStringParam g prefix bv9 = uri /= "Error" (bv10 , post10) = takec '"' sd (bv11 , const, post11) = parseUntil '"' (skipBlancs post10) (bv12 , uria , post12) = parseUntil '>' (skipBlancs post1) sd@(y:ys) = skipBlancs s -- function for parsing prefixes -- input are the global data and the string to be parsed -- returns a multiple that exists of an identifier, the value -- the rest string and the global list. -- format of a prefix : -- @prefix ...: -- the prefix is added in the global list as ("prefix","value-of-prefix"); -- the returned value = "" parsePrefix :: Globals -> String -> (Globals, String, String, String) parsePrefix g "" = (g, "", "", "") parsePrefix g s |bv1 && bv2 && bv3 && bv4 = (g, "Prefix", "", post4) |otherwise = (g, "Error", "", s ) where (bv1, post1) = parseString "@prefix" s (bv2, prefix, post2 ) = parseUntil ':' (skipBlancs post1) (bv3, uri, post3) = parseUntil '>' (skipBlancs post2) (bv4, post4) = takec '.' (skipBlancs post3) g' = setStringParam g prefix uri -- add the prefix -- to the global list -- ************* basic constants ************** -- g = [("BaseURI", baseURI),("AnonCounter", "0"), ("LastSubject", ""), ("LastVerb", "")] -- ************* --------------- ************** -- globals for testing g2 = [("BaseURI", baseURI),("AnonCounter", "1"), ("LastSubject", ":last_subject"), ("LastVerb", ":last_verb")] -- test of node parsing 1 baseURI = "http://www/w3.org" g1 = [("BaseURI", baseURI),("dc","http://gn.org/")] p = "<#pat> :a :b" -- test with parseAtom g p -- test of node parsing 2 p1 = "<> :a :b" -- test with parseAtom g p1 -- test of prefix parsing p2 = "@prefix dc: ." -- test of node with :... p3 = ":a :b :c . { dc:ho \"oke\".}" -- test of node with prefix:... p4 = "dc:a dc:b dc:c . { dc:ho \"oke\".}" -- test of node with p5 = " dc:b dc:c . { dc:ho \"oke\".}" -- test of node with ".." (constant) p6 = "\"Hallo\" dc:b dc:c . { dc:ho \"oke\".}" -- test of comment p7 = "# ddddddd \r\n :a :b :c" -- test of parseNodeList p8 = ":a, :b, :c." -- test of parsePropertyList p9 = ":a :b; :c :d; :e :f." -- test of tripleset p10 = "{:a :b :c. :d :e :f.}" {- -- The bnf grammar ---------------------- --# --# Taken from --# on 2001-08-03 (version of 2001-04-10) --# --# Modifications: --# --# $Log: n3.bnf,v $ --# Revision 1.4 2001/08/06 20:56:21 sandro --# added space* and space+ in several places --# removed "#" from forbidden chars in URI_Reference --# handles comments --# made directives actually part of the grammar (!) --# allowed nprefix to be zero-length --# --# Revision 1.3 2001/08/03 13:44:43 sandro --# filled in remaining non-terminals --# --# Revision 1.2 2001/08/03 13:02:48 sandro --# standardized BNF so blindfold can compile it --# added ::= for each rule --# added | for branches --# added ; at end of rule --# added # before comments --# put quotes around literals --# turn hypen into underscore in identifiers --# rename prefix to nprefix (hack around blindfold keyword for now) --# --# Revision 1.1 2001/08/03 12:34:38 sandro --# added opening comments --# -- -- --document ::= void -- | statementlist; -- --space ::= " " | "\n" | "\r" | comment; -- --comment ::= "#" [^\r\n]*; -- --statement ::= subject space+ property_list -- | directive -- ; -- --statementlist ::= (statement space* ("." space*)?)* ; -- --subject ::= node; -- --verb ::= ">-" prop "->" # has xxx of -- | "<-" prop "<-" # is xxx of -- # | operator # has operator:xxx of??? NOT IMPLMENTED -- | prop # has xxx of -- shorthand -- | "has" prop # has xxx of -- | "is" prop "of" # is xxx of -- | "a" # has rdf:type of -- | "=" # has daml:equivaent of -- ; -- --prop ::= node; -- --node ::= uri_ref2 -- | anonnode -- | "this" -- | node -- ; -- --nodelist ::= void # (used in lists) -- | node -- | node nodelist -- ; -- --anonnode ::= "[" property_list "]" # something which ... -- | "{" statementlist "}" # the statementlist itself as a resource -- | "(" nodelist ")" # short for eg [ n3:first node1; n3:rest [ n3:first node2; n3:rest: n3:null ]] -- ; -- --property_list ::= void # to allow [...]. -- | verb space+ object_list -- | verb space+ object_list space+ ";" space+ property_list -- | ":-" anonnode #to allow two anonymous forms to be given eg [ a :Truth; :- { :sky :color :blue } ] ) -- | ":-" anonnode ";" property_list -- ; -- --object_list ::= object -- | object "," object_list -- ; -- --uri_ref2 ::= qname -- | "<" URI_Reference ">" -- ; -- --qname ::= nprefix ":" localname; # ??? Allow omit colon when prefix void - keyword clash -- --object ::= subject -- | string1 # " constant-value-with-escaping " -- | string2 # """ constant value with escaping including single or double occurences of quotes and/or newlines """ -- # well-formed-xml-element ???? legacy or structured stuff - not implemented or distinguished -- ; -- --directive ::= "bind" space+ nprefix ":" uri_ref2 # Namespace declartion. Trailing "#" is omitted & assumed. Obsolete. -- | "@prefix" space+ nprefix ":" space+ uri_ref2 # Namespace declaration -- ; -- --# operator ::= (Not implemented) --# + >- operator:plus -> --# - >- operator:minus -> --# / >- operator:slash-> --# * >- operator:star-> (etc? @@) -- --fragid ::= alpha alphanumeric* ; -- --alpha ::= [a-zA-Z]; -- --alphanumeric ::= alpha | [0-9] | "_"; -- --void ::= "" ; # nothing -- --URI_Reference ::= [^{}<>]*; # short version -- --nprefix ::= "" | ((alpha | "_") alphanumeric*); -- --localname ::= fragid; -- --string1 ::= '"' string1_char* '"'; -- --string1_char ::= '\\"' | [^\"] ; # should disallow some other characters, etc. -- --string2 ::= '"""' string2_char* '"""'; -- --string2_char ::= [^"] | ([^] [^] [^"]); # something like this; need to think about it some more -- -----------------------------------------------------------------------}