import "Utils"
import "IO.hs"
{--
-- N3 parser
--------------------------------------
--
-- Author : G.Naudts E-mail : naudts_vannoten@yahoo.com
-- Address : Secretarisdreef 5 2288 Bouwel Belgium
-- The parser is based on the grammar at the back and has been token from :
-- http://2001/blindfold/sample/n3.bnf
-- and the parser uses also the structures defined in N3 primer :
-- http://www.w3.org/200/10/swap/Primer.htm
-- The output data structure consists basically of pairs (id,value) .
-- All abbreviations of N3 are resolved in the output .
-- Anonymous nodes get an anonymous subject value = T1 ... Tn where
-- n is the index of the last anonymous node .
-- The parser is a look-ahead parser .
-- The prelude used is standard.pre .
-- When an error occurs the stream is synchronized on the next point .
-- With thanks to Mark P.Jones for his inspiring prolog interpreter .
-- Download Gofer at : http://www.cse.ogi.edu/~mpj/goferarc/
--
-- I give here a bnf of the output :
--
-- ParserOutput ::= ParsedString "End"
--
-- ParsedString ::= Triple (ParserOutput)*|
-- TripleSet (ParserOutput)*
--
-- Triple ::= "Triple/" Subject Verb Object
--
-- AnonTriple ::= "AnonTriple/" AnonSubject Verb Object
--
-- TripleSet ::= "TripleSet/" Triple* AnonTriple* "EndOfSet/"
--
-- Subject ::= "Subject/" String "/"|
-- TripleSet
--
-- AnonSubject ::= "AnonSubject/T" n "/"
--
-- Verb ::= "Verb/" String "/"
--
-- Object ::= "Object/" String "/"|
-- TripleSet
--
-- n ::= (digit)*
--
-- The output is defined by constants that are defined in a section
-- indicated by the header : Constants .
module N3Parser where
-- readN3 reads and parses a N3 file and puts the output on sysout
--readN3 :: String -> Dialogue
--readN3 f = readFile f abort
-- (\input -> appendChan stdout (n3Parser input) abort done)-}
-- n3Parser get an input string and starts the parsing proces with the
-- global list; first spaces are skipped
n3Parser :: String -> String
n3Parser s = parseN3 (g, "", skipBlancs s)
-- testResult gets a parsed item; checks for errors ;
-- if error skips till the next point and
-- continues parsing ; returns the global list and the rest string
testResult :: (Globals, String, String, String) -> (Globals, String)
testResult (g, s1, s2, s3)
| startsWith s1 "Error" = (g, skipTillCharacter '.' s3)
| otherwise = (g, s3)
-- function parseN3 : top level of the parser
-- Takes the global list and a string (without leading spaces)
-- and returns a string consisting of a list of identifier-value pairs
-- separated by blancs
-- e.g. "Subject :a Verb :b Object :c"
parseN3 :: ParserData -> String
parseN3 (g, out, "") = ""
parseN3 (g, out, s@(x:xs))
-- parse a prefix -- no output
| x == '@' = parseN3 (g2, out, skipBlancs s4)
-- parse a comment
| x == '#' = parseN3 (g, out, skipBlancs s5)
-- anonymous node
| x == '[' = parseN3 (parseAnonSet (g, out, s))
-- must be a triple(set) parse triplelist
| otherwise = parseN3 (parseTripleSet (g, out, s))
where (g1, s1, s2, s3) = parsePrefix g s
(g2, s4) = testResult (g1, s1, s2, s3)
s5 = parseComment s
-- parse a set of triples : insert "TripleSet " and call parsePropertyList
-- Then call recusively parseTripelSet ; then insert "EndOfSet "
parseTripleSet :: ParserData -> ParserData
parseTripleSet (g, out, "") = (g, out, "")
parseTripleSet p@(g, out, s@(x:xs))
-- begin of set detected; check for end of set and call recursively
| y == '{' && bv1 = parseTripleSet
(parseTriple (g, out ++ " " ++ "TripleSet", skipBlancs ys))
-- '}' found - insert "EndOfSet " and return
| y == '}' = (g, out ++ " " ++ "EndOfSet ", skipBlancs ys)
-- '.' found ; end of triple; return if single triple
| y == '.' && not bv1 = parseTriple (g, out,skipBlancs ys)
-- '.' found and analyzing set; recall parseTripleSet and skip point
| y == '.' && bv1 = parseTripleSet (g, out, skipBlancs ys)
-- '}' not found ==> synchronize on next point.
| y == '{' && not bv1 && bv3 =
(g, "Error Missing }", skipTillCharacter '.' (skipBlancs ys))
-- no point found -- last statement without point or
-- something is fundamentally wrong -- return empty rest string
| y == '{' && not bv1 && not bv3 = (g, "Error Missing Point", "")
-- must be a single triple : parseSubject and call parsePropertyList
| otherwise = parseTriple p
where bv1 = checkCharacter '}' ys
bv3 = checkCharacter '.' ys
s1@(y:ys) = skipBlancs s
-- parseTriple parses a singel triple
parseTriple :: ParserData -> ParserData
parseTriple (g, out, "") = (g, out, "")
parseTriple (g, out, s) =
parsePropertyList (parseSubject (g, out, (skipBlancs s)))
-- parse a set of anonymous triples : insert "AnonSet "
-- and call parsePropertyList
-- Then call recusively parseAnon ; then insert "EndOfSet "
parseAnonSet :: ParserData -> ParserData
parseAnonSet (g, out, "") = (g, out, "")
parseAnonSet (g, out, s@(x:xs))
-- parse a set of anonymous triples: assign a subject
-- and then call parsePropertyList
| bv2 = parseAnonSet (parsePropertyList (g3, out ++ " " ++
"AnonSet" ++ s5, skipBlancs s))
-- ']' found - insert "EndOfSet " and return
| x == ']' = (g, out ++ " " ++ "EndOfSet ", skipBlancs xs)
-- '{' found call parseTripleSet
| x == '{' = parseTripleSet (g, out, s)
-- ']' not found ==> synchronize on next point.
| x == '[' && not bv2 && bv3 =
(g, "Error Missing ]", skipTillCharacter '.' (skipBlancs xs))
-- no point found -- last statement without point or
-- something is fundamentally wrong -- return empty rest string
| x == '[' && not bv2 && not bv3 = (g, "Error Missing Point", "")
-- nothing appropriate return
| otherwise = (g, out, s)
where bv2 = checkCharacter ']' xs
bv3 = checkCharacter '.' xs
(g3, s5) = createAnonSubject g
-- parses a verb and then calls parseNodeList
-- accumulates in second parameter
--parsePropertyList :: ParserData -> ParserData
parsePropertyList (g, out, "") = (g, out, "")
parsePropertyList (g, out, s)
-- end of propertyList
| y == '.' = (g, out, s1)
-- propertylist with subject already defined
| y == ';' = parsePropertyList (parseProperty (g, out ++ " "
++ subject, ys))
| otherwise = parsePropertyList (parseProperty (g, out, s1))
where subject = "Subject " ++ (getStringParam g "LastSubject")
s1@(y:ys) = skipBlancs s
-- parse a single property
--parseProperty :: ParserData -> ParserData
parseProperty (g, out, "") = (g, out, "")
parseProperty (g, out, s) = (parseNodeList (parseVerb (g, out, (skipBlancs s))))
-- test the function parseNodeList
-- testParseNodeList :: [ParserData]
testParseNodeList =
stringToFile
(composed testList)
"testParseNodeList.tst"
where testList = [(g2, "", ":a, :b,:c."),
(g2, "", ":a."),
(g2, "", ":a"),
(g2, "", ":a, :b,:c")]
-- compose the output list
composed [] = []
composed (x:xs) = "\n\nInput:" ++ tripleToString x ++
"\n\nOutput" ++ tripleToString (parseNodeList x) ++ composed xs
-- display ParserData in a more or less readable format
displayTriple :: ParserData -> IO ()
displayTriple (g, out, rest) =
putStr ("\n" ++ show g ++ "\n" ++ "out: " ++ out ++ "\n" ++ "rest: " ++ rest)
-- display a list of ParserData
displayTripleList :: [ParserData] -> IO ()
displayTripleList [] = putStr "Empty list"
displayTripleList triple = putStr (tripleListToString triple)
-- transforms ParserData in string format
tripleToString :: ParserData -> String
tripleToString (g, out, rest) =
"\n\n" ++ show g ++ "\n" ++ "out: " ++ out ++ "\n" ++ "rest: " ++ rest
-- transform a list of ParserData to String
tripleListToString :: [ParserData] -> String
tripleListToString [] = []
tripleListToString (x:xs) = tripleToString x ++ tripleListToString xs
-- save a string to a file of which the name is indicated
-- by the second string
-- stringToFile :: (String, String) -> IO
stringToFile s fileName = do toHandle <- openFile fileName WriteMode
hPutStr toHandle s
hClose toHandle
putStr "Done."
-- parses nodes separated by , .
-- Subject and verb are retrieved from the globals .
--parseNodeList :: ParserData -> ParserData
parseNodeList (g, out, "") = (g, out, "")
parseNodeList (g, out, s)
| y == '.' = (g, out, sd)
| y == ';' = (g, out, sd)
| y == '{' = parseNodeList (parseTripleSet (g, out, sd))
| y == '[' = parseNodeList (parseAnonSet (g, out, sd))
| y == ',' = parseNodeList (g3, out ++ " " ++ subject ++ " " ++ verb
++ " " ++ s5 ++ " " ++ s6, skipBlancs s7)
| otherwise = parseNodeList (parseObject (g, out, sd))
where sd@(y:ys) = skipBlancs s
subject = "Subject " ++ getStringParam g "LastSubject"
verb = "Verb " ++ getStringParam g "LastVerb"
(g3, s5, s6, s7) = parseNode g ys
-- parse a subject .
--parseSubject :: ParserData -> ParserData
parseSubject (g, out, "") = (g, out, "")
parseSubject (g, out, s) = (g2, out ++ " " ++ s1 ++ " " ++ s2, skipBlancs s4)
where (g1, s1, s2, s3) = saveSubject (parseNode g (skipBlancs sd))
(g2, s4) = testResult (g1, s1, s2, s3)
sd = skipBlancs s
-- parse a verb
--parseVerb :: ParserData -> ParserData
parseVerb (g, out, "") = (g, out, "")
parseVerb (g, out, s) = (g2, out ++ " " ++ s1 ++ " " ++ s2, skipBlancs s4)
where (g1, s1, s2, s3) = saveVerb (parseNode g (skipBlancs sd))
(g2, s4) = testResult (g1, s1, s2, s3)
sd = skipBlancs s
-- parse an object
--parseObject :: ParserData -> ParserData
parseObject (g, out, "") = (g, out, "")
parseObject (g, out, s) = (g2, out ++ " " ++ s1 ++ " " ++ s2, skipBlancs s4)
where (g1, s1, s2, s3) = parseNode g (skipBlancs sd)
(g2, s4) = testResult (g1, s1, s2, s3)
sd = skipBlancs s
-- save a subject in the global list
saveSubject :: (Globals, String, String, String) -> (Globals, String, String, String)
saveSubject (g, s1, s2, s3) = (g1, "Subject", s2, s3)
where g1 = setStringParam g "LastSubject" s2
-- save a verb in the global list
saveVerb :: (Globals, String, String, String) -> (Globals, String, String, String)
saveVerb (g, s1, s2, s3) = (g1, "Verb", s2, s3)
where g1 = setStringParam g "LastVerb" s2
createAnonSubject :: Globals -> (Globals, String)
createAnonSubject g = (g1, s)
where i = getIntParam g "AnonCounter"
g1 = setIntParam g "AnonCounter" (i+1)
s = "T" ++ intToString (i+1)
-- test the function parseNode
-- testParseNode ::
testParseNode =
stringToFile
(composeNodes testList)
"testParseNode.tst"
where testList = [ "<> :a :b",
":a :b :c .",
"@prefix dc: .",
"dc:a dc:b dc:c . { dc:ho \"oke\".}",
" dc:b dc:c . { dc:ho \"oke\".}",
";<#pat> :a :b.",
-- "<#pat> :a :b",
"\"Hallo\" dc:b dc:c . { dc:ho \"oke\".}"]
-- compose the output list
composeNodes [] = []
composeNodes (x:xs) = "\n\nInput: " ++ x ++
"\n\nOutput " ++ quadToString (parseNode g x)
++ composeNodes xs
-- transforms output of parseNode in string format
quadToString :: (Globals, String, String, String) -> String
quadToString (g, s1, s2, s3) =
"\n\n" ++ show g ++ "\n" ++ "item: " ++ s1 ++ " value: " ++ s2 ++
" rest: " ++ s3
-- transforms a pair (g, s) to String
pairToString :: (Globals, String) -> String
pairToString (g, s) =
"\n\n" ++ show g ++ "\n" ++ "input: " ++ s
-- function for parsing nodes
-- input are the global data and the string to be parsed
-- returns a multiple that exists of global list, a node, the value and
-- the rest string .
-- Formats of a node (=URI):
-- <#...>
-- <>
-- :...
-- prefix:...
--
-- ".." (constant)
parseNode :: Globals -> String -> (Globals, String, String, String)
parseNode g "" = (g, "", "", "")
parseNode g s
-- node in a node list
|y == ';' = parseNode g ys
-- <#...>
|bv1 && bv2 && bv3 =
(g, "Object", (getStringParam g "BaseURI") ++ "#" ++ node, post3)
-- <>
|bv1 && bv4 =
(g, "Object",getStringParam g "BaseURI",post4)
-- prefix:...
|bv7 && bv8 && bv9 = (g, "Object", uri ++"#" ++ postfix, post8)
-- "..." constant
|bv10 && bv11 = (g, "Constant", const, post11)
--
|bv1 && bv12 = (g, "Object", uria, post12)
-- :...
|bv5 && bv6 = (g, "Object", getStringParam g "BaseURI"++"#" ++ node2, post6)
-- otherwise error
|otherwise = (g, "Error in parsing node: "++ s,"", sd)
-- secondary definitions
where (bv1, post1) = takec '<' sd
(bv2, post2) = takec '#' (skipBlancs post1)
(bv3, node, post3) = parseUntil '>' (skipBlancs post2)
(bv4, post4) = takec '>' (skipBlancs post1)
(bv5, post5) = takec ':' sd
(bv6, node2, post6) = parseUntilDelim delimNode (skipBlancs post5)
(bv7, prefix, post7) = parseUntil ':' sd
(bv8, postfix, post8) = parseUntilDelim delimNode (skipBlancs post7)
uri = getStringParam g prefix
bv9 = uri /= "Error"
(bv10 , post10) = takec '"' sd
(bv11 , const, post11) = parseUntil '"' (skipBlancs post10)
(bv12 , uria , post12) = parseUntil '>' (skipBlancs post1)
sd@(y:ys) = skipBlancs s
-- function for parsing prefixes
-- input are the global data and the string to be parsed
-- returns a multiple that exists of an identifier, the value
-- the rest string and the global list.
-- format of a prefix :
-- @prefix ...:
-- the prefix is added in the global list as ("prefix","value-of-prefix");
-- the returned value = ""
parsePrefix :: Globals -> String -> (Globals, String, String, String)
parsePrefix g "" = (g, "", "", "")
parsePrefix g s
|bv1 && bv2 && bv3 && bv4 =
(g, "Prefix", "", post4)
|otherwise = (g, "Error", "", s )
where (bv1, post1) = parseString "@prefix" s
(bv2, prefix, post2 ) = parseUntil ':' (skipBlancs post1)
(bv3, uri, post3) = parseUntil '>' (skipBlancs post2)
(bv4, post4) = takec '.' (skipBlancs post3)
g' = setStringParam g prefix uri -- add the prefix
-- to the global list
-- ************* basic constants **************
--
g = [("BaseURI", baseURI),("AnonCounter", "0"), ("LastSubject", ""),
("LastVerb", "")]
-- ************* --------------- **************
-- globals for testing
g2 = [("BaseURI", baseURI),("AnonCounter", "1"), ("LastSubject", ":last_subject"),
("LastVerb", ":last_verb")]
-- test of node parsing 1
baseURI = "http://www/w3.org"
g1 = [("BaseURI", baseURI),("dc","http://gn.org/")]
p = "<#pat> :a :b" -- test with parseAtom g p
-- test of node parsing 2
p1 = "<> :a :b" -- test with parseAtom g p1
-- test of prefix parsing
p2 = "@prefix dc: ."
-- test of node with :...
p3 = ":a :b :c . { dc:ho \"oke\".}"
-- test of node with prefix:...
p4 = "dc:a dc:b dc:c . { dc:ho \"oke\".}"
-- test of node with
p5 = " dc:b dc:c . { dc:ho \"oke\".}"
-- test of node with ".." (constant)
p6 = "\"Hallo\" dc:b dc:c . { dc:ho \"oke\".}"
-- test of comment
p7 = "# ddddddd \r\n :a :b :c"
-- test of parseNodeList
p8 = ":a, :b, :c."
-- test of parsePropertyList
p9 = ":a :b; :c :d; :e :f."
-- test of tripleset
p10 = "{:a :b :c. :d :e :f.}"
{-
-- The bnf grammar
----------------------
--#
--# Taken from
--# on 2001-08-03 (version of 2001-04-10)
--#
--# Modifications:
--#
--# $Log: n3.bnf,v $
--# Revision 1.4 2001/08/06 20:56:21 sandro
--# added space* and space+ in several places
--# removed "#" from forbidden chars in URI_Reference
--# handles comments
--# made directives actually part of the grammar (!)
--# allowed nprefix to be zero-length
--#
--# Revision 1.3 2001/08/03 13:44:43 sandro
--# filled in remaining non-terminals
--#
--# Revision 1.2 2001/08/03 13:02:48 sandro
--# standardized BNF so blindfold can compile it
--# added ::= for each rule
--# added | for branches
--# added ; at end of rule
--# added # before comments
--# put quotes around literals
--# turn hypen into underscore in identifiers
--# rename prefix to nprefix (hack around blindfold keyword for now)
--#
--# Revision 1.1 2001/08/03 12:34:38 sandro
--# added opening comments
--#
--
--
--document ::= void
-- | statementlist;
--
--space ::= " " | "\n" | "\r" | comment;
--
--comment ::= "#" [^\r\n]*;
--
--statement ::= subject space+ property_list
-- | directive
-- ;
--
--statementlist ::= (statement space* ("." space*)?)* ;
--
--subject ::= node;
--
--verb ::= ">-" prop "->" # has xxx of
-- | "<-" prop "<-" # is xxx of
-- # | operator # has operator:xxx of??? NOT IMPLMENTED
-- | prop # has xxx of -- shorthand
-- | "has" prop # has xxx of
-- | "is" prop "of" # is xxx of
-- | "a" # has rdf:type of
-- | "=" # has daml:equivaent of
-- ;
--
--prop ::= node;
--
--node ::= uri_ref2
-- | anonnode
-- | "this"
-- | node
-- ;
--
--nodelist ::= void # (used in lists)
-- | node
-- | node nodelist
-- ;
--
--anonnode ::= "[" property_list "]" # something which ...
-- | "{" statementlist "}" # the statementlist itself as a resource
-- | "(" nodelist ")" # short for eg [ n3:first node1; n3:rest [ n3:first node2; n3:rest: n3:null ]]
-- ;
--
--property_list ::= void # to allow [...].
-- | verb space+ object_list
-- | verb space+ object_list space+ ";" space+ property_list
-- | ":-" anonnode #to allow two anonymous forms to be given eg [ a :Truth; :- { :sky :color :blue } ] )
-- | ":-" anonnode ";" property_list
-- ;
--
--object_list ::= object
-- | object "," object_list
-- ;
--
--uri_ref2 ::= qname
-- | "<" URI_Reference ">"
-- ;
--
--qname ::= nprefix ":" localname; # ??? Allow omit colon when prefix void - keyword clash
--
--object ::= subject
-- | string1 # " constant-value-with-escaping "
-- | string2 # """ constant value with escaping including single or double occurences of quotes and/or newlines """
-- # well-formed-xml-element ???? legacy or structured stuff - not implemented or distinguished
-- ;
--
--directive ::= "bind" space+ nprefix ":" uri_ref2 # Namespace declartion. Trailing "#" is omitted & assumed. Obsolete.
-- | "@prefix" space+ nprefix ":" space+ uri_ref2 # Namespace declaration
-- ;
--
--# operator ::= (Not implemented)
--# + >- operator:plus ->
--# - >- operator:minus ->
--# / >- operator:slash->
--# * >- operator:star-> (etc? @@)
--
--fragid ::= alpha alphanumeric* ;
--
--alpha ::= [a-zA-Z];
--
--alphanumeric ::= alpha | [0-9] | "_";
--
--void ::= "" ; # nothing
--
--URI_Reference ::= [^{}<>]*; # short version
--
--nprefix ::= "" | ((alpha | "_") alphanumeric*);
--
--localname ::= fragid;
--
--string1 ::= '"' string1_char* '"';
--
--string1_char ::= '\\"' | [^\"] ; # should disallow some other characters, etc.
--
--string2 ::= '"""' string2_char* '"""';
--
--string2_char ::= [^"] | ([^] [^] [^"]); # something like this; need to think about it some more
--
-----------------------------------------------------------------------}