import "Utils"
import "IO.hs"


{--
--  N3 parser
--------------------------------------
--
-- Author : G.Naudts E-mail : naudts_vannoten@yahoo.com
-- Address : Secretarisdreef 5 2288 Bouwel Belgium 
-- The parser is based on the grammar at the back  and has been token from :
-- http://2001/blindfold/sample/n3.bnf
-- and the parser uses also the structures defined in N3 primer :
-- http://www.w3.org/200/10/swap/Primer.htm
-- The output data structure consists basically of pairs (id,value) .
-- All abbreviations of N3 are resolved in the output .
-- Anonymous nodes get an anonymous subject value = T1 ... Tn where
-- n is the index of the last anonymous node .
-- The parser is a look-ahead parser .
-- The prelude used is standard.pre .
-- When an error occurs the stream is synchronized on the next point . 
-- With thanks to Mark P.Jones for his inspiring prolog interpreter .
-- Download Gofer at : http://www.cse.ogi.edu/~mpj/goferarc/
--
-- I give here a bnf of the output :
--
-- ParserOutput ::= ParsedString "End"
--
-- ParsedString ::= Triple (ParserOutput)*|
--                  TripleSet (ParserOutput)*
--
-- Triple ::= "Triple/" Subject Verb Object 
--
-- AnonTriple ::= "AnonTriple/" AnonSubject Verb Object 
--
-- TripleSet ::= "TripleSet/" Triple* AnonTriple* "EndOfSet/"
--
-- Subject ::= "Subject/" String "/"|
--             TripleSet 
--
-- AnonSubject ::= "AnonSubject/T" n "/"
--
-- Verb ::= "Verb/" String "/"
--
-- Object ::= "Object/" String "/"|
--            TripleSet
--
-- n ::= (digit)*
--
-- The output is defined by constants that are defined in a section
-- indicated by the header : Constants .

module N3Parser where

-- readN3 reads and parses a N3 file and puts the output on sysout
--readN3 :: String -> Dialogue
--readN3  f = readFile f  abort
--   (\input -> appendChan stdout (n3Parser input) abort done)-}

-- n3Parser get an input string and starts the parsing proces with the
-- global list; first spaces are skipped
n3Parser :: String -> String
n3Parser s = parseN3 (g, "", skipBlancs s)  

-- testResult gets a parsed item; checks for errors ;
-- if error skips till the next point and
-- continues parsing ; returns the global list and the rest string
testResult :: (Globals, String, String, String)  -> (Globals, String)
testResult (g, s1, s2, s3)
   | startsWith s1 "Error" = (g,  skipTillCharacter '.' s3)
   | otherwise = (g, s3)



-- function parseN3  : top level of the parser
-- Takes the global list and a string (without leading spaces)
-- and returns a string consisting of a list of identifier-value pairs
-- separated by blancs
--  e.g. "Subject :a Verb :b Object :c"
parseN3 :: ParserData ->  String
parseN3 (g, out, "") = ""
parseN3 (g, out, s@(x:xs))
   -- parse a prefix -- no output
   | x == '@'  =  parseN3 (g2, out, skipBlancs s4)
   -- parse a comment
   | x == '#'  = parseN3 (g, out, skipBlancs s5)
   -- anonymous node 
   | x == '[' =  parseN3 (parseAnonSet (g, out, s))
   -- must be a triple(set) parse triplelist
   | otherwise =  parseN3 (parseTripleSet (g, out, s))
   where (g1, s1, s2, s3) = parsePrefix g s
         (g2, s4) = testResult (g1, s1, s2, s3) 
         s5 = parseComment s

-- parse a set of triples : insert "TripleSet " and call parsePropertyList
-- Then call recusively parseTripelSet ; then insert "EndOfSet "
parseTripleSet :: ParserData -> ParserData
parseTripleSet (g, out, "") = (g, out, "")
parseTripleSet p@(g, out, s@(x:xs))
   -- begin of set detected; check for end of set and call recursively
   | y == '{' && bv1 = parseTripleSet 
           (parseTriple (g, out ++ " " ++ "TripleSet", skipBlancs ys))
   -- '}' found - insert "EndOfSet " and return
   | y == '}' = (g, out ++ " " ++ "EndOfSet ", skipBlancs ys)
   -- '.' found ; end of triple; return if single triple
   | y == '.' && not bv1 = parseTriple (g, out,skipBlancs ys)
   -- '.' found and analyzing set; recall parseTripleSet and skip point
   | y == '.' && bv1 = parseTripleSet (g, out, skipBlancs ys)
   -- '}' not found ==> synchronize on next point.
   | y == '{' && not bv1 && bv3 =
      (g, "Error Missing }", skipTillCharacter '.' (skipBlancs ys))
   -- no point found -- last statement without point or
   -- something is fundamentally wrong -- return empty rest string
   | y == '{' && not bv1 && not bv3 = (g, "Error Missing Point", "")
   -- must be a single triple : parseSubject and call parsePropertyList
   | otherwise = parseTriple p
   where bv1 = checkCharacter '}' ys  
         bv3 = checkCharacter '.' ys
         s1@(y:ys) = skipBlancs s

-- parseTriple parses a singel triple
parseTriple :: ParserData -> ParserData
parseTriple (g, out, "") = (g, out, "")
parseTriple (g, out, s) =
   parsePropertyList (parseSubject (g, out, (skipBlancs s)))

-- parse a set of anonymous triples : insert "AnonSet "
-- and call parsePropertyList
-- Then call recusively parseAnon ; then insert "EndOfSet "
parseAnonSet :: ParserData -> ParserData
parseAnonSet (g, out, "") = (g, out, "")
parseAnonSet (g, out, s@(x:xs))
   -- parse a set of anonymous triples: assign a subject
   -- and then call parsePropertyList
   | bv2 = parseAnonSet (parsePropertyList (g3, out ++ " " ++
      "AnonSet" ++ s5, skipBlancs s))
   -- ']' found - insert "EndOfSet " and return
   | x == ']' = (g, out ++ " " ++  "EndOfSet ", skipBlancs xs)
   -- '{' found call parseTripleSet
   | x == '{' = parseTripleSet (g, out, s)
   -- ']' not found ==> synchronize on next point.
   | x == '[' && not bv2 && bv3 =
      (g, "Error Missing ]", skipTillCharacter '.' (skipBlancs xs))
   -- no point found -- last statement without point or
   -- something is fundamentally wrong -- return empty rest string
   | x == '[' && not bv2 && not bv3 = (g, "Error Missing Point", "")
   -- nothing appropriate return
   | otherwise = (g, out, s)
   where  bv2 = checkCharacter ']' xs 
          bv3 = checkCharacter '.' xs
          (g3, s5) =  createAnonSubject g

-- parses a verb and then calls parseNodeList                                                      
-- accumulates in second parameter
--parsePropertyList :: ParserData -> ParserData
parsePropertyList (g, out, "") = (g, out, "")
parsePropertyList (g, out, s)
   -- end of propertyList
   | y == '.' = (g, out, s1)
   -- propertylist with subject already defined
   | y == ';' =  parsePropertyList (parseProperty (g, out ++ " "
                 ++ subject,  ys))
   | otherwise = parsePropertyList (parseProperty (g, out, s1))
   where subject = "Subject " ++ (getStringParam g "LastSubject")
         s1@(y:ys) = skipBlancs s


-- parse a single property
--parseProperty :: ParserData -> ParserData
parseProperty (g, out, "") = (g, out, "")
parseProperty (g, out, s) =  (parseNodeList (parseVerb (g, out, (skipBlancs s))))

-- test the function parseNodeList
-- testParseNodeList :: [ParserData]
testParseNodeList =
   stringToFile
       (composed testList)
       "testParseNodeList.tst"
   where testList = [(g2, "", ":a, :b,:c."),
                     (g2, "", ":a."),
                     (g2, "", ":a"),
                     (g2, "", ":a, :b,:c")]

-- compose the output list
composed [] = []
composed (x:xs) = "\n\nInput:" ++ tripleToString x ++
                  "\n\nOutput" ++ tripleToString (parseNodeList x) ++ composed xs

-- display ParserData in a more or less readable format 
displayTriple :: ParserData -> IO ()
displayTriple (g, out, rest) =
   putStr ("\n" ++ show g ++ "\n" ++ "out: " ++ out ++ "\n" ++ "rest: " ++ rest)

-- display a list of ParserData
displayTripleList :: [ParserData] -> IO ()
displayTripleList [] = putStr "Empty list"
displayTripleList triple =  putStr (tripleListToString triple)

-- transforms ParserData in string format
tripleToString :: ParserData -> String
tripleToString (g, out, rest) =
   "\n\n" ++ show g ++ "\n" ++ "out: " ++ out ++ "\n" ++ "rest: " ++ rest

-- transform a list of ParserData to String
tripleListToString :: [ParserData] -> String
tripleListToString [] = []
tripleListToString (x:xs) = tripleToString x ++ tripleListToString xs

-- save a string to a file of which the name is indicated
-- by the second string
-- stringToFile :: (String, String) -> IO
stringToFile s fileName = do toHandle <- openFile fileName WriteMode 
                             hPutStr toHandle s
                             hClose toHandle
                             putStr "Done."

--  parses nodes separated by , .
-- Subject and verb are retrieved from the globals .
--parseNodeList :: ParserData -> ParserData
parseNodeList (g, out, "") = (g, out, "")
parseNodeList (g, out, s)
   | y == '.' = (g, out, sd)
   | y == ';' = (g, out, sd)
   | y == '{' = parseNodeList (parseTripleSet (g, out, sd))
   | y == '[' = parseNodeList (parseAnonSet (g, out, sd))
   | y == ',' = parseNodeList (g3, out ++ " " ++  subject ++ " "  ++ verb
                ++ " " ++ s5 ++ " " ++ s6, skipBlancs s7)
   | otherwise = parseNodeList (parseObject (g, out, sd))
   where sd@(y:ys) = skipBlancs s
         subject = "Subject "  ++ getStringParam g "LastSubject"
         verb = "Verb " ++ getStringParam g "LastVerb"
         (g3, s5, s6, s7) = parseNode g ys

-- parse a subject .
--parseSubject :: ParserData -> ParserData
parseSubject (g, out, "") = (g, out, "")
parseSubject (g, out, s) = (g2, out ++ " " ++ s1 ++ " " ++ s2, skipBlancs s4)
     where (g1, s1, s2, s3) = saveSubject (parseNode g (skipBlancs sd))
           (g2, s4) = testResult (g1, s1, s2, s3)
           sd = skipBlancs s
                      
-- parse a verb
--parseVerb :: ParserData -> ParserData
parseVerb (g, out, "") = (g, out, "")
parseVerb (g, out, s) =  (g2, out ++ " " ++ s1 ++ " " ++ s2, skipBlancs s4)
     where (g1, s1, s2, s3) = saveVerb (parseNode g (skipBlancs sd))
           (g2, s4) = testResult (g1, s1, s2, s3)
           sd = skipBlancs s

-- parse an object
--parseObject :: ParserData -> ParserData
parseObject (g, out, "") = (g, out, "")
parseObject (g, out, s) = (g2, out ++ " " ++ s1 ++ " " ++ s2, skipBlancs s4)
     where (g1, s1, s2, s3) = parseNode g (skipBlancs sd)
           (g2, s4) = testResult (g1, s1, s2, s3)
           sd = skipBlancs s

-- save a subject in the global list
saveSubject :: (Globals, String, String, String) -> (Globals, String, String, String)
saveSubject (g, s1, s2, s3) = (g1, "Subject", s2, s3)
   where g1 = setStringParam g "LastSubject" s2


-- save a verb in the global list
saveVerb :: (Globals, String, String, String) -> (Globals, String, String, String)
saveVerb (g, s1, s2, s3) = (g1, "Verb", s2, s3)
   where g1 = setStringParam g "LastVerb" s2

createAnonSubject :: Globals -> (Globals, String)
createAnonSubject g = (g1, s)
   where i = getIntParam g "AnonCounter"
         g1 = setIntParam g "AnonCounter" (i+1)
         s = "T" ++ intToString (i+1)

-- test the function parseNode
-- testParseNode ::
testParseNode =
   stringToFile
       (composeNodes testList)
       "testParseNode.tst"
   where testList = [ "<> :a :b",
                      ":a :b :c .",
                      "@prefix dc: <http://gn.org/> .",
                      "dc:a dc:b dc:c . {<http://www.w3.org> dc:ho \"oke\".}",
                      "<http://www.guido> dc:b dc:c . {<http://www.w3.org> dc:ho \"oke\".}",
                      ";<#pat> :a :b.",
                 --     "<#pat> :a :b",
                      "\"Hallo\" dc:b dc:c . {<http://www.w3.org> dc:ho \"oke\".}"]

-- compose the output list
composeNodes [] = []
composeNodes (x:xs) = "\n\nInput: " ++ x ++
                  "\n\nOutput " ++ quadToString (parseNode g x)
                  ++ composeNodes xs


-- transforms output of parseNode in string format
quadToString :: (Globals, String, String, String) -> String
quadToString (g, s1, s2, s3) =
   "\n\n" ++ show g ++ "\n" ++ "item: " ++ s1 ++ " value: " ++ s2 ++
      " rest: " ++ s3

-- transforms a pair (g, s) to String
pairToString :: (Globals, String) -> String
pairToString (g, s) =
   "\n\n" ++ show g ++ "\n" ++ "input: " ++ s

-- function for parsing nodes
-- input are the global data and the string to be parsed
-- returns a multiple that exists of global list, a node, the value and
-- the rest string .
-- Formats of a node (=URI):
-- <#...>
-- <>
-- :...
-- prefix:...
-- <URI>
-- ".."    (constant)
parseNode :: Globals -> String ->  (Globals, String, String, String)
parseNode g "" = (g, "", "", "")
parseNode g s
   -- node in a node list
   |y == ';' = parseNode g ys
   -- <#...>
   |bv1 && bv2 && bv3 =
       (g, "Object", (getStringParam g "BaseURI") ++ "#" ++ node, post3)
   -- <>
   |bv1 && bv4 =
       (g, "Object",getStringParam g "BaseURI",post4)
   -- prefix:...
   |bv7 && bv8 && bv9 = (g, "Object", uri ++"#" ++ postfix, post8)
   -- "..." constant 
   |bv10 && bv11 = (g, "Constant", const, post11)
   -- <URI>
   |bv1 && bv12 = (g, "Object", uria, post12)
   -- :...
   |bv5 && bv6 = (g, "Object", getStringParam g "BaseURI"++"#" ++ node2, post6)
   -- otherwise error 
   |otherwise = (g, "Error in parsing node: "++ s,"", sd)
   -- secondary definitions 
   where  (bv1, post1) = takec '<' sd
          (bv2, post2) = takec '#' (skipBlancs post1)
          (bv3, node, post3) = parseUntil '>' (skipBlancs post2)
          (bv4, post4) = takec '>' (skipBlancs post1)
          (bv5, post5) = takec ':' sd
          (bv6, node2, post6) = parseUntilDelim delimNode (skipBlancs post5)
          (bv7, prefix, post7) = parseUntil ':' sd
          (bv8, postfix, post8) = parseUntilDelim delimNode (skipBlancs post7)
          uri = getStringParam g prefix
          bv9 = uri /= "Error"
          (bv10 , post10) = takec '"'  sd
          (bv11 , const, post11) = parseUntil '"'  (skipBlancs post10)
          (bv12 , uria , post12) = parseUntil '>' (skipBlancs post1)
          sd@(y:ys) = skipBlancs s
          
-- function for parsing prefixes
-- input are the global data and the string to be parsed
-- returns a multiple that exists of an identifier, the value
-- the rest string and the global list.
-- format of a prefix :
-- @prefix ...: <URI>
-- the prefix is added in the global list as ("prefix","value-of-prefix");
-- the returned value = ""
parsePrefix :: Globals -> String ->  (Globals, String, String, String)
parsePrefix g "" = (g, "", "", "")
parsePrefix g s
   |bv1 && bv2 && bv3 && bv4 =
       (g, "Prefix", "", post4)
   |otherwise = (g, "Error", "", s )
   where (bv1, post1) = parseString "@prefix" s
         (bv2, prefix, post2 ) = parseUntil ':' (skipBlancs post1)
         (bv3, uri, post3) = parseUntil '>' (skipBlancs post2)
         (bv4, post4) = takec '.' (skipBlancs post3)
         g' = setStringParam g prefix uri   -- add the prefix
         -- to the global list
-- ************* basic constants **************
--
g = [("BaseURI", baseURI),("AnonCounter", "0"), ("LastSubject", ""),
   ("LastVerb", "")]

-- ************* --------------- **************
-- globals for testing
g2 = [("BaseURI", baseURI),("AnonCounter", "1"), ("LastSubject", ":last_subject"),
   ("LastVerb", ":last_verb")]

-- test of node parsing 1
baseURI = "http://www/w3.org"
g1 = [("BaseURI", baseURI),("dc","http://gn.org/")]
p = "<#pat> :a :b" -- test with parseAtom g p

-- test of node parsing 2
p1 = "<> :a :b"   -- test with parseAtom g p1

-- test of prefix parsing
p2 = "@prefix dc: <http://gn.org/> ."

-- test of node with :...
p3 = ":a :b :c . {<http://www.w3.org> dc:ho \"oke\".}"

-- test of node with prefix:...
p4 = "dc:a dc:b dc:c . {<http://www.w3.org> dc:ho \"oke\".}"

-- test of node with <URI>
p5 = "<http://www.guido> dc:b dc:c . {<http://www.w3.org> dc:ho \"oke\".}"

-- test of node with ".."    (constant)
p6 = "\"Hallo\" dc:b dc:c . {<http://www.w3.org> dc:ho \"oke\".}"

-- test of comment
p7 = "# ddddddd \r\n :a :b :c"

-- test of parseNodeList
p8 = ":a, :b, :c."

-- test of parsePropertyList
p9 = ":a :b; :c :d; :e :f."

-- test of tripleset
p10 = "{:a :b :c. :d :e :f.}"

{-
-- The bnf grammar
----------------------
--#
--#  Taken from <http://www.w3.org/DesignIssues/Notation3.html>
--#  on 2001-08-03 (version of 2001-04-10)
--#
--#  Modifications:
--#     
--# $Log: n3.bnf,v $
--# Revision 1.4  2001/08/06 20:56:21  sandro
--# added space* and space+ in several places
--# removed "#" from forbidden chars in URI_Reference
--# handles comments
--# made directives actually part of the grammar (!)
--# allowed nprefix to be zero-length
--#
--# Revision 1.3  2001/08/03 13:44:43  sandro
--# filled in remaining non-terminals
--#
--# Revision 1.2  2001/08/03 13:02:48  sandro
--# standardized BNF so blindfold can compile it
--#    added ::= for each rule
--#    added | for branches
--#    added ; at end of rule
--#    added # before comments
--#    put quotes around literals
--#    turn hypen into underscore in identifiers
--#    rename prefix to nprefix (hack around blindfold keyword for now)
--#
--# Revision 1.1  2001/08/03 12:34:38  sandro
--# added opening comments
--#
--
--
--document ::= void 
--           | statementlist;
--
--space ::= " " | "\n" | "\r" | comment;
--
--comment ::= "#" [^\r\n]*;
--
--statement ::= subject space+ property_list
--            | directive
--            ;
--
--statementlist ::= (statement space* ("." space*)?)* ;
--
--subject ::= node;
--
--verb ::= ">-" prop "->"    # has xxx of 
--       | "<-" prop "<-"    # is xxx of 
--    #  | operator          # has operator:xxx of??? NOT IMPLMENTED
--       | prop              # has xxx of -- shorthand
--       | "has" prop        # has xxx of
--       | "is" prop "of"    # is xxx of
--       | "a"               # has rdf:type of
--       | "="               # has daml:equivaent of
--       ;
--
--prop ::= node;
--
--node ::= uri_ref2 
--       | anonnode
--       | "this"
--       | node 
--       ;
--
--nodelist ::= void    # (used in lists) 
--           | node
--           | node nodelist
--           ;
--
--anonnode ::= "[" property_list "]"  # something which ...
--           | "{" statementlist "}"  # the statementlist itself as a resource
--           | "(" nodelist ")"       # short for eg [ n3:first node1; n3:rest [ n3:first node2; n3:rest: n3:null ]]
--           ;
--
--property_list ::= void   # to allow [...]. 
--                | verb space+ object_list
--                | verb space+ object_list space+ ";" space+ property_list
--                | ":-" anonnode  #to allow two anonymous forms to be given eg [ a :Truth; :- { :sky :color :blue } ] )
--                | ":-" anonnode ";" property_list
--                ;
--
--object_list ::= object 
--              | object "," object_list
--              ;
--
--uri_ref2 ::= qname 
--           | "<" URI_Reference ">"
--           ;
--
--qname ::= nprefix ":" localname; # ??? Allow omit colon when prefix void - keyword clash 
--
--object ::= subject 
--         | string1 # " constant-value-with-escaping "
--         | string2 # """ constant value with escaping including single or double occurences of quotes and/or newlines """
--         #      well-formed-xml-element ???? legacy or structured stuff - not implemented or distinguished
--         ;
--
--directive ::= "bind" space+ nprefix ":" uri_ref2   # Namespace declartion. Trailing "#" is omitted & assumed. Obsolete.
--            | "@prefix" space+ nprefix ":" space+ uri_ref2  # Namespace declaration
--            ;
--
--# operator ::=  (Not implemented) 
--#      + >- operator:plus -> 
--#      - >- operator:minus ->
--#      / >- operator:slash->
--#      * >- operator:star-> (etc? @@)
--
--fragid ::= alpha alphanumeric* ;
--
--alpha ::= [a-zA-Z];
--
--alphanumeric ::= alpha | [0-9] | "_";
--
--void ::= "" ;   # nothing
--
--URI_Reference ::= [^{}<>]*;    # short version
--
--nprefix ::= "" | ((alpha | "_") alphanumeric*);
--
--localname ::= fragid;
--
--string1 ::= '"' string1_char* '"';
--
--string1_char ::= '\\"' | [^\"] ;           # should disallow some other characters, etc.
--
--string2 ::= '"""' string2_char* '"""';
--
--string2_char ::= [^"] | ([^] [^] [^"]);    # something like this; need to think about it some more
--
-----------------------------------------------------------------------}