module N3Parser where
import "Utils"
import "IO"
import "Interact"
import "Observe"

{--
--  N3 parser
--------------------------------------
--
-- Author : G.Naudts E-mail : naudts_vannoten@yahoo.com
-- Address : Secretarisdreef 5 2288 Bouwel Belgium 
-- The parser is based on the grammar at the back  and has been token from :
-- http://2001/blindfold/sample/n3.bnf
-- and the parser uses also the structures defined in N3 primer :
-- http://www.w3.org/200/10/swap/Primer.htm
-- The output data structure consists basically of triples
-- (id,short value, full value) . The full value gives the complete URI.
-- The tree structure of N3 is kept intact, but dummy subjects and verbs
-- are introduced where these are missing (_subject and _verb)
-- Points are eliminated.
-- In ":a is :b of :c" of is eliminated and the verb :b is preceded
-- by "Reverse" meaning subject and object have to be reversed.
-- In ":a has :b of :c" has and of are eliminated. 
-- 
-- Anonymous nodes get an anonymous subject value = _T$$$1 ... _T$$$n where
-- n is the index of the last anonymous node .
-- The parser is basically recursive descent with look-ahead features.
-- The prelude used is standard.pre .
-- When an error occurs the stream is synchronized on the next point
-- and an error message is included in the stream.
-- With thanks to Mark P.Jones for his inspiring prolog interpreter.
-- Download hugs at : http://www.haskell.org/hugs
--
-- I give here a bnf of the output :
--
-- ParserOutput ::= Triple (ParserOutput)*|
--                  TripleSet (ParserOutput)*|
--                  AnonSet (ParserOutput)*  ;
--
-- Triple ::= "Triple" Sep Subject Verb Object ; 
--
-- AnonTriple ::= AnonSubject Verb Object ;
--
-- TripleSet ::=  Sep Triple* "EndOfSet" Sep ;
--
-- AnonSet ::= Sep AnonTriple* "EndOfSet" Sep ;
--
-- Subject ::= "Subject" Sep String Sep|
--             "Set" Sep TripleSet| 
--             "AnonSet" Sep AnonSet ;
--
-- AnonSubject ::= "Subject" Sep "_T$$$" n Sep ;
--
-- Verb ::= ["Reverse" sep] "Verb" Sep String Sep| #(Reverse means subject
--                                                 #and object must be reversed)   
--          "Set" Sep TripleList|
--          "AnonSet" Sep AnonSet ;
--
-- Object ::= "Object" Sep String Sep|
--            "ObjectSet" Sep TripleSet|
--            "AnonSet" Sep AnonSet ;
--
-- n ::= (digit)*;
--
-- Sep ::= Separator ;
--
-- Prefix ::= "Prefix" Sep String Sep ;
--
-- The separator is defined in the source code; might be : /@/
-- The output is defined by constants that are defined in a section
-- indicated by the header : Constants .
-- _subject and _verb refer to the latest subject and verb.
--
-- To do: better synchronization && error messages.
-}

-- read extern permits using the parser from another module.
-- a function with type String -> IO () is passed in the call.
-- This function will be called with the parsed string as a parameter.
readExternN3 :: String -> (String -> IO ()) -> IO ()
readExternN3 fileName function =
   do {s <- readFile fileName; function (n3Parser s)}

-- readN3 reads and parses a N3 file and puts the output on sysout
readN3 :: String -> IO ()
readN3 fileName =  do {s  <- readFile fileName; putStr (n3Parser s)}

-- interpreteN3 reads a file and executes triple per triple
interPreteN3 :: String -> IO ()
interPreteN3 fileName = do {s <- readFile fileName; interpreter s}

-- interpreteNode reads a file and executes node per node
interPreteNode :: String -> IO ()
interPreteNode fileName = do {s <- readFile fileName; interpreteN s}

-- saveN3 reads and parses a N3 file and saves the output in a file
-- with suffix pr
saveN3 :: String -> IO ()
saveN3 fileName = do {s <- readFile fileName;
                 (n3ParseAndSave s (fileName ++ ".pr"))}

-- n3ParseAndSave get an input string and starts the parsing proces with the
-- global list; first spaces are skipped; the output is put in the file
-- indicated by the second parameter.
n3ParseAndSave :: String -> String -> IO ()
n3ParseAndSave s fileName = stringToFile --(filterSep
                (getSecondQ (parseN3 (g, "", skipBlancs s, newLen))) fileName
   where newLen
            | debug = length s
            | otherwise = 0

-- n3Parser get an input string and starts the parsing proces with the
-- global list; first spaces are skipped
n3Parser :: String -> String
n3Parser s =  getSecondQ (parseN3 (g, "", skipBlancs s, newLen))
   where newLen
            | debug = length s + 1
            | otherwise = 0

-- change the separator to something more readable
filterSep :: String -> String
filterSep [] = []
filterSep s@(x:xs)
   | startsWith s sep = " || " ++ filterSep s1
   | otherwise = x:filterSep xs
   where s1 = drop 3 s

-- getsecond gets the second element from a triple
getSecond :: (a, b, c) -> b
getSecond (a, b, c) = b

-- getSecondQ gets the second element from a quadruple
getSecondQ :: (a, b, c, d) -> b
getSecondQ (a, b, c, d) = b


-- testResult gets a parsed item; checks for errors ;
-- if error skips till the next point and
-- continues parsing ; returns the global list, the rest string and a boolean value.
testResult :: (Globals, String, String, String)  -> (Globals, String, Bool)
testResult (g, s1, s2, s3)
   | startsWith s1 "Error" = (g,  skipTillCharacter '.' s3, False)
   | otherwise = (g, s3, True)

-- setting the debug flag to true will detect loops
debug = True

-- function parseN3  : top level of the parser
-- Takes the global list and a string (without leading spaces)
-- and returns a string consisting of a list of identifier-value pairs
-- separated by the separator
--  e.g. "Subject :a Verb :b Object :c"
parseN3 :: ParserData ->  ParserData
parseN3 (g, out, "", oldLen) = (g, out, "", oldLen)
parseN3 (g, out, s, oldLen)
   -- s is really blanc
   | sd == "" = (g, out, "", oldLen)
   -- point found = end of triple - just skip
   | y == '.' = parseN3 (g, out, skipBlancs ys, newLen)
   -- loop test
   | bv1 && debug = (g, out ++ "Loop error length: " ++ intToString oldLen ++ 
                      " String:" ++ s ++ " in parseN3  /@/", s, newLen)
   -- intercept bind   (obsolete)
   | startsWith sd "bind" = parseN3 (g4, out ++ s5 ++ sep ++ s6 ++ sep,
                            skipBlancs s8, newLen)
   -- parse a prefix -- no output
   | y == '@' = parseN3 (g2, out ++ s1 ++ sep ++ s2 ++ sep, skipBlancs s4,
                        newLen)
   -- must be a triple parse triple
   | otherwise = parseN3 (parseTriple (g, out, sd, newLen))
   where (g1, s1, s2, s3) = parsePrefix g sd
         (g2, s4, bool) = testResult (g1, s1, s2, s3) 
         sd@(y:ys) = skipBlancs s
         (g3, s5, s6, s7) = parseBind g sd
         (g4, s8, bool1) = testResult (g3,s5, s6, s7)
         bv1 = newLen == oldLen
         newLen
            | debug = length sd
            | otherwise = 0


-- parse a set of triples : insert "Set " and call parsePropertyList
-- Then call recusively parseTripelSet ; then insert "EndOfSet "
parseTripleSet :: ParserData -> ParserData
parseTripleSet (g, out, "", oldLen) = (g, out, "", oldLen)
parseTripleSet p@(g, out, s@(x:xs), oldLen)
   -- missing input
   | bv4 = (g, "Error no input" ++ sep, "", 0)
   -- begin of set detected; check for end of set and call recursively
   | y == '&' && bv1 = parseTripleSet (parseTriple 
                       (g, out ++ "Set" ++ sep, skipBlancs ys, newLen))
   -- '}' found - insert "EndOfSet " and return
   | y == '}' = (g, out ++ "EndOfSet" ++ sep, skipBlancs ys, newLen)
   -- '}' not found ==> synchronize on next point.
   | y == '{' && not bv1  =
      (g, "Error Missing }" ++ sep,skipTillCharacter '.' ys, newLen)
   -- no point found -- last statement without point or
   -- something is fundamentally wrong -- return empty rest string
   | y == '{' && not bv1 && not bv3 = (g, "Error Missing Point" ++ sep, (skipBlancs ys),
                                      newLen)
   -- must be the next triple : parseSubject and call parsePropertyList
   | otherwise = parseTripleSet (parseTriple p)
   where bv1 = checkCharacter '}' ys  
         bv3 = checkCharacter '.' ys
         bv4 = length (skipBlancs s) == 0
         s1@(y:ys) = skipBlancs s
         newLen
            | debug = length s1
            | otherwise = 0

-- parseTriple parses a singel triple
parseTriple :: ParserData -> ParserData
parseTriple (g, out, "", oldLen) = (g, out, "", oldLen)
parseTriple (g, out, s, oldLen)
   -- no input
   | bv1 = (g, "Error no input" ++ sep, "", 0)
   -- higher separators -- return
   | y == ']' = (g, out, s1, newLen)
   | y == '}' = (g, out, s1, newLen)
   -- end of triple return
   | y == '.' = (g, out, ys, newLen)
   -- parse a propertylist now
   | otherwise = (parsePropertyList  (parseSubject (g, out, s1, newLen)))
   where s1@(y:ys) = skipBlancs s
         bv1 = length (skipBlancs s) == 0
         newLen
            | debug = length s1
            | otherwise = 0

-- parseTripleSpecial parses a singel triple for an anonymous set
parseTripleSpecial :: ParserData -> ParserData
parseTripleSpecial (g, out, "", oldLen) = (g, out, "", oldLen)
parseTripleSpecial (g, out, s, oldLen)
   -- higher separators -- return
   | y == ']' = (g, out, s1, newLen)
   | y == '}' = (g, out, s1, newLen)
   -- end of triple return
   | y == '.' = (g, out, ys, newLen)
   -- embedded anonymous set
   | y == '[' = (parsePropertyList (parseSubject (g, out, s1, newLen))) 
   -- parse a propertylist now
   | otherwise = parseTriple (parsePropertyList (g, out, s1, newLen))
   where s1@(y:ys) = skipBlancs s
         newLen
            | debug = length s1
            | otherwise = 0


-- parse a set of anonymous triples : insert "AnonSet "
-- and call parsePropertyList
-- Then call recusively parseAnon ; then insert "EndOfSet "
parseAnonSet :: ParserData -> ParserData
parseAnonSet (g, out, "", oldLen) = (g, out, "", oldLen)      
parseAnonSet (g, out, s, newLen)
   -- . found recall parsePropertyList
   | y == '.' = parseAnonSet (parsePropertyList (g3, out, skipBlancs ys,
                          newLen))
   -- parse a set of anonymous triples: assign a subject
   -- and then call parsePropertyList
   | y == '&' && bv2 = parseAnonSet (parseTripleSpecial (g3, out ++ 
      "AnonSet" ++ sep ++ "Subject" ++ sep ++ s5 ++ sep, ys, newLen))
   -- ']' found - insert "EndOfSet " and return
   | y == ']' = (g, out ++ "EndOfSet" ++ sep, skipBlancs ys, newLen)
   -- '{' found call parseTripleSet
   | y == '{' = parseTripleSet (g, out, sd, newLen)
   -- ']' not found ==> synchronize on next point.
   | y == '&' && not bv2 && bv3 =
      (g, "Error Missing ]", skipTillCharacter '.' (skipBlancs ys), newLen)
   -- no point found -- last statement without point or
   -- something is fundamentally wrong -- return empty rest string
   | y == '&' && not bv2 && not bv3 = (g, "Error Missing Point", "", newLen)
   -- nothing appropriate return
   | otherwise = (g, out, sd, newLen)
   where  bv2 = checkCharacter ']' ys 
          bv3 = checkCharacter '.' ys
          (g3, s5) =  createAnonSubject g
          sd@(y:ys) = skipBlancs s
          newLen
             | debug = length sd
             | otherwise = 0


-- parses a verb and then calls parseNodeList                                                      
-- accumulates in second parameter
parsePropertyList :: ParserData -> ParserData
parsePropertyList (g, out, "", oldLen) = (g, out, "", oldLen)
parsePropertyList (g, out, s, oldLen)
   -- end of propertyList
   | y == '}' = (g, out, s1, newLen)
   -- end of propertyList
   | y == '.' = (g, out, s1, newLen)
   -- end of anonymous set
   | y == ']' = (g, out, s1, newLen)
   -- propertylist with subject already defined
   | y == ';' =  parsePropertyList (parseProperty (g, out 
                 ++ "_subject" ++ sep,  ys, newLen))
   | otherwise = parsePropertyList (parseProperty (g, out, s1, newLen))
   where s1@(y:ys) = skipBlancs s
         newLen
            | debug = length s1
            | otherwise = 0


-- parse a single property
parseProperty :: ParserData -> ParserData
parseProperty (g, out, "", oldLen) = (g, out, "", oldLen)
parseProperty (g, out, s, oldLen)
   -- end of property
   | y == '}' = (g, out, s1, newLen)
   | y == ']' = (g, out, s1, newLen)
   -- end of property
   | y == '.' = (g, out, s1, newLen)
   | otherwise =  (parseNodeList (parseVerb (g, out, s1, newLen)))
   where s1@(y:ys) = skipBlancs s
         newLen
            | debug = length s1
            | otherwise = 0

-- parses nodes separated by , .
-- Subject and verb are retrieved from the globals .
parseNodeList :: ParserData -> ParserData
parseNodeList (g, out, "", oldLen) = (g, out, "", oldLen)
parseNodeList (g, out, s, oldLen)
   -- higher separators return
   | y == '.' = (g, out, sd, newLen)
   | y == ';' = (g, out, sd, newLen)
   | y == '}' = (g, out, sd, newLen)
   | y == ']' = (g, out, sd, newLen)
   -- parse the next node (=object)
   | y == ',' = parseNodeList (g3, out ++ "_subject" ++ sep  ++ "_verb"
                ++ sep ++ out1, skipBlancs s5, newLen)
   -- parse the first (possibly last) node
   | otherwise = parseNodeList (parseObject (g, out, sd, newLen))
   where sd@(y:ys) = skipBlancs s
         (g3, out1, s5, len) = parseObject (g, "", ys, newLen)
         newLen
            | debug = length sd
            | otherwise = 0

-- parse a subject .
parseSubject :: ParserData -> ParserData
parseSubject (g, out, "", oldLen) = (g, out, "", oldLen)
parseSubject (g, out, s, oldLen)
     -- embedded sets 
     | y == '{' = parseTripleSet (g, out, '&':ys, newLen)
     | y == '[' = parseAnonSet (g, out, '&':ys, newLen)
     -- parse a subject
     | bool = (g2, out ++ "Subject" ++ sep ++ s2 ++ sep, skipBlancs s4, newLen)
     | otherwise = (g2, out ++ s1 ++ sep ++ s2 ++ sep, skipBlancs s4, newLen)
     where (g1, s1, s2, s3) =  (parseNode g (skipBlancs sd))
           (g2, s4, bool) = testResult (g1, s1, s2, s3)
           sd@(y:ys) = skipBlancs s
           newLen
              | debug = length sd
              | otherwise = 0
                      
-- parse a verb
parseVerb :: ParserData -> ParserData
parseVerb (g, out, "", oldLen) = (g, out, "", oldLen)
parseVerb (g, out, s, oldLen)
     -- embedded sets 
     | y == '{' = parseTripleSet (g,out, '&':ys, newLen)
     | y == '[' = parseAnonSet (g, out, '&':ys, newLen)
     -- is detected
     |bv2 = parseVerb (g, out ++ "Inverse" ++ sep, sd2, newLen)
     -- has detected
     | bv1 = parseVerb (g, out, sd1, newLen)
     -- parse a verb
     | bool =  (g2, out ++ "Verb" ++ sep ++ s2 ++ sep, skipBlancs s4,
                     newLen)
     | otherwise = (g2, out ++ s1 ++ sep ++ s2 ++ sep, skipBlancs s4,
                     newLen)
     where (g1, s1, s2, s3) = parseNode g (skipBlancs sd)
           (g2, s4, bool) = testResult (g1, s1, s2, s3)
           sd@(y:ys) = skipBlancs s
           newLen
              | debug = length sd
              | otherwise = 0
           -- has detected - must be skipped
           bv1 = startsWith sd "has"
           bv2 = startsWith sd "is"
           sd1 = skipBlancs (drop 3 sd)
           sd2 = skipBlancs (drop 2 sd)

-- parse an object
parseObject :: ParserData -> ParserData
parseObject (g, out, "", oldLen) = (g, out, "", oldLen)
parseObject (g, out, s, oldLen)
     -- embedded sets 
     | y == '{' = parseTripleSet (g,out, '&':ys, newLen)
     | y == '[' = parseAnonSet (g, out, '&':ys, newLen)
     -- of detected
     | bv1 = parseObject (g, out, sd1, newLen)
     -- parse a subject
     | bool =  (g2, out ++ "Object" ++ sep ++ s2 ++ sep, skipBlancs s4, newLen)
     | otherwise = (g2, out ++ s1 ++ sep ++ s2 ++ sep, skipBlancs s4, newLen)
     where (g1, s1, s2, s3) =  (parseNode g (skipBlancs sd))
           (g2, s4, bool) = testResult (g1, s1, s2, s3)
           sd@(y:ys) = skipBlancs s
           newLen
              | debug = length sd
              | otherwise = 0
           -- of detected - must be skipped
           bv1 = startsWith sd "of"
           sd1 = skipBlancs (drop 2 sd)

-- create an anonymous subject; form= _T$$$n 
createAnonSubject :: Globals -> (Globals, String)
createAnonSubject g = (g1, s1)
   where i = getIntParam g "AnonCounter"
         g1 = setIntParam g "AnonCounter" (i+1)
         s = "_T$$$" ++ intToString (i+1)
         s1 = s ++ "/@/" ++ s


-- function for parsing nodes
-- input are the global data and the string to be parsed
-- returns a multiple that exists of global list, a node, the value and
-- the rest string .
-- Formats of a node (=URI):
-- <#...>
-- <>
-- :...
-- prefix:...
-- <URI>
-- ".."    (constant)
parseNode :: Globals -> String ->  (Globals, String, String, String)
parseNode g "" = (g, "", "", "")
parseNode g s
   -- node in a node list
   | y == ';' = parseNode g ys
   -- starts with ':' This refers to the parsed document
   | y == ':' = parseNodeThis g sd
   -- starts with '<' Three cases : <> <#..> <URI>
   | y == '<' = parseNodeLesser g ys
   -- starts with '?:' This is a variable.
   | y == '?' = parseVariableQ g sd
   -- starts with '_:' This is a variable.
   | y == '_' = parseVariable g sd
   -- intercept special comment """
   | bv6 = parseSpecialComment g sd1
   -- starts with '"' Constant
   | y == '"' = parseConstant g ys
   -- = is a special case
   | y == '=' = (g, "Node", "=" ++ sep ++ daml ++ "equivalent>", skipBlancs ys)
   -- the verb is "a"
   | bv1 = (g, "Node" ,"a" ++ sep ++ rdf ++ "type>", skipBlancs ys)
   -- skip "of"
   | bv2 = parseNode g (skipBlancs (drop 2 sd))
   -- "is" detected, insert "Inverse"
   | bv3 = (g1, s1, "Inverse" ++ sep ++ s2, skipBlancs (s3))
   -- "has" detected skip this
   | bv4 = parseNode g (skipBlancs (drop 3 sd))
   -- "this" detected
   | bv5 = (g, "Node", "this" ++ sep ++ "this", skipBlancs (drop 4 sd))
   -- lonely : detected
   -- must be format : prefix:postfix The prefix must be known
   | otherwise = parseNodePrefix g s 
   -- first skip all blancs
   where  sd@(y:ys) = skipBlancs s
          y1:ys1 = ys
          y2:ys2 = ys1
          y3:ys3 = ys2
          y4:ys4 = ys3
          bv1 = startsWith sd "a" && testBlancs y1
          bv2 = startsWith sd "of" && testBlancs y2
          bv3 = startsWith sd "is" && testBlancs y2
          bv4 = startsWith sd "has" && testBlancs y3
          bv5 = startsWith sd "this" && testBlancs y4
          bv6 = startsWith sd "\"\"\""
          (g1, s1, s2, s3) = parseNode g (skipBlancs (drop 2 sd))
          sd1 = drop 3 sd

-- parse a special comment (starts with """)
parseSpecialComment :: Globals -> String -> (Globals, String, String, String)
parseSpecialComment g "" = (g, "", "", "")
parseSpecialComment g s
        |bool = (g, "Constant", "literal" ++ sep ++ parsed,
                         skipBlancs rest1)
        |otherwise = (g, "Error parsing special constant: ", "",
                         skipBlancs s)
        where (bool, parsed, rest) = parseUntilString s "\"\"\""
              rest1 = drop 3 rest

-- parse a constant
parseConstant :: Globals -> String -> (Globals, String, String, String)
parseConstant g "" = (g, "", "", "")
parseConstant g s
    -- normal case
    | bv1 = (g, "Constant", "literal" ++ sep ++ const, skipBlancs post1)
    -- error
    | otherwise = (g, "Error parsing constant:", "", sd)
    where (bv1, const, post1) = parseUntil '"' sd
          sd@(y:ys) = skipBlancs s

-- parse a node that starts with _: (variable)
parseVariable :: Globals -> String -> (Globals, String, String, String)
parseVariable g s
    |sd == "" = (g, "", "", "")
    -- normal case
    | bv1 = (g, "Node", node ++ sep ++ node, skipBlancs post1)
    -- error
    | otherwise = (g, "Error parsing variable (_:xxx) : ", "", s)
    where (bv1, node, post1) = parseUntilDelim delimNode sd
          sd = skipBlancs s 

-- parse a node that starts with _: (variable)
parseVariableQ :: Globals -> String -> (Globals, String, String, String)
parseVariableQ g s
    -- normal case
    | bv1 = (g, "Node", node ++ sep ++ node, skipBlancs post1)
    -- error
    | otherwise = (g, "Error parsing variable (?xxx) : ", "", s)
    where (bv1, node, post1) = parseUntilDelim delimNode s
   
-- parse a node that starts with :
parseNodeThis :: Globals -> String -> (Globals, String, String, String)
parseNodeThis g "" = (g, "", "", "")
parseNodeThis g s
    -- normal case
    | bv1 = (g, "Node", node ++ sep ++
               "<" ++ (getStringParam g ":") ++ ys
             ++ ">", skipBlancs post1)
    -- error
    | otherwise = (g, "Error parsing :node " ++ s ++ " ", "Error", s)
    where (bv1, node, post1) = parseUntilDelim delimNode s
          (y:ys) = node

-- parse a node that starts with <
parseNodeLesser :: Globals -> String -> (Globals, String, String, String)
parseNodeLesser g "" = (g, "", "", "")
parseNodeLesser g s 
    -- case <> = the parsed document
    | bv1 = (g, "Node", "<>" ++ sep ++ "<" ++
                getStringParam g ":" ++ ">", post1)
    -- case <#...>
    | bv2 && bv3 = (g, "Node", "<#" ++ node ++ ">" ++ sep
                    ++ getStringParam g ":"  ++ "<#"
                    ++ node ++ ">", post3)
    -- case <URI>
    | bv4 = (g, "Node", "<" ++ node1 ++ ">" ++ sep ++ "<" ++ node1 ++ ">", post4)
    -- missing >
    | otherwise = (g, "Error missing > :", "", s)
    where (bv1, post1) = takec '>' (skipBlancs s)
          (bv2, post2) = takec '#' (skipBlancs1 s)
          (bv3, node, post3) = parseUntil '>' (skipBlancs post2)
          (bv4, node1, post4) = parseUntil '>' (skipBlancs s)

-- parse a node with format prefix:postfix
parseNodePrefix :: Globals -> String -> (Globals, String, String, String)
parseNodePrefix g "" = (g, "", "", "")
parseNodePrefix g s
    -- normal case
    | bv1 && bv2 && bv3 = (g, "Object", prefix ++ ":" ++ postfix ++
                           sep ++ "<" ++ pre ++ postfix ++ ">", post2)
    -- error
    | otherwise = (g, "Error parsing prefix:postfix : ", "", s)
    where (bv1, prefix, post1) = parseUntil ':' (skipBlancs s)
          (bv2, postfix, post2) = parseUntilDelim delimNode (skipBlancs post1)
          pre = getStringParam g (prefix ++ ":")
          bv3 = pre /= "Error"
          
-- function for parsing prefixes
-- input are the global data and the string to be parsed
-- returns a multiple that exists of an identifier, the value
-- the rest string and the global list.
-- format of a prefix :
-- @prefix ...: <URI>
-- the prefix is added in the global list as ("prefix","value-of-prefix");
-- the returned value = ""
parsePrefix :: Globals -> String ->  (Globals, String, String, String)
parsePrefix g "" = (g, "", "", "")
parsePrefix g s
   -- normal case
   |bv1 && bv2 && bv2a && bv3 && bv4 =
       (g', "Prefix", "@prefix " ++ prefix ++ ":" ++ " " ++
                             "<" ++ uri ++ ">.", post4)
   -- error
   |not bv4 = (g, "/ Error parsing prefix no point" ++ sep, "", post4)
   |otherwise = (g, "Error parsing prefix" ++ sep, "", post4)
   where (bv1, post1) = parseString "@prefix" sd
         (bv2, prefix, post2 ) = parseUntil ':' (skipBlancs post1)
         (bv2a, post2a) = takec '<' (skipBlancs post2)
         (bv3, uri, post3) = parseUntil '>' (skipBlancs1 post2a)
         (bv4, post4) = takec '.' (skipBlancs post3)
         g' = setStringParam g (prefix ++ ":") uri   -- add the prefix
         -- to the global list
         sd = skipBlancs s

-- parse the bind directive
parseBind g s
   -- normal case
   | bv1 && bv2 && bv3 && bv4 =
       (g1, "Prefix", "@prefix " ++ prefix ++ ":" ++ " " ++
                             "<" ++ uri ++ ">.", post4)
   -- error
   |otherwise = (g, "Error parsing bind" ++ sep, "", s )
   where (bv1, post1) = parseString "bind" s
         (bv2, prefix, post2) = parseUntil ':' (skipBlancs post1)
         (bv3, post3) = takec '<' (skipBlancs post2)
         (bv4, uri, post4) = parseUntil '>' (skipBlancs post3)
         g1 = setStringParam g (prefix ++ ":") uri  -- add the prefix

-- ************* Test functions  **************

-- parses a string but asks for confirmation ('c') after each element =
-- triple, tripleset or anonymous set.
-- When finished enter 'q' to stop.
interpreter      :: String -> IO ()
interpreter s   =  do is <- getContents
                      putStr ((loop g1 s) is)

-- this reads a line in a loop and then executes exec
loop             :: Globals -> String -> Interact
loop g s         = readLine "> " (exec g s)

-- this function interprets the input string
exec             :: Globals -> String -> String -> Interact
exec g [] _       = writeStr "Bye"  end
exec g _ []       = writeStr "Bye"  end 
exec g s "q"      = writeStr "Bye"  end
exec g s "c"      = writeStr ("result: " ++ out ++ "\n") (loop g1 s1)
                    where (g1, out, s1, len) = parseSingle (g, "", s, 0)

-- parses a string but asks for confirmation ('c') after each element =
-- node.
-- When finished enter 'q' to stop.
interpreteN      :: String -> IO ()
interpreteN s   =  do is <- getContents
                      putStr ((loop1 g1 s) is)

-- this reads a line in a loop and then executes exec1
loop1             :: Globals -> String -> Interact
loop1 g s         = readLine "> " (exec1 g s)

-- this function interprets the input string
exec1             :: Globals -> String -> String -> Interact
exec1 g [] _       = writeStr "Bye"  end
exec1 g _ []       = writeStr "Bye"  end 
exec1 g s "q"      = writeStr "Bye"  end
exec1 g s "c"      = writeStr ("result: " ++ out ++ "\n")
                              (loop1 g1 s1)
                    where (g1, out, s1, len) = parseNodeList (g, "", s, 0)

-- function parseSingle  : top level of the parser
-- Takes the global list and a string (without leading spaces)
-- and returns a string consisting of a list of identifier-value pairs
-- separated by the separator
--  e.g. "Subject :a Verb :b Object :c"
-- parses only a single prefix definition or a triple
-- for testing !!!
parseSingle :: ParserData ->  ParserData
parseSingle (g, out, "", oldLen) = (g, out, "", oldLen)
parseSingle (g, out, s, oldLen)
   | (skipBlancs s) == "" = (g, out, "", newLen)
   -- intercept bind   (obsolete)
   | startsWith sd "bind" =  (g4, out ++ s5 ++ sep ++ s6 ++ sep,
                            skipBlancs s8, newLen)
   -- skip point
   | y == '.' = parseSingle (g, out, ys, newLen)
   -- parse a prefix -- no output
   | y == '@' =  (g2, out ++ s1 ++ sep ++ s2 ++ sep, skipBlancs s4, newLen)
   -- must be a triple parse triple
   | otherwise = (parseSingleTriple (g, out, sd, newLen))
   where (g1, s1, s2, s3) = parsePrefix g sd
         (g2, s4, bool1) = testResult (g1, s1, s2, s3) 
         sd@(y:ys) = skipBlancs s
         (g3, s5, s6, s7) = parseBind g sd
         (g4, s8, bool) = testResult (g3,s5, s6, s7)
         newLen
            | debug = length s
            | otherwise = 0

-- parseSingleTriple parses a singel triple -- for testing !!!
parseSingleTriple :: ParserData -> ParserData
parseSingleTriple (g, out, "", oldLen) = (g, out, "", oldLen)
parseSingleTriple (g, out, s, oldLen)
   | y == '}' = (g, out, s1, newLen)
   | y == '.' = (g, out, ys, newLen)
   | otherwise = (parsePropertyList (parseSubject (g, out, s1, newLen)))
   where s1@(y:ys) = skipBlancs s
         newLen
            | debug = length s1
            | otherwise = 0

-- test the function parseTripleSet
-- testParseTripleSet :: [ParserData]
testParseTripleSet =
   stringToFile
       (composedTriple testList)
       "testParseTripleSet.tst"
   where testList = [(g2, "", "{:a :b :c.}", 0),
                     (g2, "", "{{:a :b :c. :d :d :f} :g" ++
                       " {:h :i :j. :k :l :m}}", 0),
                     (g2, "", "{{:a :b :c} :d :e}", 0),
                     (g2, "", "{:a :b :c}", 0), 
                     (g2, "", "{:a :b :c. :d :e :f.}", 0),
                     (g2, "", "{:: :b :c. :d :e :f.", 0),
                     (g2, "", ":a :b :c.", 0),
                     (g2, "", ":a is :b of :c.", 0),
                     (g2, "", ":a has :b of :c.", 0),
                     (g2, "", ":a :b :c", 0)]

-- compose the output list
composedTriple [] = []
composedTriple (x:xs) = "\n\nInput:" ++ tripleToString x ++
                  "\n\nOutput" ++ tripleToString (parseTripleSet x)
                  ++ composedTriple xs

-- test the function parseAnonSet
-- testParseAnonSet :: [ParserData]
testParseAnonSet =
   stringToFile
       (composedAnon testList)
       "testParseAnonSet.tst"
   where testList = [(g2, "", "[:b :c.].", 0),
                     (g2, "", "[ :b :c; :e :f; :g :h, :i, :p]", 0),
                     (g2, "", "[ :b :c; :d [:e :f]; :g :h]", 0)]

-- compose the output list
composedAnon [] = []
composedAnon (x:xs) = "\n\nInput:" ++ tripleToString x ++
                  "\n\nOutput" ++ tripleToString (parseAnonSet x)
                  ++ composedAnon xs

-- test the function parsePropertyList
-- testParsePropertyList :: [ParserData]
testParsePropertyList =
   stringToFile
       (composedProperty testList)
       "testParsePropertyList.tst"
   where testList = [(g2, "", ":b :c; :d :e; :f :g.", 0),
                     (g2, "", ":b :c.", 0)] --,
--                     (g2, "", ":b :c; :d :e; :f :g:", 0),
--                     (g2, "", ":a, :b,:c", 0)]

-- compose the output list
composedProperty [] = []
composedProperty (x:xs) = "\n\nInput:" ++ tripleToString x ++
                  "\n\nOutput" ++ tripleToString (parsePropertyList x)
                  ++ composedProperty xs

-- test the function parseNodeList
-- testParseNodeList :: [ParserData]
testParseNodeList =
   stringToFile
       (composed testList)
       "testParseNodeList.tst"
   where testList = [(g2, "", ":a, :b,:c.", 0),
                     (g2, "", ":a.", 0) ,
                     (g2, "", ":a", 0),
                     (g2, "", ":a, :b,:c", 0)]

-- compose the output list
composed [] = []
composed (x:xs) = "\n\nInput:" ++ tripleToString x ++
                  "\n\nOutput" ++ tripleToString (parseNodeList x) ++ composed xs

-- display ParserData in a more or less readable format 
displayTriple :: ParserData -> IO ()
displayTriple (g, out, rest, oldLen) =
   putStr ("\n" ++ show g ++ "\n" ++ "out: " ++ out ++ "\n" ++ "rest: " ++ rest)

-- display a list of ParserData
displayTripleList :: [ParserData] -> IO ()
displayTripleList [] = putStr "Empty list"
displayTripleList triple =  putStr (tripleListToString triple)

-- transforms ParserData in string format
tripleToString :: ParserData -> String
tripleToString (g, out, rest, len) =
   "\n\n" ++ show g ++ "\n" ++ "out: " ++ out ++ "\n" ++ "rest: " ++ rest

-- transform a list of ParserData to String
tripleListToString :: [ParserData] -> String
tripleListToString [] = []
tripleListToString (x:xs) = tripleToString x ++ tripleListToString xs

-- test the function parseNode
-- testParseNode ::
testParseNode =
   stringToFile
       (composeNodes testList)
       "testParseNode.tst"
   where testList = [ "<> :a :b",
                      ":]",
                      ":a :b :c .",
                      "@prefix dc: <http://gn.org/> .",
                      "dc:a dc:b dc:c . {<http://www.w3.org> dc:ho \"oke\".}",
                      "<http://www.guido> dc:b dc:c . {<http://www.w3.org> dc:ho \"oke\".}",
                      ";<#pat> :a :b.",
                 --     "<#pat> :a :b",
                      "\"Hallo\" dc:b dc:c . {<http://www.w3.org> dc:ho \"oke\".}"]

-- compose the output list
composeNodes [] = []
composeNodes (x:xs) = "\n\nInput: " ++ x ++
                  "\n\nOutput " ++ quadToString (parseNode g1 x)
                  ++ composeNodes xs

-- transforms output of parseNode in string format
quadToString :: (Globals, String, String, String) -> String
quadToString (g, s1, s2, s3) =
   "\n\n" ++ show g ++ "\n" ++ "item: " ++ s1 ++ " value: " ++ s2 ++
      " rest: " ++ s3

-- transforms a pair (g, s) to String
pairToString :: (Globals, String) -> String
pairToString (g, s) =
   "\n\n" ++ show g ++ "\n" ++ "input: " ++ s

-- ************* basic constants **************
--
g = [("BaseURI", baseURI),("AnonCounter", "0"), ("LastSubject", ""),
   ("LastVerb", ""), (":","")]


sep = "/@/"

-- ************* known uri's ******************
rdf = "<http://www.w3.org/1999/02/22-rdf-syntax-ns#"
rdfs = "<http://www.w3.org/2000/01/rdf-schema#"
daml = "<http://www.daml.org/2001/03/daml+oil#"

-- ************* --------------- **************
-- globals for testing
g2 = [("BaseURI", baseURI),("AnonCounter", "1"), ("LastSubject", "last_subject"),
   ("LastVerb", "last_verb"),(":","")]

-- test of node parsing 1
baseURI = "http://www/w3.org"
g1 = [(":", ""),("dc:","http://gn.org/"),("AnonCounter", "1")]
p = "<#pat> :a :b" -- test with parseAtom g p

-- test of node parsing 2
p1 = "<> :a :b"   -- test with parseAtom g p1

-- test of prefix parsing
p2 = "@prefix dc: <http://gn.org/> ."

-- test of comment parseing
p2a = "# blabla \r\n @prefix dc: <http://www.gn>."


-- test of node with :...
p3 = ":a :b :c . {<http://www.w3.org> dc:ho \"oke\".}"
p3a = "{<http://www.w3.org> dc:ho \"oke\".}"

-- test of node with prefix:...
p4 = "dc:a dc:b dc:c . {<http://www.w3.org> dc:ho \"oke\".}"

-- test of node with <URI>
p5 = "<http://www.guido> dc:b dc:c . {<http://www.w3.org> dc:ho \"oke\".}"

-- test of node with ".."    (constant)
p6 = "\"Hallo\" dc:b dc:c . {<http://www.w3.org> dc:ho \"oke\".}"

-- test of comment
p7 = "# ddddddd \r\n :a :b :c"

-- test of parseNodeList
p8 = ":a, :b, :c."

-- test of parsePropertyList
p9 = ":a :b; :c :d; :e :f."

-- test of tripleset
p10 = "{:a :b :c. :d :e :f.}"

p10a = ":a :b :c . :d :e :f ."

-- test of embedded triplesets
p11 = "{{:person :member :institution. " ++
  ":institution :w3cmember <http://www.w3.org>." ++
  ":institution :subscribed :mailinglist} :implies "++
  "{:person :authenticated :mailinglist}} a :Truth; :forAll :person, :mailinglist, :institution."

-- test of embedded anonymous sets
p11a = "[[[:member :institution; " ++
  ":w3cmember <http://www.w3.org>.]" ++
  ":institution [:subscribed :mailinglist]] :implies "++
  "[:authenticated :mailinglist]] a :Truth."

p12 =
   "# $Id: authen.axiom.n3,v 1.2 2001/10/01 00:12:34 amdus Exp $\n" ++
   " \n" ++
   "@prefix log: <http://www.w3.org/2000/10/swap/log#>.\n" ++
   "@prefix : <authen#>.\n" ++
   "  \n" ++
   "<mailto:jos.deroo.jd@belgium.agfa.com> :member <http://www.agfa.com>.\n" ++
   "<http://www.agfa.com> :w3cmember <http://www.w3.org>.\n" ++
   "<http://www.agfa.com> :subscribed <mailto:w3c-ac-forum@w3.org/>.\n" ++
   " \n" ++
   "{{:person :member :institution.\n" ++
   ":institution :w3cmember <http://www.w3.org>.\n" ++
   ":institution :subscribed :mailinglist} log:implies\n" ++
   "{:person :authenticated :mailinglist}} a log:Truth; log:forAll :person, :mailinglist, :institution.\n"

p13 =
   "<mailto:jos.deroo.jd@belgium.agfa.com> :member <http://www.agfa.com>.\n" ++
   "<http://www.agfa.com> :w3cmember <http://www.w3.org>.\n" ++
   "<http://www.agfa.com> :subscribed <mailto:w3c-ac-forum@w3.org/>.\n" ++
   " \n" 

p14 = "@prefix : <danc#>."

p15 = "<#QA> :includes :b."

t1 = readN3 "http://www.w3.org/2002/03owlt/ontAx.n3"
t2 = readN3 "animal-result.n3"                   -- ok
t3 = readN3 "animal-simple.n3"                   -- ok
t4 = readN3 "animal.n3"                          -- ok
t5 = readN3 "authen.axiom.n3"                    -- ok
t6 = readN3 "authen.lemma.n3"                    -- ok
t7 = readN3 "authen.proof.n3"                    -- ok
t8 = readN3 "danb-query.n3"                      -- ok 
t9 = readN3 "danb-result.n3"                     -- ok
t10 = readN3 "danb.n3"                           -- ok
t11 = readN3 "danc-query.n3"                     -- ok
t12 = readN3 "danc-result.n3"                    -- ok
t13 = readN3 "danc.n3"                           -- ok
t14 = readN3 "etc.n3"                            -- ok 
t15 = readN3 "gedcom-facts.n3"                   -- ok
t16 = readN3 "gedcom-proof.n3"                   -- contains syntax error
t17 = readN3 "gedcom-query.n3"                   -- ok
t18 = readN3 "gedcom-relations-result.n3"        -- ok 
t19 = readN3 "gedcom-relations-test.n3"          -- ok
t20 = readN3 "gedcom-relations.n3"               -- ok
t21 = readN3 "graph.axiom.n3"                    -- ok
t22 = readN3 "graph.lemma.n3"                    -- ok
t23 = readN3 "graph.proof.n3"                    -- syntax error
t24 = readN3 "janet-result.n3"                   -- ok
t25 = readN3 "janet-test.n3"                     -- ok
t26 = readN3 "janet.n3"                          -- ok
t27 = readN3 "lists-query.n3"                    -- ok
t28 = readN3 "lists-result.n3"                   -- ok
t29 = readN3 "lists.n3"                          -- ok
t30 = readN3 "vogel.q.n3"                        -- ok
t40 = readN3 "rdf-facts.n3"                      -- ok
t41 = readN3 "rdf-query.n3"                      -- ok
t42 = readN3 "rdf-result.n3"                     -- ok
t43 = readN3 "rdf-rules.n3"                      -- ok
t44 = readN3 "rdfc25May-result.n3"               -- ok
t45 = readN3 "rdfc25May-test.n3"                 -- ok
t46 = readN3 "rdfc25May.n3"                      -- ok
t47 = readN3 "rdfs-query.n3"                     -- ok
t48 = readN3 "rdfs-result.n3"                    -- ok
t49 = readN3 "rdfs-rules.n3"                     -- ok
t50 = readN3 "russell.axiom.n3"                  -- ok
t51 = readN3 "russell.lemma.n3"                  -- ok
t52 = readN3 "russell.proof.n3"                 -- ok
t53 = readN3 "subclass-query.n3"                -- ok
t54 = readN3 "subclass-result.n3"               -- ok
t55 = readN3 "subclass.n3"                      -- ok
t56 = readN3 "subprop-query.n3"                 -- ok
t57 = readN3 "subprop-result.n3"                -- ok
t58 = readN3 "subprop.n3"                       -- ok
t59 = readN3 "test-result.n3"                   -- syntax error in source
t60 = readN3 "test-test.n3"                     -- ok
t61 = readN3 "test.n3"                          -- ok
t62 = readN3 "tpoint-all.n3"                    -- ok
t63 = readN3 "tpoint-facts.n3"                  -- ok
t64 = readN3 "tpoint-query.n3"                  -- ok
t65 = readN3 "tpoint-result.n3"                 -- ok
t66 = readN3 "tpoint.n3"                        -- ok 
t67 = readN3 "varprop-query.n3"                 -- ok
t68 = readN3 "varprop-result.n3"                -- ok
t69 = readN3 "varprop.n3"                       -- ok
t70 = readN3 "ziv-query.n3"                     -- ok
t71 = readN3 "ziv-result.n3"                    -- ok
t72 = readN3 "ziv.n3"                           -- ok
t73 = readN3 "wol-facts.n3"                     -- ok
t74 = readN3 "wol-query.n3"                     -- ok
t75 = readN3 "wol-rules.n3"                     -- ok
t76 = readN3 "VOGEL.N3"                         -- ok
t77 = readN3 "vogel.l.n3"                       -- ok
t78 = readN3 "boole.lemma.n3"                   -- ok
t79 = readN3 "boole.axiom.n3"                   -- ok
t80 = readN3 "induction.axiom.n3"               -- ok
t81 = readN3 "induction.query.n3"               -- ok
t82 = readN3 "allValuesFrom.n3"                 -- ok
t83 = readN3 "Owls.n3"                          -- ok
t86 = readN3 "ontology2.axiom.n3"
t87 = readN3 "eq.a.n3"
t88 = readN3 "eq.q.n3"
t89 = readN3 "graph.axiom1.n3"
t90 = readN3 "hosts.a.n3"

--
-- The bnf grammar
----------------------
--
-- #Taken from <http://www.w3.org/DesignIssues/Notation3.html>
-- #on 2001-08-03 (version of 2001-04-10)
--
-- #Modifications:
--     
-- #Log: n3.bnf,v $
-- #Revision 1.4  2001/08/06 20:56:21  sandro
-- #added space* and space+ in several places
-- #removed "#" from forbidden chars in URI_Reference
-- #handles comments
-- #made directives actually part of the grammar (!)
-- #allowed nprefix to be zero-length
--
-- #Revision 1.3  2001/08/03 13:44:43  sandro
-- #filled in remaining non-terminals
--
-- #Revision 1.2  2001/08/03 13:02:48  sandro
-- #standardized BNF so blindfold can compile it
-- #   added ::= for each rule
-- #   added | for branches
-- #   added ; at end of rule
-- #   added # before comments
-- #   put quotes around literals
-- #   turn hypen into underscore in identifiers
-- #   rename prefix to nprefix (hack around blindfold keyword for now)
-- #
-- # Revision 1.1  2001/08/03 12:34:38  sandro
-- # added opening comments
-- #
--
--
-- document ::= void 
--           | statementlist;
--
-- space ::= " " | "\n" | "\r" | comment;
--
-- comment ::= "#" [^\r\n]*;
--
-- statement ::= subject space+ property_list
--            | directive
--            ;
--
-- statementlist ::= (statement space* ("." space*)?)* ;
--
-- subject ::= node;
--
-- verb ::= ">-" prop "->"    # has xxx of 
--       | "<-" prop "<-"    # is xxx of 
--    #  | operator          # has operator:xxx of??? NOT IMPLMENTED
--       | prop              # has xxx of -- shorthand
--       | "has" prop        # has xxx of
--       | "is" prop "of"    # is xxx of
--       | "a"               # has rdf:type of
--       | "="               # has daml:equivalent of
--       ;
--
-- prop ::= node;
--
-- node ::= uri_ref2 
--       | anonnode
--       | "this"
--       | node 
--       ;
--
-- nodelist ::= void    # (used in lists) 
--           | node
--           | node nodelist
--           ;
--
-- anonnode ::= "[" property_list "]"  # something which ...
--           | "{" statementlist "}"  # the statementlist itself as a resource
--           | "(" nodelist ")"       # short for eg [ n3:first node1; n3:rest [ n3:first node2; n3:rest: n3:null ]]
--           ;
--
-- property_list ::= void   # to allow [...]. 
--                | verb space+ object_list
--                | verb space+ object_list space+ ";" space+ property_list
--                | ":-" anonnode  #to allow two anonymous forms to be given eg [ a :Truth; :- { :sky :color :blue } ] )
--                | ":-" anonnode ";" property_list
--                ;
--
-- object_list ::= object 
--              | object "," object_list
--              ;
--
-- uri_ref2 ::= qname 
--           | "<" URI_Reference ">"
--           ;
--
-- qname ::= nprefix ":" localname; # ??? Allow omit colon when prefix void - keyword clash 
--
-- object ::= subject 
--         | string1 # " constant-value-with-escaping "
--         | string2 # """ constant value with escaping including single or double occurences of quotes and/or newlines """
--         #      well-formed-xml-element ???? legacy or structured stuff - not implemented or distinguished
--         ;
--
-- directive ::= "bind" space+ nprefix ":" uri_ref2   # Namespace declartion. Trailing "#" is omitted & assumed. Obsolete.
--            | "@prefix" space+ nprefix ":" space+ uri_ref2  # Namespace declaration
--            ;
--
-- # operator ::=  (Not implemented) 
-- #      + >- operator:plus -> 
-- #      - >- operator:minus ->
-- #      / >- operator:slash->
-- #      * >- operator:star-> (etc? @@)
--
-- fragid ::= alpha alphanumeric* ;
--
-- alpha ::= [a-zA-Z];
--
-- alphanumeric ::= alpha | [0-9] | "_";
--
-- void ::= "" ;   # nothing
--
-- URI_Reference ::= [^{}<>]*;    # short version
--
-- nprefix ::= "" | ((alpha | "_") alphanumeric*);
--
-- localname ::= fragid;
--
-- string1 ::= '"' string1_char* '"';
--
-- string1_char ::= '\\"' | [^\"] ;           # should disallow some other characters, etc.
--
-- string2 ::= '"""' string2_char* '"""';
--
-- string2_char ::= [^"] | ([^] [^] [^"]);    # something like this; need to think about it some more
--
-----------------------------------------------------------------------}