module GenerateDB where import "Utils" import "XML" import "N3Parser" import "Array" import "LoadTree" import "Observe" -- Generate a database for the resolution engine. -- Input is the output from LoadTree.hs; eventually several load -- structures are fused. -- All variables are detected and recieve a unique number within their scope. -- Universal local variables recieve a tag "Var" and existential local -- variables recieve the tag "Evar"; universal global variables recieve -- the tag "GVar" and existential global variables recieve the tag "GEVar". -- The output has three parts per input file: -- the prefix list, the list of variables and the triple database. -- Author: G.Naudts. Mail: naudts_vannoten@yahoo.com. -- BNF for rules (in N3): -- rule ::= "{" triplelist verbimplies triplelist "}" "a" objectTruth ";" -- verbforall|verbforsome objectforall; -- ruleSubject ::= triplelist; -- triplelist ::= "{" triple* "}"; -- triple ::= as usual; -- verbimplies ::= ""; -- objectTruth ::= ""; -- verbforall ::= ""; -- verbforsome ::= ""; -- objectforall ::= URI ["," URI]* -- BNF of the database: -- -- database ::= clause*; -- clause ::= rule | tripleset; -- tripleset ::= "{" triple* "}"; -- triple ::= subject verb object [number] [ref1] [ref2]; -- subject ::= triplelist | "" content ""; -- content ::= URI | var | vare | gvar | gvare; -- ** The first string is the abreviated URI; the second is the full URI. -- ** For a var the tag uri is simply changed into the tag var. -- ** var is an universal variable; vare is an existential variable -- ** gvar and gevar are global variables. -- URI ::= "" String String ""; -- var ::= "" String String ""; -- vare ::= "" String String ""; -- gvar ::= "" String String ""; -- vare ::= "" String String ""; -- verb ::= "" content ""; -- object ::= triplelist | "" content ""; -- rule ::= tripleset " " -- "log:implies " -- " " triple " " -- " " -- " a http://www.w3.org/1999/02/22-rdf-syntax-ns#type" -- " " -- "log:Truth http://www.w3.org/2000/10/swap/log#Truth" -- " " -- " log:forAll http://www.w3.org/2000/10/swap/log#forAll" -- "" objectlist " " -- objectlist ::= ("" content "")* -- -- Eventually log:forAll can be replaced by log:forSome -- -- On the scope of variables: -- If variables are declared with a separate triple like: -- this log:forAll :a, :b, :c. -- their scope is global. Beware!! Global variables can give unattented results -- with a resolution engine. -- When they are declared within a tripleset their scope is local. -- There are existential and universal variables giving following -- variable tags: Var, EVar, GVar and GEVar. -- Anonymous variables (_T$$$X) have type EVar in the query but -- not in axiom-files (otherwise they would never unify). -- read a list of files and return a list of strings; one for each -- file. semTree = Tag("semTree", [], "") -- This generates a database. Input is a list of filenames; -- output is the database. generateDB :: [String] -> IO() generateDB filenames = do list <- sequence (map readFile filenames) putStr (printXml (mergeInput list semTree) "") -- merge the inputfiles; the last one is the query file. -- The input files are in the list inputFiles; output is in -- the db. mergeInput inputs semTree = semTree2 where (y:ys) = reverse inputs queryS = n3Parser y (s1, prefixQ11, query1, pa1) = loadString (queryS, prefixQFile, queryFile, pa) (axioms1, prefixes1) = fillAxioms inputs (axiomFile, prefixFile) axiomFile = Tag("axioms", [], "") prefixFile = Tag("prefixes", [], "") prefixQFile = Tag("prefixesQ", [], "") queryFile = Tag("query", [], "") fillAxioms l@(x:xs) (axioms, prefixes) |bv1 = (axioms, prefixes) |otherwise = fillAxioms xs (axioms2, prefixes2) where axiomS = n3Parser x (s1, prefixes1, axioms1, pa1) = loadString (axiomS, prefixes, axioms, pa) bv1 = length l == 1 axioms2 = addTreeList axioms (getChildren axioms1) prefixes2 = addTreeList prefixes (getChildren prefixes1) varList1 = getVariables axioms1 axioms2 = markAllVariables axioms1 varList1 True semTree1 = addTree (addTree semTree axioms2) prefixes1 varList2 = getVariables query1 query2 = markAllVariables query1 varList2 False semTree2 = addTree (addTree semTree1 query2) prefixQ11 -- create a test db = semTest semTest = generateDB inputFiles where inputFiles = ["authen.axiom.n3", "authen.lemma.n3"] -- test data from LoadTree.hs (_, prefixTest, dbTest, _) = loadString (ppa7, prefixList, db, pa) testTestData = putStr((printXml prefixTest "") ++ (printXml dbTest "")) varLTest = getVariables dbTest testGetVariables = putStr (printTreeList (getVariables dbTest) "") -- get the variables from a tree -- returns a list of Var and EVar tags. getVariables :: XMLTree -> [XMLTree] getVariables Empty = [] getVariables tree = varList where tagList = selectFromTree tree f1 varList = getVarList tagList getVarList [] = [] getVarList tagList@(x:xs) |bv1 && bv2 && bv3 = (getVarsFromObjects objects name2) ++ getVarList xs |bv1 && bv2 = getVarsFromObjects objects name3 ++ getVarList xs |bv4 = getFromUriList us varTy ++ getVarList xs |bv5 && bv6 = [var6] ++ getVarList xs where Tag(name, _, content) = x -- global variables are detected if their declaration -- has a subject "this" and one verb log:forAll or log:forSome. bv1 = name == "global" uri1@(Tag(name1, _, content1)) = getChildByName x "URI" (bool1, s1, rest1) = parseUntil ' ' content1 bv2 = s1 == "this" verb = getChildByName x "Verb" uri2@(Tag(_, _, content2)) = getChildByName verb "URI" (bool2, s2, rest2) = parseUntil ' ' content2 bv3 = rest2 == logForAll name2 = "GVar" name3 = "GEVar" objects = getChildrenByName x "Object" getVarsFromObjects [] name = [] getVarsFromObjects (x:xs) name = [Tag(name, children, content)] ++ getVarsFromObjects xs name where uri@(Tag(_, children, content)) = getChildByName x "URI" -- the tag is a verb bv4 = name == "Verb" uriList@(u:us) = getChildrenByName x "URI" Tag(_, _, varT) = u (_, _, varTy) = parseUntil ' ' varT getFromUriList [] varTy = [] getFromUriList (x:xs) varTy |bv1 = [Tag("Var", children, content)] ++ getFromUriList xs varTy |otherwise = [Tag("EVar", children, content)] ++ getFromUriList xs varTy where Tag(_, children, content) = x (bool, s1, rest) = parseUntil ' ' content bv1 = bool && varTy == logForAll bv2 = bool && varTy == logForSome -- the tag is a subject bv5 = name == "Subject" uri3@(Tag(_, children3, content3)) = getChildByName x "URI" bv6 = startsWith content3 "_T$$$" var6 = Tag("EVar", children3, content3) -- the function for selecting from the tree f1 :: XMLTree -> [XMLTree] f1 Empty = [] f1 tree@(Tag(name, children, content)) |bv1 && bv2 = [tree] |bv1 && bv3 && bv4 && bool = [tree1] |bv5 && bool1 && (bv6 || bv7) = [tree] |otherwise = [] where bv1 = name == "Subject" uri@(Tag(name1, _, content1)) = getChildByName tree "URI" bv2 = startsWith content "_T$$$" bv3 = startsWith content "this" verbs@(y:ys) = getChildrenByName tree "Verb" bv4 = length verbs == 1 verb = y uri1@(Tag(name2, _, content2)) = getChildByName verb "URI" (bool, s1, rest1) = parseUntil ' ' content2 tree1 = Tag("global", children, content) bv5 = name == "Verb" (bool1, s2, rest2) = parseUntil ' ' content1 bv6 = rest2 == logForAll bv7 = rest2 == logForSome testMarkAllVariables = putStr (printXml (markAllVariables dbTest varLTest True) "") -- this function marks all variables; the tag is changed to -- or . -- Bool indicates whether this is a query or an axiom file -- True = axiom file; False = query file. -- This is necessary for anonymous nodes. markAllVariables :: XMLTree -> [XMLTree] -> Bool -> XMLTree markAllVariables Empty _ _ = Empty markAllVariables db varList boolIn = Tag(dbname, markSets tripleSets 1, dbcontent) where tripleSets = getDirectChildrenByName db "TripleSet" Tag(dbname, dbchildren, dbcontent) = db markSets [] i = [] markSets sets@(x:xs) i = [Tag(name, markSubjects subjects i, content)] ++ markSets xs i1 where subjects = getDirectChildrenByName x "Subject" Tag(name, children, content) = x i1 = i + 1 markSubjects [] i = [] markSubjects subjects@(y:ys) i = newSubjects:markSubjects ys i1 -- |otherwise = y:markSubjects ys i1 where -- bv1 = checkVars y newSubjects = walkATree y f2 i1 = i + 1 f2 :: XMLTree -> XMLTree f2 tree@(Tag(name, children, content)) |bv1 && bv2 = tree1 |bv1 && bv2a && (not boolIn) = tree1a |bv1 && bv3 = tree2 |bv1 && bool && bv4 = tree3 |bv1 && bool && bv5 = tree4 |otherwise = tree where bv1 = name == "URI" bv2 = startsWith content "_:" bv2a = startsWith content "_T$$$" tree1a = Tag ("EVar", children, content) tree1 = Tag("GEVar", children, "_" ++ content) bv3 = startsWith content "?" tree2 = Tag("Var", children, "_" ++ intToString i ++ content) (bool, typeU) = isVarIn varList tree bv4 = (typeU == "Var" || typeU == "EVar") tree3 = Tag(typeU, children, intToString i ++ "_" ++ content) bv5 = (typeU == "GVar" || typeU == "GEVar") tree4 = Tag(typeU, children, content) f2 t = t -- checks whether local variables are defined in this subject i.e. -- the verbs logForSome or logForAll are present. checkVars :: XMLTree -> Bool checkVars Empty = False checkVars subject = checkVerbs verbs where verbs = getChildrenByName subject "Verb" checkVerbs [] = False checkVerbs verbs@(x:xs) |bool && bv1 = True |otherwise = checkVerbs xs where uri@(Tag(_, _, content)) = getChildByName x "URI" (bool, s1, rest) = parseUntil ' ' content bv1 = rest == logForAll || rest == logForSome checkVars t = False -- test whether the variable defined in the xml tree is -- present in the list of trees. isVarIn :: [XMLTree] -> XMLTree -> (Bool, String) isVarIn [] _ = (False, "") isVarIn treeList@(Tag(name1, children1, content1):xs) tree@(Tag(name2, children2, content2)) |bv1 && children1 == children2 && content1 == content2 = (True, name1) |otherwise = isVarIn xs tree where bv1 = name1 == "Var" || name1 == "EVar" || name1 == "GVar" || name1 == "GEVar" tg1 = generateDB ["animal.n3", "animal-simple.n3"] -- ok tg2 = generateDB ["authen.axiom.n3","authen.lemma.n3"] -- ok tg3 = generateDB ["danb.n3", "danb-query.n3"] -- ok tg4 = generateDB ["danc.n3", "danc-query.n3"] -- ok tg5 = generateDB ["gedcom-facts.n3", "gedcom-relations.n3", "gedcom-query.n3"] -- ok tg6 = generateDB ["graph.axiom.n3", "graph.lemma.n3"] -- ok tg7 = generateDB ["lists.n3", "lists-query.n3"] -- ok tg8 = generateDB ["rdf-facts.n3", "rdf-rules.n3", "rdf-query.n3"] -- ok tg9 = generateDB ["rdfc25May-test.n3", "rdfc25May.n3"] tg10 = generateDB ["rdfs-rules.n3", "rdfs-query.n3"] -- ok tg11 = generateDB ["russell.axiom.n3", "russell.lemma.n3"] -- ok tg12 = generateDB ["subclass.n3", "subclass-query.n3"] -- ok tg13 = generateDB ["subprop.n3", "subprop-query.n3"] -- ok tg14 = generateDB ["test-test.n3", "test.n3"] -- ok tg15 = generateDB ["tpoint-all.n3", "tpoint-facts.n3", "tpoint.n3", "tpoint-query.n3"] tg16 = generateDB ["varprop.n3", "varprop-query.n3"] -- ok tg17 = generateDB ["ziv.n3", "ziv-query.n3"] -- ok tg18 = generateDB ["wol-facts.n3", "wol-rules.n3", "wol-query.n3"] -- ok tg19 = generateDB ["vogel.l.n3", "vogel.q.n3"] -- ok tg20 = generateDB ["boole.axiom.n3", "boole.lemma.n3"] -- ok tg21 = generateDB ["induction.axiom.n3", "induction.query.n3"] -- ok tg22 = generateDB ["Owls.n3", "owls.query.n3"] -- ok tg23 = generateDB ["logic.a.n3", "logic.q.n3"] -- ok tg24 = generateDB ["unif.n3","unif.q.n3"] -- ok tg29 = generateDB ["ontology2.axiom.n3", "ontology.query.n3"]