identity.js (18705B)
1 // Identity management and indexing for RDF 2 // 3 // This file provides IndexedFormula a formula (set of triples) which 4 // indexed by predicate, subject and object. 5 // 6 // It "smushes" (merges into a single node) things which are identical 7 // according to owl:sameAs or an owl:InverseFunctionalProperty 8 // or an owl:FunctionalProperty 9 // 10 // 11 // 2005-10 Written Tim Berners-Lee 12 // 2007 Changed so as not to munge statements from documents when smushing 13 // 14 // 15 /*jsl:option explicit*/ 16 // Turn on JavaScriptLint variable declaration checking 17 $rdf.IndexedFormula = function () { 18 19 var owl_ns = "http://www.w3.org/2002/07/owl#"; 20 // var link_ns = "http://www.w3.org/2007/ont/link#"; 21 /* hashString functions are used as array indeces. This is done to avoid 22 ** conflict with existing properties of arrays such as length and map. 23 ** See issue 139. 24 */ 25 $rdf.Literal.prototype.hashString = $rdf.Literal.prototype.toNT; 26 $rdf.Symbol.prototype.hashString = $rdf.Symbol.prototype.toNT; 27 $rdf.BlankNode.prototype.hashString = $rdf.BlankNode.prototype.toNT; 28 $rdf.Collection.prototype.hashString = $rdf.Collection.prototype.toNT; 29 30 31 //Stores an associative array that maps URIs to functions 32 $rdf.IndexedFormula = function (features) { 33 this.statements = []; // As in Formula 34 this.optional = []; 35 this.propertyActions = []; // Array of functions to call when getting statement with {s X o} 36 //maps <uri> to [f(F,s,p,o),...] 37 this.classActions = []; // Array of functions to call when adding { s type X } 38 this.redirections = []; // redirect to lexically smaller equivalent symbol 39 this.aliases = []; // reverse mapping to redirection: aliases for this 40 this.HTTPRedirects = []; // redirections we got from HTTP 41 this.subjectIndex = []; // Array of statements with this X as subject 42 this.predicateIndex = []; // Array of statements with this X as subject 43 this.objectIndex = []; // Array of statements with this X as object 44 this.whyIndex = []; // Array of statements with X as provenance 45 this.index = [this.subjectIndex, this.predicateIndex, this.objectIndex, this.whyIndex]; 46 this.namespaces = {} // Dictionary of namespace prefixes 47 if(features === undefined) features = ["sameAs", 48 "InverseFunctionalProperty", "FunctionalProperty"]; 49 // this.features = features 50 // Callbackify? 51 function handleRDFType(formula, subj, pred, obj, why) { 52 if(formula.typeCallback != undefined) 53 formula.typeCallback(formula, obj, why); 54 55 var x = formula.classActions[obj.hashString()]; 56 var done = false; 57 if(x) { 58 for(var i = 0; i < x.length; i++) { 59 done = done || x[i](formula, subj, pred, obj, why); 60 } 61 } 62 return done; // statement given is not needed if true 63 } //handleRDFType 64 //If the predicate is #type, use handleRDFType to create a typeCallback on the object 65 this.propertyActions['<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>'] = [handleRDFType]; 66 67 // Assumption: these terms are not redirected @@fixme 68 if($rdf.Util.ArrayIndexOf(features, "sameAs") >= 0) 69 this.propertyActions['<http://www.w3.org/2002/07/owl#sameAs>'] = [ 70 function (formula, subj, pred, obj, why) { 71 // tabulator.log.warn("Equating "+subj.uri+" sameAs "+obj.uri); //@@ 72 formula.equate(subj, obj); 73 return true; // true if statement given is NOT needed in the store 74 }]; //sameAs -> equate & don't add to index 75 if($rdf.Util.ArrayIndexOf(features, "InverseFunctionalProperty") >= 0) 76 this.classActions["<" + owl_ns + "InverseFunctionalProperty>"] = [ 77 function (formula, subj, pred, obj, addFn) { 78 return formula.newPropertyAction(subj, handle_IFP); // yes subj not pred! 79 }]; //IFP -> handle_IFP, do add to index 80 if($rdf.Util.ArrayIndexOf(features, "FunctionalProperty") >= 0) 81 this.classActions["<" + owl_ns + "FunctionalProperty>"] = [ 82 function (formula, subj, proj, obj, addFn) { 83 return formula.newPropertyAction(subj, handle_FP); 84 } 85 ]; //FP => handleFP, do add to index 86 function handle_IFP(formula, subj, pred, obj) { 87 var s1 = formula.any(undefined, pred, obj); 88 if(s1 == undefined) return false; // First time with this value 89 // tabulator.log.warn("Equating "+s1.uri+" and "+subj.uri + " because IFP "+pred.uri); //@@ 90 formula.equate(s1, subj); 91 return true; 92 } //handle_IFP 93 function handle_FP(formula, subj, pred, obj) { 94 var o1 = formula.any(subj, pred, undefined); 95 if(o1 == undefined) return false; // First time with this value 96 // tabulator.log.warn("Equating "+o1.uri+" and "+obj.uri + " because FP "+pred.uri); //@@ 97 formula.equate(o1, obj); 98 return true; 99 } //handle_FP 100 } /* end IndexedFormula */ 101 102 $rdf.IndexedFormula.prototype = new $rdf.Formula(); 103 $rdf.IndexedFormula.prototype.constructor = $rdf.IndexedFormula; 104 $rdf.IndexedFormula.SuperClass = $rdf.Formula; 105 106 $rdf.IndexedFormula.prototype.newPropertyAction = function newPropertyAction(pred, action) { 107 //$rdf.log.debug("newPropertyAction: "+pred); 108 var hash = pred.hashString(); 109 if(this.propertyActions[hash] == undefined) 110 this.propertyActions[hash] = []; 111 this.propertyActions[hash].push(action); 112 // Now apply the function to to statements already in the store 113 var toBeFixed = this.statementsMatching(undefined, pred, undefined); 114 var done = false; 115 for(var i = 0; i < toBeFixed.length; i++) { // NOT optimized - sort toBeFixed etc 116 done = done || action(this, toBeFixed[i].subject, pred, toBeFixed[i].object); 117 } 118 return done; 119 } 120 121 $rdf.IndexedFormula.prototype.setPrefixForURI = function (prefix, nsuri) { 122 //TODO:This is a hack for our own issues, which ought to be fixed post-release 123 //See http://dig.csail.mit.edu/cgi-bin/roundup.cgi/$rdf/issue227 124 if(prefix == "tab" && this.namespaces["tab"]) { 125 return; 126 } 127 this.namespaces[prefix] = nsuri 128 } 129 130 // Deprocated ... name too generic 131 $rdf.IndexedFormula.prototype.register = function (prefix, nsuri) { 132 this.namespaces[prefix] = nsuri 133 } 134 135 136 /** simplify graph in store when we realize two identifiers are equivalent 137 138 We replace the bigger with the smaller. 139 140 */ 141 $rdf.IndexedFormula.prototype.equate = function (u1, u2) { 142 // tabulator.log.warn("Equating "+u1+" and "+u2); // @@ 143 //@@JAMBO Must canonicalize the uris to prevent errors from a=b=c 144 //03-21-2010 145 u1 = this.canon(u1); 146 u2 = this.canon(u2); 147 var d = u1.compareTerm(u2); 148 if(!d) return true; // No information in {a = a} 149 var big, small; 150 if(d < 0) { // u1 less than u2 151 return this.replaceWith(u2, u1); 152 } else { 153 return this.replaceWith(u1, u2); 154 } 155 } 156 157 // Replace big with small, obsoleted with obsoleting. 158 // 159 $rdf.IndexedFormula.prototype.replaceWith = function (big, small) { 160 //$rdf.log.debug("Replacing "+big+" with "+small) // @@ 161 var oldhash = big.hashString(); 162 var newhash = small.hashString(); 163 164 var moveIndex = function (ix) { 165 var oldlist = ix[oldhash]; 166 if(oldlist == undefined) return; // none to move 167 var newlist = ix[newhash]; 168 if(newlist == undefined) { 169 ix[newhash] = oldlist; 170 } else { 171 ix[newhash] = oldlist.concat(newlist); 172 } 173 delete ix[oldhash]; 174 } 175 176 // the canonical one carries all the indexes 177 for(var i = 0; i < 4; i++) { 178 moveIndex(this.index[i]); 179 } 180 181 this.redirections[oldhash] = small; 182 if(big.uri) { 183 //@@JAMBO: must update redirections,aliases from sub-items, too. 184 if(this.aliases[newhash] == undefined) 185 this.aliases[newhash] = []; 186 this.aliases[newhash].push(big); // Back link 187 if(this.aliases[oldhash]) { 188 for(var i = 0; i < this.aliases[oldhash].length; i++) { 189 this.redirections[this.aliases[oldhash][i].hashString()] = small; 190 this.aliases[newhash].push(this.aliases[oldhash][i]); 191 } 192 } 193 194 //this.add(small, this.sym('http://www.w3.org/2007/ont/link#uri'), big.uri) 195 196 // If two things are equal, and one is requested, we should request the other. 197 if(this.sf) { 198 this.sf.nowKnownAs(big, small) 199 } 200 } 201 202 moveIndex(this.classActions); 203 moveIndex(this.propertyActions); 204 205 $rdf.log.debug("Equate done. "+big+" now links to "+small) 206 return true; // true means the statement does not need to be put in 207 }; 208 209 // Return the symbol with canonical URI as smushed 210 $rdf.IndexedFormula.prototype.canon = function (term) { 211 if(term == undefined) return term; 212 var y = this.redirections[term.hashString()]; 213 if(y == undefined) return term; 214 return y; 215 } 216 217 // Compare by canonical URI as smushed 218 $rdf.IndexedFormula.prototype.sameThings = function (x, y) { 219 if(x.sameTerm(y)) return true; 220 var x1 = this.canon(x); 221 // alert('x1='+x1); 222 if(x1 == undefined) return false; 223 var y1 = this.canon(y); 224 // alert('y1='+y1); //@@ 225 if(y1 == undefined) return false; 226 return(x1.uri == y1.uri); 227 } 228 229 // A list of all the URIs by which this thing is known 230 $rdf.IndexedFormula.prototype.uris = function (term) { 231 var cterm = this.canon(term) 232 var terms = this.aliases[cterm.hashString()]; 233 if(!cterm.uri) return [] 234 var res = [cterm.uri] 235 if(terms != undefined) { 236 for(var i = 0; i < terms.length; i++) { 237 res.push(terms[i].uri) 238 } 239 } 240 return res 241 } 242 243 // On input parameters, convert constants to terms 244 // 245 function RDFMakeTerm(formula, val, canonicalize) { 246 if(typeof val != 'object') { 247 if(typeof val == 'string') 248 return new $rdf.Literal(val); 249 if(typeof val == 'number') 250 return new $rdf.Literal(val); // @@ differet types 251 if(typeof val == 'boolean') 252 return new $rdf.Literal(val ? "1" : "0", undefined, $rdf.Symbol.prototype.XSDboolean); 253 if(typeof val == 'undefined') 254 return undefined; 255 else // @@ add converting of dates and numbers 256 throw "Can't make Term from " + val + " of type " + typeof val; 257 } 258 return val; 259 } 260 261 // Add a triple to the store 262 // 263 // Returns the statement added 264 // (would it be better to return the original formula for chaining?) 265 // 266 $rdf.IndexedFormula.prototype.add = function (subj, pred, obj, why) { 267 var actions, st; 268 if(why == undefined) why = this.fetcher ? this.fetcher.appNode : this.sym("chrome:theSession"); //system generated 269 //defined in source.js, is this OK with identity.js only user? 270 subj = RDFMakeTerm(this, subj); 271 pred = RDFMakeTerm(this, pred); 272 obj = RDFMakeTerm(this, obj); 273 why = RDFMakeTerm(this, why); 274 275 if(this.predicateCallback != undefined) 276 this.predicateCallback(this, pred, why); 277 278 // Action return true if the statement does not need to be added 279 var actions = this.propertyActions[this.canon(pred).hashString()]; 280 var done = false; 281 if(actions) { 282 // alert('type: '+typeof actions +' @@ actions='+actions); 283 for(var i = 0; i < actions.length; i++) { 284 done = done || actions[i](this, subj, pred, obj, why); 285 } 286 } 287 288 //If we are tracking provenanance, every thing should be loaded into the store 289 //if (done) return new Statement(subj, pred, obj, why); // Don't put it in the store 290 // still return this statement for owl:sameAs input 291 var hash = [this.canon(subj).hashString(), this.canon(pred).hashString(), 292 this.canon(obj).hashString(), this.canon(why).hashString()]; 293 var st = new $rdf.Statement(subj, pred, obj, why); 294 for(var i = 0; i < 4; i++) { 295 var ix = this.index[i]; 296 var h = hash[i]; 297 if(ix[h] == undefined) ix[h] = []; 298 ix[h].push(st); // Set of things with this as subject, etc 299 } 300 301 //tabulator.log.debug("ADDING {"+subj+" "+pred+" "+obj+"} "+why); 302 this.statements.push(st); 303 return st; 304 }; //add 305 // Find out whether a given URI is used as symbol in the formula 306 $rdf.IndexedFormula.prototype.mentionsURI = function (uri) { 307 var hash = '<' + uri + '>'; 308 return (!!this.subjectIndex[hash] 309 || !!this.objectIndex[hash] 310 || !!this.predicateIndex[hash]); 311 } 312 313 // Find an unused id for a file being edited: return a symbol 314 // (Note: Slow iff a lot of them -- could be O(log(k)) ) 315 $rdf.IndexedFormula.prototype.nextSymbol = function (doc) { 316 for(var i = 0;; i++) { 317 var uri = doc.uri + '#n' + i; 318 if(!this.mentionsURI(uri)) return this.sym(uri); 319 } 320 } 321 322 323 $rdf.IndexedFormula.prototype.anyStatementMatching = function (subj, pred, obj, why) { 324 var x = this.statementsMatching(subj, pred, obj, why, true); 325 if(!x || x == []) return undefined; 326 return x[0]; 327 }; 328 329 330 // Return statements matching a pattern 331 // ALL CONVENIENCE LOOKUP FUNCTIONS RELY ON THIS! 332 $rdf.IndexedFormula.prototype.statementsMatching = function (subj, pred, obj, why, justOne) { 333 //$rdf.log.debug("Matching {"+subj+" "+pred+" "+obj+"}"); 334 var pat = [subj, pred, obj, why]; 335 var pattern = []; 336 var hash = []; 337 var wild = []; // wildcards 338 var given = []; // Not wild 339 for(var p = 0; p < 4; p++) { 340 pattern[p] = this.canon(RDFMakeTerm(this, pat[p])); 341 if(pattern[p] == undefined) { 342 wild.push(p); 343 } else { 344 given.push(p); 345 hash[p] = pattern[p].hashString(); 346 } 347 } 348 if(given.length == 0) { 349 return this.statements; 350 } 351 if(given.length == 1) { // Easy too, we have an index for that 352 var p = given[0]; 353 var list = this.index[p][hash[p]]; 354 if(list && justOne) { 355 if(list.length > 1) 356 list = list.slice(0, 1); 357 } 358 return list == undefined ? [] : list; 359 } 360 361 // Now given.length is 2, 3 or 4. 362 // We hope that the scale-free nature of the data will mean we tend to get 363 // a short index in there somewhere! 364 var best = 1e10; // really bad 365 var best_i; 366 for(var i = 0; i < given.length; i++) { 367 var p = given[i]; // Which part we are dealing with 368 var list = this.index[p][hash[p]]; 369 if(list == undefined) return []; // No occurrences 370 if(list.length < best) { 371 best = list.length; 372 best_i = i; // (not p!) 373 } 374 } 375 376 // Ok, we have picked the shortest index but now we have to filter it 377 var best_p = given[best_i]; 378 var possibles = this.index[best_p][hash[best_p]]; 379 var check = given.slice(0, best_i).concat(given.slice(best_i + 1)) // remove best_i 380 var results = []; 381 var parts = ['subject', 'predicate', 'object', 'why']; 382 for(var j = 0; j < possibles.length; j++) { 383 var st = possibles[j]; 384 for(var i = 0; i < check.length; i++) { // for each position to be checked 385 var p = check[i]; 386 if(!this.canon(st[parts[p]]).sameTerm(pattern[p])) { 387 st = null; 388 break; 389 } 390 } 391 if(st != null) { 392 results.push(st); 393 if(justOne) 394 break; 395 } 396 } 397 return results; 398 }; // statementsMatching 399 /** remove a particular statement from the bank **/ 400 $rdf.IndexedFormula.prototype.remove = function (st) { 401 //$rdf.log.debug("entering remove w/ st=" + st); 402 var term = [st.subject, st.predicate, st.object, st.why]; 403 for(var p = 0; p < 4; p++) { 404 var c = this.canon(term[p]); 405 var h = c.hashString(); 406 if(this.index[p][h] == undefined) { 407 //$rdf.log.warn ("Statement removal: no index '+p+': "+st); 408 } else { 409 $rdf.Util.RDFArrayRemove(this.index[p][h], st); 410 } 411 } 412 $rdf.Util.RDFArrayRemove(this.statements, st); 413 }; //remove 414 /** remove all statements matching args (within limit) **/ 415 $rdf.IndexedFormula.prototype.removeMany = function (subj, pred, obj, why, limit) { 416 //$rdf.log.debug("entering removeMany w/ subj,pred,obj,why,limit = " + subj +", "+ pred+", " + obj+", " + why+", " + limit); 417 var sts = this.statementsMatching(subj, pred, obj, why, false); 418 //This is a subtle bug that occcured in updateCenter.js too. 419 //The fact is, this.statementsMatching returns this.whyIndex instead of a copy of it 420 //but for perfromance consideration, it's better to just do that 421 //so make a copy here. 422 var statements = []; 423 for(var i = 0; i < sts.length; i++) statements.push(sts[i]); 424 if(limit) statements = statements.slice(0, limit); 425 for(var i = 0; i < statements.length; i++) this.remove(statements[i]); 426 }; //removeMany 427 /** Utility**/ 428 429 /* @method: copyTo 430 @description: replace @template with @target and add appropriate triples (no triple removed) 431 one-direction replication 432 */ 433 $rdf.IndexedFormula.prototype.copyTo = function (template, target, flags) { 434 if(!flags) flags = []; 435 var statList = this.statementsMatching(template); 436 if($rdf.Util.ArrayIndexOf(flags, 'two-direction') != -1) 437 statList.concat(this.statementsMatching(undefined, undefined, template)); 438 for(var i = 0; i < statList.length; i++) { 439 var st = statList[i]; 440 switch(st.object.termType) { 441 case 'symbol': 442 this.add(target, st.predicate, st.object); 443 break; 444 case 'literal': 445 case 'bnode': 446 case 'collection': 447 this.add(target, st.predicate, st.object.copy(this)); 448 } 449 if($rdf.Util.ArrayIndexOf(flags, 'delete') != -1) this.remove(st); 450 } 451 }; 452 //for the case when you alter this.value (text modified in userinput.js) 453 $rdf.Literal.prototype.copy = function () { 454 return new $rdf.Literal(this.value, this.lang, this.datatype); 455 }; 456 $rdf.BlankNode.prototype.copy = function (formula) { //depends on the formula 457 var bnodeNew = new $rdf.BlankNode(); 458 formula.copyTo(this, bnodeNew); 459 return bnodeNew; 460 } 461 /** Full N3 bits -- placeholders only to allow parsing, no functionality! **/ 462 463 $rdf.IndexedFormula.prototype.newUniversal = function (uri) { 464 var x = this.sym(uri); 465 if(!this._universalVariables) this._universalVariables = []; 466 this._universalVariables.push(x); 467 return x; 468 } 469 470 $rdf.IndexedFormula.prototype.newExistential = function (uri) { 471 if(!uri) return this.bnode(); 472 var x = this.sym(uri); 473 return this.declareExistential(x); 474 } 475 476 $rdf.IndexedFormula.prototype.declareExistential = function (x) { 477 if(!this._existentialVariables) this._existentialVariables = []; 478 this._existentialVariables.push(x); 479 return x; 480 } 481 482 $rdf.IndexedFormula.prototype.formula = function (features) { 483 return new $rdf.IndexedFormula(features); 484 } 485 486 $rdf.IndexedFormula.prototype.close = function () { 487 return this; 488 } 489 490 $rdf.IndexedFormula.prototype.hashString = $rdf.IndexedFormula.prototype.toNT; 491 492 return $rdf.IndexedFormula; 493 494 }(); 495 // ends