www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | Submodules | README | LICENSE

identity.js (18705B)


      1 //  Identity management and indexing for RDF
      2 //
      3 // This file provides  IndexedFormula a formula (set of triples) which
      4 // indexed by predicate, subject and object.
      5 //
      6 // It "smushes"  (merges into a single node) things which are identical 
      7 // according to owl:sameAs or an owl:InverseFunctionalProperty
      8 // or an owl:FunctionalProperty
      9 //
     10 //
     11 //  2005-10 Written Tim Berners-Lee
     12 //  2007    Changed so as not to munge statements from documents when smushing
     13 //
     14 // 
     15 /*jsl:option explicit*/
     16 // Turn on JavaScriptLint variable declaration checking
     17 $rdf.IndexedFormula = function () {
     18 
     19   var owl_ns = "http://www.w3.org/2002/07/owl#";
     20   // var link_ns = "http://www.w3.org/2007/ont/link#";
     21   /* hashString functions are used as array indeces. This is done to avoid
     22    ** conflict with existing properties of arrays such as length and map.
     23    ** See issue 139.
     24    */
     25   $rdf.Literal.prototype.hashString = $rdf.Literal.prototype.toNT;
     26   $rdf.Symbol.prototype.hashString = $rdf.Symbol.prototype.toNT;
     27   $rdf.BlankNode.prototype.hashString = $rdf.BlankNode.prototype.toNT;
     28   $rdf.Collection.prototype.hashString = $rdf.Collection.prototype.toNT;
     29 
     30 
     31   //Stores an associative array that maps URIs to functions
     32   $rdf.IndexedFormula = function (features) {
     33     this.statements = []; // As in Formula
     34     this.optional = [];
     35     this.propertyActions = []; // Array of functions to call when getting statement with {s X o}
     36     //maps <uri> to [f(F,s,p,o),...]
     37     this.classActions = []; // Array of functions to call when adding { s type X }
     38     this.redirections = []; // redirect to lexically smaller equivalent symbol
     39     this.aliases = []; // reverse mapping to redirection: aliases for this
     40     this.HTTPRedirects = []; // redirections we got from HTTP
     41     this.subjectIndex = []; // Array of statements with this X as subject
     42     this.predicateIndex = []; // Array of statements with this X as subject
     43     this.objectIndex = []; // Array of statements with this X as object
     44     this.whyIndex = []; // Array of statements with X as provenance
     45     this.index = [this.subjectIndex, this.predicateIndex, this.objectIndex, this.whyIndex];
     46     this.namespaces = {} // Dictionary of namespace prefixes
     47     if(features === undefined) features = ["sameAs",
     48                                 "InverseFunctionalProperty", "FunctionalProperty"];
     49     //    this.features = features
     50     // Callbackify?
     51     function handleRDFType(formula, subj, pred, obj, why) {
     52       if(formula.typeCallback != undefined)
     53         formula.typeCallback(formula, obj, why);
     54 
     55       var x = formula.classActions[obj.hashString()];
     56       var done = false;
     57       if(x) {
     58         for(var i = 0; i < x.length; i++) {
     59           done = done || x[i](formula, subj, pred, obj, why);
     60         }
     61       }
     62       return done; // statement given is not needed if true
     63     } //handleRDFType
     64     //If the predicate is #type, use handleRDFType to create a typeCallback on the object
     65     this.propertyActions['<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>'] = [handleRDFType];
     66 
     67     // Assumption: these terms are not redirected @@fixme
     68     if($rdf.Util.ArrayIndexOf(features, "sameAs") >= 0)
     69       this.propertyActions['<http://www.w3.org/2002/07/owl#sameAs>'] = [
     70       function (formula, subj, pred, obj, why) {
     71       // tabulator.log.warn("Equating "+subj.uri+" sameAs "+obj.uri);  //@@
     72       formula.equate(subj, obj);
     73       return true; // true if statement given is NOT needed in the store
     74     }]; //sameAs -> equate & don't add to index
     75     if($rdf.Util.ArrayIndexOf(features, "InverseFunctionalProperty") >= 0)
     76       this.classActions["<" + owl_ns + "InverseFunctionalProperty>"] = [
     77       function (formula, subj, pred, obj, addFn) {
     78       return formula.newPropertyAction(subj, handle_IFP); // yes subj not pred!
     79     }]; //IFP -> handle_IFP, do add to index
     80     if($rdf.Util.ArrayIndexOf(features, "FunctionalProperty") >= 0)
     81       this.classActions["<" + owl_ns + "FunctionalProperty>"] = [
     82         function (formula, subj, proj, obj, addFn) {
     83          return formula.newPropertyAction(subj, handle_FP);
     84         }
     85       ]; //FP => handleFP, do add to index
     86     function handle_IFP(formula, subj, pred, obj) {
     87       var s1 = formula.any(undefined, pred, obj);
     88       if(s1 == undefined) return false; // First time with this value
     89       // tabulator.log.warn("Equating "+s1.uri+" and "+subj.uri + " because IFP "+pred.uri);  //@@
     90       formula.equate(s1, subj);
     91       return true;
     92     } //handle_IFP
     93     function handle_FP(formula, subj, pred, obj) {
     94       var o1 = formula.any(subj, pred, undefined);
     95       if(o1 == undefined) return false; // First time with this value
     96       // tabulator.log.warn("Equating "+o1.uri+" and "+obj.uri + " because FP "+pred.uri);  //@@
     97       formula.equate(o1, obj);
     98       return true;
     99     } //handle_FP
    100   } /* end IndexedFormula */
    101 
    102   $rdf.IndexedFormula.prototype = new $rdf.Formula();
    103   $rdf.IndexedFormula.prototype.constructor = $rdf.IndexedFormula;
    104   $rdf.IndexedFormula.SuperClass = $rdf.Formula;
    105 
    106   $rdf.IndexedFormula.prototype.newPropertyAction = function newPropertyAction(pred, action) {
    107     //$rdf.log.debug("newPropertyAction:  "+pred);
    108     var hash = pred.hashString();
    109     if(this.propertyActions[hash] == undefined)
    110       this.propertyActions[hash] = [];
    111     this.propertyActions[hash].push(action);
    112     // Now apply the function to to statements already in the store
    113     var toBeFixed = this.statementsMatching(undefined, pred, undefined);
    114     var done = false;
    115     for(var i = 0; i < toBeFixed.length; i++) { // NOT optimized - sort toBeFixed etc
    116       done = done || action(this, toBeFixed[i].subject, pred, toBeFixed[i].object);
    117     }
    118     return done;
    119   }
    120 
    121   $rdf.IndexedFormula.prototype.setPrefixForURI = function (prefix, nsuri) {
    122     //TODO:This is a hack for our own issues, which ought to be fixed post-release
    123     //See http://dig.csail.mit.edu/cgi-bin/roundup.cgi/$rdf/issue227
    124     if(prefix == "tab" && this.namespaces["tab"]) {
    125       return;
    126     }
    127     this.namespaces[prefix] = nsuri
    128   }
    129 
    130   // Deprocated ... name too generic
    131   $rdf.IndexedFormula.prototype.register = function (prefix, nsuri) {
    132     this.namespaces[prefix] = nsuri
    133   }
    134 
    135 
    136   /** simplify graph in store when we realize two identifiers are equivalent
    137 
    138 We replace the bigger with the smaller.
    139 
    140 */
    141   $rdf.IndexedFormula.prototype.equate = function (u1, u2) {
    142     // tabulator.log.warn("Equating "+u1+" and "+u2); // @@
    143     //@@JAMBO Must canonicalize the uris to prevent errors from a=b=c
    144     //03-21-2010
    145     u1 = this.canon(u1);
    146     u2 = this.canon(u2);
    147     var d = u1.compareTerm(u2);
    148     if(!d) return true; // No information in {a = a}
    149     var big, small;
    150     if(d < 0) { // u1 less than u2
    151       return this.replaceWith(u2, u1);
    152     } else {
    153       return this.replaceWith(u1, u2);
    154     }
    155   }
    156 
    157   // Replace big with small, obsoleted with obsoleting.
    158   //
    159   $rdf.IndexedFormula.prototype.replaceWith = function (big, small) {
    160     //$rdf.log.debug("Replacing "+big+" with "+small) // @@
    161     var oldhash = big.hashString();
    162     var newhash = small.hashString();
    163 
    164     var moveIndex = function (ix) {
    165         var oldlist = ix[oldhash];
    166         if(oldlist == undefined) return; // none to move
    167         var newlist = ix[newhash];
    168         if(newlist == undefined) {
    169           ix[newhash] = oldlist;
    170         } else {
    171           ix[newhash] = oldlist.concat(newlist);
    172         }
    173         delete ix[oldhash];
    174       }
    175 
    176       // the canonical one carries all the indexes
    177     for(var i = 0; i < 4; i++) {
    178       moveIndex(this.index[i]);
    179     }
    180 
    181     this.redirections[oldhash] = small;
    182     if(big.uri) {
    183       //@@JAMBO: must update redirections,aliases from sub-items, too.
    184       if(this.aliases[newhash] == undefined)
    185         this.aliases[newhash] = [];
    186       this.aliases[newhash].push(big); // Back link
    187       if(this.aliases[oldhash]) {
    188         for(var i = 0; i < this.aliases[oldhash].length; i++) {
    189           this.redirections[this.aliases[oldhash][i].hashString()] = small;
    190           this.aliases[newhash].push(this.aliases[oldhash][i]);
    191         }
    192       }
    193 
    194       //this.add(small, this.sym('http://www.w3.org/2007/ont/link#uri'), big.uri)
    195 
    196       // If two things are equal, and one is requested, we should request the other.
    197       if(this.sf) {
    198         this.sf.nowKnownAs(big, small)
    199       }
    200     }
    201 
    202     moveIndex(this.classActions);
    203     moveIndex(this.propertyActions);
    204 
    205     $rdf.log.debug("Equate done. "+big+" now links to "+small)    
    206     return true; // true means the statement does not need to be put in
    207   };
    208 
    209   // Return the symbol with canonical URI as smushed
    210   $rdf.IndexedFormula.prototype.canon = function (term) {
    211     if(term == undefined) return term;
    212     var y = this.redirections[term.hashString()];
    213     if(y == undefined) return term;
    214     return y;
    215   }
    216 
    217   // Compare by canonical URI as smushed
    218   $rdf.IndexedFormula.prototype.sameThings = function (x, y) {
    219     if(x.sameTerm(y)) return true;
    220     var x1 = this.canon(x);
    221     //    alert('x1='+x1);
    222     if(x1 == undefined) return false;
    223     var y1 = this.canon(y);
    224     //    alert('y1='+y1); //@@
    225     if(y1 == undefined) return false;
    226     return(x1.uri == y1.uri);
    227   }
    228 
    229   // A list of all the URIs by which this thing is known
    230   $rdf.IndexedFormula.prototype.uris = function (term) {
    231     var cterm = this.canon(term)
    232     var terms = this.aliases[cterm.hashString()];
    233     if(!cterm.uri) return []
    234     var res = [cterm.uri]
    235     if(terms != undefined) {
    236       for(var i = 0; i < terms.length; i++) {
    237         res.push(terms[i].uri)
    238       }
    239     }
    240     return res
    241   }
    242 
    243   // On input parameters, convert constants to terms
    244   // 
    245   function RDFMakeTerm(formula, val, canonicalize) {
    246     if(typeof val != 'object') {
    247       if(typeof val == 'string')
    248         return new $rdf.Literal(val);
    249       if(typeof val == 'number')
    250         return new $rdf.Literal(val); // @@ differet types
    251       if(typeof val == 'boolean')
    252         return new $rdf.Literal(val ? "1" : "0", undefined, $rdf.Symbol.prototype.XSDboolean);
    253       if(typeof val == 'undefined')
    254         return undefined;
    255       else // @@ add converting of dates and numbers
    256       throw "Can't make Term from " + val + " of type " + typeof val;
    257     }
    258     return val;
    259   }
    260 
    261   // Add a triple to the store
    262   //
    263   //  Returns the statement added
    264   // (would it be better to return the original formula for chaining?)
    265   //
    266   $rdf.IndexedFormula.prototype.add = function (subj, pred, obj, why) {
    267     var actions, st;
    268     if(why == undefined) why = this.fetcher ? this.fetcher.appNode : this.sym("chrome:theSession"); //system generated
    269     //defined in source.js, is this OK with identity.js only user?
    270     subj = RDFMakeTerm(this, subj);
    271     pred = RDFMakeTerm(this, pred);
    272     obj = RDFMakeTerm(this, obj);
    273     why = RDFMakeTerm(this, why);
    274 
    275     if(this.predicateCallback != undefined)
    276       this.predicateCallback(this, pred, why);
    277 
    278     // Action return true if the statement does not need to be added
    279     var actions = this.propertyActions[this.canon(pred).hashString()];
    280     var done = false;
    281     if(actions) {
    282       // alert('type: '+typeof actions +' @@ actions='+actions);
    283       for(var i = 0; i < actions.length; i++) {
    284         done = done || actions[i](this, subj, pred, obj, why);
    285       }
    286     }
    287 
    288     //If we are tracking provenanance, every thing should be loaded into the store
    289     //if (done) return new Statement(subj, pred, obj, why); // Don't put it in the store
    290     // still return this statement for owl:sameAs input
    291     var hash = [this.canon(subj).hashString(), this.canon(pred).hashString(),
    292                    this.canon(obj).hashString(), this.canon(why).hashString()];
    293     var st = new $rdf.Statement(subj, pred, obj, why);
    294     for(var i = 0; i < 4; i++) {
    295       var ix = this.index[i];
    296       var h = hash[i];
    297       if(ix[h] == undefined) ix[h] = [];
    298       ix[h].push(st); // Set of things with this as subject, etc
    299     }
    300 
    301     //tabulator.log.debug("ADDING    {"+subj+" "+pred+" "+obj+"} "+why);
    302     this.statements.push(st);
    303     return st;
    304   }; //add
    305   // Find out whether a given URI is used as symbol in the formula
    306   $rdf.IndexedFormula.prototype.mentionsURI = function (uri) {
    307     var hash = '<' + uri + '>';
    308     return (!!this.subjectIndex[hash]
    309       || !!this.objectIndex[hash]
    310       || !!this.predicateIndex[hash]);
    311   }
    312 
    313   // Find an unused id for a file being edited: return a symbol
    314   // (Note: Slow iff a lot of them -- could be O(log(k)) )
    315   $rdf.IndexedFormula.prototype.nextSymbol = function (doc) {
    316     for(var i = 0;; i++) {
    317       var uri = doc.uri + '#n' + i;
    318       if(!this.mentionsURI(uri)) return this.sym(uri);
    319     }
    320   }
    321 
    322 
    323   $rdf.IndexedFormula.prototype.anyStatementMatching = function (subj, pred, obj, why) {
    324     var x = this.statementsMatching(subj, pred, obj, why, true);
    325     if(!x || x == []) return undefined;
    326     return x[0];
    327   };
    328 
    329 
    330   // Return statements matching a pattern
    331   // ALL CONVENIENCE LOOKUP FUNCTIONS RELY ON THIS!
    332   $rdf.IndexedFormula.prototype.statementsMatching = function (subj, pred, obj, why, justOne) {
    333     //$rdf.log.debug("Matching {"+subj+" "+pred+" "+obj+"}");
    334     var pat = [subj, pred, obj, why];
    335     var pattern = [];
    336     var hash = [];
    337     var wild = []; // wildcards
    338     var given = []; // Not wild
    339     for(var p = 0; p < 4; p++) {
    340       pattern[p] = this.canon(RDFMakeTerm(this, pat[p]));
    341       if(pattern[p] == undefined) {
    342         wild.push(p);
    343       } else {
    344         given.push(p);
    345         hash[p] = pattern[p].hashString();
    346       }
    347     }
    348     if(given.length == 0) {
    349       return this.statements;
    350     }
    351     if(given.length == 1) { // Easy too, we have an index for that
    352       var p = given[0];
    353       var list = this.index[p][hash[p]];
    354       if(list && justOne) {
    355         if(list.length > 1)
    356           list = list.slice(0, 1);
    357       }
    358       return list == undefined ? [] : list;
    359     }
    360 
    361     // Now given.length is 2, 3 or 4.
    362     // We hope that the scale-free nature of the data will mean we tend to get
    363     // a short index in there somewhere!
    364     var best = 1e10; // really bad
    365     var best_i;
    366     for(var i = 0; i < given.length; i++) {
    367       var p = given[i]; // Which part we are dealing with
    368       var list = this.index[p][hash[p]];
    369       if(list == undefined) return []; // No occurrences
    370       if(list.length < best) {
    371         best = list.length;
    372         best_i = i; // (not p!)
    373       }
    374     }
    375 
    376     // Ok, we have picked the shortest index but now we have to filter it
    377     var best_p = given[best_i];
    378     var possibles = this.index[best_p][hash[best_p]];
    379     var check = given.slice(0, best_i).concat(given.slice(best_i + 1)) // remove best_i
    380     var results = [];
    381     var parts = ['subject', 'predicate', 'object', 'why'];
    382     for(var j = 0; j < possibles.length; j++) {
    383       var st = possibles[j];
    384       for(var i = 0; i < check.length; i++) { // for each position to be checked
    385         var p = check[i];
    386         if(!this.canon(st[parts[p]]).sameTerm(pattern[p])) {
    387           st = null;
    388           break;
    389         }
    390       }
    391       if(st != null) {
    392         results.push(st);
    393         if(justOne)
    394           break;
    395       }
    396     }
    397     return results;
    398   }; // statementsMatching
    399   /** remove a particular statement from the bank **/
    400   $rdf.IndexedFormula.prototype.remove = function (st) {
    401     //$rdf.log.debug("entering remove w/ st=" + st);
    402     var term = [st.subject, st.predicate, st.object, st.why];
    403     for(var p = 0; p < 4; p++) {
    404       var c = this.canon(term[p]);
    405       var h = c.hashString();
    406       if(this.index[p][h] == undefined) {
    407         //$rdf.log.warn ("Statement removal: no index '+p+': "+st);
    408       } else {
    409         $rdf.Util.RDFArrayRemove(this.index[p][h], st);
    410       }
    411     }
    412     $rdf.Util.RDFArrayRemove(this.statements, st);
    413   }; //remove
    414   /** remove all statements matching args (within limit) **/
    415   $rdf.IndexedFormula.prototype.removeMany = function (subj, pred, obj, why, limit) {
    416     //$rdf.log.debug("entering removeMany w/ subj,pred,obj,why,limit = " + subj +", "+ pred+", " + obj+", " + why+", " + limit);
    417     var sts = this.statementsMatching(subj, pred, obj, why, false);
    418     //This is a subtle bug that occcured in updateCenter.js too.
    419     //The fact is, this.statementsMatching returns this.whyIndex instead of a copy of it
    420     //but for perfromance consideration, it's better to just do that
    421     //so make a copy here.
    422     var statements = [];
    423     for(var i = 0; i < sts.length; i++) statements.push(sts[i]);
    424     if(limit) statements = statements.slice(0, limit);
    425     for(var i = 0; i < statements.length; i++) this.remove(statements[i]);
    426   }; //removeMany
    427   /** Utility**/
    428 
    429   /*  @method: copyTo
    430     @description: replace @template with @target and add appropriate triples (no triple removed)
    431                   one-direction replication 
    432 */
    433   $rdf.IndexedFormula.prototype.copyTo = function (template, target, flags) {
    434     if(!flags) flags = [];
    435     var statList = this.statementsMatching(template);
    436     if($rdf.Util.ArrayIndexOf(flags, 'two-direction') != -1)
    437       statList.concat(this.statementsMatching(undefined, undefined, template));
    438     for(var i = 0; i < statList.length; i++) {
    439       var st = statList[i];
    440       switch(st.object.termType) {
    441       case 'symbol':
    442         this.add(target, st.predicate, st.object);
    443         break;
    444       case 'literal':
    445       case 'bnode':
    446       case 'collection':
    447         this.add(target, st.predicate, st.object.copy(this));
    448       }
    449       if($rdf.Util.ArrayIndexOf(flags, 'delete') != -1) this.remove(st);
    450     }
    451   };
    452   //for the case when you alter this.value (text modified in userinput.js)
    453   $rdf.Literal.prototype.copy = function () {
    454     return new $rdf.Literal(this.value, this.lang, this.datatype);
    455   };
    456   $rdf.BlankNode.prototype.copy = function (formula) { //depends on the formula
    457     var bnodeNew = new $rdf.BlankNode();
    458     formula.copyTo(this, bnodeNew);
    459     return bnodeNew;
    460   }
    461   /**  Full N3 bits  -- placeholders only to allow parsing, no functionality! **/
    462 
    463   $rdf.IndexedFormula.prototype.newUniversal = function (uri) {
    464     var x = this.sym(uri);
    465     if(!this._universalVariables) this._universalVariables = [];
    466     this._universalVariables.push(x);
    467     return x;
    468   }
    469 
    470   $rdf.IndexedFormula.prototype.newExistential = function (uri) {
    471     if(!uri) return this.bnode();
    472     var x = this.sym(uri);
    473     return this.declareExistential(x);
    474   }
    475 
    476   $rdf.IndexedFormula.prototype.declareExistential = function (x) {
    477     if(!this._existentialVariables) this._existentialVariables = [];
    478     this._existentialVariables.push(x);
    479     return x;
    480   }
    481 
    482   $rdf.IndexedFormula.prototype.formula = function (features) {
    483     return new $rdf.IndexedFormula(features);
    484   }
    485 
    486   $rdf.IndexedFormula.prototype.close = function () {
    487     return this;
    488   }
    489 
    490   $rdf.IndexedFormula.prototype.hashString = $rdf.IndexedFormula.prototype.toNT;
    491 
    492   return $rdf.IndexedFormula;
    493 
    494 }();
    495 // ends