rdfparser.js (19970B)
1 /** 2 * @fileoverview 3 * TABULATOR RDF PARSER 4 * 5 * Version 0.1 6 * Parser believed to be in full positive RDF/XML parsing compliance 7 * with the possible exception of handling deprecated RDF attributes 8 * appropriately. Parser is believed to comply fully with other W3C 9 * and industry standards where appropriate (DOM, ECMAScript, &c.) 10 * 11 * Author: David Sheets <dsheets@mit.edu> 12 * SVN ID: $Id$ 13 * 14 * W3C® SOFTWARE NOTICE AND LICENSE 15 * http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231 16 * This work (and included software, documentation such as READMEs, or 17 * other related items) is being provided by the copyright holders under 18 * the following license. By obtaining, using and/or copying this work, 19 * you (the licensee) agree that you have read, understood, and will 20 * comply with the following terms and conditions. 21 * 22 * Permission to copy, modify, and distribute this software and its 23 * documentation, with or without modification, for any purpose and 24 * without fee or royalty is hereby granted, provided that you include 25 * the following on ALL copies of the software and documentation or 26 * portions thereof, including modifications: 27 * 28 * 1. The full text of this NOTICE in a location viewable to users of 29 * the redistributed or derivative work. 30 * 2. Any pre-existing intellectual property disclaimers, notices, or terms and 31 * conditions. If none exist, the W3C Software Short Notice should be 32 * included (hypertext is preferred, text is permitted) within the body 33 * of any redistributed or derivative code. 34 * 3. Notice of any changes or modifications to the files, including the 35 * date changes were made. (We recommend you provide URIs to the location 36 * from which the code is derived.) 37 * 38 * THIS SOFTWARE AND DOCUMENTATION IS PROVIDED "AS IS," AND COPYRIGHT 39 * HOLDERS MAKE NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, 40 * INCLUDING BUT NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY OR FITNESS 41 * FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE OR 42 * DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, 43 * TRADEMARKS OR OTHER RIGHTS. 44 * 45 * COPYRIGHT HOLDERS WILL NOT BE LIABLE FOR ANY DIRECT, INDIRECT, SPECIAL 46 * OR CONSEQUENTIAL DAMAGES ARISING OUT OF ANY USE OF THE SOFTWARE OR 47 * DOCUMENTATION. 48 * 49 * The name and trademarks of copyright holders may NOT be used in 50 * advertising or publicity pertaining to the software without specific, 51 * written prior permission. Title to copyright in this software and any 52 * associated documentation will at all times remain with copyright 53 * holders. 54 */ 55 /** 56 * @class Class defining an RDFParser resource object tied to an RDFStore 57 * 58 * @author David Sheets <dsheets@mit.edu> 59 * @version 0.1 60 * 61 * @constructor 62 * @param {RDFStore} store An RDFStore object 63 */ 64 $rdf.RDFParser = function (store) { 65 var RDFParser = {}; 66 67 /** Standard namespaces that we know how to handle @final 68 * @member RDFParser 69 */ 70 RDFParser['ns'] = { 71 'RDF': "http://www.w3.org/1999/02/22-rdf-syntax-ns#", 72 'RDFS': "http://www.w3.org/2000/01/rdf-schema#" 73 } 74 /** DOM Level 2 node type magic numbers @final 75 * @member RDFParser 76 */ 77 RDFParser['nodeType'] = { 78 'ELEMENT': 1, 79 'ATTRIBUTE': 2, 80 'TEXT': 3, 81 'CDATA_SECTION': 4, 82 'ENTITY_REFERENCE': 5, 83 'ENTITY': 6, 84 'PROCESSING_INSTRUCTION': 7, 85 'COMMENT': 8, 86 'DOCUMENT': 9, 87 'DOCUMENT_TYPE': 10, 88 'DOCUMENT_FRAGMENT': 11, 89 'NOTATION': 12 90 } 91 92 /** 93 * Frame class for namespace and base URI lookups 94 * Base lookups will always resolve because the parser knows 95 * the default base. 96 * 97 * @private 98 */ 99 this['frameFactory'] = function (parser, parent, element) { 100 return { 101 'NODE': 1, 102 'ARC': 2, 103 'parent': parent, 104 'parser': parser, 105 'store': parser['store'], 106 'element': element, 107 'lastChild': 0, 108 'base': null, 109 'lang': null, 110 'node': null, 111 'nodeType': null, 112 'listIndex': 1, 113 'rdfid': null, 114 'datatype': null, 115 'collection': false, 116 117 /** Terminate the frame and notify the store that we're done */ 118 'terminateFrame': function () { 119 if(this['collection']) { 120 this['node']['close']() 121 } 122 }, 123 124 /** Add a symbol of a certain type to the this frame */ 125 'addSymbol': function (type, uri) { 126 uri = $rdf.Util.uri.join(uri, this['base']) 127 this['node'] = this['store']['sym'](uri) 128 this['nodeType'] = type 129 }, 130 131 /** Load any constructed triples into the store */ 132 'loadTriple': function () { 133 if(this['parent']['parent']['collection']) { 134 this['parent']['parent']['node']['append'](this['node']) 135 } else { 136 this['store']['add'](this['parent']['parent']['node'], 137 this['parent']['node'], 138 this['node'], 139 this['parser']['why']) 140 } 141 if(this['parent']['rdfid'] != null) { // reify 142 var triple = this['store']['sym']( 143 $rdf.Util.uri.join("#" + this['parent']['rdfid'], this['base'])) 144 this['store']['add'](triple, 145 this['store']['sym'](RDFParser['ns']['RDF'] + "type"), 146 this['store']['sym'](RDFParser['ns']['RDF'] + "Statement"), 147 this['parser']['why']) 148 this['store']['add'](triple, 149 this['store']['sym'](RDFParser['ns']['RDF'] + "subject"), 150 this['parent']['parent']['node'], 151 this['parser']['why']) 152 this['store']['add'](triple, 153 this['store']['sym'](RDFParser['ns']['RDF'] + "predicate"), 154 this['parent']['node'], 155 this['parser']['why']) 156 this['store']['add'](triple, 157 this['store']['sym'](RDFParser['ns']['RDF'] + "object"), 158 this['node'], 159 this['parser']['why']) 160 } 161 }, 162 163 /** Check if it's OK to load a triple */ 164 'isTripleToLoad': function () { 165 return (this['parent'] != null 166 && this['parent']['parent'] != null 167 && this['nodeType'] == this['NODE'] 168 && this['parent']['nodeType'] == this['ARC'] 169 && this['parent']['parent']['nodeType'] == this['NODE']) 170 }, 171 172 /** Add a symbolic node to this frame */ 173 'addNode': function (uri) { 174 this['addSymbol'](this['NODE'], uri) 175 if(this['isTripleToLoad']()) { 176 this['loadTriple']() 177 } 178 }, 179 180 /** Add a collection node to this frame */ 181 'addCollection': function () { 182 this['nodeType'] = this['NODE'] 183 this['node'] = this['store']['collection']() 184 this['collection'] = true 185 if(this['isTripleToLoad']()) { 186 this['loadTriple']() 187 } 188 }, 189 190 /** Add a collection arc to this frame */ 191 'addCollectionArc': function () { 192 this['nodeType'] = this['ARC'] 193 }, 194 195 /** Add a bnode to this frame */ 196 'addBNode': function (id) { 197 if(id != null) { 198 if(this['parser']['bnodes'][id] != null) { 199 this['node'] = this['parser']['bnodes'][id] 200 } else { 201 this['node'] = this['parser']['bnodes'][id] = this['store']['bnode']() 202 } 203 } else { 204 this['node'] = this['store']['bnode']() 205 } 206 207 this['nodeType'] = this['NODE'] 208 if(this['isTripleToLoad']()) { 209 this['loadTriple']() 210 } 211 }, 212 213 /** Add an arc or property to this frame */ 214 'addArc': function (uri) { 215 if(uri == RDFParser['ns']['RDF'] + "li") { 216 uri = RDFParser['ns']['RDF'] + "_" + this['parent']['listIndex']++ 217 } 218 this['addSymbol'](this['ARC'], uri) 219 }, 220 221 /** Add a literal to this frame */ 222 'addLiteral': function (value) { 223 if(this['parent']['datatype']) { 224 this['node'] = this['store']['literal']( 225 value, "", this['store']['sym']( 226 this['parent']['datatype'])) 227 } else { 228 this['node'] = this['store']['literal']( 229 value, this['lang']) 230 } 231 this['nodeType'] = this['NODE'] 232 if(this['isTripleToLoad']()) { 233 this['loadTriple']() 234 } 235 } 236 } 237 } 238 239 //from the OpenLayers source .. needed to get around IE problems. 240 this['getAttributeNodeNS'] = function (node, uri, name) { 241 var attributeNode = null; 242 if(node.getAttributeNodeNS) { 243 attributeNode = node.getAttributeNodeNS(uri, name); 244 } else { 245 var attributes = node.attributes; 246 var potentialNode, fullName; 247 for(var i = 0; i < attributes.length; ++i) { 248 potentialNode = attributes[i]; 249 if(potentialNode.namespaceURI == uri) { 250 fullName = (potentialNode.prefix) ? (potentialNode.prefix + ":" + name) : name; 251 if(fullName == potentialNode.nodeName) { 252 attributeNode = potentialNode; 253 break; 254 } 255 } 256 } 257 } 258 return attributeNode; 259 } 260 261 /** Our triple store reference @private */ 262 this['store'] = store 263 /** Our identified blank nodes @private */ 264 this['bnodes'] = {} 265 /** A context for context-aware stores @private */ 266 this['why'] = null 267 /** Reification flag */ 268 this['reify'] = false 269 270 /** 271 * Build our initial scope frame and parse the DOM into triples 272 * @param {DOMTree} document The DOM to parse 273 * @param {String} base The base URL to use 274 * @param {Object} why The context to which this resource belongs 275 */ 276 this['parse'] = function (document, base, why) { 277 // alert('parse base:'+base); 278 var children = document['childNodes'] 279 280 // clean up for the next run 281 this['cleanParser']() 282 283 // figure out the root element 284 //var root = document.documentElement; //this is faster, I think, cross-browser issue? well, DOM 2 285 if(document['nodeType'] == RDFParser['nodeType']['DOCUMENT']) { 286 for(var c = 0; c < children['length']; c++) { 287 if(children[c]['nodeType'] == RDFParser['nodeType']['ELEMENT']) { 288 var root = children[c] 289 break 290 } 291 } 292 } else if(document['nodeType'] == RDFParser['nodeType']['ELEMENT']) { 293 var root = document 294 } else { 295 throw new Error("RDFParser: can't find root in " + base + ". Halting. ") 296 return false 297 } 298 299 this['why'] = why 300 301 302 // our topmost frame 303 var f = this['frameFactory'](this) 304 this['base'] = base 305 f['base'] = base 306 f['lang'] = '' 307 308 this['parseDOM'](this['buildFrame'](f, root)) 309 return true 310 } 311 this['parseDOM'] = function (frame) { 312 // a DOM utility function used in parsing 313 var elementURI = function (el) { 314 var result = ""; 315 if(el['namespaceURI'] == null) { 316 throw new Error("RDF/XML syntax error: No namespace for " 317 + el['localName'] + " in " + this.base) 318 } 319 if(el['namespaceURI']) { 320 result = result + el['namespaceURI']; 321 } 322 if(el['localName']) { 323 result = result + el['localName']; 324 } else if(el['nodeName']) { 325 if(el['nodeName'].indexOf(":") >= 0) 326 result = result + el['nodeName'].split(":")[1]; 327 else 328 result = result + el['nodeName']; 329 } 330 return result; 331 } 332 var dig = true // if we'll dig down in the tree on the next iter 333 while(frame['parent']) { 334 var dom = frame['element'] 335 var attrs = dom['attributes'] 336 337 if(dom['nodeType'] == RDFParser['nodeType']['TEXT'] 338 || dom['nodeType'] == RDFParser['nodeType']['CDATA_SECTION']) { 339 //we have a literal 340 if(frame['parent']['nodeType'] == frame['NODE']) { 341 //must have had attributes, store as rdf:value 342 frame['addArc'](RDFParser['ns']['RDF'] + 'value'); 343 frame = this['buildFrame'](frame); 344 } 345 frame['addLiteral'](dom['nodeValue']) 346 } else if(elementURI(dom) != RDFParser['ns']['RDF'] + "RDF") { 347 // not root 348 if(frame['parent'] && frame['parent']['collection']) { 349 // we're a collection element 350 frame['addCollectionArc']() 351 frame = this['buildFrame'](frame, frame['element']) 352 frame['parent']['element'] = null 353 } 354 if(!frame['parent'] || !frame['parent']['nodeType'] 355 || frame['parent']['nodeType'] == frame['ARC']) { 356 // we need a node 357 var about = this['getAttributeNodeNS'](dom, RDFParser['ns']['RDF'], "about") 358 var rdfid = this['getAttributeNodeNS'](dom, RDFParser['ns']['RDF'], "ID") 359 if(about && rdfid) { 360 throw new Error("RDFParser: " + dom['nodeName'] 361 + " has both rdf:id and rdf:about." + " Halting. Only one of these" 362 + " properties may be specified on a" + " node."); 363 } 364 if(about == null && rdfid) { 365 frame['addNode']("#" + rdfid['nodeValue']) 366 dom['removeAttributeNode'](rdfid) 367 } else if(about == null && rdfid == null) { 368 var bnid = this['getAttributeNodeNS'](dom, RDFParser['ns']['RDF'], "nodeID") 369 if(bnid) { 370 frame['addBNode'](bnid['nodeValue']) 371 dom['removeAttributeNode'](bnid) 372 } else { 373 frame['addBNode']() 374 } 375 } else { 376 frame['addNode'](about['nodeValue']) 377 dom['removeAttributeNode'](about) 378 } 379 380 // Typed nodes 381 var rdftype = this['getAttributeNodeNS'](dom, RDFParser['ns']['RDF'], "type") 382 if(RDFParser['ns']['RDF'] + "Description" != elementURI(dom)) { 383 rdftype = { 384 'nodeValue': elementURI(dom) 385 } 386 } 387 if(rdftype != null) { 388 this['store']['add'](frame['node'], 389 this['store']['sym'](RDFParser['ns']['RDF'] + "type"), 390 this['store']['sym']( 391 $rdf.Util.uri.join( 392 rdftype['nodeValue'], 393 frame['base'])), 394 this['why']) 395 if(rdftype['nodeName']) { 396 dom['removeAttributeNode'](rdftype) 397 } 398 } 399 400 // Property Attributes 401 for(var x = attrs['length'] - 1; x >= 0; x--) { 402 this['store']['add'](frame['node'], 403 this['store']['sym'](elementURI(attrs[x])), 404 this['store']['literal']( 405 attrs[x]['nodeValue'], 406 frame['lang']), 407 this['why']) 408 } 409 } else { 410 // we should add an arc (or implicit bnode+arc) 411 frame['addArc'](elementURI(dom)) 412 413 // save the arc's rdf:ID if it has one 414 if(this['reify']) { 415 var rdfid = this['getAttributeNodeNS'](dom, RDFParser['ns']['RDF'], "ID") 416 if(rdfid) { 417 frame['rdfid'] = rdfid['nodeValue'] 418 dom['removeAttributeNode'](rdfid) 419 } 420 } 421 422 var parsetype = this['getAttributeNodeNS'](dom, RDFParser['ns']['RDF'], "parseType") 423 var datatype = this['getAttributeNodeNS'](dom, RDFParser['ns']['RDF'], "datatype") 424 if(datatype) { 425 frame['datatype'] = datatype['nodeValue'] 426 dom['removeAttributeNode'](datatype) 427 } 428 429 if(parsetype) { 430 var nv = parsetype['nodeValue'] 431 if(nv == "Literal") { 432 frame['datatype'] = RDFParser['ns']['RDF'] + "XMLLiteral" 433 // (this.buildFrame(frame)).addLiteral(dom) 434 // should work but doesn't 435 frame = this['buildFrame'](frame) 436 frame['addLiteral'](dom) 437 dig = false 438 } else if(nv == "Resource") { 439 frame = this['buildFrame'](frame, frame['element']) 440 frame['parent']['element'] = null 441 frame['addBNode']() 442 } else if(nv == "Collection") { 443 frame = this['buildFrame'](frame, frame['element']) 444 frame['parent']['element'] = null 445 frame['addCollection']() 446 } 447 dom['removeAttributeNode'](parsetype) 448 } 449 450 if(attrs['length'] != 0) { 451 var resource = this['getAttributeNodeNS'](dom, RDFParser['ns']['RDF'], "resource") 452 var bnid = this['getAttributeNodeNS'](dom, RDFParser['ns']['RDF'], "nodeID") 453 454 frame = this['buildFrame'](frame) 455 if(resource) { 456 frame['addNode'](resource['nodeValue']) 457 dom['removeAttributeNode'](resource) 458 } else { 459 if(bnid) { 460 frame['addBNode'](bnid['nodeValue']) 461 dom['removeAttributeNode'](bnid) 462 } else { 463 frame['addBNode']() 464 } 465 } 466 467 for(var x = attrs['length'] - 1; x >= 0; x--) { 468 var f = this['buildFrame'](frame) 469 f['addArc'](elementURI(attrs[x])) 470 if(elementURI(attrs[x]) == RDFParser['ns']['RDF'] + "type") { 471 (this['buildFrame'](f))['addNode']( 472 attrs[x]['nodeValue']) 473 } else { 474 (this['buildFrame'](f))['addLiteral']( 475 attrs[x]['nodeValue']) 476 } 477 } 478 } else if(dom['childNodes']['length'] == 0) { 479 (this['buildFrame'](frame))['addLiteral']("") 480 } 481 } 482 } // rdf:RDF 483 // dig dug 484 dom = frame['element'] 485 while(frame['parent']) { 486 var pframe = frame 487 while(dom == null) { 488 frame = frame['parent'] 489 dom = frame['element'] 490 } 491 var candidate = dom['childNodes'][frame['lastChild']] 492 if(candidate == null || !dig) { 493 frame['terminateFrame']() 494 if(!(frame = frame['parent'])) { 495 break 496 } // done 497 dom = frame['element'] 498 dig = true 499 } else if((candidate['nodeType'] != RDFParser['nodeType']['ELEMENT'] 500 && candidate['nodeType'] != RDFParser['nodeType']['TEXT'] 501 && candidate['nodeType'] != RDFParser['nodeType']['CDATA_SECTION']) 502 || ((candidate['nodeType'] == RDFParser['nodeType']['TEXT'] 503 || candidate['nodeType'] == RDFParser['nodeType']['CDATA_SECTION']) 504 && dom['childNodes']['length'] != 1)) { 505 frame['lastChild']++ 506 } else { 507 // not a leaf 508 frame['lastChild']++; 509 frame = this['buildFrame'](pframe, dom['childNodes'][frame['lastChild'] - 1]) 510 break 511 } 512 } 513 } // while 514 } 515 516 /** 517 * Cleans out state from a previous parse run 518 * @private 519 */ 520 this['cleanParser'] = function () { 521 this['bnodes'] = {} 522 this['why'] = null 523 } 524 525 /** 526 * Builds scope frame 527 * @private 528 */ 529 this['buildFrame'] = function (parent, element) { 530 var frame = this['frameFactory'](this, parent, element) 531 if(parent) { 532 frame['base'] = parent['base'] 533 frame['lang'] = parent['lang'] 534 } 535 if(element == null 536 || element['nodeType'] == RDFParser['nodeType']['TEXT'] 537 || element['nodeType'] == RDFParser['nodeType']['CDATA_SECTION']) { 538 return frame 539 } 540 541 var attrs = element['attributes'] 542 543 var base = element['getAttributeNode']("xml:base") 544 if(base != null) { 545 frame['base'] = base['nodeValue'] 546 element['removeAttribute']("xml:base") 547 } 548 var lang = element['getAttributeNode']("xml:lang") 549 if(lang != null) { 550 frame['lang'] = lang['nodeValue'] 551 element['removeAttribute']("xml:lang") 552 } 553 554 // remove all extraneous xml and xmlns attributes 555 for(var x = attrs['length'] - 1; x >= 0; x--) { 556 if(attrs[x]['nodeName']['substr'](0, 3) == "xml") { 557 if(attrs[x].name.slice(0, 6) == 'xmlns:') { 558 var uri = attrs[x].nodeValue; 559 // alert('base for namespac attr:'+this.base); 560 if(this.base) uri = $rdf.Util.uri.join(uri, this.base); 561 this.store.setPrefixForURI(attrs[x].name.slice(6), uri); 562 } 563 // alert('rdfparser: xml atribute: '+attrs[x].name) //@@ 564 element['removeAttributeNode'](attrs[x]) 565 } 566 } 567 return frame 568 } 569 }