www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | Submodules | README | LICENSE

rdfparser.js (19970B)


      1 /**
      2  * @fileoverview
      3  * TABULATOR RDF PARSER
      4  *
      5  * Version 0.1
      6  *  Parser believed to be in full positive RDF/XML parsing compliance
      7  *  with the possible exception of handling deprecated RDF attributes
      8  *  appropriately. Parser is believed to comply fully with other W3C
      9  *  and industry standards where appropriate (DOM, ECMAScript, &c.)
     10  *
     11  *  Author: David Sheets <dsheets@mit.edu>
     12  *  SVN ID: $Id$
     13  *
     14  * W3C® SOFTWARE NOTICE AND LICENSE
     15  * http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231
     16  * This work (and included software, documentation such as READMEs, or
     17  * other related items) is being provided by the copyright holders under
     18  * the following license. By obtaining, using and/or copying this work,
     19  * you (the licensee) agree that you have read, understood, and will
     20  * comply with the following terms and conditions.
     21  * 
     22  * Permission to copy, modify, and distribute this software and its
     23  * documentation, with or without modification, for any purpose and
     24  * without fee or royalty is hereby granted, provided that you include
     25  * the following on ALL copies of the software and documentation or
     26  * portions thereof, including modifications:
     27  * 
     28  * 1. The full text of this NOTICE in a location viewable to users of
     29  * the redistributed or derivative work.
     30  * 2. Any pre-existing intellectual property disclaimers, notices, or terms and
     31  * conditions. If none exist, the W3C Software Short Notice should be
     32  * included (hypertext is preferred, text is permitted) within the body
     33  * of any redistributed or derivative code.
     34  * 3. Notice of any changes or modifications to the files, including the
     35  * date changes were made. (We recommend you provide URIs to the location
     36  * from which the code is derived.)
     37  * 
     38  * THIS SOFTWARE AND DOCUMENTATION IS PROVIDED "AS IS," AND COPYRIGHT
     39  * HOLDERS MAKE NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED,
     40  * INCLUDING BUT NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY OR FITNESS
     41  * FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE OR
     42  * DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS,
     43  * TRADEMARKS OR OTHER RIGHTS.
     44  * 
     45  * COPYRIGHT HOLDERS WILL NOT BE LIABLE FOR ANY DIRECT, INDIRECT, SPECIAL
     46  * OR CONSEQUENTIAL DAMAGES ARISING OUT OF ANY USE OF THE SOFTWARE OR
     47  * DOCUMENTATION.
     48  * 
     49  * The name and trademarks of copyright holders may NOT be used in
     50  * advertising or publicity pertaining to the software without specific,
     51  * written prior permission. Title to copyright in this software and any
     52  * associated documentation will at all times remain with copyright
     53  * holders.
     54  */
     55 /**
     56  * @class Class defining an RDFParser resource object tied to an RDFStore
     57  *  
     58  * @author David Sheets <dsheets@mit.edu>
     59  * @version 0.1
     60  * 
     61  * @constructor
     62  * @param {RDFStore} store An RDFStore object
     63  */
     64 $rdf.RDFParser = function (store) {
     65   var RDFParser = {};
     66 
     67   /** Standard namespaces that we know how to handle @final
     68    *  @member RDFParser
     69    */
     70   RDFParser['ns'] = {
     71     'RDF': "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
     72     'RDFS': "http://www.w3.org/2000/01/rdf-schema#"
     73   }
     74   /** DOM Level 2 node type magic numbers @final
     75    *  @member RDFParser
     76    */
     77   RDFParser['nodeType'] = {
     78     'ELEMENT': 1,
     79     'ATTRIBUTE': 2,
     80     'TEXT': 3,
     81     'CDATA_SECTION': 4,
     82     'ENTITY_REFERENCE': 5,
     83     'ENTITY': 6,
     84     'PROCESSING_INSTRUCTION': 7,
     85     'COMMENT': 8,
     86     'DOCUMENT': 9,
     87     'DOCUMENT_TYPE': 10,
     88     'DOCUMENT_FRAGMENT': 11,
     89     'NOTATION': 12
     90   }
     91 
     92   /**
     93    * Frame class for namespace and base URI lookups
     94    * Base lookups will always resolve because the parser knows
     95    * the default base.
     96    *
     97    * @private
     98    */
     99   this['frameFactory'] = function (parser, parent, element) {
    100     return {
    101       'NODE': 1,
    102       'ARC': 2,
    103       'parent': parent,
    104       'parser': parser,
    105       'store': parser['store'],
    106       'element': element,
    107       'lastChild': 0,
    108       'base': null,
    109       'lang': null,
    110       'node': null,
    111       'nodeType': null,
    112       'listIndex': 1,
    113       'rdfid': null,
    114       'datatype': null,
    115       'collection': false,
    116 
    117       /** Terminate the frame and notify the store that we're done */
    118       'terminateFrame': function () {
    119         if(this['collection']) {
    120           this['node']['close']()
    121         }
    122       },
    123 
    124       /** Add a symbol of a certain type to the this frame */
    125       'addSymbol': function (type, uri) {
    126         uri = $rdf.Util.uri.join(uri, this['base'])
    127         this['node'] = this['store']['sym'](uri)
    128         this['nodeType'] = type
    129       },
    130 
    131       /** Load any constructed triples into the store */
    132       'loadTriple': function () {
    133         if(this['parent']['parent']['collection']) {
    134           this['parent']['parent']['node']['append'](this['node'])
    135         } else {
    136           this['store']['add'](this['parent']['parent']['node'],
    137             this['parent']['node'],
    138             this['node'],
    139             this['parser']['why'])
    140         }
    141         if(this['parent']['rdfid'] != null) { // reify
    142           var triple = this['store']['sym'](
    143           $rdf.Util.uri.join("#" + this['parent']['rdfid'], this['base']))
    144           this['store']['add'](triple,
    145             this['store']['sym'](RDFParser['ns']['RDF'] + "type"),
    146             this['store']['sym'](RDFParser['ns']['RDF'] + "Statement"),
    147             this['parser']['why'])
    148           this['store']['add'](triple,
    149             this['store']['sym'](RDFParser['ns']['RDF'] + "subject"),
    150             this['parent']['parent']['node'],
    151             this['parser']['why'])
    152           this['store']['add'](triple,
    153             this['store']['sym'](RDFParser['ns']['RDF'] + "predicate"),
    154             this['parent']['node'],
    155             this['parser']['why'])
    156           this['store']['add'](triple,
    157             this['store']['sym'](RDFParser['ns']['RDF'] + "object"),
    158             this['node'],
    159             this['parser']['why'])
    160         }
    161       },
    162 
    163       /** Check if it's OK to load a triple */
    164       'isTripleToLoad': function () {
    165         return (this['parent'] != null
    166           && this['parent']['parent'] != null
    167           && this['nodeType'] == this['NODE']
    168           && this['parent']['nodeType'] == this['ARC']
    169           && this['parent']['parent']['nodeType'] == this['NODE'])
    170       },
    171 
    172       /** Add a symbolic node to this frame */
    173       'addNode': function (uri) {
    174         this['addSymbol'](this['NODE'], uri)
    175         if(this['isTripleToLoad']()) {
    176           this['loadTriple']()
    177         }
    178       },
    179 
    180       /** Add a collection node to this frame */
    181       'addCollection': function () {
    182         this['nodeType'] = this['NODE']
    183         this['node'] = this['store']['collection']()
    184         this['collection'] = true
    185         if(this['isTripleToLoad']()) {
    186           this['loadTriple']()
    187         }
    188       },
    189 
    190       /** Add a collection arc to this frame */
    191       'addCollectionArc': function () {
    192         this['nodeType'] = this['ARC']
    193       },
    194 
    195       /** Add a bnode to this frame */
    196       'addBNode': function (id) {
    197         if(id != null) {
    198           if(this['parser']['bnodes'][id] != null) {
    199             this['node'] = this['parser']['bnodes'][id]
    200           } else {
    201             this['node'] = this['parser']['bnodes'][id] = this['store']['bnode']()
    202           }
    203         } else {
    204           this['node'] = this['store']['bnode']()
    205         }
    206 
    207         this['nodeType'] = this['NODE']
    208         if(this['isTripleToLoad']()) {
    209           this['loadTriple']()
    210         }
    211       },
    212 
    213       /** Add an arc or property to this frame */
    214       'addArc': function (uri) {
    215         if(uri == RDFParser['ns']['RDF'] + "li") {
    216           uri = RDFParser['ns']['RDF'] + "_" + this['parent']['listIndex']++
    217         }
    218         this['addSymbol'](this['ARC'], uri)
    219       },
    220 
    221       /** Add a literal to this frame */
    222       'addLiteral': function (value) {
    223         if(this['parent']['datatype']) {
    224           this['node'] = this['store']['literal'](
    225           value, "", this['store']['sym'](
    226           this['parent']['datatype']))
    227         } else {
    228           this['node'] = this['store']['literal'](
    229           value, this['lang'])
    230         }
    231         this['nodeType'] = this['NODE']
    232         if(this['isTripleToLoad']()) {
    233           this['loadTriple']()
    234         }
    235       }
    236     }
    237   }
    238 
    239   //from the OpenLayers source .. needed to get around IE problems.
    240   this['getAttributeNodeNS'] = function (node, uri, name) {
    241     var attributeNode = null;
    242     if(node.getAttributeNodeNS) {
    243       attributeNode = node.getAttributeNodeNS(uri, name);
    244     } else {
    245       var attributes = node.attributes;
    246       var potentialNode, fullName;
    247       for(var i = 0; i < attributes.length; ++i) {
    248         potentialNode = attributes[i];
    249         if(potentialNode.namespaceURI == uri) {
    250           fullName = (potentialNode.prefix) ? (potentialNode.prefix + ":" + name) : name;
    251           if(fullName == potentialNode.nodeName) {
    252             attributeNode = potentialNode;
    253             break;
    254           }
    255         }
    256       }
    257     }
    258     return attributeNode;
    259   }
    260 
    261   /** Our triple store reference @private */
    262   this['store'] = store
    263   /** Our identified blank nodes @private */
    264   this['bnodes'] = {}
    265   /** A context for context-aware stores @private */
    266   this['why'] = null
    267   /** Reification flag */
    268   this['reify'] = false
    269 
    270   /**
    271    * Build our initial scope frame and parse the DOM into triples
    272    * @param {DOMTree} document The DOM to parse
    273    * @param {String} base The base URL to use 
    274    * @param {Object} why The context to which this resource belongs
    275    */
    276   this['parse'] = function (document, base, why) {
    277     // alert('parse base:'+base);
    278     var children = document['childNodes']
    279 
    280     // clean up for the next run
    281     this['cleanParser']()
    282 
    283     // figure out the root element
    284     //var root = document.documentElement; //this is faster, I think, cross-browser issue? well, DOM 2
    285     if(document['nodeType'] == RDFParser['nodeType']['DOCUMENT']) {
    286       for(var c = 0; c < children['length']; c++) {
    287         if(children[c]['nodeType'] == RDFParser['nodeType']['ELEMENT']) {
    288           var root = children[c]
    289           break
    290         }
    291       }
    292     } else if(document['nodeType'] == RDFParser['nodeType']['ELEMENT']) {
    293       var root = document
    294     } else {
    295       throw new Error("RDFParser: can't find root in " + base + ". Halting. ")
    296       return false
    297     }
    298 
    299     this['why'] = why
    300 
    301 
    302     // our topmost frame
    303     var f = this['frameFactory'](this)
    304     this['base'] = base
    305     f['base'] = base
    306     f['lang'] = ''
    307 
    308     this['parseDOM'](this['buildFrame'](f, root))
    309     return true
    310   }
    311   this['parseDOM'] = function (frame) {
    312     // a DOM utility function used in parsing
    313     var elementURI = function (el) {
    314         var result = "";
    315         if(el['namespaceURI'] == null) {
    316           throw new Error("RDF/XML syntax error: No namespace for "
    317             + el['localName'] + " in " + this.base)
    318         }
    319         if(el['namespaceURI']) {
    320           result = result + el['namespaceURI'];
    321         }
    322         if(el['localName']) {
    323           result = result + el['localName'];
    324         } else if(el['nodeName']) {
    325           if(el['nodeName'].indexOf(":") >= 0)
    326             result = result + el['nodeName'].split(":")[1];
    327           else
    328             result = result + el['nodeName'];
    329         }
    330         return result;
    331       }
    332     var dig = true // if we'll dig down in the tree on the next iter
    333     while(frame['parent']) {
    334       var dom = frame['element']
    335       var attrs = dom['attributes']
    336 
    337       if(dom['nodeType'] == RDFParser['nodeType']['TEXT']
    338         || dom['nodeType'] == RDFParser['nodeType']['CDATA_SECTION']) {
    339         //we have a literal
    340         if(frame['parent']['nodeType'] == frame['NODE']) {
    341           //must have had attributes, store as rdf:value
    342           frame['addArc'](RDFParser['ns']['RDF'] + 'value');
    343           frame = this['buildFrame'](frame);
    344         }
    345         frame['addLiteral'](dom['nodeValue'])
    346       } else if(elementURI(dom) != RDFParser['ns']['RDF'] + "RDF") {
    347         // not root
    348         if(frame['parent'] && frame['parent']['collection']) {
    349           // we're a collection element
    350           frame['addCollectionArc']()
    351           frame = this['buildFrame'](frame, frame['element'])
    352           frame['parent']['element'] = null
    353         }
    354         if(!frame['parent'] || !frame['parent']['nodeType']
    355           || frame['parent']['nodeType'] == frame['ARC']) {
    356           // we need a node
    357           var about = this['getAttributeNodeNS'](dom, RDFParser['ns']['RDF'], "about")
    358           var rdfid = this['getAttributeNodeNS'](dom, RDFParser['ns']['RDF'], "ID")
    359           if(about && rdfid) {
    360             throw new Error("RDFParser: " + dom['nodeName']
    361               + " has both rdf:id and rdf:about." + " Halting. Only one of these"
    362               + " properties may be specified on a" + " node.");
    363           }
    364           if(about == null && rdfid) {
    365             frame['addNode']("#" + rdfid['nodeValue'])
    366             dom['removeAttributeNode'](rdfid)
    367           } else if(about == null && rdfid == null) {
    368             var bnid = this['getAttributeNodeNS'](dom, RDFParser['ns']['RDF'], "nodeID")
    369             if(bnid) {
    370               frame['addBNode'](bnid['nodeValue'])
    371               dom['removeAttributeNode'](bnid)
    372             } else {
    373               frame['addBNode']()
    374             }
    375           } else {
    376             frame['addNode'](about['nodeValue'])
    377             dom['removeAttributeNode'](about)
    378           }
    379 
    380           // Typed nodes
    381           var rdftype = this['getAttributeNodeNS'](dom, RDFParser['ns']['RDF'], "type")
    382           if(RDFParser['ns']['RDF'] + "Description" != elementURI(dom)) {
    383             rdftype = {
    384               'nodeValue': elementURI(dom)
    385             }
    386           }
    387           if(rdftype != null) {
    388             this['store']['add'](frame['node'],
    389               this['store']['sym'](RDFParser['ns']['RDF'] + "type"),
    390               this['store']['sym'](
    391                 $rdf.Util.uri.join(
    392                   rdftype['nodeValue'],
    393                   frame['base'])),
    394               this['why'])
    395             if(rdftype['nodeName']) {
    396               dom['removeAttributeNode'](rdftype)
    397             }
    398           }
    399 
    400           // Property Attributes
    401           for(var x = attrs['length'] - 1; x >= 0; x--) {
    402             this['store']['add'](frame['node'],
    403               this['store']['sym'](elementURI(attrs[x])),
    404               this['store']['literal'](
    405                 attrs[x]['nodeValue'],
    406                 frame['lang']),
    407               this['why'])
    408           }
    409         } else {
    410           // we should add an arc (or implicit bnode+arc)
    411           frame['addArc'](elementURI(dom))
    412 
    413           // save the arc's rdf:ID if it has one
    414           if(this['reify']) {
    415             var rdfid = this['getAttributeNodeNS'](dom, RDFParser['ns']['RDF'], "ID")
    416             if(rdfid) {
    417               frame['rdfid'] = rdfid['nodeValue']
    418               dom['removeAttributeNode'](rdfid)
    419             }
    420           }
    421 
    422           var parsetype = this['getAttributeNodeNS'](dom, RDFParser['ns']['RDF'], "parseType")
    423           var datatype = this['getAttributeNodeNS'](dom, RDFParser['ns']['RDF'], "datatype")
    424           if(datatype) {
    425             frame['datatype'] = datatype['nodeValue']
    426             dom['removeAttributeNode'](datatype)
    427           }
    428 
    429           if(parsetype) {
    430             var nv = parsetype['nodeValue']
    431             if(nv == "Literal") {
    432               frame['datatype'] = RDFParser['ns']['RDF'] + "XMLLiteral"
    433               // (this.buildFrame(frame)).addLiteral(dom)
    434               // should work but doesn't
    435               frame = this['buildFrame'](frame)
    436               frame['addLiteral'](dom)
    437               dig = false
    438             } else if(nv == "Resource") {
    439               frame = this['buildFrame'](frame, frame['element'])
    440               frame['parent']['element'] = null
    441               frame['addBNode']()
    442             } else if(nv == "Collection") {
    443               frame = this['buildFrame'](frame, frame['element'])
    444               frame['parent']['element'] = null
    445               frame['addCollection']()
    446             }
    447             dom['removeAttributeNode'](parsetype)
    448           }
    449 
    450           if(attrs['length'] != 0) {
    451             var resource = this['getAttributeNodeNS'](dom, RDFParser['ns']['RDF'], "resource")
    452             var bnid = this['getAttributeNodeNS'](dom, RDFParser['ns']['RDF'], "nodeID")
    453 
    454             frame = this['buildFrame'](frame)
    455             if(resource) {
    456               frame['addNode'](resource['nodeValue'])
    457               dom['removeAttributeNode'](resource)
    458             } else {
    459               if(bnid) {
    460                 frame['addBNode'](bnid['nodeValue'])
    461                 dom['removeAttributeNode'](bnid)
    462               } else {
    463                 frame['addBNode']()
    464               }
    465             }
    466 
    467             for(var x = attrs['length'] - 1; x >= 0; x--) {
    468               var f = this['buildFrame'](frame)
    469               f['addArc'](elementURI(attrs[x]))
    470               if(elementURI(attrs[x]) == RDFParser['ns']['RDF'] + "type") {
    471                 (this['buildFrame'](f))['addNode'](
    472                 attrs[x]['nodeValue'])
    473               } else {
    474                 (this['buildFrame'](f))['addLiteral'](
    475                 attrs[x]['nodeValue'])
    476               }
    477             }
    478           } else if(dom['childNodes']['length'] == 0) {
    479             (this['buildFrame'](frame))['addLiteral']("")
    480           }
    481         }
    482       } // rdf:RDF
    483       // dig dug
    484       dom = frame['element']
    485       while(frame['parent']) {
    486         var pframe = frame
    487         while(dom == null) {
    488           frame = frame['parent']
    489           dom = frame['element']
    490         }
    491         var candidate = dom['childNodes'][frame['lastChild']]
    492         if(candidate == null || !dig) {
    493           frame['terminateFrame']()
    494           if(!(frame = frame['parent'])) {
    495             break
    496           } // done
    497           dom = frame['element']
    498           dig = true
    499         } else if((candidate['nodeType'] != RDFParser['nodeType']['ELEMENT']
    500             && candidate['nodeType'] != RDFParser['nodeType']['TEXT']
    501             && candidate['nodeType'] != RDFParser['nodeType']['CDATA_SECTION'])
    502           || ((candidate['nodeType'] == RDFParser['nodeType']['TEXT']
    503               || candidate['nodeType'] == RDFParser['nodeType']['CDATA_SECTION'])
    504             && dom['childNodes']['length'] != 1)) {
    505           frame['lastChild']++
    506         } else {
    507           // not a leaf
    508           frame['lastChild']++;
    509           frame = this['buildFrame'](pframe, dom['childNodes'][frame['lastChild'] - 1])
    510           break
    511         }
    512       }
    513     } // while
    514   }
    515 
    516   /**
    517    * Cleans out state from a previous parse run
    518    * @private
    519    */
    520   this['cleanParser'] = function () {
    521     this['bnodes'] = {}
    522     this['why'] = null
    523   }
    524 
    525   /**
    526    * Builds scope frame 
    527    * @private
    528    */
    529   this['buildFrame'] = function (parent, element) {
    530     var frame = this['frameFactory'](this, parent, element)
    531     if(parent) {
    532       frame['base'] = parent['base']
    533       frame['lang'] = parent['lang']
    534     }
    535     if(element == null
    536       || element['nodeType'] == RDFParser['nodeType']['TEXT']
    537       || element['nodeType'] == RDFParser['nodeType']['CDATA_SECTION']) {
    538       return frame
    539     }
    540 
    541     var attrs = element['attributes']
    542 
    543     var base = element['getAttributeNode']("xml:base")
    544     if(base != null) {
    545       frame['base'] = base['nodeValue']
    546       element['removeAttribute']("xml:base")
    547     }
    548     var lang = element['getAttributeNode']("xml:lang")
    549     if(lang != null) {
    550       frame['lang'] = lang['nodeValue']
    551       element['removeAttribute']("xml:lang")
    552     }
    553 
    554     // remove all extraneous xml and xmlns attributes
    555     for(var x = attrs['length'] - 1; x >= 0; x--) {
    556       if(attrs[x]['nodeName']['substr'](0, 3) == "xml") {
    557         if(attrs[x].name.slice(0, 6) == 'xmlns:') {
    558           var uri = attrs[x].nodeValue;
    559           // alert('base for namespac attr:'+this.base);
    560           if(this.base) uri = $rdf.Util.uri.join(uri, this.base);
    561           this.store.setPrefixForURI(attrs[x].name.slice(6), uri);
    562         }
    563         //		alert('rdfparser: xml atribute: '+attrs[x].name) //@@
    564         element['removeAttributeNode'](attrs[x])
    565       }
    566     }
    567     return frame
    568   }
    569 }