www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | Submodules | README | LICENSE

commit 9144b567726b04f1a39cb3693db106cc2963a716
parent 1ce4de835b6ae57d594a51734662b394e868cea5
Author: Simon Kornblith <simon@simonster.com>
Date:   Sat,  5 Aug 2006 20:58:45 +0000

addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


Diffstat:
Mchrome/chromeFiles/content/scholar/fileInterface.js | 21++++++++++++++++++---
Mchrome/chromeFiles/content/scholar/ingester/browser.js | 7+++++--
Mchrome/chromeFiles/content/scholar/xpcom/translate.js | 265+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------
Mscrapers.sql | 422++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------
4 files changed, 576 insertions(+), 139 deletions(-)

diff --git a/chrome/chromeFiles/content/scholar/fileInterface.js b/chrome/chromeFiles/content/scholar/fileInterface.js @@ -1,5 +1,5 @@ Scholar_File_Interface = new function() { - var _unresponsiveScriptPreference; + var _unresponsiveScriptPreference, _importCollection; this.exportFile = exportFile; this.importFile = importFile; @@ -80,11 +80,17 @@ Scholar_File_Interface = new function() { // get translators again, bc now we can check against the file translators = translation.getTranslators(); if(translators.length) { + // create a new collection to take in imported items + var date = new Date(); + _importCollection = Scholar.Collections.add("Imported "+date.toLocaleString()); + + // import items translation.setTranslator(translators[0]); - // show progress indicator translation.setHandler("itemDone", _importItemDone); + translation.setHandler("collectionDone", _importCollectionDone); translation.setHandler("done", _importDone); _disableUnresponsive(); + // show progress indicator Scholar_File_Interface.Progress.show( Scholar.getString("fileInterface.itemsImported"), function() { @@ -100,7 +106,16 @@ Scholar_File_Interface = new function() { */ function _importItemDone(obj, item) { //Scholar_File_Interface.Progress.increment(); - item.save(); + _importCollection.addItem(item.getID()); + } + + /* + * Saves collections after they've been imported. Input item is of the type + * outputted by Scholar.Collection.toArray(); only receives top-level + * collections + */ + function _importCollectionDone(obj, collection) { + collection.changeParent(_importCollection.getID()); } /* diff --git a/chrome/chromeFiles/content/scholar/ingester/browser.js b/chrome/chromeFiles/content/scholar/ingester/browser.js @@ -288,8 +288,11 @@ Scholar_Ingester_Interface._itemDone = function(obj, item, collection) { var title = item.getField("title"); var icon = "chrome://scholar/skin/treeitem-"+Scholar.ItemTypes.getName(item.getField("itemTypeID"))+".png" Scholar_Ingester_Interface.Progress.addLines([title], [icon]); - var item = item.save(); - collection.addItem(item); + + // add item to collection, if one was specified + if(collection) { + collection.addItem(item); + } } /* diff --git a/chrome/chromeFiles/content/scholar/xpcom/translate.js b/chrome/chromeFiles/content/scholar/xpcom/translate.js @@ -48,6 +48,7 @@ * immediately when script has finished executing * _sandbox - sandbox in which translators will be executed * _streams - streams that need to be closed when execution is complete + * _IDMap - a map from IDs as specified in Scholar.Item() to IDs of actual items * * WEB-ONLY PRIVATE PROPERTIES: * @@ -144,10 +145,17 @@ Scholar.Translate.prototype.setTranslator = function(translator) { * returns: N/A * * itemDone - * valid: web + * valid: import, web * called: when an item has been processed; may be called asynchronously * passed: an item object (see Scholar.Item) * returns: N/A + * + * collectionDone + * valid: import, web + * called: when a collection has been processed, after all items have been + * added; may be called asynchronously + * passed: a collection object (see Scholar.Collection) + * returns: N/A * * done * valid: all @@ -245,6 +253,7 @@ Scholar.Translate.prototype._loadTranslator = function() { * does the actual translation */ Scholar.Translate.prototype.translate = function() { + this._IDMap = new Array(); if(!this.location) { throw("cannot translate: no location specified"); @@ -301,8 +310,13 @@ Scholar.Translate.prototype._generateSandbox = function() { if(this.type == "web" || this.type == "import") { // add routines to add new items this._sandbox.Scholar.Item = Scholar.Translate.ScholarItem; - // attach the function to be run when an item is + // attach the function to be run when an item is done this._sandbox.Scholar.Item.prototype.complete = function() {me._itemDone(this)}; + + // add routines to add new collections + this._sandbox.Scholar.Collection = Scholar.Translate.ScholarCollection; + // attach the function to be run when a collection is done + this._sandbox.Scholar.Collection.prototype.complete = function() {me._collectionDone(this)}; } else if(this.type == "export") { // add routines to retrieve items and collections this._sandbox.Scholar.nextItem = function() { return me._exportGetItem() }; @@ -532,69 +546,126 @@ Scholar.Translate.prototype._closeStreams = function() { * executed when an item is done and ready to be loaded into the database */ Scholar.Translate.prototype._itemDone = function(item) { + Scholar.debug(item); + // Get typeID, defaulting to "website" var type = (item.itemType ? item.itemType : "website"); - // makes looping through easier - delete item.itemType, item.complete; - item.itemType = item.complete = undefined; - - var typeID = Scholar.ItemTypes.getID(type); - var newItem = Scholar.Items.getNewItemByType(typeID); - - if(item.date && !item.year) { - // date can serve as a year - var dateID = Scholar.ItemFields.getID("date"); - var yearID = Scholar.ItemFields.getID("year"); - if(!Scholar.ItemFields.isValidForType(dateID, typeID) && Scholar.ItemFields.isValidForType(yearID, typeID)) { - // year is valid but date is not - var yearRe = /[0-9]{4}/; - var m = yearRe.exec(item.date); - if(m) { - item.year = m[0] - item.date = undefined; + Scholar.debug("type is "+type); + if(type == "note") { // handle notes differently + Scholar.debug("handling a note"); + var myID = Scholar.Notes.add(item.note); + // re-retrieve the item + var newItem = Scholar.Items.get(myID); + } else { + // create new item + var typeID = Scholar.ItemTypes.getID(type); + var newItem = Scholar.Items.getNewItemByType(typeID); + + // makes looping through easier + item.itemType = item.complete = undefined; + + if(item.date && !item.year) { + // date can serve as a year + var dateID = Scholar.ItemFields.getID("date"); + var yearID = Scholar.ItemFields.getID("year"); + if(!Scholar.ItemFields.isValidForType(dateID, typeID) && Scholar.ItemFields.isValidForType(yearID, typeID)) { + // year is valid but date is not + var yearRe = /[0-9]{4}/; + var m = yearRe.exec(item.date); + if(m) { + item.year = m[0] + item.date = undefined; + } + } + } else if(!item.date && item.year) { + // the converse is also true + var dateID = Scholar.ItemFields.getID("date"); + var yearID = Scholar.ItemFields.getID("year"); + if(Scholar.ItemFields.isValidForType(dateID, typeID) && !Scholar.ItemFields.isValidForType(yearID, typeID)) { + // date is valid but year is not + item.date = item.year; + item.year = undefined; } } - } else if(!item.date && item.year) { - // the converse is also true - var dateID = Scholar.ItemFields.getID("date"); - var yearID = Scholar.ItemFields.getID("year"); - if(Scholar.ItemFields.isValidForType(dateID, typeID) && !Scholar.ItemFields.isValidForType(yearID, typeID)) { - // date is valid but year is not - item.date = item.year; - item.year = undefined; - } - } - - Scholar.debug(item); - - var fieldID, field; - for(var i in item) { - // loop through item fields - data = item[i]; - if(data) { // if field has content - if(i == "creators") { // creators are a special case - for(j in data) { - newItem.setCreator(j, data[j].firstName, data[j].lastName, 1); - } - } else if(i == "title") { // skip checks for title - newItem.setField(i, data); - } else if(i == "tags") { // add tags - for(j in data) { - newItem.addTag(data[j]); - } - } else if(fieldID = Scholar.ItemFields.getID(i)) { - // if field is in db - if(Scholar.ItemFields.isValidForType(fieldID, typeID)) { - // if field is valid for this type - // add field + var fieldID, field; + for(var i in item) { + // loop through item fields + data = item[i]; + + if(data) { // if field has content + if(i == "creators") { // creators are a special case + for(var j in data) { + var creatorType = 1; + // try to assign correct creator type + if(data[j].creatorType) { + try { + var creatorType = Scholar.CreatorTypes.getID(data[j].creatorType); + } catch(e) { + Scholar.debug("invalid creator type "+data[j].creatorType+" for creator index "+j); + } + } + + newItem.setCreator(j, data[j].firstName, data[j].lastName, creatorType); + } + } else if(i == "title") { // skip checks for title newItem.setField(i, data); + } else if(i == "tags") { // add tags + for(var j in data) { + newItem.addTag(data[j]); + } + } else if(i == "seeAlso") { + newItem.translateSeeAlso = data; + } else if(i != "note" && i != "notes" && i != "itemID" && (fieldID = Scholar.ItemFields.getID(i))) { + // if field is in db + if(Scholar.ItemFields.isValidForType(fieldID, typeID)) { + // if field is valid for this type + // add field + newItem.setField(i, data); + } else { + Scholar.debug("discarded field "+i+" for item: field not valid for type "+type); + } } else { - Scholar.debug("discarded field "+i+" for item: field not valid for type "+type); + Scholar.debug("discarded field "+i+" for item: field does not exist"); + } + } + } + + // save item + var myID = newItem.save(); + if(myID == true) { + myID = newItem.getID(); + } + + // handle notes + if(item.notes) { + for each(var note in item.notes) { + var noteID = Scholar.Notes.add(note.note, myID); + + // handle see also + if(note.seeAlso) { + var myNote = Scholar.Items.get(noteID); + + for each(var seeAlso in note.seeAlso) { + if(this._IDMap[seeAlso]) { + myNote.addSeeAlso(this._IDMap[seeAlso]); + } + } } - } else { - Scholar.debug("discarded field "+i+" for item: field does not exist"); + } + } + } + + if(item.itemID) { + this._IDMap[item.itemID] = myID; + } + + // handle see also + if(item.seeAlso) { + for each(var seeAlso in item.seeAlso) { + if(this._IDMap[seeAlso]) { + newItem.addSeeAlso(this._IDMap[seeAlso]); } } } @@ -605,6 +676,40 @@ Scholar.Translate.prototype._itemDone = function(item) { } /* + * executed when a collection is done and ready to be loaded into the database + */ +Scholar.Translate.prototype._collectionDone = function(collection) { + Scholar.debug(collection); + var newCollection = this._processCollection(collection, null); + + this._runHandler("collectionDone", newCollection); +} + +/* + * recursively processes collections + */ +Scholar.Translate.prototype._processCollection = function(collection, parentID) { + var newCollection = Scholar.Collections.add(collection.name, parentID); + + for each(child in collection.children) { + if(child.type == "collection") { + // do recursive processing of collections + this._processCollection(child, newCollection.getID()); + } else { + // add mapped items to collection + if(this._IDMap[child.id]) { + Scholar.debug("adding "+this._IDMap[child.id]); + newCollection.addItem(this._IDMap[child.id]); + } else { + Scholar.debug("could not map "+child.id+" to an imported item"); + } + } + } + + return newCollection; +} + +/* * calls a handler (see setHandler above) */ Scholar.Translate.prototype._runHandler = function(type, argument) { @@ -791,7 +896,7 @@ Scholar.Translate.prototype._exportGetCollection = function() { collection.type = "collection"; collection.children = returnItem.toArray(); - return returnItem; + return collection; } } @@ -881,12 +986,8 @@ Scholar.Translate.prototype._initializeInternalIO = function() { } } -/* Scholar.Translate.ScholarItem: a class for generating new item from +/* Scholar.Translate.ScholarItem: a class for generating a new item from * inside scraper code - * - * (this must be part of the prototype because it must be able to access - * methods relating to a specific instance of Scholar.Translate yet be called - * as a class) */ Scholar.Translate.ScholarItem = function(itemType) { @@ -898,12 +999,20 @@ Scholar.Translate.ScholarItem = function(itemType) { this.notes = new Array(); // generate tags array this.tags = new Array(); + // generate see also array + this.seeAlso = new Array(); } +/* Scholar.Translate.Collection: a class for generating a new top-level + * collection from inside scraper code + */ + +Scholar.Translate.ScholarCollection = function() {} + /* Scholar.Translate.RDF: a class for handling RDF IO * * If an import/export translator specifies dataMode RDF, this is the interface, - * accessible from model.x + * accessible from model. * * In order to simplify things, all classes take in their resource/container * as either the Mozilla native type or a string, but all @@ -951,8 +1060,12 @@ Scholar.Translate.RDF.prototype._deEnumerate = function(enumerator) { // get a resource as an nsIRDFResource, instead of a string Scholar.Translate.RDF.prototype._getResource = function(about) { - if(!(about instanceof Components.interfaces.nsIRDFResource)) { - about = this._RDFService.GetResource(about); + try { + if(!(about instanceof Components.interfaces.nsIRDFResource)) { + about = this._RDFService.GetResource(about); + } + } catch(e) { + throw("invalid RDF resource: "+about); } return about; } @@ -996,15 +1109,20 @@ Scholar.Translate.RDF.prototype.newContainer = function(type, about) { } // adds a new container element (index optional) -Scholar.Translate.RDF.prototype.addContainerElement = function(about, element, index) { +Scholar.Translate.RDF.prototype.addContainerElement = function(about, element, literal, index) { if(!(about instanceof Components.interfaces.nsIRDFContainer)) { about = this._getResource(about); var container = Components.classes["@mozilla.org/rdf/container;1"]. createInstance(Components.interfaces.nsIRDFContainer); container.Init(this._dataSource, about); + about = container; } if(!(element instanceof Components.interfaces.nsIRDFResource)) { - element = this._RDFService.GetResource(element); + if(literal) { + element = this._RDFService.GetLiteral(element); + } else { + element = this._RDFService.GetResource(element); + } } if(index) { @@ -1014,6 +1132,19 @@ Scholar.Translate.RDF.prototype.addContainerElement = function(about, element, i } } +// gets container elements as an array +Scholar.Translate.RDF.prototype.getContainerElements = function(about) { + if(!(about instanceof Components.interfaces.nsIRDFContainer)) { + about = this._getResource(about); + var container = Components.classes["@mozilla.org/rdf/container;1"]. + createInstance(Components.interfaces.nsIRDFContainer); + container.Init(this._dataSource, about); + about = container; + } + + return this._deEnumerate(about.GetElements()); +} + // sets a namespace Scholar.Translate.RDF.prototype.addNamespace = function(prefix, uri) { if(this._serializer) { // silently fail, in case the reason the scraper diff --git a/scrapers.sql b/scrapers.sql @@ -2781,33 +2781,17 @@ Scholar.addOption("exportFileData", true);', function generateCollection(collection) { var collectionResource = "#collection:"+collection.id; Scholar.RDF.addStatement(collectionResource, rdf+"type", n.bib+"Collection", false); + Scholar.RDF.addStatement(collectionResource, n.dc+"title", collection.name, true); - for(var i in collection.children) { - var child = collection.children[i]; - + for each(var child in collection.children) { // add child list items if(child.type == "collection") { - Scholar.RDF.addStatement(collectionResource, n.dc+"hasPart", "#collection:"+child.id, false); + Scholar.RDF.addStatement(collectionResource, n.dcterms+"hasPart", "#collection:"+child.id, false); // do recursive processing of collections generateCollection(child); } else { - Scholar.RDF.addStatement(collectionResource, n.dc+"hasPart", itemResources[child.id], false); - } - } -} - -function getContainerIfExists() { - if(container) { - if(containerElement) { - return containerElement; - } else { - containerElement = Scholar.RDF.newResource(); - // attach container to section (if exists) or resource - Scholar.RDF.addStatement((section ? section : resource), n.dcterms+"isPartOf", containerElement, false); - return containerElement; + Scholar.RDF.addStatement(collectionResource, n.dcterms+"hasPart", itemResources[child.id], false); } - } else { - return resource; } } @@ -2831,14 +2815,22 @@ function doExport() { // leave as global itemResources = new Array(); + // keep track of resources already assigned (in case two book items have the + // same ISBN, or something like that) + var usedResources = new Array(); + + var items = new Array(); + // first, map each ID to a resource - for(var i in items) { - item = items[i]; + while(item = Scholar.nextItem()) { + items.push(item); - if(item.ISBN) { + if(item.ISBN && !usedResources["urn:isbn:"+item.ISBN]) { itemResources[item.itemID] = "urn:isbn:"+item.ISBN; - } else if(item.url) { + usedResources[itemResources[item.itemID]] = true; + } else if(item.url && !usedResources[item.url]) { itemResources[item.itemID] = item.url; + usedResources[itemResources[item.itemID]] = true; } else { // just specify a node ID itemResources[item.itemID] = "#item:"+item.itemID; @@ -2849,10 +2841,8 @@ function doExport() { } } - var item; - while(item = Scholar.nextItem()) { + for each(item in items) { // these items are global - item = items[i]; resource = itemResources[item.itemID]; container = null; @@ -2930,7 +2920,7 @@ function doExport() { // attach container to resource Scholar.RDF.addStatement(resource, n.bib+cTag, creatorResource, false); } - Scholar.RDF.addContainerElement(creatorContainers[cTag], creator, true); + Scholar.RDF.addContainerElement(creatorContainers[cTag], creator, false); } /** FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE **/ @@ -2962,16 +2952,34 @@ function doExport() { // add relationship to resource Scholar.RDF.addStatement(resource, n.dc+"isPartOf", section, false); } - // use ISSN to set up container element - if(item.ISSN) { - containerElement = "urn:issn:"+item.ISSN; // leave as global + + // generate container + if(container) { + if(item.ISSN && !Scholar.RDF.getArcsIn("urn:issn:"+item.ISSN)) { + // use ISSN as container URI if no other item is + containerElement = "urn:issn:"+item.ISSN + } else { + containerElement = Scholar.RDF.newResource(); + } // attach container to section (if exists) or resource Scholar.RDF.addStatement((section ? section : resource), n.dcterms+"isPartOf", containerElement, false); + // add container type + Scholar.RDF.addStatement(containerElement, rdf+"type", n.bib+container, false); + } + + // ISSN + if(item.ISSN) { + Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "ISSN "+item.ISSN, true); + } + + // ISBN + if(item.ISBN) { + Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "ISBN "+item.ISBN, true); } // publication gets linked to container via isPartOf if(item.publication) { - Scholar.RDF.addStatement(getContainerIfExists(), n.dc+"title", item.publication, true); + Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"title", item.publication, true); } // series also linked in @@ -2982,16 +2990,16 @@ function doExport() { // set series title Scholar.RDF.addStatement(series, n.dc+"title", item.series, true); // add relationship to resource - Scholar.RDF.addStatement(getContainerIfExists(), n.dcterms+"isPartOf", series, false); + Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dcterms+"isPartOf", series, false); } // volume if(item.volume) { - Scholar.RDF.addStatement(getContainerIfExists(), n.prism+"volume", item.volume, true); + Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.prism+"volume", item.volume, true); } // number if(item.number) { - Scholar.RDF.addStatement(getContainerIfExists(), n.prism+"number", item.number, true); + Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.prism+"number", item.number, true); } // edition if(item.edition) { @@ -3069,18 +3077,17 @@ function doExport() { // add note tag Scholar.RDF.addStatement(noteResource, rdf+"type", n.bib+"Memo", false); - // add note description (sorry, couldn''t find a better way of - // representing this data in an existing ontology) - Scholar.RDF.addStatement(noteResource, n.dc+"description", item.notes[j].note, true); + // add note value + Scholar.RDF.addStatement(noteResource, rdf+"value", item.notes[j].note, true); // add relationship between resource and note Scholar.RDF.addStatement(resource, n.dcterms+"isReferencedBy", noteResource, false); // Add see also info to RDF - generateSeeAlso(item.notes[j].itemID, item.notes[j].seeAlso); + generateSeeAlso(resource, item.notes[j].seeAlso); } if(item.note) { - Scholar.RDF.addStatement(resource, n.dc+"description", item.note, true); + Scholar.RDF.addStatement(resource, rdf+"value", item.note, true); } /** TAGS **/ @@ -3090,9 +3097,7 @@ function doExport() { } // Add see also info to RDF - generateSeeAlso(item.itemID, item.seeAlso); - - // ELEMENTS AMBIGUOUSLY ENCODED: callNumber, acccessionType + generateSeeAlso(resource, item.seeAlso); } /** RDF COLLECTION STRUCTURE **/ @@ -3204,14 +3209,18 @@ REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006 REPLACE INTO "translators" VALUES ('5e3ad958-ac79-463d-812b-a86a9235c28f', '2006-07-15 17:09:00', 1, 'RDF', 'Simon Kornblith', 'rdf', 'Scholar.configure("dataMode", "rdf");', -'function getFirstResults(node, properties, onlyOneString) { +'// gets the first result set for a property that can be encoded in multiple +// ontologies +function getFirstResults(node, properties, onlyOneString) { for(var i=0; i<properties.length; i++) { var result = Scholar.RDF.getTargets(node, properties[i]); if(result) { if(onlyOneString) { // onlyOneString means we won''t return nsIRDFResources, only // actual literals - return result[0]; + if(typeof(result[0]) != "object") { + return result[0]; + } } else { return result; } @@ -3220,7 +3229,93 @@ REPLACE INTO "translators" VALUES ('5e3ad958-ac79-463d-812b-a86a9235c28f', '2006 return; // return undefined on failure } +// adds creators to an item given a list of creator nodes +function handleCreators(newItem, creators, creatorType) { + if(!creators) { + return; + } + + if(typeof(creators[0]) != "string") { // see if creators are in a container + try { + var creators = Scholar.RDF.getContainerElements(creators[0]); + } catch(e) {} + } + + if(typeof(creators[0]) == "string") { // support creators encoded as strings + for(var i in creators) { + if(typeof(creators[i]) != "object") { + newItem.creators.push(Scholar.Utilities.cleanAuthor(creators[i], creatorType, true)); + } + } + } else { // also support foaf + for(var i in creators) { + var type = Scholar.RDF.getTargets(creators[i], rdf+"type"); + if(type) { + type = Scholar.RDF.getResourceURI(type[0]); + if(type == n.foaf+"Person") { // author is FOAF type person + var creator = new Array(); + creator.lastName = getFirstResults(creators[i], + [n.foaf+"surname", n.foaf+"family_name"], true); + creator.firstName = getFirstResults(creators[i], + [n.foaf+"givenname", n.foaf+"firstName"], true); + creator.creatorType = creatorType; + newItem.creators.push(creator); + } + } + } + } +} + +// processes collections recursively +function processCollection(node, collection) { + if(!collection) { + collection = new Array(); + } + collection.type = "collection"; + collection.name = getFirstResults(node, [n.dc+"title"], true); + collection.children = new Array(); + + // check for children + var children = getFirstResults(node, [n.dcterms+"hasPart"]); + for each(var child in children) { + var type = Scholar.RDF.getTargets(child, rdf+"type"); + if(type) { + type = Scholar.RDF.getResourceURI(type[0]); + } + + if(type == n.bib+"Collection") { + // for collections, process recursively + collection.children.push(processCollection(child)); + } else { + // all other items are added by ID + collection.children.push({id:Scholar.RDF.getResourceURI(child), type:"item"}); + } + } + + return collection; +} + +// gets the node with a given type from an array +function getNodeByType(nodes, type) { + if(!nodes) { + return false; + } + + for each(node in nodes) { + var nodeType = Scholar.RDF.getTargets(node, rdf+"type"); + if(nodeType) { + nodeType = Scholar.RDF.getResourceURI(nodeType[0]); + if(nodeType == type) { // we have a node of the correct type + return node; + } + } + } + return false; +} + function doImport() { + rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; + n = { bib:"http://purl.org/net/biblio#", dc:"http://purl.org/dc/elements/1.1/", @@ -3230,37 +3325,104 @@ function doImport() { vcard:"http://nwalsh.com/rdf/vCard" }; + callNumberTypes = [ + n.dcterms+"LCC", n.dcterms+"DDC", n.dcterms+"UDC" + ]; + var nodes = Scholar.RDF.getAllResources(); if(!nodes) { return false; } - for(var i in nodes) { - var node = nodes[i]; - - if(Scholar.RDF.getArcsIn(node)) { - // root nodes only, please - continue; - } + // keep track of collections while we''re looping through + var collections = new Array(); + for each(var node in nodes) { var newItem = new Scholar.Item(); + newItem.itemID = Scholar.RDF.getResourceURI(node); + var container = undefined; + + // type + var type = Scholar.RDF.getTargets(node, rdf+"type"); + // also deal with type detection based on parts, so we can differentiate + // magazine and journal articles, and find container elements + var isPartOf = getFirstResults(node, [n.dcterms+"isPartOf"]); + + if(type) { + type = Scholar.RDF.getResourceURI(type[0]); + + if(type == n.bib+"Book") { + newItem.itemType = "book"; + } else if(type == n.bib+"BookSection") { + newItem.itemType = "bookSection"; + container = getNodeByType(isPartOf, n.bib+"Book"); + } else if(type == n.bib+"Article") { // choose between journal, + // newspaper, and magazine + // articles + if(container = getNodeByType(isPartOf, n.bib+"Journal")) { + newItem.itemType = "journalArticle"; + } else if(container = getNodeByType(isPartOf, n.bib+"Periodical")) { + newItem.itemType = "magazineArticle"; + } else if(container = getNodeByType(isPartOf, n.bib+"Newspaper")) { + newItem.itemType = "newspaperArticle"; + } + } else if(type == n.bib+"Thesis") { + newItem.itemType = "thesis"; + } else if(type == n.bib+"Letter") { + newItem.itemType = "letter"; + } else if(type == n.bib+"Manuscript") { + newItem.itemType = "manuscript"; + } else if(type == n.bib+"Interview") { + newItem.itemType = "interview"; + } else if(type == n.bib+"MotionPicture") { + newItem.itemType = "film"; + } else if(type == n.bib+"Illustration") { + newItem.itemType = "illustration"; + } else if(type == n.bib+"Document") { + newItem.itemType = "website"; + } else if(type == n.bib+"Memo") { + // check to see if this note is independent + var arcs = Scholar.RDF.getArcsIn(node); + Scholar.Utilities.debugPrint("working on a note"); + Scholar.Utilities.debugPrint(arcs); + var skip = false; + for each(var arc in arcs) { + arc = Scholar.RDF.getResourceURI(arc); + if(arc != n.dc+"relation" && arc != n.dcterms+"hasPart") { + // related to another item by some arc besides see also + skip = true; + } + } + if(skip) { + continue; + } + + newItem.itemType = "note"; + } else if(type == n.bib+"Collection") { + // skip collections until all the items are done + collections.push(node); + continue; + } else { // default to book + newItem.itemType = "book"; + } + } // title newItem.title = getFirstResults(node, [n.dc+"title"], true); - if(!newItem.title) { // require the title + if(newItem.itemType != "note" && !newItem.title) { // require the title + // (if not a note) continue; } - // creators - var creators = getFirstResults(node, [n.dc+"creator"]); - Scholar.Utilities.debugPrint(creators); - if(creators) { - for(var i in creators) { - if(typeof(creators[i]) != "object") { - newItem.creators.push(Scholar.Utilities.cleanAuthor(creators[i], "author", true)); - } - } - } + // regular author-type creators + var creators = getFirstResults(node, [n.bib+"authors", n.dc+"creator"]); + handleCreators(newItem, creators, "author"); + // editors + var creators = getFirstResults(node, [n.bib+"editors"]); + handleCreators(newItem, creators, "editor"); + // contributors + var creators = getFirstResults(node, [n.bib+"contributors"]); + handleCreators(newItem, creators, "contributor"); // source newItem.source = getFirstResults(node, [n.dc+"source"], true); @@ -3268,10 +3430,54 @@ function doImport() { // rights newItem.rights = getFirstResults(node, [n.dc+"rights"], true); + // section + var section = getNodeByType(isPartOf, n.bib+"Part"); + if(section) { + newItem.section = getFirstResults(section, [n.dc+"title"], true); + } + + // publication + if(container) { + newItem.publication = getFirstResults(container, [n.dc+"title"], true); + } + + // series + var series = getNodeByType(isPartOf, n.bib+"Series"); + if(series) { + newItem.series = getFirstResults(container, [n.dc+"title"], true); + } + + // volume + newItem.volume = getFirstResults((container ? container : node), [n.prism+"volume"], true); + + // number + newItem.number = getFirstResults((container ? container : node), [n.prism+"number"], true); + + // edition + newItem.edition = getFirstResults(node, [n.prism+"edition"], true); + // publisher - newItem.publisher = getFirstResults(node, [n.dc+"publisher"], true); + var publisher = getFirstResults(node, [n.dc+"publisher"]); + if(publisher) { + if(typeof(publisher[0]) == "string") { + newItem.publisher = publisher[0]; + } else { + var type = Scholar.RDF.getTargets(publisher[0], rdf+"type"); + if(type) { + type = Scholar.RDF.getResourceURI(type[0]); + if(type == n.foaf+"Organization") { // handle foaf organizational publishers + newItem.publisher = getFirstResults(publisher[0], [n.foaf+"name"], true); + var place = getFirstResults(publisher[0], [n.vcard+"adr"]); + if(place) { + newItem.place = getFirstResults(place[0], [n.vcard+"locality"]); + } + } + } + } + } + // (this will get ignored except for films, where we encode distributor as publisher) - newItem.distributor = getFirstResults(node, [n.dc+"publisher"], true); + newItem.distributor = newItem.publisher; // date newItem.date = getFirstResults(node, [n.dc+"date"], true); @@ -3281,6 +3487,18 @@ function doImport() { // identifier var identifiers = getFirstResults(node, [n.dc+"identifier"]); + if(container) { + var containerIdentifiers = getFirstResults(container, [n.dc+"identifier"]); + // concatenate sets of identifiers + if(containerIdentifiers) { + if(identifiers) { + identifiers = identifiers.concat(containerIdentifiers); + } else { + identifiers = containerIdentifiers; + } + } + } + if(identifiers) { for(var i in identifiers) { var firstFour = identifiers[i].substr(0, 4).toUpperCase(); @@ -3289,15 +3507,85 @@ function doImport() { newItem.ISBN = identifiers[i].substr(5).toUpperCase(); } else if(firstFour == "ISSN") { newItem.ISSN = identifiers[i].substr(5).toUpperCase(); + } else if(!newItem.accessionNumber) { + newItem.accessionNumber = identifiers[i]; } } } - // identifier - newItem.coverage = getFirstResults(node, [n.dc+"coverage"]); + // coverage + newItem.archiveLocation = getFirstResults(node, [n.dc+"coverage"], true); + + // medium + newItem.medium = getFirstResults(node, [n.dc+"medium"], true); + + // see also + var relations; + if(relations = getFirstResults(node, [n.dc+"relation"])) { + for each(var relation in relations) { + newItem.seeAlso.push(Scholar.RDF.getResourceURI(relation)); + } + } + + /** NOTES **/ + + var referencedBy = Scholar.RDF.getTargets(node, n.dcterms+"isReferencedBy"); + for each(var referentNode in referencedBy) { + var type = Scholar.RDF.getTargets(referentNode, rdf+"type"); + if(type && Scholar.RDF.getResourceURI(type[0]) == n.bib+"Memo") { + // if this is a memo + var note = new Array(); + note.note = getFirstResults(referentNode, [rdf+"value", n.dc+"description"], true); + if(note.note != undefined) { + // handle see also + var relations; + if(relations = getFirstResults(referentNode, [n.dc+"relation"])) { + note.seeAlso = new Array(); + for each(var relation in relations) { + note.seeAlso.push(Scholar.RDF.getResourceURI(relation)); + } + } + + // add note + newItem.notes.push(note); + } + } + } + + if(newItem.itemType == "note") { + // add note for standalone + newItem.note = getFirstResults(node, [rdf+"value", n.dc+"description"], true); + } + + /** TAGS **/ + + var subjects = getFirstResults(node, [n.dc+"subject"]); + for each(var subject in subjects) { + if(typeof(subject) == "string") { // a regular tag + newItem.tags.push(subject); + } else { // a call number + var type = Scholar.RDF.getTargets(subject, rdf+"type"); + if(type) { + type = Scholar.RDF.getResourceURI(type[0]); + if(Scholar.Utilities.inArray(type, callNumberTypes)) { + newItem.callNumber = getFirstResults(subject, [rdf+"value"], true); + } + } + } + } newItem.complete(); } + + /* COLLECTIONS */ + + for each(collection in collections) { + if(!Scholar.RDF.getArcsIn(collection)) { + var newCollection = new Scholar.Collection(); + processCollection(collection, newCollection); + newCollection.complete(); + } + } }'); REPLACE INTO "translators" VALUES ('32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7', '2006-06-30 15:36:00', 3, 'RIS', 'Simon Kornblith', 'ris',