commit 9144b567726b04f1a39cb3693db106cc2963a716
parent 1ce4de835b6ae57d594a51734662b394e868cea5
Author: Simon Kornblith <simon@simonster.com>
Date: Sat, 5 Aug 2006 20:58:45 +0000
addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author
import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.
Diffstat:
4 files changed, 576 insertions(+), 139 deletions(-)
diff --git a/chrome/chromeFiles/content/scholar/fileInterface.js b/chrome/chromeFiles/content/scholar/fileInterface.js
@@ -1,5 +1,5 @@
Scholar_File_Interface = new function() {
- var _unresponsiveScriptPreference;
+ var _unresponsiveScriptPreference, _importCollection;
this.exportFile = exportFile;
this.importFile = importFile;
@@ -80,11 +80,17 @@ Scholar_File_Interface = new function() {
// get translators again, bc now we can check against the file
translators = translation.getTranslators();
if(translators.length) {
+ // create a new collection to take in imported items
+ var date = new Date();
+ _importCollection = Scholar.Collections.add("Imported "+date.toLocaleString());
+
+ // import items
translation.setTranslator(translators[0]);
- // show progress indicator
translation.setHandler("itemDone", _importItemDone);
+ translation.setHandler("collectionDone", _importCollectionDone);
translation.setHandler("done", _importDone);
_disableUnresponsive();
+ // show progress indicator
Scholar_File_Interface.Progress.show(
Scholar.getString("fileInterface.itemsImported"),
function() {
@@ -100,7 +106,16 @@ Scholar_File_Interface = new function() {
*/
function _importItemDone(obj, item) {
//Scholar_File_Interface.Progress.increment();
- item.save();
+ _importCollection.addItem(item.getID());
+ }
+
+ /*
+ * Saves collections after they've been imported. Input item is of the type
+ * outputted by Scholar.Collection.toArray(); only receives top-level
+ * collections
+ */
+ function _importCollectionDone(obj, collection) {
+ collection.changeParent(_importCollection.getID());
}
/*
diff --git a/chrome/chromeFiles/content/scholar/ingester/browser.js b/chrome/chromeFiles/content/scholar/ingester/browser.js
@@ -288,8 +288,11 @@ Scholar_Ingester_Interface._itemDone = function(obj, item, collection) {
var title = item.getField("title");
var icon = "chrome://scholar/skin/treeitem-"+Scholar.ItemTypes.getName(item.getField("itemTypeID"))+".png"
Scholar_Ingester_Interface.Progress.addLines([title], [icon]);
- var item = item.save();
- collection.addItem(item);
+
+ // add item to collection, if one was specified
+ if(collection) {
+ collection.addItem(item);
+ }
}
/*
diff --git a/chrome/chromeFiles/content/scholar/xpcom/translate.js b/chrome/chromeFiles/content/scholar/xpcom/translate.js
@@ -48,6 +48,7 @@
* immediately when script has finished executing
* _sandbox - sandbox in which translators will be executed
* _streams - streams that need to be closed when execution is complete
+ * _IDMap - a map from IDs as specified in Scholar.Item() to IDs of actual items
*
* WEB-ONLY PRIVATE PROPERTIES:
*
@@ -144,10 +145,17 @@ Scholar.Translate.prototype.setTranslator = function(translator) {
* returns: N/A
*
* itemDone
- * valid: web
+ * valid: import, web
* called: when an item has been processed; may be called asynchronously
* passed: an item object (see Scholar.Item)
* returns: N/A
+ *
+ * collectionDone
+ * valid: import, web
+ * called: when a collection has been processed, after all items have been
+ * added; may be called asynchronously
+ * passed: a collection object (see Scholar.Collection)
+ * returns: N/A
*
* done
* valid: all
@@ -245,6 +253,7 @@ Scholar.Translate.prototype._loadTranslator = function() {
* does the actual translation
*/
Scholar.Translate.prototype.translate = function() {
+ this._IDMap = new Array();
if(!this.location) {
throw("cannot translate: no location specified");
@@ -301,8 +310,13 @@ Scholar.Translate.prototype._generateSandbox = function() {
if(this.type == "web" || this.type == "import") {
// add routines to add new items
this._sandbox.Scholar.Item = Scholar.Translate.ScholarItem;
- // attach the function to be run when an item is
+ // attach the function to be run when an item is done
this._sandbox.Scholar.Item.prototype.complete = function() {me._itemDone(this)};
+
+ // add routines to add new collections
+ this._sandbox.Scholar.Collection = Scholar.Translate.ScholarCollection;
+ // attach the function to be run when a collection is done
+ this._sandbox.Scholar.Collection.prototype.complete = function() {me._collectionDone(this)};
} else if(this.type == "export") {
// add routines to retrieve items and collections
this._sandbox.Scholar.nextItem = function() { return me._exportGetItem() };
@@ -532,69 +546,126 @@ Scholar.Translate.prototype._closeStreams = function() {
* executed when an item is done and ready to be loaded into the database
*/
Scholar.Translate.prototype._itemDone = function(item) {
+ Scholar.debug(item);
+
// Get typeID, defaulting to "website"
var type = (item.itemType ? item.itemType : "website");
- // makes looping through easier
- delete item.itemType, item.complete;
- item.itemType = item.complete = undefined;
-
- var typeID = Scholar.ItemTypes.getID(type);
- var newItem = Scholar.Items.getNewItemByType(typeID);
-
- if(item.date && !item.year) {
- // date can serve as a year
- var dateID = Scholar.ItemFields.getID("date");
- var yearID = Scholar.ItemFields.getID("year");
- if(!Scholar.ItemFields.isValidForType(dateID, typeID) && Scholar.ItemFields.isValidForType(yearID, typeID)) {
- // year is valid but date is not
- var yearRe = /[0-9]{4}/;
- var m = yearRe.exec(item.date);
- if(m) {
- item.year = m[0]
- item.date = undefined;
+ Scholar.debug("type is "+type);
+ if(type == "note") { // handle notes differently
+ Scholar.debug("handling a note");
+ var myID = Scholar.Notes.add(item.note);
+ // re-retrieve the item
+ var newItem = Scholar.Items.get(myID);
+ } else {
+ // create new item
+ var typeID = Scholar.ItemTypes.getID(type);
+ var newItem = Scholar.Items.getNewItemByType(typeID);
+
+ // makes looping through easier
+ item.itemType = item.complete = undefined;
+
+ if(item.date && !item.year) {
+ // date can serve as a year
+ var dateID = Scholar.ItemFields.getID("date");
+ var yearID = Scholar.ItemFields.getID("year");
+ if(!Scholar.ItemFields.isValidForType(dateID, typeID) && Scholar.ItemFields.isValidForType(yearID, typeID)) {
+ // year is valid but date is not
+ var yearRe = /[0-9]{4}/;
+ var m = yearRe.exec(item.date);
+ if(m) {
+ item.year = m[0]
+ item.date = undefined;
+ }
+ }
+ } else if(!item.date && item.year) {
+ // the converse is also true
+ var dateID = Scholar.ItemFields.getID("date");
+ var yearID = Scholar.ItemFields.getID("year");
+ if(Scholar.ItemFields.isValidForType(dateID, typeID) && !Scholar.ItemFields.isValidForType(yearID, typeID)) {
+ // date is valid but year is not
+ item.date = item.year;
+ item.year = undefined;
}
}
- } else if(!item.date && item.year) {
- // the converse is also true
- var dateID = Scholar.ItemFields.getID("date");
- var yearID = Scholar.ItemFields.getID("year");
- if(Scholar.ItemFields.isValidForType(dateID, typeID) && !Scholar.ItemFields.isValidForType(yearID, typeID)) {
- // date is valid but year is not
- item.date = item.year;
- item.year = undefined;
- }
- }
-
- Scholar.debug(item);
-
- var fieldID, field;
- for(var i in item) {
- // loop through item fields
- data = item[i];
- if(data) { // if field has content
- if(i == "creators") { // creators are a special case
- for(j in data) {
- newItem.setCreator(j, data[j].firstName, data[j].lastName, 1);
- }
- } else if(i == "title") { // skip checks for title
- newItem.setField(i, data);
- } else if(i == "tags") { // add tags
- for(j in data) {
- newItem.addTag(data[j]);
- }
- } else if(fieldID = Scholar.ItemFields.getID(i)) {
- // if field is in db
- if(Scholar.ItemFields.isValidForType(fieldID, typeID)) {
- // if field is valid for this type
- // add field
+ var fieldID, field;
+ for(var i in item) {
+ // loop through item fields
+ data = item[i];
+
+ if(data) { // if field has content
+ if(i == "creators") { // creators are a special case
+ for(var j in data) {
+ var creatorType = 1;
+ // try to assign correct creator type
+ if(data[j].creatorType) {
+ try {
+ var creatorType = Scholar.CreatorTypes.getID(data[j].creatorType);
+ } catch(e) {
+ Scholar.debug("invalid creator type "+data[j].creatorType+" for creator index "+j);
+ }
+ }
+
+ newItem.setCreator(j, data[j].firstName, data[j].lastName, creatorType);
+ }
+ } else if(i == "title") { // skip checks for title
newItem.setField(i, data);
+ } else if(i == "tags") { // add tags
+ for(var j in data) {
+ newItem.addTag(data[j]);
+ }
+ } else if(i == "seeAlso") {
+ newItem.translateSeeAlso = data;
+ } else if(i != "note" && i != "notes" && i != "itemID" && (fieldID = Scholar.ItemFields.getID(i))) {
+ // if field is in db
+ if(Scholar.ItemFields.isValidForType(fieldID, typeID)) {
+ // if field is valid for this type
+ // add field
+ newItem.setField(i, data);
+ } else {
+ Scholar.debug("discarded field "+i+" for item: field not valid for type "+type);
+ }
} else {
- Scholar.debug("discarded field "+i+" for item: field not valid for type "+type);
+ Scholar.debug("discarded field "+i+" for item: field does not exist");
+ }
+ }
+ }
+
+ // save item
+ var myID = newItem.save();
+ if(myID == true) {
+ myID = newItem.getID();
+ }
+
+ // handle notes
+ if(item.notes) {
+ for each(var note in item.notes) {
+ var noteID = Scholar.Notes.add(note.note, myID);
+
+ // handle see also
+ if(note.seeAlso) {
+ var myNote = Scholar.Items.get(noteID);
+
+ for each(var seeAlso in note.seeAlso) {
+ if(this._IDMap[seeAlso]) {
+ myNote.addSeeAlso(this._IDMap[seeAlso]);
+ }
+ }
}
- } else {
- Scholar.debug("discarded field "+i+" for item: field does not exist");
+ }
+ }
+ }
+
+ if(item.itemID) {
+ this._IDMap[item.itemID] = myID;
+ }
+
+ // handle see also
+ if(item.seeAlso) {
+ for each(var seeAlso in item.seeAlso) {
+ if(this._IDMap[seeAlso]) {
+ newItem.addSeeAlso(this._IDMap[seeAlso]);
}
}
}
@@ -605,6 +676,40 @@ Scholar.Translate.prototype._itemDone = function(item) {
}
/*
+ * executed when a collection is done and ready to be loaded into the database
+ */
+Scholar.Translate.prototype._collectionDone = function(collection) {
+ Scholar.debug(collection);
+ var newCollection = this._processCollection(collection, null);
+
+ this._runHandler("collectionDone", newCollection);
+}
+
+/*
+ * recursively processes collections
+ */
+Scholar.Translate.prototype._processCollection = function(collection, parentID) {
+ var newCollection = Scholar.Collections.add(collection.name, parentID);
+
+ for each(child in collection.children) {
+ if(child.type == "collection") {
+ // do recursive processing of collections
+ this._processCollection(child, newCollection.getID());
+ } else {
+ // add mapped items to collection
+ if(this._IDMap[child.id]) {
+ Scholar.debug("adding "+this._IDMap[child.id]);
+ newCollection.addItem(this._IDMap[child.id]);
+ } else {
+ Scholar.debug("could not map "+child.id+" to an imported item");
+ }
+ }
+ }
+
+ return newCollection;
+}
+
+/*
* calls a handler (see setHandler above)
*/
Scholar.Translate.prototype._runHandler = function(type, argument) {
@@ -791,7 +896,7 @@ Scholar.Translate.prototype._exportGetCollection = function() {
collection.type = "collection";
collection.children = returnItem.toArray();
- return returnItem;
+ return collection;
}
}
@@ -881,12 +986,8 @@ Scholar.Translate.prototype._initializeInternalIO = function() {
}
}
-/* Scholar.Translate.ScholarItem: a class for generating new item from
+/* Scholar.Translate.ScholarItem: a class for generating a new item from
* inside scraper code
- *
- * (this must be part of the prototype because it must be able to access
- * methods relating to a specific instance of Scholar.Translate yet be called
- * as a class)
*/
Scholar.Translate.ScholarItem = function(itemType) {
@@ -898,12 +999,20 @@ Scholar.Translate.ScholarItem = function(itemType) {
this.notes = new Array();
// generate tags array
this.tags = new Array();
+ // generate see also array
+ this.seeAlso = new Array();
}
+/* Scholar.Translate.Collection: a class for generating a new top-level
+ * collection from inside scraper code
+ */
+
+Scholar.Translate.ScholarCollection = function() {}
+
/* Scholar.Translate.RDF: a class for handling RDF IO
*
* If an import/export translator specifies dataMode RDF, this is the interface,
- * accessible from model.x
+ * accessible from model.
*
* In order to simplify things, all classes take in their resource/container
* as either the Mozilla native type or a string, but all
@@ -951,8 +1060,12 @@ Scholar.Translate.RDF.prototype._deEnumerate = function(enumerator) {
// get a resource as an nsIRDFResource, instead of a string
Scholar.Translate.RDF.prototype._getResource = function(about) {
- if(!(about instanceof Components.interfaces.nsIRDFResource)) {
- about = this._RDFService.GetResource(about);
+ try {
+ if(!(about instanceof Components.interfaces.nsIRDFResource)) {
+ about = this._RDFService.GetResource(about);
+ }
+ } catch(e) {
+ throw("invalid RDF resource: "+about);
}
return about;
}
@@ -996,15 +1109,20 @@ Scholar.Translate.RDF.prototype.newContainer = function(type, about) {
}
// adds a new container element (index optional)
-Scholar.Translate.RDF.prototype.addContainerElement = function(about, element, index) {
+Scholar.Translate.RDF.prototype.addContainerElement = function(about, element, literal, index) {
if(!(about instanceof Components.interfaces.nsIRDFContainer)) {
about = this._getResource(about);
var container = Components.classes["@mozilla.org/rdf/container;1"].
createInstance(Components.interfaces.nsIRDFContainer);
container.Init(this._dataSource, about);
+ about = container;
}
if(!(element instanceof Components.interfaces.nsIRDFResource)) {
- element = this._RDFService.GetResource(element);
+ if(literal) {
+ element = this._RDFService.GetLiteral(element);
+ } else {
+ element = this._RDFService.GetResource(element);
+ }
}
if(index) {
@@ -1014,6 +1132,19 @@ Scholar.Translate.RDF.prototype.addContainerElement = function(about, element, i
}
}
+// gets container elements as an array
+Scholar.Translate.RDF.prototype.getContainerElements = function(about) {
+ if(!(about instanceof Components.interfaces.nsIRDFContainer)) {
+ about = this._getResource(about);
+ var container = Components.classes["@mozilla.org/rdf/container;1"].
+ createInstance(Components.interfaces.nsIRDFContainer);
+ container.Init(this._dataSource, about);
+ about = container;
+ }
+
+ return this._deEnumerate(about.GetElements());
+}
+
// sets a namespace
Scholar.Translate.RDF.prototype.addNamespace = function(prefix, uri) {
if(this._serializer) { // silently fail, in case the reason the scraper
diff --git a/scrapers.sql b/scrapers.sql
@@ -2781,33 +2781,17 @@ Scholar.addOption("exportFileData", true);',
function generateCollection(collection) {
var collectionResource = "#collection:"+collection.id;
Scholar.RDF.addStatement(collectionResource, rdf+"type", n.bib+"Collection", false);
+ Scholar.RDF.addStatement(collectionResource, n.dc+"title", collection.name, true);
- for(var i in collection.children) {
- var child = collection.children[i];
-
+ for each(var child in collection.children) {
// add child list items
if(child.type == "collection") {
- Scholar.RDF.addStatement(collectionResource, n.dc+"hasPart", "#collection:"+child.id, false);
+ Scholar.RDF.addStatement(collectionResource, n.dcterms+"hasPart", "#collection:"+child.id, false);
// do recursive processing of collections
generateCollection(child);
} else {
- Scholar.RDF.addStatement(collectionResource, n.dc+"hasPart", itemResources[child.id], false);
- }
- }
-}
-
-function getContainerIfExists() {
- if(container) {
- if(containerElement) {
- return containerElement;
- } else {
- containerElement = Scholar.RDF.newResource();
- // attach container to section (if exists) or resource
- Scholar.RDF.addStatement((section ? section : resource), n.dcterms+"isPartOf", containerElement, false);
- return containerElement;
+ Scholar.RDF.addStatement(collectionResource, n.dcterms+"hasPart", itemResources[child.id], false);
}
- } else {
- return resource;
}
}
@@ -2831,14 +2815,22 @@ function doExport() {
// leave as global
itemResources = new Array();
+ // keep track of resources already assigned (in case two book items have the
+ // same ISBN, or something like that)
+ var usedResources = new Array();
+
+ var items = new Array();
+
// first, map each ID to a resource
- for(var i in items) {
- item = items[i];
+ while(item = Scholar.nextItem()) {
+ items.push(item);
- if(item.ISBN) {
+ if(item.ISBN && !usedResources["urn:isbn:"+item.ISBN]) {
itemResources[item.itemID] = "urn:isbn:"+item.ISBN;
- } else if(item.url) {
+ usedResources[itemResources[item.itemID]] = true;
+ } else if(item.url && !usedResources[item.url]) {
itemResources[item.itemID] = item.url;
+ usedResources[itemResources[item.itemID]] = true;
} else {
// just specify a node ID
itemResources[item.itemID] = "#item:"+item.itemID;
@@ -2849,10 +2841,8 @@ function doExport() {
}
}
- var item;
- while(item = Scholar.nextItem()) {
+ for each(item in items) {
// these items are global
- item = items[i];
resource = itemResources[item.itemID];
container = null;
@@ -2930,7 +2920,7 @@ function doExport() {
// attach container to resource
Scholar.RDF.addStatement(resource, n.bib+cTag, creatorResource, false);
}
- Scholar.RDF.addContainerElement(creatorContainers[cTag], creator, true);
+ Scholar.RDF.addContainerElement(creatorContainers[cTag], creator, false);
}
/** FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE **/
@@ -2962,16 +2952,34 @@ function doExport() {
// add relationship to resource
Scholar.RDF.addStatement(resource, n.dc+"isPartOf", section, false);
}
- // use ISSN to set up container element
- if(item.ISSN) {
- containerElement = "urn:issn:"+item.ISSN; // leave as global
+
+ // generate container
+ if(container) {
+ if(item.ISSN && !Scholar.RDF.getArcsIn("urn:issn:"+item.ISSN)) {
+ // use ISSN as container URI if no other item is
+ containerElement = "urn:issn:"+item.ISSN
+ } else {
+ containerElement = Scholar.RDF.newResource();
+ }
// attach container to section (if exists) or resource
Scholar.RDF.addStatement((section ? section : resource), n.dcterms+"isPartOf", containerElement, false);
+ // add container type
+ Scholar.RDF.addStatement(containerElement, rdf+"type", n.bib+container, false);
+ }
+
+ // ISSN
+ if(item.ISSN) {
+ Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "ISSN "+item.ISSN, true);
+ }
+
+ // ISBN
+ if(item.ISBN) {
+ Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "ISBN "+item.ISBN, true);
}
// publication gets linked to container via isPartOf
if(item.publication) {
- Scholar.RDF.addStatement(getContainerIfExists(), n.dc+"title", item.publication, true);
+ Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"title", item.publication, true);
}
// series also linked in
@@ -2982,16 +2990,16 @@ function doExport() {
// set series title
Scholar.RDF.addStatement(series, n.dc+"title", item.series, true);
// add relationship to resource
- Scholar.RDF.addStatement(getContainerIfExists(), n.dcterms+"isPartOf", series, false);
+ Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dcterms+"isPartOf", series, false);
}
// volume
if(item.volume) {
- Scholar.RDF.addStatement(getContainerIfExists(), n.prism+"volume", item.volume, true);
+ Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.prism+"volume", item.volume, true);
}
// number
if(item.number) {
- Scholar.RDF.addStatement(getContainerIfExists(), n.prism+"number", item.number, true);
+ Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.prism+"number", item.number, true);
}
// edition
if(item.edition) {
@@ -3069,18 +3077,17 @@ function doExport() {
// add note tag
Scholar.RDF.addStatement(noteResource, rdf+"type", n.bib+"Memo", false);
- // add note description (sorry, couldn''t find a better way of
- // representing this data in an existing ontology)
- Scholar.RDF.addStatement(noteResource, n.dc+"description", item.notes[j].note, true);
+ // add note value
+ Scholar.RDF.addStatement(noteResource, rdf+"value", item.notes[j].note, true);
// add relationship between resource and note
Scholar.RDF.addStatement(resource, n.dcterms+"isReferencedBy", noteResource, false);
// Add see also info to RDF
- generateSeeAlso(item.notes[j].itemID, item.notes[j].seeAlso);
+ generateSeeAlso(resource, item.notes[j].seeAlso);
}
if(item.note) {
- Scholar.RDF.addStatement(resource, n.dc+"description", item.note, true);
+ Scholar.RDF.addStatement(resource, rdf+"value", item.note, true);
}
/** TAGS **/
@@ -3090,9 +3097,7 @@ function doExport() {
}
// Add see also info to RDF
- generateSeeAlso(item.itemID, item.seeAlso);
-
- // ELEMENTS AMBIGUOUSLY ENCODED: callNumber, acccessionType
+ generateSeeAlso(resource, item.seeAlso);
}
/** RDF COLLECTION STRUCTURE **/
@@ -3204,14 +3209,18 @@ REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006
REPLACE INTO "translators" VALUES ('5e3ad958-ac79-463d-812b-a86a9235c28f', '2006-07-15 17:09:00', 1, 'RDF', 'Simon Kornblith', 'rdf',
'Scholar.configure("dataMode", "rdf");',
-'function getFirstResults(node, properties, onlyOneString) {
+'// gets the first result set for a property that can be encoded in multiple
+// ontologies
+function getFirstResults(node, properties, onlyOneString) {
for(var i=0; i<properties.length; i++) {
var result = Scholar.RDF.getTargets(node, properties[i]);
if(result) {
if(onlyOneString) {
// onlyOneString means we won''t return nsIRDFResources, only
// actual literals
- return result[0];
+ if(typeof(result[0]) != "object") {
+ return result[0];
+ }
} else {
return result;
}
@@ -3220,7 +3229,93 @@ REPLACE INTO "translators" VALUES ('5e3ad958-ac79-463d-812b-a86a9235c28f', '2006
return; // return undefined on failure
}
+// adds creators to an item given a list of creator nodes
+function handleCreators(newItem, creators, creatorType) {
+ if(!creators) {
+ return;
+ }
+
+ if(typeof(creators[0]) != "string") { // see if creators are in a container
+ try {
+ var creators = Scholar.RDF.getContainerElements(creators[0]);
+ } catch(e) {}
+ }
+
+ if(typeof(creators[0]) == "string") { // support creators encoded as strings
+ for(var i in creators) {
+ if(typeof(creators[i]) != "object") {
+ newItem.creators.push(Scholar.Utilities.cleanAuthor(creators[i], creatorType, true));
+ }
+ }
+ } else { // also support foaf
+ for(var i in creators) {
+ var type = Scholar.RDF.getTargets(creators[i], rdf+"type");
+ if(type) {
+ type = Scholar.RDF.getResourceURI(type[0]);
+ if(type == n.foaf+"Person") { // author is FOAF type person
+ var creator = new Array();
+ creator.lastName = getFirstResults(creators[i],
+ [n.foaf+"surname", n.foaf+"family_name"], true);
+ creator.firstName = getFirstResults(creators[i],
+ [n.foaf+"givenname", n.foaf+"firstName"], true);
+ creator.creatorType = creatorType;
+ newItem.creators.push(creator);
+ }
+ }
+ }
+ }
+}
+
+// processes collections recursively
+function processCollection(node, collection) {
+ if(!collection) {
+ collection = new Array();
+ }
+ collection.type = "collection";
+ collection.name = getFirstResults(node, [n.dc+"title"], true);
+ collection.children = new Array();
+
+ // check for children
+ var children = getFirstResults(node, [n.dcterms+"hasPart"]);
+ for each(var child in children) {
+ var type = Scholar.RDF.getTargets(child, rdf+"type");
+ if(type) {
+ type = Scholar.RDF.getResourceURI(type[0]);
+ }
+
+ if(type == n.bib+"Collection") {
+ // for collections, process recursively
+ collection.children.push(processCollection(child));
+ } else {
+ // all other items are added by ID
+ collection.children.push({id:Scholar.RDF.getResourceURI(child), type:"item"});
+ }
+ }
+
+ return collection;
+}
+
+// gets the node with a given type from an array
+function getNodeByType(nodes, type) {
+ if(!nodes) {
+ return false;
+ }
+
+ for each(node in nodes) {
+ var nodeType = Scholar.RDF.getTargets(node, rdf+"type");
+ if(nodeType) {
+ nodeType = Scholar.RDF.getResourceURI(nodeType[0]);
+ if(nodeType == type) { // we have a node of the correct type
+ return node;
+ }
+ }
+ }
+ return false;
+}
+
function doImport() {
+ rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
+
n = {
bib:"http://purl.org/net/biblio#",
dc:"http://purl.org/dc/elements/1.1/",
@@ -3230,37 +3325,104 @@ function doImport() {
vcard:"http://nwalsh.com/rdf/vCard"
};
+ callNumberTypes = [
+ n.dcterms+"LCC", n.dcterms+"DDC", n.dcterms+"UDC"
+ ];
+
var nodes = Scholar.RDF.getAllResources();
if(!nodes) {
return false;
}
- for(var i in nodes) {
- var node = nodes[i];
-
- if(Scholar.RDF.getArcsIn(node)) {
- // root nodes only, please
- continue;
- }
+ // keep track of collections while we''re looping through
+ var collections = new Array();
+ for each(var node in nodes) {
var newItem = new Scholar.Item();
+ newItem.itemID = Scholar.RDF.getResourceURI(node);
+ var container = undefined;
+
+ // type
+ var type = Scholar.RDF.getTargets(node, rdf+"type");
+ // also deal with type detection based on parts, so we can differentiate
+ // magazine and journal articles, and find container elements
+ var isPartOf = getFirstResults(node, [n.dcterms+"isPartOf"]);
+
+ if(type) {
+ type = Scholar.RDF.getResourceURI(type[0]);
+
+ if(type == n.bib+"Book") {
+ newItem.itemType = "book";
+ } else if(type == n.bib+"BookSection") {
+ newItem.itemType = "bookSection";
+ container = getNodeByType(isPartOf, n.bib+"Book");
+ } else if(type == n.bib+"Article") { // choose between journal,
+ // newspaper, and magazine
+ // articles
+ if(container = getNodeByType(isPartOf, n.bib+"Journal")) {
+ newItem.itemType = "journalArticle";
+ } else if(container = getNodeByType(isPartOf, n.bib+"Periodical")) {
+ newItem.itemType = "magazineArticle";
+ } else if(container = getNodeByType(isPartOf, n.bib+"Newspaper")) {
+ newItem.itemType = "newspaperArticle";
+ }
+ } else if(type == n.bib+"Thesis") {
+ newItem.itemType = "thesis";
+ } else if(type == n.bib+"Letter") {
+ newItem.itemType = "letter";
+ } else if(type == n.bib+"Manuscript") {
+ newItem.itemType = "manuscript";
+ } else if(type == n.bib+"Interview") {
+ newItem.itemType = "interview";
+ } else if(type == n.bib+"MotionPicture") {
+ newItem.itemType = "film";
+ } else if(type == n.bib+"Illustration") {
+ newItem.itemType = "illustration";
+ } else if(type == n.bib+"Document") {
+ newItem.itemType = "website";
+ } else if(type == n.bib+"Memo") {
+ // check to see if this note is independent
+ var arcs = Scholar.RDF.getArcsIn(node);
+ Scholar.Utilities.debugPrint("working on a note");
+ Scholar.Utilities.debugPrint(arcs);
+ var skip = false;
+ for each(var arc in arcs) {
+ arc = Scholar.RDF.getResourceURI(arc);
+ if(arc != n.dc+"relation" && arc != n.dcterms+"hasPart") {
+ // related to another item by some arc besides see also
+ skip = true;
+ }
+ }
+ if(skip) {
+ continue;
+ }
+
+ newItem.itemType = "note";
+ } else if(type == n.bib+"Collection") {
+ // skip collections until all the items are done
+ collections.push(node);
+ continue;
+ } else { // default to book
+ newItem.itemType = "book";
+ }
+ }
// title
newItem.title = getFirstResults(node, [n.dc+"title"], true);
- if(!newItem.title) { // require the title
+ if(newItem.itemType != "note" && !newItem.title) { // require the title
+ // (if not a note)
continue;
}
- // creators
- var creators = getFirstResults(node, [n.dc+"creator"]);
- Scholar.Utilities.debugPrint(creators);
- if(creators) {
- for(var i in creators) {
- if(typeof(creators[i]) != "object") {
- newItem.creators.push(Scholar.Utilities.cleanAuthor(creators[i], "author", true));
- }
- }
- }
+ // regular author-type creators
+ var creators = getFirstResults(node, [n.bib+"authors", n.dc+"creator"]);
+ handleCreators(newItem, creators, "author");
+ // editors
+ var creators = getFirstResults(node, [n.bib+"editors"]);
+ handleCreators(newItem, creators, "editor");
+ // contributors
+ var creators = getFirstResults(node, [n.bib+"contributors"]);
+ handleCreators(newItem, creators, "contributor");
// source
newItem.source = getFirstResults(node, [n.dc+"source"], true);
@@ -3268,10 +3430,54 @@ function doImport() {
// rights
newItem.rights = getFirstResults(node, [n.dc+"rights"], true);
+ // section
+ var section = getNodeByType(isPartOf, n.bib+"Part");
+ if(section) {
+ newItem.section = getFirstResults(section, [n.dc+"title"], true);
+ }
+
+ // publication
+ if(container) {
+ newItem.publication = getFirstResults(container, [n.dc+"title"], true);
+ }
+
+ // series
+ var series = getNodeByType(isPartOf, n.bib+"Series");
+ if(series) {
+ newItem.series = getFirstResults(container, [n.dc+"title"], true);
+ }
+
+ // volume
+ newItem.volume = getFirstResults((container ? container : node), [n.prism+"volume"], true);
+
+ // number
+ newItem.number = getFirstResults((container ? container : node), [n.prism+"number"], true);
+
+ // edition
+ newItem.edition = getFirstResults(node, [n.prism+"edition"], true);
+
// publisher
- newItem.publisher = getFirstResults(node, [n.dc+"publisher"], true);
+ var publisher = getFirstResults(node, [n.dc+"publisher"]);
+ if(publisher) {
+ if(typeof(publisher[0]) == "string") {
+ newItem.publisher = publisher[0];
+ } else {
+ var type = Scholar.RDF.getTargets(publisher[0], rdf+"type");
+ if(type) {
+ type = Scholar.RDF.getResourceURI(type[0]);
+ if(type == n.foaf+"Organization") { // handle foaf organizational publishers
+ newItem.publisher = getFirstResults(publisher[0], [n.foaf+"name"], true);
+ var place = getFirstResults(publisher[0], [n.vcard+"adr"]);
+ if(place) {
+ newItem.place = getFirstResults(place[0], [n.vcard+"locality"]);
+ }
+ }
+ }
+ }
+ }
+
// (this will get ignored except for films, where we encode distributor as publisher)
- newItem.distributor = getFirstResults(node, [n.dc+"publisher"], true);
+ newItem.distributor = newItem.publisher;
// date
newItem.date = getFirstResults(node, [n.dc+"date"], true);
@@ -3281,6 +3487,18 @@ function doImport() {
// identifier
var identifiers = getFirstResults(node, [n.dc+"identifier"]);
+ if(container) {
+ var containerIdentifiers = getFirstResults(container, [n.dc+"identifier"]);
+ // concatenate sets of identifiers
+ if(containerIdentifiers) {
+ if(identifiers) {
+ identifiers = identifiers.concat(containerIdentifiers);
+ } else {
+ identifiers = containerIdentifiers;
+ }
+ }
+ }
+
if(identifiers) {
for(var i in identifiers) {
var firstFour = identifiers[i].substr(0, 4).toUpperCase();
@@ -3289,15 +3507,85 @@ function doImport() {
newItem.ISBN = identifiers[i].substr(5).toUpperCase();
} else if(firstFour == "ISSN") {
newItem.ISSN = identifiers[i].substr(5).toUpperCase();
+ } else if(!newItem.accessionNumber) {
+ newItem.accessionNumber = identifiers[i];
}
}
}
- // identifier
- newItem.coverage = getFirstResults(node, [n.dc+"coverage"]);
+ // coverage
+ newItem.archiveLocation = getFirstResults(node, [n.dc+"coverage"], true);
+
+ // medium
+ newItem.medium = getFirstResults(node, [n.dc+"medium"], true);
+
+ // see also
+ var relations;
+ if(relations = getFirstResults(node, [n.dc+"relation"])) {
+ for each(var relation in relations) {
+ newItem.seeAlso.push(Scholar.RDF.getResourceURI(relation));
+ }
+ }
+
+ /** NOTES **/
+
+ var referencedBy = Scholar.RDF.getTargets(node, n.dcterms+"isReferencedBy");
+ for each(var referentNode in referencedBy) {
+ var type = Scholar.RDF.getTargets(referentNode, rdf+"type");
+ if(type && Scholar.RDF.getResourceURI(type[0]) == n.bib+"Memo") {
+ // if this is a memo
+ var note = new Array();
+ note.note = getFirstResults(referentNode, [rdf+"value", n.dc+"description"], true);
+ if(note.note != undefined) {
+ // handle see also
+ var relations;
+ if(relations = getFirstResults(referentNode, [n.dc+"relation"])) {
+ note.seeAlso = new Array();
+ for each(var relation in relations) {
+ note.seeAlso.push(Scholar.RDF.getResourceURI(relation));
+ }
+ }
+
+ // add note
+ newItem.notes.push(note);
+ }
+ }
+ }
+
+ if(newItem.itemType == "note") {
+ // add note for standalone
+ newItem.note = getFirstResults(node, [rdf+"value", n.dc+"description"], true);
+ }
+
+ /** TAGS **/
+
+ var subjects = getFirstResults(node, [n.dc+"subject"]);
+ for each(var subject in subjects) {
+ if(typeof(subject) == "string") { // a regular tag
+ newItem.tags.push(subject);
+ } else { // a call number
+ var type = Scholar.RDF.getTargets(subject, rdf+"type");
+ if(type) {
+ type = Scholar.RDF.getResourceURI(type[0]);
+ if(Scholar.Utilities.inArray(type, callNumberTypes)) {
+ newItem.callNumber = getFirstResults(subject, [rdf+"value"], true);
+ }
+ }
+ }
+ }
newItem.complete();
}
+
+ /* COLLECTIONS */
+
+ for each(collection in collections) {
+ if(!Scholar.RDF.getArcsIn(collection)) {
+ var newCollection = new Scholar.Collection();
+ processCollection(collection, newCollection);
+ newCollection.complete();
+ }
+ }
}');
REPLACE INTO "translators" VALUES ('32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7', '2006-06-30 15:36:00', 3, 'RIS', 'Simon Kornblith', 'ris',