commit 216f0c7581d6a486f7a6a27340b97bcdde119767
parent 9e5c15423a0e8248e6b263755d268ad84390d6f6
Author: Simon Kornblith <simon@simonster.com>
Date: Tue, 8 Aug 2006 01:06:33 +0000
closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata
OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.
Diffstat:
4 files changed, 504 insertions(+), 313 deletions(-)
diff --git a/chrome/chromeFiles/content/scholar/xpcom/ingester.js b/chrome/chromeFiles/content/scholar/xpcom/ingester.js
@@ -148,7 +148,6 @@ Scholar.OpenURL = new function() {
this.discoverResolvers = discoverResolvers;
this.createContextObject = createContextObject;
this.parseContextObject = parseContextObject;
- this.lookupContextObject = lookupContextObject;
/*
* Returns a URL to look up an item in the OpenURL resolver
@@ -305,12 +304,16 @@ Scholar.OpenURL = new function() {
/*
* Generates an item in the format returned by item.fromArray() given an
* OpenURL version 1.0 contextObject
+ *
+ * accepts an item array to fill, or creates and returns a new item array
*/
- function parseContextObject(co) {
+ function parseContextObject(co, item) {
var coParts = co.split("&");
- var item = new Array();
- item.creators = new Array();
+ if(!item) {
+ var item = new Array();
+ item.creators = new Array();
+ }
// get type
item.itemType = _determineResourceType(coParts);
@@ -417,157 +420,6 @@ Scholar.OpenURL = new function() {
}
/*
- * Looks up additional information on an item in the format returned by
- * item.fromArray() in CrossRef or Open WorldCat given an OpenURL version
- * 1.0 contextObject
- */
- function lookupContextObject(co, done, error) {
- // CrossRef requires a url_ver to work right
- if(co.indexOf("url_ver=Z39.88-2004") == -1) {
- co = "url_ver=Z39.88-2004&"+co;
- }
-
- var type = _determineResourceType(co.split("&"));
- if(!type) {
- return false;
- }
-
- if(type == "journal") {
- // look up journals in CrossRef
- Scholar.Utilities.HTTP.doGet("http://www.crossref.org/openurl/?"+co+"&noredirect=true", null, function(req) {
- var items = _processCrossRef(req.responseText);
- done(items);
- });
- } else {
- // look up books in Open WorldCat
- Scholar.Utilities.HTTP.processDocuments(null, ["http://partneraccess.oclc.org/wcpa/servlet/OpenUrl?"+co], function(browser) {
- var doc = browser.contentDocument;
- // find new COinS in the Open WorldCat page
- items = _processOWC(doc);
-
- if(items) { // we got a single item page; return the item
- done(items);
- } else { // assume we have a search results page
- var items = new Array();
-
- var namespace = doc.documentElement.namespaceURI;
- var nsResolver = namespace ? function(prefix) {
- if (prefix == 'x') return namespace; else return null;
- } : null;
-
- // first try to get only books
- var elmts = doc.evaluate('//table[@class="tableLayout"]/tbody/tr/td[@class="content"]/table[@class="tableResults"]/tbody/tr[td/img[@alt="Book"]]/td/div[@class="title"]/a', doc, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE,null);
- var elmt = elmts.iterateNext();
- if(!elmt) { // if that fails, look for other options
- var elmts = doc.evaluate('//table[@class="tableLayout"]/tbody/tr/td[@class="content"]/table[@class="tableResults"]/tbody/tr[td/img[@alt="Book"]]/td/div[@class="title"]/a', doc, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE,null);
- elmt = elmts.iterateNext()
- }
-
- var urlsToProcess = new Array();
- do {
- urlsToProcess.push(elmt.href);
- } while(elmt = elmts.iterateNext());
-
- Scholar.Utilities.HTTP.processDocuments(null, urlsToProcess, function(browser) {
- // per URL
- var newItems = _processOWC(browser.contentDocument);
- if(newItems) {
- items = items.concat(newItems);
- }
- }, function() { // done
- done(items);
- }, function() { // error
- error();
- });
- }
- }, null, function() {
- error();
- });
- }
- }
-
- /*
- * Processes the XML format returned by CrossRef
- */
- function _processCrossRef(xmlOutput) {
- xmlOutput = xmlOutput.replace(/<\?xml[^>]*\?>/, "");
-
- // parse XML with E4X
- var qr = new Namespace("http://www.crossref.org/qrschema/2.0");
- try {
- var xml = new XML(xmlOutput);
- } catch(e) {
- return false;
- }
-
- // ensure status is valid
- var status = xml.qr::body.qr::query.@status.toString();
- if(status != "resolved" && status != "multiresolved") {
- return false;
- }
-
- var query = xml.qr::body.qr::query;
- var item = new Array();
- item.creators = new Array();
-
- // try to get a DOI
- item.DOI = query.qr::doi.(@type=="journal_article").toString();
- if(!item.DOI) {
- item.DOI = query.qr::doi.(@type=="book_title").toString();
- }
- if(!item.DOI) {
- item.DOI = query.qr::doi.(@type=="book_content").toString();
- }
-
- // try to get an ISSN (no print/electronic preferences)
- item.ISSN = query.qr::issn.toString();
- // get title
- item.title = query.qr::article_title.toString();
- // get publicationTitle
- item.publicationTitle = query.qr::journal_title.toString();
- // get author
- item.creators.push(Scholar.Utilities.cleanAuthor(query.qr::author.toString(), "author", true));
- // get volume
- item.volume = query.qr::volume.toString();
- // get issue
- item.issue = query.qr::issue.toString();
- // get year
- item.date = query.qr::year.toString();
- // get edition
- item.edition = query.qr::edition_number.toString();
- // get first page
- item.pages = query.qr::first_page.toString();
-
- return [item];
- }
-
- /*
- * Parses a document object referring to an Open WorldCat entry for its
- * OpenURL contextObject, then returns an item generated from this
- * contextObject
- */
- function _processOWC(doc) {
- var spanTags = doc.getElementsByTagName("span");
- for(var i=0; i<spanTags.length; i++) {
- var spanClass = spanTags[i].getAttribute("class");
- if(spanClass) {
- var spanClasses = spanClass.split(" ");
- if(Scholar.inArray("Z3988", spanClasses)) {
- var spanTitle = spanTags[i].getAttribute("title");
- var item = parseContextObject(spanTitle);
- if(item) {
- return [item];
- } else {
- return false;
- }
- }
- }
- }
-
- return false;
- }
-
- /*
* Determines the type of an OpenURL contextObject
*/
function _determineResourceType(coParts) {
diff --git a/chrome/chromeFiles/content/scholar/xpcom/translate.js b/chrome/chromeFiles/content/scholar/xpcom/translate.js
@@ -13,6 +13,7 @@
* export
* import
* web
+ * search
*
* a typical export process:
* var translatorObj = new Scholar.Translate();
@@ -35,7 +36,10 @@
* location - the location of the target (read-only; set with setLocation)
* for import/export - this is an instance of nsILocalFile
* for web - this is a URL
+ * item - item to be used for searching (read-only; set with setItem)
* path - the path to the target; for web, this is the same as location
+ * saveItem - whether new items should be saved to the database. defaults to
+ * true; set using second argument of constructor.
*
* PRIVATE PROPERTIES:
*
@@ -49,6 +53,10 @@
* _sandbox - sandbox in which translators will be executed
* _streams - streams that need to be closed when execution is complete
* _IDMap - a map from IDs as specified in Scholar.Item() to IDs of actual items
+ * _parentTranslator - set when a translator is called from another translator.
+ * among other things, disables passing of the translate
+ * object to handlers and modifies complete() function on
+ * returned items
*
* WEB-ONLY PRIVATE PROPERTIES:
*
@@ -56,23 +64,41 @@
* an EZProxy
*/
-Scholar.Translate = function(type) {
+Scholar.Translate = function(type, saveItem) {
this.type = type;
- // import = 001 = 1
- // export = 010 = 2
- // web = 100 = 4
+ // import = 0001 = 1
+ // export = 0010 = 2
+ // web = 0100 = 4
+ // search = 1000 = 8
// combination types determined by addition or bitwise AND
// i.e., import+export = 1+2 = 3
- if(type == "import") {
- this._numericTypes = "1,3,5,7";
- } else if(type == "export") {
- this._numericTypes = "2,3,6,7";
- } else if(type == "web") {
- this._numericTypes = "4,5,6,7";
+ this._numericTypes = "";
+ for(var i=0; i<=1; i++) {
+ for(var j=0; j<=1; j++) {
+ for(var k=0; k<=1; k++) {
+ if(type == "import") {
+ this._numericTypes += ","+parseInt(i.toString()+j.toString()+k.toString()+"1", 2);
+ } else if(type == "export") {
+ this._numericTypes += ","+parseInt(i.toString()+j.toString()+"1"+k.toString(), 2);
+ } else if(type == "web") {
+ this._numericTypes += ","+parseInt(i.toString()+"1"+j.toString()+k.toString(), 2);
+ } else if(type == "search") {
+ this._numericTypes += ","+parseInt("1"+i.toString()+j.toString()+k.toString(), 2);
+ } else {
+ throw("invalid import type");
+ }
+ }
+ }
+ }
+ this._numericTypes = this._numericTypes.substr(1);
+
+ if(saveItem === false) { // three equals signs means if it's left
+ // undefined, this.saveItem will still be true
+ this.saveItem = false;
} else {
- throw("invalid import type");
+ this.saveItem = true;
}
this._handlers = new Array();
@@ -88,6 +114,13 @@ Scholar.Translate.prototype.setBrowser = function(browser) {
}
/*
+ * sets the item to be used for searching
+ */
+Scholar.Translate.prototype.setItem = function(item) {
+ this.item = item;
+}
+
+/*
* sets the location to operate upon (file should be an nsILocalFile object or
* web address)
*/
@@ -112,12 +145,41 @@ Scholar.Translate.prototype.setLocation = function(location) {
* accepts either the object from getTranslators() or an ID
*/
Scholar.Translate.prototype.setTranslator = function(translator) {
+ if(!translator) {
+ throw("cannot set translator: invalid value");
+ }
+
if(typeof(translator) == "object") { // passed an object and not an ID
- translator = translator.translatorID;
+ if(translator.translatorID) {
+ translator = [translator.translatorID];
+ } else {
+ // we have an associative array of translators
+ if(this.type != "search") {
+ throw("cannot set translator: a single translator must be specified when doing "+this.type+" translation");
+ }
+ // accept a list of objects
+ for(var i in translator) {
+ if(typeof(translator[i]) == "object") {
+ if(translator[i].translatorID) {
+ translator[i] = translator[i].translatorID;
+ } else {
+ throw("cannot set translator: must specify a single translator or a list of translators");
+ }
+ }
+ }
+ }
+ } else {
+ translator = [translator];
+ }
+
+ var where = "";
+ for(var i in translator) {
+ where += " OR translatorID = ?";
}
+ where = where.substr(4);
- var sql = "SELECT * FROM translators WHERE translatorID = ? AND type IN ("+this._numericTypes+")";
- this.translator = Scholar.DB.rowQuery(sql, [translator]);
+ var sql = "SELECT * FROM translators WHERE "+where+" AND type IN ("+this._numericTypes+")";
+ this.translator = Scholar.DB.query(sql, translator);
if(!this.translator) {
return false;
}
@@ -145,13 +207,13 @@ Scholar.Translate.prototype.setTranslator = function(translator) {
* returns: N/A
*
* itemDone
- * valid: import, web
+ * valid: import, web, search
* called: when an item has been processed; may be called asynchronously
* passed: an item object (see Scholar.Item)
* returns: N/A
*
* collectionDone
- * valid: import, web
+ * valid: import
* called: when a collection has been processed, after all items have been
* added; may be called asynchronously
* passed: a collection object (see Scholar.Collection)
@@ -187,7 +249,7 @@ Scholar.Translate.prototype.getTranslators = function() {
var sql = "SELECT translatorID, label, target, detectCode FROM translators WHERE type IN ("+this._numericTypes+") ORDER BY target IS NULL";
var translators = Scholar.DB.query(sql);
- if(!this.location) {
+ if(!this.location && !this.item) {
return translators; // no need to see which can translate, because
// we don't have a location yet (for export or
// import dialog)
@@ -228,20 +290,21 @@ Scholar.Translate.prototype.displayOptions = function() {
}
Scholar.Translate.prototype._loadTranslator = function() {
- if(!this._sandbox) {
- // create a new sandbox if none exists
+ if(!this._sandbox || this.type == "search") {
+ // create a new sandbox if none exists, or for searching (so that it's
+ // bound to the correct url)
this._generateSandbox();
}
// parse detect code for the translator
- this._parseDetectCode(this.translator);
+ this._parseDetectCode(this.translator[0]);
- Scholar.debug("parsing code for "+this.translator.label);
+ Scholar.debug("parsing code for "+this.translator[0].label);
try {
- Components.utils.evalInSandbox(this.translator.code, this._sandbox);
+ Components.utils.evalInSandbox(this.translator[0].code, this._sandbox);
} catch(e) {
- Scholar.debug(e+' in parsing code for '+this.translator.label);
+ Scholar.debug(e+' in parsing code for '+this.translator[0].label);
this._translationComplete(false);
return false;
}
@@ -254,17 +317,24 @@ Scholar.Translate.prototype._loadTranslator = function() {
*/
Scholar.Translate.prototype.translate = function() {
this._IDMap = new Array();
+ this._complete = false;
- if(!this.location) {
- throw("cannot translate: no location specified");
+ if(!this.translator || !this.translator.length) {
+ throw("cannot translate: no translator specified");
}
- this._complete = false;
+ if(!this.location && this.type != "search") {
+ // searches operate differently, because we could have an array of
+ // translators and have to go through each
+ throw("cannot translate: no location specified");
+ }
if(!this._loadTranslator()) {
return;
}
+ this._sandbox.Scholar.scraperName = this.translator[0].label;
+
var returnValue;
if(this.type == "web") {
returnValue = this._web();
@@ -272,7 +342,10 @@ Scholar.Translate.prototype.translate = function() {
returnValue = this._import();
} else if(this.type == "export") {
returnValue = this._export();
+ } else if(this.type == "search") {
+ returnValue = this._search();
}
+
if(!returnValue) {
// failure
this._translationComplete(false);
@@ -285,12 +358,31 @@ Scholar.Translate.prototype.translate = function() {
/*
* generates a sandbox for scraping/scraper detection
*/
+Scholar.Translate._searchSandboxRegexp = new RegExp();
+Scholar.Translate._searchSandboxRegexp.compile("^http://[\\w.]+/");
Scholar.Translate.prototype._generateSandbox = function() {
var me = this;
- if(this.type == "web") {
- // use real URL, not proxied version, to create sandbox
- this._sandbox = new Components.utils.Sandbox(this.browser.contentDocument.location.href);
+ if(this.type == "web" || this.type == "search") {
+ // get sandbox URL
+ var sandboxURL = "";
+ if(this.type == "web") {
+ // use real URL, not proxied version, to create sandbox
+ sandboxURL = this.browser.contentDocument.location.href;
+ } else {
+ // generate sandbox for search by extracting domain from translator
+ // target, if one exists
+ if(this.translator && this.translator[0] && this.translator[0].target) {
+ // so that web translators work too
+ var tempURL = this.translator[0].target.replace(/\\/g, "").replace(/\^/g, "");
+ var m = Scholar.Translate._searchSandboxRegexp.exec(tempURL);
+ if(m) {
+ sandboxURL = m[0];
+ }
+ }
+ }
+ Scholar.debug("binding sandbox to "+sandboxURL);
+ this._sandbox = new Components.utils.Sandbox(sandboxURL);
this._sandbox.Scholar = new Object();
// add ingester utilities
@@ -300,27 +392,30 @@ Scholar.Translate.prototype._generateSandbox = function() {
// set up selectItems handler
this._sandbox.Scholar.selectItems = function(options) { return me._selectItems(options) };
} else {
- // use null URL to create sanbox
+ // use null URL to create sandbox
this._sandbox = new Components.utils.Sandbox("");
this._sandbox.Scholar = new Object();
this._sandbox.Scholar.Utilities = new Scholar.Utilities();
}
- if(this.type == "web" || this.type == "import") {
+
+ if(this.type == "export") {
+ // add routines to retrieve items and collections
+ this._sandbox.Scholar.nextItem = function() { return me._exportGetItem() };
+ this._sandbox.Scholar.nextCollection = function() { return me._exportGetCollection() }
+ } else {
// add routines to add new items
this._sandbox.Scholar.Item = Scholar.Translate.ScholarItem;
// attach the function to be run when an item is done
this._sandbox.Scholar.Item.prototype.complete = function() {me._itemDone(this)};
- // add routines to add new collections
- this._sandbox.Scholar.Collection = Scholar.Translate.ScholarCollection;
- // attach the function to be run when a collection is done
- this._sandbox.Scholar.Collection.prototype.complete = function() {me._collectionDone(this)};
- } else if(this.type == "export") {
- // add routines to retrieve items and collections
- this._sandbox.Scholar.nextItem = function() { return me._exportGetItem() };
- this._sandbox.Scholar.nextCollection = function() { return me._exportGetCollection() };
+ if(this.type == "import") {
+ // add routines to add new collections
+ this._sandbox.Scholar.Collection = Scholar.Translate.ScholarCollection;
+ // attach the function to be run when a collection is done
+ this._sandbox.Scholar.Collection.prototype.complete = function() {me._collectionDone(this)};
+ }
}
this._sandbox.XPathResult = Components.interfaces.nsIDOMXPathResult;
@@ -334,33 +429,50 @@ Scholar.Translate.prototype._generateSandbox = function() {
this._sandbox.Scholar.addOption = function(option, value) {me._addOption(option, value) };
// for loading other translators and accessing their methods
- var me = this;
this._sandbox.Scholar.loadTranslator = function(type, translatorID) {
- var translation = new Scholar.Translate(type);
- // assign same handlers as for parent, because the done handler won't
- // get called anyway, and the itemDone/selectItems handlers should be
- // the same
- translation._handlers = me._handlers;
- // set the translator
- translation.setTranslator(translatorID);
- // load the translator into our sandbox
- translation._loadTranslator();
- // use internal io
- translation._initializeInternalIO();
- return translation._sandbox;
+ var translation = new Scholar.Translate(type, (translatorID ? true : false));
+ if(translatorID) {
+ // assign same handlers as for parent, because the done handler won't
+ // get called anyway, and the itemDone/selectItems handlers should be
+ // the same
+ translation._handlers = me._handlers;
+ // set the translator
+ translation.setTranslator(translatorID);
+ // load the translator into our sandbox
+ translation._loadTranslator();
+ // use internal io
+ translation._initializeInternalIO();
+ return translation._sandbox;
+ } else {
+ // create a safe translator object, so that scrapers can't get
+ // access to potentially harmful methods.
+ if(type == "import" || type == "export") {
+ throw("you must specify a translatorID for "+type+" translation");
+ }
+
+ var safeTranslator = new Object();
+ safeTranslator.setItem = function(arg) { return translation.setItem(arg) };
+ safeTranslator.setBrowser = function(arg) { return translation.setBrowser(arg) };
+ safeTranslator.setHandler = function(arg1, arg2) { translation.setHandler(arg1, arg2) };
+ safeTranslator.setTranslator = function(arg) { return translation.setTranslator(arg) };
+ safeTranslator.getTranslators = function() { return translation.getTranslators() };
+ safeTranslator.translate = function() { return translation.translate() };
+ translation._parentTranslator = me;
+
+ return safeTranslator;
+ }
}
}
/*
* Check to see if _scraper_ can scrape this document
*/
-Scholar.Translate.prototype._canTranslate = function(translator) {
- var canTranslate = false;
-
+Scholar.Translate.prototype._canTranslate = function(translator) {
// Test location with regular expression
// If this is slow, we could preload all scrapers and compile regular
// expressions, so each check will be faster
- if(translator.target) {
+ if(translator.target && this.type != "search") {
+ var canTranslate = false;
if(this.type == "web") {
var regularExpression = new RegExp(translator.target, "i");
} else {
@@ -370,6 +482,8 @@ Scholar.Translate.prototype._canTranslate = function(translator) {
if(regularExpression.test(this.path)) {
canTranslate = true;
}
+ } else {
+ var canTranslate = true;
}
// Test with JavaScript if available and didn't have a regular expression or
@@ -388,14 +502,21 @@ Scholar.Translate.prototype._canTranslate = function(translator) {
}
}
- if(this._sandbox.detect) {
+ if((this.type == "web" && this._sandbox.detectWeb) ||
+ (this.type == "search" && this._sandbox.detectSearch) ||
+ (this.type == "import" && this._sandbox.detectImport) ||
+ (this.type == "export" && this._sandbox.detectExport)) {
var returnValue;
try {
if(this.type == "web") {
- returnValue = this._sandbox.detect(this.browser.contentDocument, this.location);
+ returnValue = this._sandbox.detectWeb(this.browser.contentDocument, this.location);
+ } else if(this.type == "search") {
+ returnValue = this._sandbox.detectSearch(this.item);
} else if(this.type == "import") {
- returnValue = this._sandbox.detect();
+ returnValue = this._sandbox.detectImport();
+ } else if(this.type == "export") {
+ returnValue = this._sandbox.detectExport();
}
} catch(e) {
Scholar.debug(e+' in executing detectCode for '+translator.label);
@@ -476,7 +597,7 @@ Scholar.Translate.prototype._addOption = function(option, value) {
* called as wait() in translator code
*/
Scholar.Translate.prototype._enableAsynchronous = function() {
- me = this;
+ var me = this;
this._waitForCompletion = true;
this._sandbox.Scholar.done = function() { me._translationComplete(true) };
}
@@ -505,13 +626,20 @@ Scholar.Translate.prototype._translationComplete = function(returnValue) {
if(!this._complete) {
this._complete = true;
- Scholar.debug("translation complete");
-
- // call handler
- this._runHandler("done", returnValue);
-
- // close open streams
- this._closeStreams();
+ if(this.type == "search" && !this._itemsFound && this.translator.length > 1) {
+ // if we're performing a search and didn't get any results, go on
+ // to the next translator
+ this.translator.shift();
+ this.translate();
+ } else {
+ Scholar.debug("translation complete");
+
+ // call handler
+ this._runHandler("done", returnValue);
+
+ // close open streams
+ this._closeStreams();
+ }
}
}
@@ -547,13 +675,23 @@ Scholar.Translate.prototype._closeStreams = function() {
*/
Scholar.Translate.prototype._itemDone = function(item) {
Scholar.debug(item);
+ if(!this.saveItem) { // if we're not supposed to save the item, just
+ // return the item array
+
+ // if a parent sandbox exists, use complete() function from that sandbox
+ if(this._parentTranslator) {
+ var pt = this._parentTranslator;
+ item.complete = function() { pt._itemDone(this) };
+ Scholar.debug("done from parent sandbox");
+ }
+ this._runHandler("itemDone", item);
+ return;
+ }
// Get typeID, defaulting to "website"
var type = (item.itemType ? item.itemType : "website");
- Scholar.debug("type is "+type);
if(type == "note") { // handle notes differently
- Scholar.debug("handling a note");
var myID = Scholar.Notes.add(item.note);
// re-retrieve the item
var newItem = Scholar.Items.get(myID);
@@ -718,7 +856,11 @@ Scholar.Translate.prototype._runHandler = function(type, argument) {
for(var i in this._handlers[type]) {
Scholar.debug("running handler "+i+" for "+type);
try {
- returnValue = this._handlers[type][i](this, argument);
+ if(this._parentTranslator) {
+ returnValue = this._handlers[type][i](null, argument);
+ } else {
+ returnValue = this._handlers[type][i](this, argument);
+ }
} catch(e) {
Scholar.debug(e+' in handler '+i+' for '+type);
}
@@ -734,7 +876,21 @@ Scholar.Translate.prototype._web = function() {
try {
this._sandbox.doWeb(this.browser.contentDocument, this.location);
} catch(e) {
- Scholar.debug(e+' in executing code for '+this.translator.label);
+ Scholar.debug(e+' in executing code for '+this.translator[0].label);
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * does the actual search translation
+ */
+Scholar.Translate.prototype._search = function() {
+ try {
+ this._sandbox.doSearch(this.item);
+ } catch(e) {
+ Scholar.debug(e+' in executing code for '+this.translator[0].label);
return false;
}
@@ -750,7 +906,7 @@ Scholar.Translate.prototype._import = function() {
try {
this._sandbox.doImport();
} catch(e) {
- Scholar.debug(e+' in executing code for '+this.translator.label);
+ Scholar.debug(e+' in executing code for '+this.translator[0].label);
return false;
}
@@ -830,7 +986,7 @@ Scholar.Translate.prototype._export = function() {
try {
this._sandbox.doExport();
} catch(e) {
- Scholar.debug(e+' in executing code for '+this.translator.label);
+ Scholar.debug(e+' in executing code for '+this.translator[0].label);
return false;
}
diff --git a/chrome/chromeFiles/content/scholar/xpcom/utilities.js b/chrome/chromeFiles/content/scholar/xpcom/utilities.js
@@ -321,8 +321,8 @@ Scholar.Utilities.Ingester.prototype.lookupContextObject = function(co, done, er
return Scholar.OpenURL.lookupContextObject(co, done, error);
}
-Scholar.Utilities.Ingester.prototype.parseContextObject = function(co) {
- return Scholar.OpenURL.parseContextObject(co);
+Scholar.Utilities.Ingester.prototype.parseContextObject = function(co, item) {
+ return Scholar.OpenURL.parseContextObject(co, item);
}
/*
diff --git a/scrapers.sql b/scrapers.sql
@@ -4,7 +4,7 @@
REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-07 01:09:00'));
REPLACE INTO "translators" VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-06-28 23:08:00', 4, 'Amazon.com Scraper', 'Simon Kornblith', '^http://www\.amazon\.com/(?:gp/(?:product|search)/|exec/obidos/search-handle-url/|s/)',
-'function detect(doc, url) {
+'function detectWeb(doc, url) {
var searchRe = new RegExp(''^http://www\.amazon\.com/(gp/search/|exec/obidos/search-handle-url/|s/)'');
if(searchRe.test(doc.location.href)) {
return "multiple";
@@ -123,7 +123,7 @@ function doWeb(doc, url) {
}');
REPLACE INTO "translators" VALUES ('838d8849-4ffb-9f44-3d0d-aa8a0a079afe', '2006-06-26 16:01:00', 4, 'WorldCat Scraper', 'Simon Kornblith', '^http://(?:new)?firstsearch\.oclc\.org/WebZ/',
-'function detect(doc, url) {
+'function detectWeb(doc, url) {
if(doc.title == ''FirstSearch: WorldCat Detailed Record'') {
return "book";
} else if(doc.title == ''FirstSearch: WorldCat List of Records'') {
@@ -288,7 +288,7 @@ REPLACE INTO "translators" VALUES ('838d8849-4ffb-9f44-3d0d-aa8a0a079afe', '2006
}');
REPLACE INTO "translators" VALUES ('88915634-1af6-c134-0171-56fd198235ed', '2006-06-26 21:40:00', 4, 'LOC/Voyager WebVoyage Scraper', 'Simon Kornblith', 'Pwebrecon\.cgi',
-'function detect(doc, url) {
+'function detectWeb(doc, url) {
var export_options = doc.forms.namedItem(''frm'').elements.namedItem(''RD'').options;
for(var i in export_options) {
if(export_options[i].text == ''Latin1 MARC''
@@ -415,7 +415,7 @@ REPLACE INTO "translators" VALUES ('88915634-1af6-c134-0171-56fd198235ed', '2006
}');
REPLACE INTO "translators" VALUES ('d921155f-0186-1684-615c-ca57682ced9b', '2006-06-26 16:01:00', 4, 'JSTOR Scraper', 'Simon Kornblith', '^http://www\.jstor\.org/(?:view|browse|search/)',
-'function detect(doc, url) {
+'function detectWeb(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == ''x'') return namespace; else return null;
@@ -590,7 +590,7 @@ function doWeb(doc, url) {
}');
REPLACE INTO "translators" VALUES ('e85a3134-8c1a-8644-6926-584c8565f23e', '2006-06-26 16:01:00', 4, 'History Cooperative Scraper', 'Simon Kornblith', '^http://www\.historycooperative\.org/(?:journals/.+/.+/.+\.html$|cgi-bin/search.cgi)',
-'function detect(doc, url) {
+'function detectWeb(doc, url) {
if(doc.title == "History Cooperative: Search Results") {
return "multiple";
} else {
@@ -657,7 +657,7 @@ function doWeb(doc, url) {
}');
REPLACE INTO "translators" VALUES ('4fd6b89b-2316-2dc4-fd87-61a97dd941e8', '2006-08-06 21:45:00', 4, 'InnoPAC Scraper', 'Simon Kornblith', '^http://[^/]+/(?:search/|record=)',
-'function detect(doc, url) {
+'function detectWeb(doc, url) {
// First, check to see if the URL alone reveals InnoPAC, since some sites don''t reveal the MARC button
var matchRegexp = new RegExp(''^(http://[^/]+/search/[^/]+/[^/]+/1\%2C[^/]+/)frameset(.+)$'');
if(matchRegexp.test(doc.location.href)) {
@@ -837,7 +837,7 @@ REPLACE INTO "translators" VALUES ('4fd6b89b-2316-2dc4-fd87-61a97dd941e8', '2006
}');
REPLACE INTO "translators" VALUES ('add7c71c-21f3-ee14-d188-caf9da12728b', '2006-06-26 16:01:00', 4, 'SIRSI 2003+ Scraper', 'Simon Kornblith', '/uhtbin/cgisirsi',
-'function detect(doc, url) {
+'function detectWeb(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == ''x'') return namespace; else return null;
@@ -964,7 +964,7 @@ function doWeb(doc, url) {
');
REPLACE INTO "translators" VALUES ('a77690cf-c5d1-8fc4-110f-d1fc765dcf88', '2006-06-26 16:01:00', 4, 'ProQuest Scraper', 'Simon Kornblith', '^http://proquest\.umi\.com/pqdweb\?((?:.*\&)?did=.*&Fmt=[0-9]|(?:.*\&)Fmt=[0-9].*&did=|(?:.*\&)searchInterface=)',
-'function detect(doc, url) {
+'function detectWeb(doc, url) {
if(doc.title == "Results") {
return "magazineArticle";
} else {
@@ -1147,7 +1147,7 @@ function doWeb(doc, url) {
}');
REPLACE INTO "translators" VALUES ('6773a9af-5375-3224-d148-d32793884dec', '2006-06-26 16:01:00', 4, 'InfoTrac Scraper', 'Simon Kornblith', '^http://infotrac-college\.thomsonlearning\.com/itw/infomark/',
-'function detect(doc, url) {
+'function detectWeb(doc, url) {
if(doc.title.substring(0, 8) == "Article ") {
return "magazineArticle";
} else doc.title.substring(0, 10) == "Citations ") {
@@ -1273,7 +1273,7 @@ function doWeb(doc, url) {
}');
REPLACE INTO "translators" VALUES ('b047a13c-fe5c-6604-c997-bef15e502b09', '2006-06-26 16:01:00', 4, 'LexisNexis Scraper', 'Simon Kornblith', '^http://web\.lexis-nexis\.com/universe/(?:document|doclist)',
-'function detect(doc, url) {
+'function detectWeb(doc, url) {
var detailRe = new RegExp("^http://[^/]+/universe/document");
if(detailRe.test(doc.location.href)) {
return "newspaperArticle";
@@ -1377,7 +1377,7 @@ function doWeb(doc, url) {
}');
REPLACE INTO "translators" VALUES ('cf87eca8-041d-b954-795a-2d86348999d5', '2006-06-26 16:01:00', 4, 'Aleph Scraper', 'Simon Kornblith', '^http://[^/]+/F(?:/[A-Z0-9\-]+(?:\?.*)?$|\?func=find)',
-'function detect(doc, url) {
+'function detectWeb(doc, url) {
var singleRe = new RegExp("^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=[0-9]{3}");
if(singleRe.test(doc.location.href)) {
@@ -1468,7 +1468,7 @@ REPLACE INTO "translators" VALUES ('cf87eca8-041d-b954-795a-2d86348999d5', '2006
}');
REPLACE INTO "translators" VALUES ('774d7dc2-3474-2684-392c-f787789ec63d', '2006-06-26 16:01:00', 4, 'Dynix Scraper', 'Simon Kornblith', 'ipac\.jsp\?.*(?:uri=full=[0-9]|menu=search)',
-'function detect(doc, url) {
+'function detectWeb(doc, url) {
var detailsRe = new RegExp(''ipac\.jsp\?.*uri=full=[0-9]'');
if(detailsRe.test(doc.location.href)) {
return "book";
@@ -1556,7 +1556,7 @@ REPLACE INTO "translators" VALUES ('774d7dc2-3474-2684-392c-f787789ec63d', '2006
}');
REPLACE INTO "translators" VALUES ('63a0a351-3131-18f4-21aa-f46b9ac51d87', '2006-06-26 16:01:00', 4, 'VTLS Scraper', 'Simon Kornblith', '/chameleon(?:\?|$)',
-'function detect(doc, url) {
+'function detectWeb(doc, url) {
var node = Scholar.Utilities.getNode(doc, doc, ''//tr[@class="intrRow"]/td/table/tbody/tr[th]'', null);
if(node) {
return "multiple";
@@ -1660,7 +1660,7 @@ REPLACE INTO "translators" VALUES ('63a0a351-3131-18f4-21aa-f46b9ac51d87', '2006
}');
REPLACE INTO "translators" VALUES ('fb12ae9e-f473-cab4-0546-27ab88c64101', '2006-06-26 16:01:00', 4, 'DRA Scraper', 'Simon Kornblith', '/web2/tramp2\.exe/(?:see\_record/|authority\_hits/|goto/.*\?.*screen=Record\.html)',
-'function detect(doc, url) {
+'function detectWeb(doc, url) {
if(doc.location.href.indexOf("/authority_hits") > 0) {
return "multiple";
} else {
@@ -1730,7 +1730,7 @@ REPLACE INTO "translators" VALUES ('fb12ae9e-f473-cab4-0546-27ab88c64101', '2006
REPLACE INTO "translators" VALUES ('c0e6fda6-0ecd-e4f4-39ca-37a4de436e15', '2006-06-26 16:01:00', 4, 'GEAC Scraper', 'Simon Kornblith', '/(?:GeacQUERY|(?:Geac)?FETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html))',
-'function detect(doc, url) {
+'function detectWeb(doc, url) {
if(doc.location.href.indexOf("/GeacQUERY") > 0) {
return "multiple";
} else {
@@ -1818,7 +1818,7 @@ REPLACE INTO "translators" VALUES ('c0e6fda6-0ecd-e4f4-39ca-37a4de436e15', '2006
}');
REPLACE INTO "translators" VALUES ('5287d20c-8a13-6004-4dcb-5bb2b66a9cc9', '2006-06-26 16:01:00', 4, 'SIRSI -2003 Scraper', 'Simon Kornblith', '/uhtbin/cgisirsi',
-'function detect(doc, url) {
+'function detectWeb(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == ''x'') return namespace; else return null;
@@ -1954,7 +1954,7 @@ REPLACE INTO "translators" VALUES ('5287d20c-8a13-6004-4dcb-5bb2b66a9cc9', '2006
}');
REPLACE INTO "translators" VALUES ('0f9fc2fc-306e-5204-1117-25bca009dffc', '2006-06-26 16:01:00', 4, 'TLC/YouSeeMore Scraper', 'Simon Kornblith', 'TLCScripts/interpac\.dll\?(?:.*LabelDisplay.*RecordNumber=[0-9]|Search|ItemTitles)',
-'function detect(doc, url) {
+'function detectWeb(doc, url) {
var detailRe = new RegExp("TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]");
if(detailRe.test(doc.location.href)) {
return "book";
@@ -2052,7 +2052,7 @@ REPLACE INTO "translators" VALUES ('0f9fc2fc-306e-5204-1117-25bca009dffc', '2006
}');
REPLACE INTO "translators" VALUES ('c54d1932-73ce-dfd4-a943-109380e06574', '2006-06-26 16:01:00', 4, 'Project MUSE Scraper', 'Simon Kornblith', '^http://muse\.jhu\.edu/(?:journals/[^/]+/[^/]+/[^/]+\.html|search/pia.cgi)',
-'function detect(doc, url) {
+'function detectWeb(doc, url) {
var searchRe = new RegExp("^http://[^/]+/search/pia\.cgi");
if(searchRe.test(url)) {
return "multiple";
@@ -2163,48 +2163,37 @@ REPLACE INTO "translators" VALUES ('c54d1932-73ce-dfd4-a943-109380e06574', '2006
}
}');
-REPLACE INTO "translators" VALUES ('fcf41bed-0cbc-3704-85c7-8062a0068a7a', '2006-06-26 16:01:00', 4, 'PubMed Scraper', 'Simon Kornblith', '^http://www\.ncbi\.nlm\.nih\.gov/entrez/query\.fcgi\?(?:.*db=PubMed.*list_uids=[0-9]|.*list_uids=[0-9].*db=PubMed|.*db=PubMed.*CMD=search|.*CMD=search.*db=PubMed)',
-'function detect(doc, url) {
+REPLACE INTO "translators" VALUES ('fcf41bed-0cbc-3704-85c7-8062a0068a7a', '2006-06-26 16:01:00', 12, 'PubMed Scraper', 'Simon Kornblith', '^http://www\.ncbi\.nlm\.nih\.gov/entrez/query\.fcgi\?(?:.*db=PubMed.*list_uids=[0-9]|.*list_uids=[0-9].*db=PubMed|.*db=PubMed.*CMD=search|.*CMD=search.*db=PubMed)',
+'function detectWeb(doc, url) {
if(doc.location.href.indexOf("list_uids=") >= 0) {
return "journalArticle";
} else {
return "multiple";
}
-}',
-'function doWeb(doc, url) {
- var uri = doc.location.href;
- var ids = new Array();
- var idRegexp = /[\?\&]list_uids=([0-9\,]+)/;
-
- var m = idRegexp.exec(uri);
- if(m) {
- ids.push(m[1]);
- } else {
- var namespace = doc.documentElement.namespaceURI;
- var nsResolver = namespace ? function(prefix) {
- if (prefix == ''x'') return namespace; else return null;
- } : null;
-
- var items = new Array();
- var tableRows = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''//div[@class="ResultSet"]/table/tbody'', nsResolver);
- // Go through table rows
- for(var i=0; i<tableRows.length; i++) {
- var link = Scholar.Utilities.getNode(doc, tableRows[i], ''.//a'', nsResolver);
- var article = Scholar.Utilities.getNode(doc, tableRows[i], ''./tr[2]/td[2]/text()[1]'', nsResolver);
- items[link.href] = article.nodeValue;
- }
-
- items = Scholar.selectItems(items);
-
- if(!items) {
- return true;
+}
+
+function getPMID(co) {
+ var coParts = co.split("&");
+ for each(part in coParts) {
+ if(part.substr(0, 7) == "rft_id=") {
+ var value = unescape(part.substr(7));
+ if(value.substr(0, 10) == "info:pmid/") {
+ return value.substr(10);
+ }
}
-
- for(var i in items) {
- var m = idRegexp.exec(i);
- ids.push(m[1]);
+ }
+}
+
+function detectSearch(item) {
+ if(item.contextObject) {
+ if(getPMID(item.contextObject)) {
+ return "journalArticle";
}
}
+ return false;
+}',
+'function lookupPMIDs(ids) {
+ Scholar.wait();
var newUri = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=PubMed&retmode=xml&rettype=citation&id="+ids.join(",");
Scholar.Utilities.HTTPUtilities.doGet(newUri, null, function(text) {
@@ -2283,13 +2272,54 @@ REPLACE INTO "translators" VALUES ('fcf41bed-0cbc-3704-85c7-8062a0068a7a', '2006
}
Scholar.done();
- })
+ });
+}
+
+function doWeb(doc, url) {
+ var uri = doc.location.href;
+ var ids = new Array();
+ var idRegexp = /[\?\&]list_uids=([0-9\,]+)/;
- Scholar.wait();
+ var m = idRegexp.exec(uri);
+ if(m) {
+ ids.push(m[1]);
+ } else {
+ var namespace = doc.documentElement.namespaceURI;
+ var nsResolver = namespace ? function(prefix) {
+ if (prefix == ''x'') return namespace; else return null;
+ } : null;
+
+ var items = new Array();
+ var tableRows = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''//div[@class="ResultSet"]/table/tbody'', nsResolver);
+ // Go through table rows
+ for(var i=0; i<tableRows.length; i++) {
+ var link = Scholar.Utilities.getNode(doc, tableRows[i], ''.//a'', nsResolver);
+ var article = Scholar.Utilities.getNode(doc, tableRows[i], ''./tr[2]/td[2]/text()[1]'', nsResolver);
+ items[link.href] = article.nodeValue;
+ }
+
+ items = Scholar.selectItems(items);
+
+ if(!items) {
+ return true;
+ }
+
+ for(var i in items) {
+ var m = idRegexp.exec(i);
+ ids.push(m[1]);
+ }
+ }
+
+ lookupPMIDs(ids);
+}
+
+function doSearch(item) {
+ // pmid was defined earlier in detectSearch
+ lookupPMIDs([getPMID(item.contextObject)]);
}');
REPLACE INTO "translators" VALUES ('951c027d-74ac-47d4-a107-9c3069ab7b48', '2006-06-26 16:41:00', 4, 'Embedded RDF Scraper', 'Simon Kornblith', NULL,
-'function detect(doc, url) {
+'function detectWeb(doc, url) {
var metaTags = doc.getElementsByTagName("meta");
for(var i=0; i<metaTags.length; i++) {
@@ -2333,7 +2363,7 @@ REPLACE INTO "translators" VALUES ('951c027d-74ac-47d4-a107-9c3069ab7b48', '2006
}');
REPLACE INTO "translators" VALUES ('05d07af9-105a-4572-99f6-a8e231c0daef', '2006-08-07 01:09:00', 4, 'COinS Scraper', 'Simon Kornblith', NULL,
-'function detect(doc, url) {
+'function detectWeb(doc, url) {
var spanTags = doc.getElementsByTagName("span");
var encounteredType = false;
@@ -2348,11 +2378,11 @@ REPLACE INTO "translators" VALUES ('05d07af9-105a-4572-99f6-a8e231c0daef', '2006
// determine if it''s a valid type
var coParts = spanTitle.split("&");
var type = null
- for(var i in coParts) {
- if(coParts[i].substr(0, 12) == "rft_val_fmt=") {
- var format = unescape(coParts[i].substr(12));
+ for(var j in coParts) {
+ if(coParts[j].substr(0, 12) == "rft_val_fmt=") {
+ var format = unescape(coParts[j].substr(12));
if(format == "info:ofi/fmt:kev:mtx:journal") {
- var type = "journal";
+ var type = "journalArticle";
} else if(format == "info:ofi/fmt:kev:mtx:book") {
if(Scholar.Utilities.inArray("rft.genre=bookitem", coParts)) {
var type = "bookSection";
@@ -2384,45 +2414,43 @@ function retrieveNextCOinS(needFullItems, newItems) {
var item = needFullItems.shift();
Scholar.Utilities.debugPrint("looking up contextObject");
- Scholar.Utilities.lookupContextObject(item.contextObject, function(items) {
- Scholar.Utilities.debugPrint(items);
- if(items) {
- newItems = newItems.concat(items);
- }
+ var search = Scholar.loadTranslator("search");
+ search.setHandler("itemDone", function(obj, item) {
+ newItems.push(item);
+ });
+ search.setHandler("done", function() {
retrieveNextCOinS(needFullItems, newItems);
- }, function() {
- Scholar.done(false);
});
+ search.setItem(item);
+
+ // look for translators
+ var translators = search.getTranslators();
+ if(translators) {
+ search.setTranslator(translators);
+ search.translate();
+ } else {
+ retrieveNextCOinS(needFullItems, newItems);
+ }
} else {
completeCOinS(newItems);
Scholar.done(true);
}
}
-// attaches item data to a new Scholar.Item instance (because data returned from
-// Scholar.OpenURL.processContextObject does not have a complete() method)
-function addAsItem(itemArray) {
- var newItem = new Scholar.Item();
- for(var i in itemArray) {
- newItem[i] = itemArray[i];
- }
- newItem.complete();
-}
-
// saves all COinS objects
function completeCOinS(newItems) {
if(newItems.length > 1) {
var selectArray = new Array();
for(var i in newItems) {
- selectArray[i] = newItems.title;
+ selectArray[i] = newItems[i].title;
}
selectArray = Scholar.selectItems(selectArray);
for(var i in selectArray) {
- addAsItem(newItems[i]);
+ newItems[i].complete();
}
} else if(newItems.length) {
- addAsItem(newItems[0]);
+ newItems[0].complete();
}
}
@@ -2438,8 +2466,8 @@ function doWeb(doc, url) {
var spanClasses = spanClass.split(" ");
if(Scholar.Utilities.inArray("Z3988", spanClasses)) {
var spanTitle = spanTags[i].getAttribute("title");
- var newItem = Scholar.Utilities.parseContextObject(spanTitle);
- if(newItem) {
+ var newItem = new Scholar.Item();
+ if(Scholar.Utilities.parseContextObject(spanTitle, newItem)) {
if(newItem.title && newItem.creators.length) {
// title and creators are minimum data to avoid looking up
newItems.push(newItem);
@@ -2463,7 +2491,7 @@ function doWeb(doc, url) {
}');
REPLACE INTO "translators" VALUES ('3e684d82-73a3-9a34-095f-19b112d88bbf', '2006-06-26 16:01:00', 4, 'Google Books Scraper', 'Simon Kornblith', '^http://books\.google\.com/books\?(.*vid=.*\&id=.*|.*q=.*)',
-'function detect(doc, url) {
+'function detectWeb(doc, url) {
var re = new RegExp(''^http://books\\.google\\.com/books\\?vid=([^&]+).*\\&id=([^&]+)'', ''i'');
if(re.test(doc.location.href)) {
return "book";
@@ -2553,6 +2581,161 @@ REPLACE INTO "translators" VALUES ('3e684d82-73a3-9a34-095f-19b112d88bbf', '2006
Scholar.wait();
}');
+REPLACE INTO "translators" VALUES ('e07e9b8c-0e98-4915-bb5a-32a08cb2f365', '2006-08-07 11:36:00', 8, 'Open WorldCat', 'Simon Kornblith', 'http://partneraccess.oclc.org/',
+'function detectSearch(item) {
+ if(item.itemType == "book" || item.itemType == "bookSection") {
+ return true;
+ }
+ return false;
+}',
+'// creates an item from an Open WorldCat document
+function processOWC(doc) {
+ var spanTags = doc.getElementsByTagName("span");
+ for(var i=0; i<spanTags.length; i++) {
+ var spanClass = spanTags[i].getAttribute("class");
+ if(spanClass) {
+ var spanClasses = spanClass.split(" ");
+ if(Scholar.Utilities.inArray("Z3988", spanClasses)) {
+ var spanTitle = spanTags[i].getAttribute("title");
+ var item = new Scholar.Item();
+ if(Scholar.Utilities.parseContextObject(spanTitle, item)) {
+ item.complete();
+ return true;
+ } else {
+ return false;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+function doSearch(item) {
+ if(item.contextObject) {
+ var co = item.contextObject;
+ } else {
+ var co = Scholar.Utilities.createContextObject(item);
+ }
+
+ Scholar.Utilities.processDocuments(null, ["http://partneraccess.oclc.org/wcpa/servlet/OpenUrl?"+co], function(browser) {
+ var doc = browser.contentDocument;
+ // find new COinS in the Open WorldCat page
+ if(processOWC(doc)) { // we got a single item page
+ Scholar.done();
+ } else { // assume we have a search results page
+ var items = new Array();
+
+ var namespace = doc.documentElement.namespaceURI;
+ var nsResolver = namespace ? function(prefix) {
+ if (prefix == ''x'') return namespace; else return null;
+ } : null;
+
+ // first try to get only books
+ var elmts = doc.evaluate(''//table[@class="tableLayout"]/tbody/tr/td[@class="content"]/table[@class="tableResults"]/tbody/tr[td/img[@alt="Book"]]/td/div[@class="title"]/a'', doc, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE,null);
+ var elmt = elmts.iterateNext();
+ if(!elmt) { // if that fails, look for other options
+ var elmts = doc.evaluate(''//table[@class="tableLayout"]/tbody/tr/td[@class="content"]/table[@class="tableResults"]/tbody/tr[td/img[@alt="Book"]]/td/div[@class="title"]/a'', doc, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE,null);
+ elmt = elmts.iterateNext()
+ }
+
+ var urlsToProcess = new Array();
+ do {
+ urlsToProcess.push(elmt.href);
+ } while(elmt = elmts.iterateNext());
+
+ Scholar.Utilities.processDocuments(null, urlsToProcess, function(browser) {
+ // per URL
+ processOWC(browser.contentDocument);
+ }, function() { // done
+ Scholar.done();
+ }, function() { // error
+ Scholar.done(false);
+ });
+ }
+ }, null, function() {
+ error();
+ });
+
+ Scholar.wait();
+}');
+
+REPLACE INTO "translators" VALUES ('11645bd1-0420-45c1-badb-53fb41eeb753', '2006-08-07 18:17:00', 8, 'CrossRef', 'Simon Kornblith', 'http://partneraccess.oclc.org/',
+'function detectSearch(item) {
+ if(item.itemType == "journal") {
+ return true;
+ }
+ return false;
+}',
+'function processCrossRef(xmlOutput) {
+ xmlOutput = xmlOutput.replace(/<\?xml[^>]*\?>/, "");
+
+ // parse XML with E4X
+ var qr = new Namespace("http://www.crossref.org/qrschema/2.0");
+ try {
+ var xml = new XML(xmlOutput);
+ } catch(e) {
+ return false;
+ }
+
+ // ensure status is valid
+ var status = xml.qr::query_result.qr::body.qr::query.@status.toString();
+ if(status != "resolved" && status != "multiresolved") {
+ return false;
+ }
+
+ var query = xml.qr::query_result.qr::body.qr::query;
+ var item = new Scholar.Item("journalArticle");
+
+ // try to get a DOI
+ item.DOI = query.qr::doi.(@type=="journal_article").text().toString();
+ if(!item.DOI) {
+ item.DOI = query.qr::doi.(@type=="book_title").text().toString();
+ }
+ if(!item.DOI) {
+ item.DOI = query.qr::doi.(@type=="book_content").text().toString();
+ }
+
+ // try to get an ISSN (no print/electronic preferences)
+ item.ISSN = query.qr::issn[0].text().toString();
+ // get title
+ item.title = query.qr::article_title.text().toString();
+ // get publicationTitle
+ item.publicationTitle = query.qr::journal_title.text().toString();
+ // get author
+ item.creators.push(Scholar.Utilities.cleanAuthor(query.qr::author.text().toString(), "author", true));
+ // get volume
+ item.volume = query.qr::volume.text().toString();
+ // get issue
+ item.issue = query.qr::issue.text().toString();
+ // get year
+ item.date = query.qr::year.text().toString();
+ // get edition
+ item.edition = query.qr::edition_number.text().toString();
+ // get first page
+ item.pages = query.qr::first_page.text().toString();
+ item.complete();
+ return true;
+}
+
+function doSearch(item) {
+ if(item.contextObject) {
+ var co = item.contextObject;
+ if(co.indexOf("url_ver=") == -1) {
+ co = "url_ver=Z39.88-2004"+co;
+ }
+ } else {
+ var co = Scholar.Utilities.createContextObject(item);
+ }
+
+ Scholar.Utilities.HTTPUtilities.doGet("http://www.crossref.org/openurl/?"+co+"&noredirect=true", null, function(responseText) {
+ processCrossRef(responseText);
+ Scholar.done();
+ });
+
+ Scholar.wait();
+}');
+
REPLACE INTO "translators" VALUES ('0e2235e7-babf-413c-9acf-f27cce5f059c', '2006-07-05 23:40:00', 3, 'MODS (XML)', 'Simon Kornblith', 'xml',
'Scholar.addOption("exportNotes", true);
Scholar.addOption("exportFileData", true);',