www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | Submodules | README | LICENSE

commit 216f0c7581d6a486f7a6a27340b97bcdde119767
parent 9e5c15423a0e8248e6b263755d268ad84390d6f6
Author: Simon Kornblith <simon@simonster.com>
Date:   Tue,  8 Aug 2006 01:06:33 +0000

closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


Diffstat:
Mchrome/chromeFiles/content/scholar/xpcom/ingester.js | 162++++---------------------------------------------------------------------------
Mchrome/chromeFiles/content/scholar/xpcom/translate.js | 302++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
Mchrome/chromeFiles/content/scholar/xpcom/utilities.js | 4++--
Mscrapers.sql | 349++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
4 files changed, 504 insertions(+), 313 deletions(-)

diff --git a/chrome/chromeFiles/content/scholar/xpcom/ingester.js b/chrome/chromeFiles/content/scholar/xpcom/ingester.js @@ -148,7 +148,6 @@ Scholar.OpenURL = new function() { this.discoverResolvers = discoverResolvers; this.createContextObject = createContextObject; this.parseContextObject = parseContextObject; - this.lookupContextObject = lookupContextObject; /* * Returns a URL to look up an item in the OpenURL resolver @@ -305,12 +304,16 @@ Scholar.OpenURL = new function() { /* * Generates an item in the format returned by item.fromArray() given an * OpenURL version 1.0 contextObject + * + * accepts an item array to fill, or creates and returns a new item array */ - function parseContextObject(co) { + function parseContextObject(co, item) { var coParts = co.split("&"); - var item = new Array(); - item.creators = new Array(); + if(!item) { + var item = new Array(); + item.creators = new Array(); + } // get type item.itemType = _determineResourceType(coParts); @@ -417,157 +420,6 @@ Scholar.OpenURL = new function() { } /* - * Looks up additional information on an item in the format returned by - * item.fromArray() in CrossRef or Open WorldCat given an OpenURL version - * 1.0 contextObject - */ - function lookupContextObject(co, done, error) { - // CrossRef requires a url_ver to work right - if(co.indexOf("url_ver=Z39.88-2004") == -1) { - co = "url_ver=Z39.88-2004&"+co; - } - - var type = _determineResourceType(co.split("&")); - if(!type) { - return false; - } - - if(type == "journal") { - // look up journals in CrossRef - Scholar.Utilities.HTTP.doGet("http://www.crossref.org/openurl/?"+co+"&noredirect=true", null, function(req) { - var items = _processCrossRef(req.responseText); - done(items); - }); - } else { - // look up books in Open WorldCat - Scholar.Utilities.HTTP.processDocuments(null, ["http://partneraccess.oclc.org/wcpa/servlet/OpenUrl?"+co], function(browser) { - var doc = browser.contentDocument; - // find new COinS in the Open WorldCat page - items = _processOWC(doc); - - if(items) { // we got a single item page; return the item - done(items); - } else { // assume we have a search results page - var items = new Array(); - - var namespace = doc.documentElement.namespaceURI; - var nsResolver = namespace ? function(prefix) { - if (prefix == 'x') return namespace; else return null; - } : null; - - // first try to get only books - var elmts = doc.evaluate('//table[@class="tableLayout"]/tbody/tr/td[@class="content"]/table[@class="tableResults"]/tbody/tr[td/img[@alt="Book"]]/td/div[@class="title"]/a', doc, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE,null); - var elmt = elmts.iterateNext(); - if(!elmt) { // if that fails, look for other options - var elmts = doc.evaluate('//table[@class="tableLayout"]/tbody/tr/td[@class="content"]/table[@class="tableResults"]/tbody/tr[td/img[@alt="Book"]]/td/div[@class="title"]/a', doc, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE,null); - elmt = elmts.iterateNext() - } - - var urlsToProcess = new Array(); - do { - urlsToProcess.push(elmt.href); - } while(elmt = elmts.iterateNext()); - - Scholar.Utilities.HTTP.processDocuments(null, urlsToProcess, function(browser) { - // per URL - var newItems = _processOWC(browser.contentDocument); - if(newItems) { - items = items.concat(newItems); - } - }, function() { // done - done(items); - }, function() { // error - error(); - }); - } - }, null, function() { - error(); - }); - } - } - - /* - * Processes the XML format returned by CrossRef - */ - function _processCrossRef(xmlOutput) { - xmlOutput = xmlOutput.replace(/<\?xml[^>]*\?>/, ""); - - // parse XML with E4X - var qr = new Namespace("http://www.crossref.org/qrschema/2.0"); - try { - var xml = new XML(xmlOutput); - } catch(e) { - return false; - } - - // ensure status is valid - var status = xml.qr::body.qr::query.@status.toString(); - if(status != "resolved" && status != "multiresolved") { - return false; - } - - var query = xml.qr::body.qr::query; - var item = new Array(); - item.creators = new Array(); - - // try to get a DOI - item.DOI = query.qr::doi.(@type=="journal_article").toString(); - if(!item.DOI) { - item.DOI = query.qr::doi.(@type=="book_title").toString(); - } - if(!item.DOI) { - item.DOI = query.qr::doi.(@type=="book_content").toString(); - } - - // try to get an ISSN (no print/electronic preferences) - item.ISSN = query.qr::issn.toString(); - // get title - item.title = query.qr::article_title.toString(); - // get publicationTitle - item.publicationTitle = query.qr::journal_title.toString(); - // get author - item.creators.push(Scholar.Utilities.cleanAuthor(query.qr::author.toString(), "author", true)); - // get volume - item.volume = query.qr::volume.toString(); - // get issue - item.issue = query.qr::issue.toString(); - // get year - item.date = query.qr::year.toString(); - // get edition - item.edition = query.qr::edition_number.toString(); - // get first page - item.pages = query.qr::first_page.toString(); - - return [item]; - } - - /* - * Parses a document object referring to an Open WorldCat entry for its - * OpenURL contextObject, then returns an item generated from this - * contextObject - */ - function _processOWC(doc) { - var spanTags = doc.getElementsByTagName("span"); - for(var i=0; i<spanTags.length; i++) { - var spanClass = spanTags[i].getAttribute("class"); - if(spanClass) { - var spanClasses = spanClass.split(" "); - if(Scholar.inArray("Z3988", spanClasses)) { - var spanTitle = spanTags[i].getAttribute("title"); - var item = parseContextObject(spanTitle); - if(item) { - return [item]; - } else { - return false; - } - } - } - } - - return false; - } - - /* * Determines the type of an OpenURL contextObject */ function _determineResourceType(coParts) { diff --git a/chrome/chromeFiles/content/scholar/xpcom/translate.js b/chrome/chromeFiles/content/scholar/xpcom/translate.js @@ -13,6 +13,7 @@ * export * import * web + * search * * a typical export process: * var translatorObj = new Scholar.Translate(); @@ -35,7 +36,10 @@ * location - the location of the target (read-only; set with setLocation) * for import/export - this is an instance of nsILocalFile * for web - this is a URL + * item - item to be used for searching (read-only; set with setItem) * path - the path to the target; for web, this is the same as location + * saveItem - whether new items should be saved to the database. defaults to + * true; set using second argument of constructor. * * PRIVATE PROPERTIES: * @@ -49,6 +53,10 @@ * _sandbox - sandbox in which translators will be executed * _streams - streams that need to be closed when execution is complete * _IDMap - a map from IDs as specified in Scholar.Item() to IDs of actual items + * _parentTranslator - set when a translator is called from another translator. + * among other things, disables passing of the translate + * object to handlers and modifies complete() function on + * returned items * * WEB-ONLY PRIVATE PROPERTIES: * @@ -56,23 +64,41 @@ * an EZProxy */ -Scholar.Translate = function(type) { +Scholar.Translate = function(type, saveItem) { this.type = type; - // import = 001 = 1 - // export = 010 = 2 - // web = 100 = 4 + // import = 0001 = 1 + // export = 0010 = 2 + // web = 0100 = 4 + // search = 1000 = 8 // combination types determined by addition or bitwise AND // i.e., import+export = 1+2 = 3 - if(type == "import") { - this._numericTypes = "1,3,5,7"; - } else if(type == "export") { - this._numericTypes = "2,3,6,7"; - } else if(type == "web") { - this._numericTypes = "4,5,6,7"; + this._numericTypes = ""; + for(var i=0; i<=1; i++) { + for(var j=0; j<=1; j++) { + for(var k=0; k<=1; k++) { + if(type == "import") { + this._numericTypes += ","+parseInt(i.toString()+j.toString()+k.toString()+"1", 2); + } else if(type == "export") { + this._numericTypes += ","+parseInt(i.toString()+j.toString()+"1"+k.toString(), 2); + } else if(type == "web") { + this._numericTypes += ","+parseInt(i.toString()+"1"+j.toString()+k.toString(), 2); + } else if(type == "search") { + this._numericTypes += ","+parseInt("1"+i.toString()+j.toString()+k.toString(), 2); + } else { + throw("invalid import type"); + } + } + } + } + this._numericTypes = this._numericTypes.substr(1); + + if(saveItem === false) { // three equals signs means if it's left + // undefined, this.saveItem will still be true + this.saveItem = false; } else { - throw("invalid import type"); + this.saveItem = true; } this._handlers = new Array(); @@ -88,6 +114,13 @@ Scholar.Translate.prototype.setBrowser = function(browser) { } /* + * sets the item to be used for searching + */ +Scholar.Translate.prototype.setItem = function(item) { + this.item = item; +} + +/* * sets the location to operate upon (file should be an nsILocalFile object or * web address) */ @@ -112,12 +145,41 @@ Scholar.Translate.prototype.setLocation = function(location) { * accepts either the object from getTranslators() or an ID */ Scholar.Translate.prototype.setTranslator = function(translator) { + if(!translator) { + throw("cannot set translator: invalid value"); + } + if(typeof(translator) == "object") { // passed an object and not an ID - translator = translator.translatorID; + if(translator.translatorID) { + translator = [translator.translatorID]; + } else { + // we have an associative array of translators + if(this.type != "search") { + throw("cannot set translator: a single translator must be specified when doing "+this.type+" translation"); + } + // accept a list of objects + for(var i in translator) { + if(typeof(translator[i]) == "object") { + if(translator[i].translatorID) { + translator[i] = translator[i].translatorID; + } else { + throw("cannot set translator: must specify a single translator or a list of translators"); + } + } + } + } + } else { + translator = [translator]; + } + + var where = ""; + for(var i in translator) { + where += " OR translatorID = ?"; } + where = where.substr(4); - var sql = "SELECT * FROM translators WHERE translatorID = ? AND type IN ("+this._numericTypes+")"; - this.translator = Scholar.DB.rowQuery(sql, [translator]); + var sql = "SELECT * FROM translators WHERE "+where+" AND type IN ("+this._numericTypes+")"; + this.translator = Scholar.DB.query(sql, translator); if(!this.translator) { return false; } @@ -145,13 +207,13 @@ Scholar.Translate.prototype.setTranslator = function(translator) { * returns: N/A * * itemDone - * valid: import, web + * valid: import, web, search * called: when an item has been processed; may be called asynchronously * passed: an item object (see Scholar.Item) * returns: N/A * * collectionDone - * valid: import, web + * valid: import * called: when a collection has been processed, after all items have been * added; may be called asynchronously * passed: a collection object (see Scholar.Collection) @@ -187,7 +249,7 @@ Scholar.Translate.prototype.getTranslators = function() { var sql = "SELECT translatorID, label, target, detectCode FROM translators WHERE type IN ("+this._numericTypes+") ORDER BY target IS NULL"; var translators = Scholar.DB.query(sql); - if(!this.location) { + if(!this.location && !this.item) { return translators; // no need to see which can translate, because // we don't have a location yet (for export or // import dialog) @@ -228,20 +290,21 @@ Scholar.Translate.prototype.displayOptions = function() { } Scholar.Translate.prototype._loadTranslator = function() { - if(!this._sandbox) { - // create a new sandbox if none exists + if(!this._sandbox || this.type == "search") { + // create a new sandbox if none exists, or for searching (so that it's + // bound to the correct url) this._generateSandbox(); } // parse detect code for the translator - this._parseDetectCode(this.translator); + this._parseDetectCode(this.translator[0]); - Scholar.debug("parsing code for "+this.translator.label); + Scholar.debug("parsing code for "+this.translator[0].label); try { - Components.utils.evalInSandbox(this.translator.code, this._sandbox); + Components.utils.evalInSandbox(this.translator[0].code, this._sandbox); } catch(e) { - Scholar.debug(e+' in parsing code for '+this.translator.label); + Scholar.debug(e+' in parsing code for '+this.translator[0].label); this._translationComplete(false); return false; } @@ -254,17 +317,24 @@ Scholar.Translate.prototype._loadTranslator = function() { */ Scholar.Translate.prototype.translate = function() { this._IDMap = new Array(); + this._complete = false; - if(!this.location) { - throw("cannot translate: no location specified"); + if(!this.translator || !this.translator.length) { + throw("cannot translate: no translator specified"); } - this._complete = false; + if(!this.location && this.type != "search") { + // searches operate differently, because we could have an array of + // translators and have to go through each + throw("cannot translate: no location specified"); + } if(!this._loadTranslator()) { return; } + this._sandbox.Scholar.scraperName = this.translator[0].label; + var returnValue; if(this.type == "web") { returnValue = this._web(); @@ -272,7 +342,10 @@ Scholar.Translate.prototype.translate = function() { returnValue = this._import(); } else if(this.type == "export") { returnValue = this._export(); + } else if(this.type == "search") { + returnValue = this._search(); } + if(!returnValue) { // failure this._translationComplete(false); @@ -285,12 +358,31 @@ Scholar.Translate.prototype.translate = function() { /* * generates a sandbox for scraping/scraper detection */ +Scholar.Translate._searchSandboxRegexp = new RegExp(); +Scholar.Translate._searchSandboxRegexp.compile("^http://[\\w.]+/"); Scholar.Translate.prototype._generateSandbox = function() { var me = this; - if(this.type == "web") { - // use real URL, not proxied version, to create sandbox - this._sandbox = new Components.utils.Sandbox(this.browser.contentDocument.location.href); + if(this.type == "web" || this.type == "search") { + // get sandbox URL + var sandboxURL = ""; + if(this.type == "web") { + // use real URL, not proxied version, to create sandbox + sandboxURL = this.browser.contentDocument.location.href; + } else { + // generate sandbox for search by extracting domain from translator + // target, if one exists + if(this.translator && this.translator[0] && this.translator[0].target) { + // so that web translators work too + var tempURL = this.translator[0].target.replace(/\\/g, "").replace(/\^/g, ""); + var m = Scholar.Translate._searchSandboxRegexp.exec(tempURL); + if(m) { + sandboxURL = m[0]; + } + } + } + Scholar.debug("binding sandbox to "+sandboxURL); + this._sandbox = new Components.utils.Sandbox(sandboxURL); this._sandbox.Scholar = new Object(); // add ingester utilities @@ -300,27 +392,30 @@ Scholar.Translate.prototype._generateSandbox = function() { // set up selectItems handler this._sandbox.Scholar.selectItems = function(options) { return me._selectItems(options) }; } else { - // use null URL to create sanbox + // use null URL to create sandbox this._sandbox = new Components.utils.Sandbox(""); this._sandbox.Scholar = new Object(); this._sandbox.Scholar.Utilities = new Scholar.Utilities(); } - if(this.type == "web" || this.type == "import") { + + if(this.type == "export") { + // add routines to retrieve items and collections + this._sandbox.Scholar.nextItem = function() { return me._exportGetItem() }; + this._sandbox.Scholar.nextCollection = function() { return me._exportGetCollection() } + } else { // add routines to add new items this._sandbox.Scholar.Item = Scholar.Translate.ScholarItem; // attach the function to be run when an item is done this._sandbox.Scholar.Item.prototype.complete = function() {me._itemDone(this)}; - // add routines to add new collections - this._sandbox.Scholar.Collection = Scholar.Translate.ScholarCollection; - // attach the function to be run when a collection is done - this._sandbox.Scholar.Collection.prototype.complete = function() {me._collectionDone(this)}; - } else if(this.type == "export") { - // add routines to retrieve items and collections - this._sandbox.Scholar.nextItem = function() { return me._exportGetItem() }; - this._sandbox.Scholar.nextCollection = function() { return me._exportGetCollection() }; + if(this.type == "import") { + // add routines to add new collections + this._sandbox.Scholar.Collection = Scholar.Translate.ScholarCollection; + // attach the function to be run when a collection is done + this._sandbox.Scholar.Collection.prototype.complete = function() {me._collectionDone(this)}; + } } this._sandbox.XPathResult = Components.interfaces.nsIDOMXPathResult; @@ -334,33 +429,50 @@ Scholar.Translate.prototype._generateSandbox = function() { this._sandbox.Scholar.addOption = function(option, value) {me._addOption(option, value) }; // for loading other translators and accessing their methods - var me = this; this._sandbox.Scholar.loadTranslator = function(type, translatorID) { - var translation = new Scholar.Translate(type); - // assign same handlers as for parent, because the done handler won't - // get called anyway, and the itemDone/selectItems handlers should be - // the same - translation._handlers = me._handlers; - // set the translator - translation.setTranslator(translatorID); - // load the translator into our sandbox - translation._loadTranslator(); - // use internal io - translation._initializeInternalIO(); - return translation._sandbox; + var translation = new Scholar.Translate(type, (translatorID ? true : false)); + if(translatorID) { + // assign same handlers as for parent, because the done handler won't + // get called anyway, and the itemDone/selectItems handlers should be + // the same + translation._handlers = me._handlers; + // set the translator + translation.setTranslator(translatorID); + // load the translator into our sandbox + translation._loadTranslator(); + // use internal io + translation._initializeInternalIO(); + return translation._sandbox; + } else { + // create a safe translator object, so that scrapers can't get + // access to potentially harmful methods. + if(type == "import" || type == "export") { + throw("you must specify a translatorID for "+type+" translation"); + } + + var safeTranslator = new Object(); + safeTranslator.setItem = function(arg) { return translation.setItem(arg) }; + safeTranslator.setBrowser = function(arg) { return translation.setBrowser(arg) }; + safeTranslator.setHandler = function(arg1, arg2) { translation.setHandler(arg1, arg2) }; + safeTranslator.setTranslator = function(arg) { return translation.setTranslator(arg) }; + safeTranslator.getTranslators = function() { return translation.getTranslators() }; + safeTranslator.translate = function() { return translation.translate() }; + translation._parentTranslator = me; + + return safeTranslator; + } } } /* * Check to see if _scraper_ can scrape this document */ -Scholar.Translate.prototype._canTranslate = function(translator) { - var canTranslate = false; - +Scholar.Translate.prototype._canTranslate = function(translator) { // Test location with regular expression // If this is slow, we could preload all scrapers and compile regular // expressions, so each check will be faster - if(translator.target) { + if(translator.target && this.type != "search") { + var canTranslate = false; if(this.type == "web") { var regularExpression = new RegExp(translator.target, "i"); } else { @@ -370,6 +482,8 @@ Scholar.Translate.prototype._canTranslate = function(translator) { if(regularExpression.test(this.path)) { canTranslate = true; } + } else { + var canTranslate = true; } // Test with JavaScript if available and didn't have a regular expression or @@ -388,14 +502,21 @@ Scholar.Translate.prototype._canTranslate = function(translator) { } } - if(this._sandbox.detect) { + if((this.type == "web" && this._sandbox.detectWeb) || + (this.type == "search" && this._sandbox.detectSearch) || + (this.type == "import" && this._sandbox.detectImport) || + (this.type == "export" && this._sandbox.detectExport)) { var returnValue; try { if(this.type == "web") { - returnValue = this._sandbox.detect(this.browser.contentDocument, this.location); + returnValue = this._sandbox.detectWeb(this.browser.contentDocument, this.location); + } else if(this.type == "search") { + returnValue = this._sandbox.detectSearch(this.item); } else if(this.type == "import") { - returnValue = this._sandbox.detect(); + returnValue = this._sandbox.detectImport(); + } else if(this.type == "export") { + returnValue = this._sandbox.detectExport(); } } catch(e) { Scholar.debug(e+' in executing detectCode for '+translator.label); @@ -476,7 +597,7 @@ Scholar.Translate.prototype._addOption = function(option, value) { * called as wait() in translator code */ Scholar.Translate.prototype._enableAsynchronous = function() { - me = this; + var me = this; this._waitForCompletion = true; this._sandbox.Scholar.done = function() { me._translationComplete(true) }; } @@ -505,13 +626,20 @@ Scholar.Translate.prototype._translationComplete = function(returnValue) { if(!this._complete) { this._complete = true; - Scholar.debug("translation complete"); - - // call handler - this._runHandler("done", returnValue); - - // close open streams - this._closeStreams(); + if(this.type == "search" && !this._itemsFound && this.translator.length > 1) { + // if we're performing a search and didn't get any results, go on + // to the next translator + this.translator.shift(); + this.translate(); + } else { + Scholar.debug("translation complete"); + + // call handler + this._runHandler("done", returnValue); + + // close open streams + this._closeStreams(); + } } } @@ -547,13 +675,23 @@ Scholar.Translate.prototype._closeStreams = function() { */ Scholar.Translate.prototype._itemDone = function(item) { Scholar.debug(item); + if(!this.saveItem) { // if we're not supposed to save the item, just + // return the item array + + // if a parent sandbox exists, use complete() function from that sandbox + if(this._parentTranslator) { + var pt = this._parentTranslator; + item.complete = function() { pt._itemDone(this) }; + Scholar.debug("done from parent sandbox"); + } + this._runHandler("itemDone", item); + return; + } // Get typeID, defaulting to "website" var type = (item.itemType ? item.itemType : "website"); - Scholar.debug("type is "+type); if(type == "note") { // handle notes differently - Scholar.debug("handling a note"); var myID = Scholar.Notes.add(item.note); // re-retrieve the item var newItem = Scholar.Items.get(myID); @@ -718,7 +856,11 @@ Scholar.Translate.prototype._runHandler = function(type, argument) { for(var i in this._handlers[type]) { Scholar.debug("running handler "+i+" for "+type); try { - returnValue = this._handlers[type][i](this, argument); + if(this._parentTranslator) { + returnValue = this._handlers[type][i](null, argument); + } else { + returnValue = this._handlers[type][i](this, argument); + } } catch(e) { Scholar.debug(e+' in handler '+i+' for '+type); } @@ -734,7 +876,21 @@ Scholar.Translate.prototype._web = function() { try { this._sandbox.doWeb(this.browser.contentDocument, this.location); } catch(e) { - Scholar.debug(e+' in executing code for '+this.translator.label); + Scholar.debug(e+' in executing code for '+this.translator[0].label); + return false; + } + + return true; +} + +/* + * does the actual search translation + */ +Scholar.Translate.prototype._search = function() { + try { + this._sandbox.doSearch(this.item); + } catch(e) { + Scholar.debug(e+' in executing code for '+this.translator[0].label); return false; } @@ -750,7 +906,7 @@ Scholar.Translate.prototype._import = function() { try { this._sandbox.doImport(); } catch(e) { - Scholar.debug(e+' in executing code for '+this.translator.label); + Scholar.debug(e+' in executing code for '+this.translator[0].label); return false; } @@ -830,7 +986,7 @@ Scholar.Translate.prototype._export = function() { try { this._sandbox.doExport(); } catch(e) { - Scholar.debug(e+' in executing code for '+this.translator.label); + Scholar.debug(e+' in executing code for '+this.translator[0].label); return false; } diff --git a/chrome/chromeFiles/content/scholar/xpcom/utilities.js b/chrome/chromeFiles/content/scholar/xpcom/utilities.js @@ -321,8 +321,8 @@ Scholar.Utilities.Ingester.prototype.lookupContextObject = function(co, done, er return Scholar.OpenURL.lookupContextObject(co, done, error); } -Scholar.Utilities.Ingester.prototype.parseContextObject = function(co) { - return Scholar.OpenURL.parseContextObject(co); +Scholar.Utilities.Ingester.prototype.parseContextObject = function(co, item) { + return Scholar.OpenURL.parseContextObject(co, item); } /* diff --git a/scrapers.sql b/scrapers.sql @@ -4,7 +4,7 @@ REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-07 01:09:00')); REPLACE INTO "translators" VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-06-28 23:08:00', 4, 'Amazon.com Scraper', 'Simon Kornblith', '^http://www\.amazon\.com/(?:gp/(?:product|search)/|exec/obidos/search-handle-url/|s/)', -'function detect(doc, url) { +'function detectWeb(doc, url) { var searchRe = new RegExp(''^http://www\.amazon\.com/(gp/search/|exec/obidos/search-handle-url/|s/)''); if(searchRe.test(doc.location.href)) { return "multiple"; @@ -123,7 +123,7 @@ function doWeb(doc, url) { }'); REPLACE INTO "translators" VALUES ('838d8849-4ffb-9f44-3d0d-aa8a0a079afe', '2006-06-26 16:01:00', 4, 'WorldCat Scraper', 'Simon Kornblith', '^http://(?:new)?firstsearch\.oclc\.org/WebZ/', -'function detect(doc, url) { +'function detectWeb(doc, url) { if(doc.title == ''FirstSearch: WorldCat Detailed Record'') { return "book"; } else if(doc.title == ''FirstSearch: WorldCat List of Records'') { @@ -288,7 +288,7 @@ REPLACE INTO "translators" VALUES ('838d8849-4ffb-9f44-3d0d-aa8a0a079afe', '2006 }'); REPLACE INTO "translators" VALUES ('88915634-1af6-c134-0171-56fd198235ed', '2006-06-26 21:40:00', 4, 'LOC/Voyager WebVoyage Scraper', 'Simon Kornblith', 'Pwebrecon\.cgi', -'function detect(doc, url) { +'function detectWeb(doc, url) { var export_options = doc.forms.namedItem(''frm'').elements.namedItem(''RD'').options; for(var i in export_options) { if(export_options[i].text == ''Latin1 MARC'' @@ -415,7 +415,7 @@ REPLACE INTO "translators" VALUES ('88915634-1af6-c134-0171-56fd198235ed', '2006 }'); REPLACE INTO "translators" VALUES ('d921155f-0186-1684-615c-ca57682ced9b', '2006-06-26 16:01:00', 4, 'JSTOR Scraper', 'Simon Kornblith', '^http://www\.jstor\.org/(?:view|browse|search/)', -'function detect(doc, url) { +'function detectWeb(doc, url) { var namespace = doc.documentElement.namespaceURI; var nsResolver = namespace ? function(prefix) { if (prefix == ''x'') return namespace; else return null; @@ -590,7 +590,7 @@ function doWeb(doc, url) { }'); REPLACE INTO "translators" VALUES ('e85a3134-8c1a-8644-6926-584c8565f23e', '2006-06-26 16:01:00', 4, 'History Cooperative Scraper', 'Simon Kornblith', '^http://www\.historycooperative\.org/(?:journals/.+/.+/.+\.html$|cgi-bin/search.cgi)', -'function detect(doc, url) { +'function detectWeb(doc, url) { if(doc.title == "History Cooperative: Search Results") { return "multiple"; } else { @@ -657,7 +657,7 @@ function doWeb(doc, url) { }'); REPLACE INTO "translators" VALUES ('4fd6b89b-2316-2dc4-fd87-61a97dd941e8', '2006-08-06 21:45:00', 4, 'InnoPAC Scraper', 'Simon Kornblith', '^http://[^/]+/(?:search/|record=)', -'function detect(doc, url) { +'function detectWeb(doc, url) { // First, check to see if the URL alone reveals InnoPAC, since some sites don''t reveal the MARC button var matchRegexp = new RegExp(''^(http://[^/]+/search/[^/]+/[^/]+/1\%2C[^/]+/)frameset(.+)$''); if(matchRegexp.test(doc.location.href)) { @@ -837,7 +837,7 @@ REPLACE INTO "translators" VALUES ('4fd6b89b-2316-2dc4-fd87-61a97dd941e8', '2006 }'); REPLACE INTO "translators" VALUES ('add7c71c-21f3-ee14-d188-caf9da12728b', '2006-06-26 16:01:00', 4, 'SIRSI 2003+ Scraper', 'Simon Kornblith', '/uhtbin/cgisirsi', -'function detect(doc, url) { +'function detectWeb(doc, url) { var namespace = doc.documentElement.namespaceURI; var nsResolver = namespace ? function(prefix) { if (prefix == ''x'') return namespace; else return null; @@ -964,7 +964,7 @@ function doWeb(doc, url) { '); REPLACE INTO "translators" VALUES ('a77690cf-c5d1-8fc4-110f-d1fc765dcf88', '2006-06-26 16:01:00', 4, 'ProQuest Scraper', 'Simon Kornblith', '^http://proquest\.umi\.com/pqdweb\?((?:.*\&)?did=.*&Fmt=[0-9]|(?:.*\&)Fmt=[0-9].*&did=|(?:.*\&)searchInterface=)', -'function detect(doc, url) { +'function detectWeb(doc, url) { if(doc.title == "Results") { return "magazineArticle"; } else { @@ -1147,7 +1147,7 @@ function doWeb(doc, url) { }'); REPLACE INTO "translators" VALUES ('6773a9af-5375-3224-d148-d32793884dec', '2006-06-26 16:01:00', 4, 'InfoTrac Scraper', 'Simon Kornblith', '^http://infotrac-college\.thomsonlearning\.com/itw/infomark/', -'function detect(doc, url) { +'function detectWeb(doc, url) { if(doc.title.substring(0, 8) == "Article ") { return "magazineArticle"; } else doc.title.substring(0, 10) == "Citations ") { @@ -1273,7 +1273,7 @@ function doWeb(doc, url) { }'); REPLACE INTO "translators" VALUES ('b047a13c-fe5c-6604-c997-bef15e502b09', '2006-06-26 16:01:00', 4, 'LexisNexis Scraper', 'Simon Kornblith', '^http://web\.lexis-nexis\.com/universe/(?:document|doclist)', -'function detect(doc, url) { +'function detectWeb(doc, url) { var detailRe = new RegExp("^http://[^/]+/universe/document"); if(detailRe.test(doc.location.href)) { return "newspaperArticle"; @@ -1377,7 +1377,7 @@ function doWeb(doc, url) { }'); REPLACE INTO "translators" VALUES ('cf87eca8-041d-b954-795a-2d86348999d5', '2006-06-26 16:01:00', 4, 'Aleph Scraper', 'Simon Kornblith', '^http://[^/]+/F(?:/[A-Z0-9\-]+(?:\?.*)?$|\?func=find)', -'function detect(doc, url) { +'function detectWeb(doc, url) { var singleRe = new RegExp("^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=[0-9]{3}"); if(singleRe.test(doc.location.href)) { @@ -1468,7 +1468,7 @@ REPLACE INTO "translators" VALUES ('cf87eca8-041d-b954-795a-2d86348999d5', '2006 }'); REPLACE INTO "translators" VALUES ('774d7dc2-3474-2684-392c-f787789ec63d', '2006-06-26 16:01:00', 4, 'Dynix Scraper', 'Simon Kornblith', 'ipac\.jsp\?.*(?:uri=full=[0-9]|menu=search)', -'function detect(doc, url) { +'function detectWeb(doc, url) { var detailsRe = new RegExp(''ipac\.jsp\?.*uri=full=[0-9]''); if(detailsRe.test(doc.location.href)) { return "book"; @@ -1556,7 +1556,7 @@ REPLACE INTO "translators" VALUES ('774d7dc2-3474-2684-392c-f787789ec63d', '2006 }'); REPLACE INTO "translators" VALUES ('63a0a351-3131-18f4-21aa-f46b9ac51d87', '2006-06-26 16:01:00', 4, 'VTLS Scraper', 'Simon Kornblith', '/chameleon(?:\?|$)', -'function detect(doc, url) { +'function detectWeb(doc, url) { var node = Scholar.Utilities.getNode(doc, doc, ''//tr[@class="intrRow"]/td/table/tbody/tr[th]'', null); if(node) { return "multiple"; @@ -1660,7 +1660,7 @@ REPLACE INTO "translators" VALUES ('63a0a351-3131-18f4-21aa-f46b9ac51d87', '2006 }'); REPLACE INTO "translators" VALUES ('fb12ae9e-f473-cab4-0546-27ab88c64101', '2006-06-26 16:01:00', 4, 'DRA Scraper', 'Simon Kornblith', '/web2/tramp2\.exe/(?:see\_record/|authority\_hits/|goto/.*\?.*screen=Record\.html)', -'function detect(doc, url) { +'function detectWeb(doc, url) { if(doc.location.href.indexOf("/authority_hits") > 0) { return "multiple"; } else { @@ -1730,7 +1730,7 @@ REPLACE INTO "translators" VALUES ('fb12ae9e-f473-cab4-0546-27ab88c64101', '2006 REPLACE INTO "translators" VALUES ('c0e6fda6-0ecd-e4f4-39ca-37a4de436e15', '2006-06-26 16:01:00', 4, 'GEAC Scraper', 'Simon Kornblith', '/(?:GeacQUERY|(?:Geac)?FETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html))', -'function detect(doc, url) { +'function detectWeb(doc, url) { if(doc.location.href.indexOf("/GeacQUERY") > 0) { return "multiple"; } else { @@ -1818,7 +1818,7 @@ REPLACE INTO "translators" VALUES ('c0e6fda6-0ecd-e4f4-39ca-37a4de436e15', '2006 }'); REPLACE INTO "translators" VALUES ('5287d20c-8a13-6004-4dcb-5bb2b66a9cc9', '2006-06-26 16:01:00', 4, 'SIRSI -2003 Scraper', 'Simon Kornblith', '/uhtbin/cgisirsi', -'function detect(doc, url) { +'function detectWeb(doc, url) { var namespace = doc.documentElement.namespaceURI; var nsResolver = namespace ? function(prefix) { if (prefix == ''x'') return namespace; else return null; @@ -1954,7 +1954,7 @@ REPLACE INTO "translators" VALUES ('5287d20c-8a13-6004-4dcb-5bb2b66a9cc9', '2006 }'); REPLACE INTO "translators" VALUES ('0f9fc2fc-306e-5204-1117-25bca009dffc', '2006-06-26 16:01:00', 4, 'TLC/YouSeeMore Scraper', 'Simon Kornblith', 'TLCScripts/interpac\.dll\?(?:.*LabelDisplay.*RecordNumber=[0-9]|Search|ItemTitles)', -'function detect(doc, url) { +'function detectWeb(doc, url) { var detailRe = new RegExp("TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]"); if(detailRe.test(doc.location.href)) { return "book"; @@ -2052,7 +2052,7 @@ REPLACE INTO "translators" VALUES ('0f9fc2fc-306e-5204-1117-25bca009dffc', '2006 }'); REPLACE INTO "translators" VALUES ('c54d1932-73ce-dfd4-a943-109380e06574', '2006-06-26 16:01:00', 4, 'Project MUSE Scraper', 'Simon Kornblith', '^http://muse\.jhu\.edu/(?:journals/[^/]+/[^/]+/[^/]+\.html|search/pia.cgi)', -'function detect(doc, url) { +'function detectWeb(doc, url) { var searchRe = new RegExp("^http://[^/]+/search/pia\.cgi"); if(searchRe.test(url)) { return "multiple"; @@ -2163,48 +2163,37 @@ REPLACE INTO "translators" VALUES ('c54d1932-73ce-dfd4-a943-109380e06574', '2006 } }'); -REPLACE INTO "translators" VALUES ('fcf41bed-0cbc-3704-85c7-8062a0068a7a', '2006-06-26 16:01:00', 4, 'PubMed Scraper', 'Simon Kornblith', '^http://www\.ncbi\.nlm\.nih\.gov/entrez/query\.fcgi\?(?:.*db=PubMed.*list_uids=[0-9]|.*list_uids=[0-9].*db=PubMed|.*db=PubMed.*CMD=search|.*CMD=search.*db=PubMed)', -'function detect(doc, url) { +REPLACE INTO "translators" VALUES ('fcf41bed-0cbc-3704-85c7-8062a0068a7a', '2006-06-26 16:01:00', 12, 'PubMed Scraper', 'Simon Kornblith', '^http://www\.ncbi\.nlm\.nih\.gov/entrez/query\.fcgi\?(?:.*db=PubMed.*list_uids=[0-9]|.*list_uids=[0-9].*db=PubMed|.*db=PubMed.*CMD=search|.*CMD=search.*db=PubMed)', +'function detectWeb(doc, url) { if(doc.location.href.indexOf("list_uids=") >= 0) { return "journalArticle"; } else { return "multiple"; } -}', -'function doWeb(doc, url) { - var uri = doc.location.href; - var ids = new Array(); - var idRegexp = /[\?\&]list_uids=([0-9\,]+)/; - - var m = idRegexp.exec(uri); - if(m) { - ids.push(m[1]); - } else { - var namespace = doc.documentElement.namespaceURI; - var nsResolver = namespace ? function(prefix) { - if (prefix == ''x'') return namespace; else return null; - } : null; - - var items = new Array(); - var tableRows = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''//div[@class="ResultSet"]/table/tbody'', nsResolver); - // Go through table rows - for(var i=0; i<tableRows.length; i++) { - var link = Scholar.Utilities.getNode(doc, tableRows[i], ''.//a'', nsResolver); - var article = Scholar.Utilities.getNode(doc, tableRows[i], ''./tr[2]/td[2]/text()[1]'', nsResolver); - items[link.href] = article.nodeValue; - } - - items = Scholar.selectItems(items); - - if(!items) { - return true; +} + +function getPMID(co) { + var coParts = co.split("&"); + for each(part in coParts) { + if(part.substr(0, 7) == "rft_id=") { + var value = unescape(part.substr(7)); + if(value.substr(0, 10) == "info:pmid/") { + return value.substr(10); + } } - - for(var i in items) { - var m = idRegexp.exec(i); - ids.push(m[1]); + } +} + +function detectSearch(item) { + if(item.contextObject) { + if(getPMID(item.contextObject)) { + return "journalArticle"; } } + return false; +}', +'function lookupPMIDs(ids) { + Scholar.wait(); var newUri = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=PubMed&retmode=xml&rettype=citation&id="+ids.join(","); Scholar.Utilities.HTTPUtilities.doGet(newUri, null, function(text) { @@ -2283,13 +2272,54 @@ REPLACE INTO "translators" VALUES ('fcf41bed-0cbc-3704-85c7-8062a0068a7a', '2006 } Scholar.done(); - }) + }); +} + +function doWeb(doc, url) { + var uri = doc.location.href; + var ids = new Array(); + var idRegexp = /[\?\&]list_uids=([0-9\,]+)/; - Scholar.wait(); + var m = idRegexp.exec(uri); + if(m) { + ids.push(m[1]); + } else { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == ''x'') return namespace; else return null; + } : null; + + var items = new Array(); + var tableRows = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''//div[@class="ResultSet"]/table/tbody'', nsResolver); + // Go through table rows + for(var i=0; i<tableRows.length; i++) { + var link = Scholar.Utilities.getNode(doc, tableRows[i], ''.//a'', nsResolver); + var article = Scholar.Utilities.getNode(doc, tableRows[i], ''./tr[2]/td[2]/text()[1]'', nsResolver); + items[link.href] = article.nodeValue; + } + + items = Scholar.selectItems(items); + + if(!items) { + return true; + } + + for(var i in items) { + var m = idRegexp.exec(i); + ids.push(m[1]); + } + } + + lookupPMIDs(ids); +} + +function doSearch(item) { + // pmid was defined earlier in detectSearch + lookupPMIDs([getPMID(item.contextObject)]); }'); REPLACE INTO "translators" VALUES ('951c027d-74ac-47d4-a107-9c3069ab7b48', '2006-06-26 16:41:00', 4, 'Embedded RDF Scraper', 'Simon Kornblith', NULL, -'function detect(doc, url) { +'function detectWeb(doc, url) { var metaTags = doc.getElementsByTagName("meta"); for(var i=0; i<metaTags.length; i++) { @@ -2333,7 +2363,7 @@ REPLACE INTO "translators" VALUES ('951c027d-74ac-47d4-a107-9c3069ab7b48', '2006 }'); REPLACE INTO "translators" VALUES ('05d07af9-105a-4572-99f6-a8e231c0daef', '2006-08-07 01:09:00', 4, 'COinS Scraper', 'Simon Kornblith', NULL, -'function detect(doc, url) { +'function detectWeb(doc, url) { var spanTags = doc.getElementsByTagName("span"); var encounteredType = false; @@ -2348,11 +2378,11 @@ REPLACE INTO "translators" VALUES ('05d07af9-105a-4572-99f6-a8e231c0daef', '2006 // determine if it''s a valid type var coParts = spanTitle.split("&"); var type = null - for(var i in coParts) { - if(coParts[i].substr(0, 12) == "rft_val_fmt=") { - var format = unescape(coParts[i].substr(12)); + for(var j in coParts) { + if(coParts[j].substr(0, 12) == "rft_val_fmt=") { + var format = unescape(coParts[j].substr(12)); if(format == "info:ofi/fmt:kev:mtx:journal") { - var type = "journal"; + var type = "journalArticle"; } else if(format == "info:ofi/fmt:kev:mtx:book") { if(Scholar.Utilities.inArray("rft.genre=bookitem", coParts)) { var type = "bookSection"; @@ -2384,45 +2414,43 @@ function retrieveNextCOinS(needFullItems, newItems) { var item = needFullItems.shift(); Scholar.Utilities.debugPrint("looking up contextObject"); - Scholar.Utilities.lookupContextObject(item.contextObject, function(items) { - Scholar.Utilities.debugPrint(items); - if(items) { - newItems = newItems.concat(items); - } + var search = Scholar.loadTranslator("search"); + search.setHandler("itemDone", function(obj, item) { + newItems.push(item); + }); + search.setHandler("done", function() { retrieveNextCOinS(needFullItems, newItems); - }, function() { - Scholar.done(false); }); + search.setItem(item); + + // look for translators + var translators = search.getTranslators(); + if(translators) { + search.setTranslator(translators); + search.translate(); + } else { + retrieveNextCOinS(needFullItems, newItems); + } } else { completeCOinS(newItems); Scholar.done(true); } } -// attaches item data to a new Scholar.Item instance (because data returned from -// Scholar.OpenURL.processContextObject does not have a complete() method) -function addAsItem(itemArray) { - var newItem = new Scholar.Item(); - for(var i in itemArray) { - newItem[i] = itemArray[i]; - } - newItem.complete(); -} - // saves all COinS objects function completeCOinS(newItems) { if(newItems.length > 1) { var selectArray = new Array(); for(var i in newItems) { - selectArray[i] = newItems.title; + selectArray[i] = newItems[i].title; } selectArray = Scholar.selectItems(selectArray); for(var i in selectArray) { - addAsItem(newItems[i]); + newItems[i].complete(); } } else if(newItems.length) { - addAsItem(newItems[0]); + newItems[0].complete(); } } @@ -2438,8 +2466,8 @@ function doWeb(doc, url) { var spanClasses = spanClass.split(" "); if(Scholar.Utilities.inArray("Z3988", spanClasses)) { var spanTitle = spanTags[i].getAttribute("title"); - var newItem = Scholar.Utilities.parseContextObject(spanTitle); - if(newItem) { + var newItem = new Scholar.Item(); + if(Scholar.Utilities.parseContextObject(spanTitle, newItem)) { if(newItem.title && newItem.creators.length) { // title and creators are minimum data to avoid looking up newItems.push(newItem); @@ -2463,7 +2491,7 @@ function doWeb(doc, url) { }'); REPLACE INTO "translators" VALUES ('3e684d82-73a3-9a34-095f-19b112d88bbf', '2006-06-26 16:01:00', 4, 'Google Books Scraper', 'Simon Kornblith', '^http://books\.google\.com/books\?(.*vid=.*\&id=.*|.*q=.*)', -'function detect(doc, url) { +'function detectWeb(doc, url) { var re = new RegExp(''^http://books\\.google\\.com/books\\?vid=([^&]+).*\\&id=([^&]+)'', ''i''); if(re.test(doc.location.href)) { return "book"; @@ -2553,6 +2581,161 @@ REPLACE INTO "translators" VALUES ('3e684d82-73a3-9a34-095f-19b112d88bbf', '2006 Scholar.wait(); }'); +REPLACE INTO "translators" VALUES ('e07e9b8c-0e98-4915-bb5a-32a08cb2f365', '2006-08-07 11:36:00', 8, 'Open WorldCat', 'Simon Kornblith', 'http://partneraccess.oclc.org/', +'function detectSearch(item) { + if(item.itemType == "book" || item.itemType == "bookSection") { + return true; + } + return false; +}', +'// creates an item from an Open WorldCat document +function processOWC(doc) { + var spanTags = doc.getElementsByTagName("span"); + for(var i=0; i<spanTags.length; i++) { + var spanClass = spanTags[i].getAttribute("class"); + if(spanClass) { + var spanClasses = spanClass.split(" "); + if(Scholar.Utilities.inArray("Z3988", spanClasses)) { + var spanTitle = spanTags[i].getAttribute("title"); + var item = new Scholar.Item(); + if(Scholar.Utilities.parseContextObject(spanTitle, item)) { + item.complete(); + return true; + } else { + return false; + } + } + } + } + + return false; +} + +function doSearch(item) { + if(item.contextObject) { + var co = item.contextObject; + } else { + var co = Scholar.Utilities.createContextObject(item); + } + + Scholar.Utilities.processDocuments(null, ["http://partneraccess.oclc.org/wcpa/servlet/OpenUrl?"+co], function(browser) { + var doc = browser.contentDocument; + // find new COinS in the Open WorldCat page + if(processOWC(doc)) { // we got a single item page + Scholar.done(); + } else { // assume we have a search results page + var items = new Array(); + + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == ''x'') return namespace; else return null; + } : null; + + // first try to get only books + var elmts = doc.evaluate(''//table[@class="tableLayout"]/tbody/tr/td[@class="content"]/table[@class="tableResults"]/tbody/tr[td/img[@alt="Book"]]/td/div[@class="title"]/a'', doc, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE,null); + var elmt = elmts.iterateNext(); + if(!elmt) { // if that fails, look for other options + var elmts = doc.evaluate(''//table[@class="tableLayout"]/tbody/tr/td[@class="content"]/table[@class="tableResults"]/tbody/tr[td/img[@alt="Book"]]/td/div[@class="title"]/a'', doc, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE,null); + elmt = elmts.iterateNext() + } + + var urlsToProcess = new Array(); + do { + urlsToProcess.push(elmt.href); + } while(elmt = elmts.iterateNext()); + + Scholar.Utilities.processDocuments(null, urlsToProcess, function(browser) { + // per URL + processOWC(browser.contentDocument); + }, function() { // done + Scholar.done(); + }, function() { // error + Scholar.done(false); + }); + } + }, null, function() { + error(); + }); + + Scholar.wait(); +}'); + +REPLACE INTO "translators" VALUES ('11645bd1-0420-45c1-badb-53fb41eeb753', '2006-08-07 18:17:00', 8, 'CrossRef', 'Simon Kornblith', 'http://partneraccess.oclc.org/', +'function detectSearch(item) { + if(item.itemType == "journal") { + return true; + } + return false; +}', +'function processCrossRef(xmlOutput) { + xmlOutput = xmlOutput.replace(/<\?xml[^>]*\?>/, ""); + + // parse XML with E4X + var qr = new Namespace("http://www.crossref.org/qrschema/2.0"); + try { + var xml = new XML(xmlOutput); + } catch(e) { + return false; + } + + // ensure status is valid + var status = xml.qr::query_result.qr::body.qr::query.@status.toString(); + if(status != "resolved" && status != "multiresolved") { + return false; + } + + var query = xml.qr::query_result.qr::body.qr::query; + var item = new Scholar.Item("journalArticle"); + + // try to get a DOI + item.DOI = query.qr::doi.(@type=="journal_article").text().toString(); + if(!item.DOI) { + item.DOI = query.qr::doi.(@type=="book_title").text().toString(); + } + if(!item.DOI) { + item.DOI = query.qr::doi.(@type=="book_content").text().toString(); + } + + // try to get an ISSN (no print/electronic preferences) + item.ISSN = query.qr::issn[0].text().toString(); + // get title + item.title = query.qr::article_title.text().toString(); + // get publicationTitle + item.publicationTitle = query.qr::journal_title.text().toString(); + // get author + item.creators.push(Scholar.Utilities.cleanAuthor(query.qr::author.text().toString(), "author", true)); + // get volume + item.volume = query.qr::volume.text().toString(); + // get issue + item.issue = query.qr::issue.text().toString(); + // get year + item.date = query.qr::year.text().toString(); + // get edition + item.edition = query.qr::edition_number.text().toString(); + // get first page + item.pages = query.qr::first_page.text().toString(); + item.complete(); + return true; +} + +function doSearch(item) { + if(item.contextObject) { + var co = item.contextObject; + if(co.indexOf("url_ver=") == -1) { + co = "url_ver=Z39.88-2004"+co; + } + } else { + var co = Scholar.Utilities.createContextObject(item); + } + + Scholar.Utilities.HTTPUtilities.doGet("http://www.crossref.org/openurl/?"+co+"&noredirect=true", null, function(responseText) { + processCrossRef(responseText); + Scholar.done(); + }); + + Scholar.wait(); +}'); + REPLACE INTO "translators" VALUES ('0e2235e7-babf-413c-9acf-f27cce5f059c', '2006-07-05 23:40:00', 3, 'MODS (XML)', 'Simon Kornblith', 'xml', 'Scholar.addOption("exportNotes", true); Scholar.addOption("exportFileData", true);',