www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | Submodules | README | LICENSE

commit f7787305d9738ed2630be88c7b360156cc1483e9
parent 3ce6e429eda54a821a65b9ebc60f94a9bb4393b0
Author: Avram Lyon <ajlyon@gmail.com>
Date:   Sat,  4 Sep 2010 07:53:20 +0000

 - Fix for some InnoPAC installations, per http://forums.zotero.org/discussion/14023
 - Chad's new Rutgers IRIS translator, now moved to standard filename


Diffstat:
Dtranslators/IRIS translator.js | 316-------------------------------------------------------------------------------
Atranslators/IRIS.js | 328+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtranslators/Library Catalog (InnoPAC).js | 5++---
3 files changed, 330 insertions(+), 319 deletions(-)

diff --git a/translators/IRIS translator.js b/translators/IRIS translator.js @@ -1,315 +0,0 @@ -{ - "translatorID":"8381bf68-11fa-418c-8530-2e00284d3efd", - "translatorType":4, - "label":"IRIS translator", - "creator":"Chad Mills and Michael Berkowitz", - "target":"http://[^/]*www.iris.rutgers.edu[^/]*/", - "minVersion":"1.0.0b4.r5", - "maxVersion":"", - "priority":100, - "inRepository":true, - "lastUpdated":"2008-04-09 00:45:00" -} - -function detectWeb(doc, url) { - if (doc.evaluate('//tr/td[1][@class="searchsum"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { - return "multiple"; - } else if (doc.evaluate('//th[@class="viewmarctags"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { - return "book"; - } -} - -function scrape(doc) { - var namespace = doc.documentElement.namespaceURI; - var nsResolver = namespace ? function(prefix) { - if (prefix == 'x') return namespace; else return null; - } : null; - - var xpath = '//div[@id="panel1"]//tr[th[@class="viewmarctags"]][td[@class="viewmarctags"]]'; - var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null); - var elmt = elmts.iterateNext(); - - if(!elmt) { - return false; - } - - var newItem = new Zotero.Item("book"); - newItem.extra = ""; - - newItem.series = ""; - var seriesItemCount = 0; - - while(elmt) { - try { - var node = doc.evaluate('./TD[1]/A[1]/strong[1]/text()[1]', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); - if(!node) { - var node = doc.evaluate('./TD[1]/text()[1]', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); - } - if(node) { - var casedField = Zotero.Utilities.superCleanString(doc.evaluate('./TH[1]/text()[1]', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue); - field = casedField.toLowerCase(); - var value = Zotero.Utilities.superCleanString(node.nodeValue); - if(field == "publisher") { - newItem.publisher = value; - } else if(field == "pub date") { - var re = /[0-9]+/; - var m = re.exec(value); - newItem.date = m[0]; - } else if(field == "isbn") { - var re = /^[0-9](?:[0-9X]+)/; - var m = re.exec(value); - newItem.ISBN = m[0]; - } else if(field == "title") { - var titleParts = value.split(" / "); - re = /\[(.+)\]/i; - if (re.test(titleParts[0])) { - var ar = re.exec(titleParts[0]); - var itype = ar[1].toLowerCase(); - if(itype== "phonodisc" || itype == "sound recording"){ - newItem.itemType = "audioRecording"; - }else if(itype=="videorecording"){ - newItem.itemType = "videoRecording"; - }else if(itype=="electronic resource"){ - newItem.itemType = "webPage"; - } - } - newItem.title = Zotero.Utilities.capitalizeTitle(titleParts[0]); - }else if(field == "series") {//push onto item, delimit with semicolon when needed - if (seriesItemCount != 0){ - newItem.series += "; " + value; - } - else if(seriesItemCount == 0) { - newItem.series = value; - } - seriesItemCount++;//bump counter - }else if(field == "dissertation note") { - newItem.itemType = "thesis"; - var thesisParts = value.split("--"); - var uniDate = thesisParts[1].split(", "); - newItem.university = uniDate[0]; - newItem.date = uniDate[1]; - }else if(field == "edition") { - newItem.edition = value; - }else if(field == "physical descrip") { - //support - var physParts = value.split(" : "); - var physParts = physParts[0].split(" ; "); - newItem.pages = physParts[0]; - } else if(field == "publication info") { - var pubParts = value.split(" : "); - newItem.place = pubParts[0]; - newItem.publisher = pubParts[1]; - } else if(field == "personal author") { - newItem.creators.push(Zotero.Utilities.cleanAuthor(value, "author", true)); - } else if(field == "performer") { - newItem.creators.push(Zotero.Utilities.cleanAuthor(value, "performer", true)); - } else if(field == "author"){ - newItem.creators.push(Zotero.Utilities.cleanAuthor(value, "author", true)); - } else if(field == "added author") { - newItem.creators.push(Zotero.Utilities.cleanAuthor(value, "contributor", true)); - } else if(field == "conference author" || field == "corporate author") { - newItem.creators.push(value); - } else if(field == "subject" || field == "corporate subject" || field == "geographic term") { - var subjects = value.split("--"); - newItem.tags = newItem.tags.concat(subjects); - } else if(field == "personal subject") { - var subjects = value.split(", "); - newItem.tags = newItem.tags.push(value[0]+", "+value[1]); - } else if(value && field != "http") { - newItem.extra += casedField+": "+value+"\n"; - } - } - } catch (e) {} - elmt = elmts.iterateNext(); - } - - if(newItem.extra) { - newItem.extra = newItem.extra.substr(0, newItem.extra.length-1); - } - - var callNumber = doc.evaluate('//tr/td[1][@class="holdingslist"]/strong/text()', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); - if(callNumber && callNumber.nodeValue) { - newItem.callNumber = callNumber.nodeValue; - } - - var domain = doc.location.href.match(/https?:\/\/([^/]+)/); - newItem.repository = domain[1]+" Library Catalog"; - newItem.accessed = Date(); - newItem.complete(); - return true; -} - -function doWeb(doc, url){ - var namespace = doc.documentElement.namespaceURI; - var nsResolver = namespace ? function(prefix) { - if (prefix == 'x') return namespace; else return null; - } : null; - - var sirsiNew = true; //toggle between SIRSI -2003 and SIRSI 2003+ - var xpath = '//td[@class="searchsum"]/table'; - - if(doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { - Zotero.debug("SIRSI doWeb: searchsum"); - sirsiNew = true; - } else if (doc.evaluate('//form[@name="hitlist"]/table/tbody/tr', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { - Zotero.debug("SIRSI doWeb: hitlist"); - sirsiNew = false; - } else if (doc.evaluate('//tr[th[@class="viewmarctags"]][td[@class="viewmarctags"]]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { - Zotero.debug("SIRSI doWeb: viewmarctags"); - sirsiNew = true; - } else if (doc.evaluate('//input[@name="VOPTIONS"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { - Zotero.debug("SIRSI doWeb: VOPTIONS"); - sirsiNew = false; - } else { - var elmts = doc.evaluate('/html/body/form//text()', doc, nsResolver, XPathResult.ANY_TYPE, null); - while(elmt = elmts.iterateNext()) { - if(Zotero.Utilities.superCleanString(elmt.nodeValue) == "Viewing record") { - Zotero.debug("SIRSI doWeb: Viewing record"); - sirsiNew = false; - } - } - } - - if (sirsiNew) { //executes Simon's SIRSI 2003+ scraper code - Zotero.debug("Running SIRSI 2003+ code"); - if(!scrape(doc)) { - var checkboxes = new Array(); - var urls = new Array(); - var availableItems = new Array(); - //begin IUCAT fixes by Andrew Smith - var iuRe = /^https?:\/\/www\.iucat\.iu\.edu/; - var iu = iuRe.exec(url); - //IUCAT fix 1 of 2 - if (iu){ - var tableRows = doc.evaluate('//td[@class="searchsum"]/table[//input[@class="submitLink"]]', doc, nsResolver, XPathResult.ANY_TYPE, null); - } else{ - var tableRows = doc.evaluate('//td[@class="searchsum"]/table[//input[@value="Details"]]', doc, nsResolver, XPathResult.ANY_TYPE, null); - } - var tableRow = tableRows.iterateNext(); // skip first row - // Go through table rows - while(tableRow = tableRows.iterateNext()) { - //IUCAT fix 2 of 2 - if (iu){ - var input = doc.evaluate('.//input[@class="submitLink"]', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); - var text = doc.evaluate('.//label/span', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent; - } else { - var input = doc.evaluate('.//input[@value="Details"]', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); - var text = doc.evaluate('.//label/strong', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent; - } - //end IUCAT fixes by Andrew Smith - if(text) { - availableItems[input.name] = text; - } - } - var items = Zotero.selectItems(availableItems); - if(!items) { - return true; - } - var hostRe = new RegExp("^http(?:s)?://[^/]+"); - var m = hostRe.exec(doc.location.href); - Zotero.debug("href: " + doc.location.href); - var hitlist = doc.forms.namedItem("hitlist"); - var baseUrl = m[0]+hitlist.getAttribute("action")+"?first_hit="+hitlist.elements.namedItem("first_hit").value+"&last_hit="+hitlist.elements.namedItem("last_hit").value; - var uris = new Array(); - for(var i in items) { - uris.push(baseUrl+"&"+i+"=Details"); - } - Zotero.Utilities.processDocuments(uris, function(doc) { scrape(doc) }, - function() { Zotero.done() }, null); - Zotero.wait(); - } - } else{ //executes Simon's SIRSI -2003 translator code - Zotero.debug("Running SIRSI -2003 code"); - var uri = doc.location.href; - var recNumbers = new Array(); - var xpath = '//form[@name="hitlist"]/table/tbody/tr'; - var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null); - var elmt = elmts.iterateNext(); - if(elmt) { // Search results page - var uriRegexp = /^http:\/\/[^\/]+/; - var m = uriRegexp.exec(uri); - var postAction = doc.forms.namedItem("hitlist").getAttribute("action"); - var newUri = m[0]+postAction.substr(0, postAction.length-1)+"40"; - var titleRe = /<br>\s*(.*[^\s])\s*<br>/i; - var items = new Array(); - do { - var checkbox = doc.evaluate('.//input[@type="checkbox"]', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); - // Collect title - var title = doc.evaluate("./td[2]", elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent; - if(checkbox && title) { - items[checkbox.name] = Zotero.Utilities.cleanString(title); - } - } while(elmt = elmts.iterateNext()); - items = Zotero.selectItems(items); - - if(!items) { - return true; - } - - for(var i in items) { - recNumbers.push(i); - } - } else {// Normal page - // this regex will fail about 1/100,000,000 tries - var uriRegexp = /^((.*?)\/([0-9]+?))\//; - var m = uriRegexp.exec(uri); - var newUri = m[1]+"/40" - var elmts = doc.evaluate('/html/body/form', doc, nsResolver, XPathResult.ANY_TYPE, null); - while(elmt = elmts.iterateNext()) { - var initialText = doc.evaluate('.//text()[1]', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); - if(initialText && initialText.nodeValue && Zotero.Utilities.superCleanString(initialText.nodeValue) == "Viewing record") { - recNumbers.push(doc.evaluate('./b[1]/text()[1]', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue); - break; - } - } - } - - var translator = Zotero.loadTranslator("import"); - translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973"); - var marc = translator.getTranslatorObject(); - Zotero.Utilities.loadDocument(newUri+'?marks='+recNumbers.join(",")+'&shadow=NO&format=FLAT+ASCII&sort=TITLE&vopt_elst=ALL&library=ALL&display_rule=ASCENDING&duedate_code=l&holdcount_code=t&DOWNLOAD_x=22&DOWNLOAD_y=12&address=&form_type=', function(doc) { - var pre = doc.getElementsByTagName("pre"); - var text = pre[0].textContent; - var documents = text.split("*** DOCUMENT BOUNDARY ***"); - for(var j=1; j<documents.length; j++) { - var uri = newUri+"?marks="+recNumbers[j]+"&shadow=NO&format=FLAT+ASCII&sort=TITLE&vopt_elst=ALL&library=ALL&display_rule=ASCENDING&duedate_code=l&holdcount_code=t&DOWNLOAD_x=22&DOWNLOAD_y=12&address=&form_type="; - var lines = documents[j].split("\n"); - var record = new marc.record(); - var tag, content; - var ind = ""; - for(var i=0; i<lines.length; i++) { - var line = lines[i]; - if(line[0] == "." && line.substr(4,2) == ". ") { - if(tag) { - content = content.replace(/\|([a-z])/g, marc.subfieldDelimiter+"$1"); - record.addField(tag, ind, content); - } - } else { - content += " "+line.substr(6); - continue; - } - tag = line.substr(1, 3); - if(tag[0] != "0" || tag[1] != "0") { - ind = line.substr(6, 2); - content = line.substr(8); - } else { - content = line.substr(7); - if(tag == "000") { - tag = undefined; - record.leader = "00000"+content; - Zotero.debug("the leader is: "+record.leader); - } - } - } - - var newItem = new Zotero.Item(); - record.translate(newItem); - var domain = url.match(/https?:\/\/([^/]+)/); - newItem.repository = domain[1]+" Library Catalog"; - newItem.complete(); - } - Zotero.done(); - }); - Zotero.wait(); - } -} -\ No newline at end of file diff --git a/translators/IRIS.js b/translators/IRIS.js @@ -0,0 +1,328 @@ +{ + "translatorID":"8381bf68-11fa-418c-8530-2e00284d3efd", + "translatorType":4, + "label":"IRIS", + "creator":"Chad Mills and Michael Berkowitz", + "target":"https://[^/]*www.iris.rutgers.edu[^/]*/", + "minVersion":"1.0.0b4.r5", + "maxVersion":"", + "priority":100, + "inRepository":true, + "lastUpdated":"2010-09-03 00:45:00" +} + +function detectWeb(doc, url) { + if (doc.evaluate('/html/body/div[@class="columns_container"]/div[contains(@class, "left_column")]/div[@class="content_container"]/div[@class="content"]/form[@id="hitlist"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { + return "multiple"; + } else if (doc.evaluate('/html/body/div[@class="columns_container"]/div[contains(@class, "left_column")]/form[@name="item_view"]/div[@class="content_container item_details"]/div[@class="content"]/ul[contains(@class, "detail_page")]/li/div/table', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { + return "book"; + } +} + +function scrape(doc) { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == 'x') return namespace; else return null; + } : null; + + var xpath = '/html/body/div[@class="columns_container"]/div[contains(@class, "left_column")]/form[@name="item_view"]/div[@class="content_container item_details"]/div[@class="content"]/ul[contains(@class, "detail_page")]/li/div/table//tr[th[@class="viewmarctags1"]][td[@class="viewmarctags"]]'; + + var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null); + + var elmt = elmts.iterateNext(); + + if(!elmt) { + return false; + } + + var newItem = new Zotero.Item("book"); + newItem.extra = ""; + + newItem.series = ""; + var seriesItemCount = 0; + + while(elmt) { + try { + var node = doc.evaluate('./TD[1]/A[1]/text()[1]', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); + if(!node) { + var node = doc.evaluate('./TD[1]/text()[1]', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); + } + if(node) { + var casedField = Zotero.Utilities.superCleanString(doc.evaluate('./TH[1]/text()', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue); + field = casedField.toLowerCase(); + + var value = Zotero.Utilities.superCleanString(node.nodeValue); + + if(field == "publisher") { + newItem.publisher = value; + }else if(field == "pub date") { + var re = /[0-9]+/; + var m = re.exec(value); + newItem.date = m[0]; + }else if(field == "isbn") { + var re = /^[0-9](?:[0-9X]+)/; + var m = re.exec(value); + newItem.ISBN = m[0]; + }else if(field == "title") { + Zotero.debug(value); + var titleParts = value.split(" / "); + re = /\[(.+)\]/i; + if (re.test(titleParts[0])) { + var ar = re.exec(titleParts[0]); + var itype = ar[1].toLowerCase(); + if(itype== "phonodisc" || itype == "sound recording"){ + newItem.itemType = "audioRecording"; + }else if(itype=="videorecording"){ + newItem.itemType = "videoRecording"; + }else if(itype=="electronic resource"){ + newItem.itemType = "webPage"; + } + } + newItem.title = Zotero.Utilities.capitalizeTitle(titleParts[0]); + }else if(field == "series") {//push onto item, delimit with semicolon when needed + if (seriesItemCount != 0){ + newItem.series += "; " + value; + }else if(seriesItemCount == 0) { + newItem.series = value; + } + seriesItemCount++;//bump counter + }else if(field == "dissertation note") { + newItem.itemType = "thesis"; + var thesisParts = value.split("--"); + var uniDate = thesisParts[1].split(", "); + newItem.university = uniDate[0]; + newItem.date = uniDate[1]; + }else if(field == "edition") { + newItem.edition = value; + }else if(field == "physical descrip") { + //support + var physParts = value.split(" : "); + var physParts = physParts[0].split(" ; "); + //determine pages, split on " p." + var physPages = value.split(" p."); + //break off anything in the beginning before the numbers + var pageParts = physPages[0].split(" "); + newItem.numPages = pageParts[pageParts.length-1]; + }else if(field == "publication info") { + var pubParts = value.split(" : "); + newItem.place = pubParts[0]; + //drop off first part of array and recombine + pubParts.shift(); + var i; + var publisherInfo; + for (i in pubParts) { + if (i == 0) { + publisherInfo = pubParts[i] + " : "; + } else { + publisherInfo = publisherInfo + pubParts[i] + " : "; + } + }//END for + //drop off last colon + publisherInfo = publisherInfo.substring(0,(publisherInfo.length - 3)); + //break apart publication parts into Publisher and Date + var publisherParts = publisherInfo.split(","); + newItem.publisher = publisherParts[0]; + //check that first character isn't a 'c', if so drop it + if (publisherParts[1].substring(1,2) == "c") { + newItem.date = publisherParts[1].substring(2); + } + else { + newItem.date = publisherParts[1]; + } + }else if(field == "personal author") { + newItem.creators.push(Zotero.Utilities.cleanAuthor(value, "author", true)); + }else if(field == "performer") { + newItem.creators.push(Zotero.Utilities.cleanAuthor(value, "performer", true)); + }else if(field == "author"){ + newItem.creators.push(Zotero.Utilities.cleanAuthor(value, "author", true)); + }else if(field == "added author") { + newItem.creators.push(Zotero.Utilities.cleanAuthor(value, "contributor", true)); + }else if(field == "conference author" || field == "corporate author") { + newItem.creators.push(value); + }else if(field == "subject" || field == "corporate subject" || field == "geographic term") { + var subjects = value.split("--"); + newItem.tags = newItem.tags.concat(subjects); + }else if(field == "personal subject") { + var subjects = value.split(", "); + newItem.tags = newItem.tags.push(value[0]+", "+value[1]); + }else if(value && field != "http") { + newItem.extra += casedField+": "+value+"\n"; + } + } + } catch (e) {} + elmt = elmts.iterateNext(); + }//END if node + + if(newItem.extra) { + newItem.extra = newItem.extra.substr(0, newItem.extra.length-1); + } + + var callNumber = doc.evaluate('//tr/td[1][@class="holdingslist"]/strong/text()', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); + + if(callNumber && callNumber.nodeValue) { + newItem.callNumber = callNumber.nodeValue; + } + + var domain = doc.location.href.match(/https?:\/\/([^/]+)/); + newItem.repository = domain[1]+" Library Catalog"; + newItem.accessed = Date(); + newItem.complete(); + return true; + }//END try + + function doWeb(doc, url){ + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == 'x') return namespace; else return null; + } : null; + + var sirsiNew = true; //toggle between SIRSI -2003 and SIRSI 2003+ + + var xpath = '/html/body/div[@class="columns_container"]/div[contains(@class, "left_column")]/div[@class="content_container"]/div[@class="content"]/form[@id="hitlist"]/ul[@class="hit_list"]/li/ul[starts-with(@class, "hit_list_row")]/li[@class="hit_list_item_info"]/dl'; + + if(doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { + Zotero.debug("SIRSI doWeb: searchsum"); + sirsiNew = true; + }else if (doc.evaluate('//form[@name="hitlist"]/table/tbody/tr', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { + Zotero.debug("SIRSI doWeb: hitlist"); + sirsiNew = false; + }else if (doc.evaluate('//tr[th[@class="viewmarctags"]][td[@class="viewmarctags"]]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { + Zotero.debug("SIRSI doWeb: viewmarctags"); + sirsiNew = true; + }else if (doc.evaluate('//input[@name="VOPTIONS"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { + Zotero.debug("SIRSI doWeb: VOPTIONS"); + sirsiNew = false; + }else { + var elmts = doc.evaluate('/html/body/form//text()', doc, nsResolver, XPathResult.ANY_TYPE, null); + //var elmts = doc.evaluate(' ', doc, nsResolver, XPathResult.ANY_TYPE, null); + while(elmt = elmts.iterateNext()) { + if(Zotero.Utilities.superCleanString(elmt.nodeValue) == "Viewing record") { + Zotero.debug("SIRSI doWeb: Viewing record"); + sirsiNew = false; + } + }//END while elmts + }//END FUNCTION doWeb + + if (sirsiNew) { //executes Simon's SIRSI 2003+ scraper code + if(!scrape(doc)) { + var checkboxes = new Array(); + var urls = new Array(); + var availableItems = new Array(); + //pull items + var tableRows = doc.evaluate('//ul[@class="hit_list"]/li/ul[contains(@class, "hit_list_row")][//input[@value="Details"]]', doc, nsResolver, XPathResult.ANY_TYPE, null); + + // Go through table rows + while(tableRow = tableRows.iterateNext()) { + var input = doc.evaluate('.//input[@value="Details"]', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); + var text = doc.evaluate('.//strong', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent; + if(text) { + availableItems[input.name] = text; + } + }//END while + var items = Zotero.selectItems(availableItems); + if(!items) { + return true; + } + var hostRe = new RegExp("^http(?:s)?://[^/]+"); + var m = hostRe.exec(doc.location.href); + Zotero.debug("href: " + doc.location.href); + var hitlist = doc.forms.namedItem("hitlist"); + var baseUrl = m[0]+hitlist.getAttribute("action")+"?first_hit="+hitlist.elements.namedItem("first_hit").value+"&last_hit="+hitlist.elements.namedItem("last_hit").value; + var uris = new Array(); + for(var i in items) { + uris.push(baseUrl+"&"+i+"=Details"); + } + Zotero.Utilities.processDocuments(uris, function(doc) { scrape(doc) }, function() { Zotero.done() }, null); + Zotero.wait(); + }//END if not scrape(doc) + }else{ //executes Simon's SIRSI -2003 translator code + Zotero.debug("Running SIRSI -2003 code"); + var uri = doc.location.href; + var recNumbers = new Array(); + var xpath = '//form[@name="hitlist"]/table/tbody/tr'; + var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null); + var elmt = elmts.iterateNext(); + if(elmt) { // Search results page + var uriRegexp = /^http:\/\/[^\/]+/; + var m = uriRegexp.exec(uri); + var postAction = doc.forms.namedItem("hitlist").getAttribute("action"); + var newUri = m[0]+postAction.substr(0, postAction.length-1)+"40"; + var titleRe = /<br>\s*(.*[^\s])\s*<br>/i; + var items = new Array(); + do { + var checkbox = doc.evaluate('.//input[@type="checkbox"]', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); + // Collect title + var title = doc.evaluate("./td[2]", elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent; + if(checkbox && title) { + items[checkbox.name] = Zotero.Utilities.cleanString(title); + } + } while(elmt = elmts.iterateNext()); + items = Zotero.selectItems(items); + if(!items) { + return true; + } + for(var i in items) { + recNumbers.push(i); + } + } else {// Normal page + // this regex will fail about 1/100,000,000 tries + var uriRegexp = /^((.*?)\/([0-9]+?))\//; + var m = uriRegexp.exec(uri); + var newUri = m[1]+"/40" + var elmts = doc.evaluate('/html/body/form', doc, nsResolver, XPathResult.ANY_TYPE, null); + while(elmt = elmts.iterateNext()) { + var initialText = doc.evaluate('.//text()[1]', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); + if(initialText && initialText.nodeValue && Zotero.Utilities.superCleanString(initialText.nodeValue) == "Viewing record") { + recNumbers.push(doc.evaluate('./b[1]/text()[1]', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue); + break; + } + } + } + var translator = Zotero.loadTranslator("import"); + translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973"); + var marc = translator.getTranslatorObject(); + Zotero.Utilities.loadDocument(newUri+'?marks='+recNumbers.join(",")+'&shadow=NO&format=FLAT+ASCII&sort=TITLE&vopt_elst=ALL&library=ALL&display_rule=ASCENDING&duedate_code=l&holdcount_code=t&DOWNLOAD_x=22&DOWNLOAD_y=12&address=&form_type=', function(doc) { + var pre = doc.getElementsByTagName("pre"); + var text = pre[0].textContent; + var documents = text.split("*** DOCUMENT BOUNDARY ***"); + for(var j=1; j<documents.length; j++) { + var uri = newUri+"?marks="+recNumbers[j]+"&shadow=NO&format=FLAT+ASCII&sort=TITLE&vopt_elst=ALL&library=ALL&display_rule=ASCENDING&duedate_code=l&holdcount_code=t&DOWNLOAD_x=22&DOWNLOAD_y=12&address=&form_type="; + var lines = documents[j].split("\n"); + var record = new marc.record(); + var tag, content; + var ind = ""; + for(var i=0; i<lines.length; i++) { + var line = lines[i]; + if(line[0] == "." && line.substr(4,2) == ". ") { + if(tag) { + content = content.replace(/\|([a-z])/g, marc.subfieldDelimiter+"$1"); + record.addField(tag, ind, content); + } + } else { + content += " "+line.substr(6); + continue; + } + tag = line.substr(1, 3); + if(tag[0] != "0" || tag[1] != "0") { + ind = line.substr(6, 2); + content = line.substr(8); + } else { + content = line.substr(7); + if(tag == "000") { + tag = undefined; + record.leader = "00000"+content; + Zotero.debug("the leader is: "+record.leader); + } + } + }//end FOR + var newItem = new Zotero.Item(); + record.translate(newItem); + var domain = url.match(/https?:\/\/([^/]+)/); + newItem.repository = domain[1]+" Library Catalog"; + newItem.complete(); + }//end FOR + Zotero.done(); + }); + Zotero.wait(); + }//END while +}//END scrape function diff --git a/translators/Library Catalog (InnoPAC).js b/translators/Library Catalog (InnoPAC).js @@ -3,7 +3,7 @@ "translatorType":4, "label":"Library Catalog (InnoPAC)", "creator":"Simon Kornblith and Michael Berkowitz", - "target":"(search~|\\/search\\?|(a|X|t|Y|w)\\?|\\?(searchtype|searchscope)|frameset&FF|record=b[0-9]+~S[0-9]|/search/q\\?)", + "target":"(search~|\\/search\\?|(a|X|t|Y|w)\\?|\\?(searchtype|searchscope)|frameset&FF|record=b[0-9]+~?S?[0-9]?|/search/q\\?)", "minVersion":"1.0.0b3.r1", "maxVersion":"", "priority":200, @@ -224,4 +224,4 @@ function doWeb(doc, url) { } Zotero.wait(); -} -\ No newline at end of file +}