commit f7787305d9738ed2630be88c7b360156cc1483e9
parent 3ce6e429eda54a821a65b9ebc60f94a9bb4393b0
Author: Avram Lyon <ajlyon@gmail.com>
Date: Sat, 4 Sep 2010 07:53:20 +0000
- Fix for some InnoPAC installations, per http://forums.zotero.org/discussion/14023
- Chad's new Rutgers IRIS translator, now moved to standard filename
Diffstat:
3 files changed, 330 insertions(+), 319 deletions(-)
diff --git a/translators/IRIS translator.js b/translators/IRIS translator.js
@@ -1,315 +0,0 @@
-{
- "translatorID":"8381bf68-11fa-418c-8530-2e00284d3efd",
- "translatorType":4,
- "label":"IRIS translator",
- "creator":"Chad Mills and Michael Berkowitz",
- "target":"http://[^/]*www.iris.rutgers.edu[^/]*/",
- "minVersion":"1.0.0b4.r5",
- "maxVersion":"",
- "priority":100,
- "inRepository":true,
- "lastUpdated":"2008-04-09 00:45:00"
-}
-
-function detectWeb(doc, url) {
- if (doc.evaluate('//tr/td[1][@class="searchsum"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
- return "multiple";
- } else if (doc.evaluate('//th[@class="viewmarctags"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
- return "book";
- }
-}
-
-function scrape(doc) {
- var namespace = doc.documentElement.namespaceURI;
- var nsResolver = namespace ? function(prefix) {
- if (prefix == 'x') return namespace; else return null;
- } : null;
-
- var xpath = '//div[@id="panel1"]//tr[th[@class="viewmarctags"]][td[@class="viewmarctags"]]';
- var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
- var elmt = elmts.iterateNext();
-
- if(!elmt) {
- return false;
- }
-
- var newItem = new Zotero.Item("book");
- newItem.extra = "";
-
- newItem.series = "";
- var seriesItemCount = 0;
-
- while(elmt) {
- try {
- var node = doc.evaluate('./TD[1]/A[1]/strong[1]/text()[1]', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
- if(!node) {
- var node = doc.evaluate('./TD[1]/text()[1]', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
- }
- if(node) {
- var casedField = Zotero.Utilities.superCleanString(doc.evaluate('./TH[1]/text()[1]', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
- field = casedField.toLowerCase();
- var value = Zotero.Utilities.superCleanString(node.nodeValue);
- if(field == "publisher") {
- newItem.publisher = value;
- } else if(field == "pub date") {
- var re = /[0-9]+/;
- var m = re.exec(value);
- newItem.date = m[0];
- } else if(field == "isbn") {
- var re = /^[0-9](?:[0-9X]+)/;
- var m = re.exec(value);
- newItem.ISBN = m[0];
- } else if(field == "title") {
- var titleParts = value.split(" / ");
- re = /\[(.+)\]/i;
- if (re.test(titleParts[0])) {
- var ar = re.exec(titleParts[0]);
- var itype = ar[1].toLowerCase();
- if(itype== "phonodisc" || itype == "sound recording"){
- newItem.itemType = "audioRecording";
- }else if(itype=="videorecording"){
- newItem.itemType = "videoRecording";
- }else if(itype=="electronic resource"){
- newItem.itemType = "webPage";
- }
- }
- newItem.title = Zotero.Utilities.capitalizeTitle(titleParts[0]);
- }else if(field == "series") {//push onto item, delimit with semicolon when needed
- if (seriesItemCount != 0){
- newItem.series += "; " + value;
- }
- else if(seriesItemCount == 0) {
- newItem.series = value;
- }
- seriesItemCount++;//bump counter
- }else if(field == "dissertation note") {
- newItem.itemType = "thesis";
- var thesisParts = value.split("--");
- var uniDate = thesisParts[1].split(", ");
- newItem.university = uniDate[0];
- newItem.date = uniDate[1];
- }else if(field == "edition") {
- newItem.edition = value;
- }else if(field == "physical descrip") {
- //support
- var physParts = value.split(" : ");
- var physParts = physParts[0].split(" ; ");
- newItem.pages = physParts[0];
- } else if(field == "publication info") {
- var pubParts = value.split(" : ");
- newItem.place = pubParts[0];
- newItem.publisher = pubParts[1];
- } else if(field == "personal author") {
- newItem.creators.push(Zotero.Utilities.cleanAuthor(value, "author", true));
- } else if(field == "performer") {
- newItem.creators.push(Zotero.Utilities.cleanAuthor(value, "performer", true));
- } else if(field == "author"){
- newItem.creators.push(Zotero.Utilities.cleanAuthor(value, "author", true));
- } else if(field == "added author") {
- newItem.creators.push(Zotero.Utilities.cleanAuthor(value, "contributor", true));
- } else if(field == "conference author" || field == "corporate author") {
- newItem.creators.push(value);
- } else if(field == "subject" || field == "corporate subject" || field == "geographic term") {
- var subjects = value.split("--");
- newItem.tags = newItem.tags.concat(subjects);
- } else if(field == "personal subject") {
- var subjects = value.split(", ");
- newItem.tags = newItem.tags.push(value[0]+", "+value[1]);
- } else if(value && field != "http") {
- newItem.extra += casedField+": "+value+"\n";
- }
- }
- } catch (e) {}
- elmt = elmts.iterateNext();
- }
-
- if(newItem.extra) {
- newItem.extra = newItem.extra.substr(0, newItem.extra.length-1);
- }
-
- var callNumber = doc.evaluate('//tr/td[1][@class="holdingslist"]/strong/text()', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
- if(callNumber && callNumber.nodeValue) {
- newItem.callNumber = callNumber.nodeValue;
- }
-
- var domain = doc.location.href.match(/https?:\/\/([^/]+)/);
- newItem.repository = domain[1]+" Library Catalog";
- newItem.accessed = Date();
- newItem.complete();
- return true;
-}
-
-function doWeb(doc, url){
- var namespace = doc.documentElement.namespaceURI;
- var nsResolver = namespace ? function(prefix) {
- if (prefix == 'x') return namespace; else return null;
- } : null;
-
- var sirsiNew = true; //toggle between SIRSI -2003 and SIRSI 2003+
- var xpath = '//td[@class="searchsum"]/table';
-
- if(doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
- Zotero.debug("SIRSI doWeb: searchsum");
- sirsiNew = true;
- } else if (doc.evaluate('//form[@name="hitlist"]/table/tbody/tr', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
- Zotero.debug("SIRSI doWeb: hitlist");
- sirsiNew = false;
- } else if (doc.evaluate('//tr[th[@class="viewmarctags"]][td[@class="viewmarctags"]]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
- Zotero.debug("SIRSI doWeb: viewmarctags");
- sirsiNew = true;
- } else if (doc.evaluate('//input[@name="VOPTIONS"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
- Zotero.debug("SIRSI doWeb: VOPTIONS");
- sirsiNew = false;
- } else {
- var elmts = doc.evaluate('/html/body/form//text()', doc, nsResolver, XPathResult.ANY_TYPE, null);
- while(elmt = elmts.iterateNext()) {
- if(Zotero.Utilities.superCleanString(elmt.nodeValue) == "Viewing record") {
- Zotero.debug("SIRSI doWeb: Viewing record");
- sirsiNew = false;
- }
- }
- }
-
- if (sirsiNew) { //executes Simon's SIRSI 2003+ scraper code
- Zotero.debug("Running SIRSI 2003+ code");
- if(!scrape(doc)) {
- var checkboxes = new Array();
- var urls = new Array();
- var availableItems = new Array();
- //begin IUCAT fixes by Andrew Smith
- var iuRe = /^https?:\/\/www\.iucat\.iu\.edu/;
- var iu = iuRe.exec(url);
- //IUCAT fix 1 of 2
- if (iu){
- var tableRows = doc.evaluate('//td[@class="searchsum"]/table[//input[@class="submitLink"]]', doc, nsResolver, XPathResult.ANY_TYPE, null);
- } else{
- var tableRows = doc.evaluate('//td[@class="searchsum"]/table[//input[@value="Details"]]', doc, nsResolver, XPathResult.ANY_TYPE, null);
- }
- var tableRow = tableRows.iterateNext(); // skip first row
- // Go through table rows
- while(tableRow = tableRows.iterateNext()) {
- //IUCAT fix 2 of 2
- if (iu){
- var input = doc.evaluate('.//input[@class="submitLink"]', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
- var text = doc.evaluate('.//label/span', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
- } else {
- var input = doc.evaluate('.//input[@value="Details"]', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
- var text = doc.evaluate('.//label/strong', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
- }
- //end IUCAT fixes by Andrew Smith
- if(text) {
- availableItems[input.name] = text;
- }
- }
- var items = Zotero.selectItems(availableItems);
- if(!items) {
- return true;
- }
- var hostRe = new RegExp("^http(?:s)?://[^/]+");
- var m = hostRe.exec(doc.location.href);
- Zotero.debug("href: " + doc.location.href);
- var hitlist = doc.forms.namedItem("hitlist");
- var baseUrl = m[0]+hitlist.getAttribute("action")+"?first_hit="+hitlist.elements.namedItem("first_hit").value+"&last_hit="+hitlist.elements.namedItem("last_hit").value;
- var uris = new Array();
- for(var i in items) {
- uris.push(baseUrl+"&"+i+"=Details");
- }
- Zotero.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
- function() { Zotero.done() }, null);
- Zotero.wait();
- }
- } else{ //executes Simon's SIRSI -2003 translator code
- Zotero.debug("Running SIRSI -2003 code");
- var uri = doc.location.href;
- var recNumbers = new Array();
- var xpath = '//form[@name="hitlist"]/table/tbody/tr';
- var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
- var elmt = elmts.iterateNext();
- if(elmt) { // Search results page
- var uriRegexp = /^http:\/\/[^\/]+/;
- var m = uriRegexp.exec(uri);
- var postAction = doc.forms.namedItem("hitlist").getAttribute("action");
- var newUri = m[0]+postAction.substr(0, postAction.length-1)+"40";
- var titleRe = /<br>\s*(.*[^\s])\s*<br>/i;
- var items = new Array();
- do {
- var checkbox = doc.evaluate('.//input[@type="checkbox"]', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
- // Collect title
- var title = doc.evaluate("./td[2]", elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
- if(checkbox && title) {
- items[checkbox.name] = Zotero.Utilities.cleanString(title);
- }
- } while(elmt = elmts.iterateNext());
- items = Zotero.selectItems(items);
-
- if(!items) {
- return true;
- }
-
- for(var i in items) {
- recNumbers.push(i);
- }
- } else {// Normal page
- // this regex will fail about 1/100,000,000 tries
- var uriRegexp = /^((.*?)\/([0-9]+?))\//;
- var m = uriRegexp.exec(uri);
- var newUri = m[1]+"/40"
- var elmts = doc.evaluate('/html/body/form', doc, nsResolver, XPathResult.ANY_TYPE, null);
- while(elmt = elmts.iterateNext()) {
- var initialText = doc.evaluate('.//text()[1]', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
- if(initialText && initialText.nodeValue && Zotero.Utilities.superCleanString(initialText.nodeValue) == "Viewing record") {
- recNumbers.push(doc.evaluate('./b[1]/text()[1]', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
- break;
- }
- }
- }
-
- var translator = Zotero.loadTranslator("import");
- translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
- var marc = translator.getTranslatorObject();
- Zotero.Utilities.loadDocument(newUri+'?marks='+recNumbers.join(",")+'&shadow=NO&format=FLAT+ASCII&sort=TITLE&vopt_elst=ALL&library=ALL&display_rule=ASCENDING&duedate_code=l&holdcount_code=t&DOWNLOAD_x=22&DOWNLOAD_y=12&address=&form_type=', function(doc) {
- var pre = doc.getElementsByTagName("pre");
- var text = pre[0].textContent;
- var documents = text.split("*** DOCUMENT BOUNDARY ***");
- for(var j=1; j<documents.length; j++) {
- var uri = newUri+"?marks="+recNumbers[j]+"&shadow=NO&format=FLAT+ASCII&sort=TITLE&vopt_elst=ALL&library=ALL&display_rule=ASCENDING&duedate_code=l&holdcount_code=t&DOWNLOAD_x=22&DOWNLOAD_y=12&address=&form_type=";
- var lines = documents[j].split("\n");
- var record = new marc.record();
- var tag, content;
- var ind = "";
- for(var i=0; i<lines.length; i++) {
- var line = lines[i];
- if(line[0] == "." && line.substr(4,2) == ". ") {
- if(tag) {
- content = content.replace(/\|([a-z])/g, marc.subfieldDelimiter+"$1");
- record.addField(tag, ind, content);
- }
- } else {
- content += " "+line.substr(6);
- continue;
- }
- tag = line.substr(1, 3);
- if(tag[0] != "0" || tag[1] != "0") {
- ind = line.substr(6, 2);
- content = line.substr(8);
- } else {
- content = line.substr(7);
- if(tag == "000") {
- tag = undefined;
- record.leader = "00000"+content;
- Zotero.debug("the leader is: "+record.leader);
- }
- }
- }
-
- var newItem = new Zotero.Item();
- record.translate(newItem);
- var domain = url.match(/https?:\/\/([^/]+)/);
- newItem.repository = domain[1]+" Library Catalog";
- newItem.complete();
- }
- Zotero.done();
- });
- Zotero.wait();
- }
-}
-\ No newline at end of file
diff --git a/translators/IRIS.js b/translators/IRIS.js
@@ -0,0 +1,328 @@
+{
+ "translatorID":"8381bf68-11fa-418c-8530-2e00284d3efd",
+ "translatorType":4,
+ "label":"IRIS",
+ "creator":"Chad Mills and Michael Berkowitz",
+ "target":"https://[^/]*www.iris.rutgers.edu[^/]*/",
+ "minVersion":"1.0.0b4.r5",
+ "maxVersion":"",
+ "priority":100,
+ "inRepository":true,
+ "lastUpdated":"2010-09-03 00:45:00"
+}
+
+function detectWeb(doc, url) {
+ if (doc.evaluate('/html/body/div[@class="columns_container"]/div[contains(@class, "left_column")]/div[@class="content_container"]/div[@class="content"]/form[@id="hitlist"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
+ return "multiple";
+ } else if (doc.evaluate('/html/body/div[@class="columns_container"]/div[contains(@class, "left_column")]/form[@name="item_view"]/div[@class="content_container item_details"]/div[@class="content"]/ul[contains(@class, "detail_page")]/li/div/table', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
+ return "book";
+ }
+}
+
+function scrape(doc) {
+ var namespace = doc.documentElement.namespaceURI;
+ var nsResolver = namespace ? function(prefix) {
+ if (prefix == 'x') return namespace; else return null;
+ } : null;
+
+ var xpath = '/html/body/div[@class="columns_container"]/div[contains(@class, "left_column")]/form[@name="item_view"]/div[@class="content_container item_details"]/div[@class="content"]/ul[contains(@class, "detail_page")]/li/div/table//tr[th[@class="viewmarctags1"]][td[@class="viewmarctags"]]';
+
+ var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
+
+ var elmt = elmts.iterateNext();
+
+ if(!elmt) {
+ return false;
+ }
+
+ var newItem = new Zotero.Item("book");
+ newItem.extra = "";
+
+ newItem.series = "";
+ var seriesItemCount = 0;
+
+ while(elmt) {
+ try {
+ var node = doc.evaluate('./TD[1]/A[1]/text()[1]', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
+ if(!node) {
+ var node = doc.evaluate('./TD[1]/text()[1]', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
+ }
+ if(node) {
+ var casedField = Zotero.Utilities.superCleanString(doc.evaluate('./TH[1]/text()', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
+ field = casedField.toLowerCase();
+
+ var value = Zotero.Utilities.superCleanString(node.nodeValue);
+
+ if(field == "publisher") {
+ newItem.publisher = value;
+ }else if(field == "pub date") {
+ var re = /[0-9]+/;
+ var m = re.exec(value);
+ newItem.date = m[0];
+ }else if(field == "isbn") {
+ var re = /^[0-9](?:[0-9X]+)/;
+ var m = re.exec(value);
+ newItem.ISBN = m[0];
+ }else if(field == "title") {
+ Zotero.debug(value);
+ var titleParts = value.split(" / ");
+ re = /\[(.+)\]/i;
+ if (re.test(titleParts[0])) {
+ var ar = re.exec(titleParts[0]);
+ var itype = ar[1].toLowerCase();
+ if(itype== "phonodisc" || itype == "sound recording"){
+ newItem.itemType = "audioRecording";
+ }else if(itype=="videorecording"){
+ newItem.itemType = "videoRecording";
+ }else if(itype=="electronic resource"){
+ newItem.itemType = "webPage";
+ }
+ }
+ newItem.title = Zotero.Utilities.capitalizeTitle(titleParts[0]);
+ }else if(field == "series") {//push onto item, delimit with semicolon when needed
+ if (seriesItemCount != 0){
+ newItem.series += "; " + value;
+ }else if(seriesItemCount == 0) {
+ newItem.series = value;
+ }
+ seriesItemCount++;//bump counter
+ }else if(field == "dissertation note") {
+ newItem.itemType = "thesis";
+ var thesisParts = value.split("--");
+ var uniDate = thesisParts[1].split(", ");
+ newItem.university = uniDate[0];
+ newItem.date = uniDate[1];
+ }else if(field == "edition") {
+ newItem.edition = value;
+ }else if(field == "physical descrip") {
+ //support
+ var physParts = value.split(" : ");
+ var physParts = physParts[0].split(" ; ");
+ //determine pages, split on " p."
+ var physPages = value.split(" p.");
+ //break off anything in the beginning before the numbers
+ var pageParts = physPages[0].split(" ");
+ newItem.numPages = pageParts[pageParts.length-1];
+ }else if(field == "publication info") {
+ var pubParts = value.split(" : ");
+ newItem.place = pubParts[0];
+ //drop off first part of array and recombine
+ pubParts.shift();
+ var i;
+ var publisherInfo;
+ for (i in pubParts) {
+ if (i == 0) {
+ publisherInfo = pubParts[i] + " : ";
+ } else {
+ publisherInfo = publisherInfo + pubParts[i] + " : ";
+ }
+ }//END for
+ //drop off last colon
+ publisherInfo = publisherInfo.substring(0,(publisherInfo.length - 3));
+ //break apart publication parts into Publisher and Date
+ var publisherParts = publisherInfo.split(",");
+ newItem.publisher = publisherParts[0];
+ //check that first character isn't a 'c', if so drop it
+ if (publisherParts[1].substring(1,2) == "c") {
+ newItem.date = publisherParts[1].substring(2);
+ }
+ else {
+ newItem.date = publisherParts[1];
+ }
+ }else if(field == "personal author") {
+ newItem.creators.push(Zotero.Utilities.cleanAuthor(value, "author", true));
+ }else if(field == "performer") {
+ newItem.creators.push(Zotero.Utilities.cleanAuthor(value, "performer", true));
+ }else if(field == "author"){
+ newItem.creators.push(Zotero.Utilities.cleanAuthor(value, "author", true));
+ }else if(field == "added author") {
+ newItem.creators.push(Zotero.Utilities.cleanAuthor(value, "contributor", true));
+ }else if(field == "conference author" || field == "corporate author") {
+ newItem.creators.push(value);
+ }else if(field == "subject" || field == "corporate subject" || field == "geographic term") {
+ var subjects = value.split("--");
+ newItem.tags = newItem.tags.concat(subjects);
+ }else if(field == "personal subject") {
+ var subjects = value.split(", ");
+ newItem.tags = newItem.tags.push(value[0]+", "+value[1]);
+ }else if(value && field != "http") {
+ newItem.extra += casedField+": "+value+"\n";
+ }
+ }
+ } catch (e) {}
+ elmt = elmts.iterateNext();
+ }//END if node
+
+ if(newItem.extra) {
+ newItem.extra = newItem.extra.substr(0, newItem.extra.length-1);
+ }
+
+ var callNumber = doc.evaluate('//tr/td[1][@class="holdingslist"]/strong/text()', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
+
+ if(callNumber && callNumber.nodeValue) {
+ newItem.callNumber = callNumber.nodeValue;
+ }
+
+ var domain = doc.location.href.match(/https?:\/\/([^/]+)/);
+ newItem.repository = domain[1]+" Library Catalog";
+ newItem.accessed = Date();
+ newItem.complete();
+ return true;
+ }//END try
+
+ function doWeb(doc, url){
+ var namespace = doc.documentElement.namespaceURI;
+ var nsResolver = namespace ? function(prefix) {
+ if (prefix == 'x') return namespace; else return null;
+ } : null;
+
+ var sirsiNew = true; //toggle between SIRSI -2003 and SIRSI 2003+
+
+ var xpath = '/html/body/div[@class="columns_container"]/div[contains(@class, "left_column")]/div[@class="content_container"]/div[@class="content"]/form[@id="hitlist"]/ul[@class="hit_list"]/li/ul[starts-with(@class, "hit_list_row")]/li[@class="hit_list_item_info"]/dl';
+
+ if(doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
+ Zotero.debug("SIRSI doWeb: searchsum");
+ sirsiNew = true;
+ }else if (doc.evaluate('//form[@name="hitlist"]/table/tbody/tr', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
+ Zotero.debug("SIRSI doWeb: hitlist");
+ sirsiNew = false;
+ }else if (doc.evaluate('//tr[th[@class="viewmarctags"]][td[@class="viewmarctags"]]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
+ Zotero.debug("SIRSI doWeb: viewmarctags");
+ sirsiNew = true;
+ }else if (doc.evaluate('//input[@name="VOPTIONS"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
+ Zotero.debug("SIRSI doWeb: VOPTIONS");
+ sirsiNew = false;
+ }else {
+ var elmts = doc.evaluate('/html/body/form//text()', doc, nsResolver, XPathResult.ANY_TYPE, null);
+ //var elmts = doc.evaluate(' ', doc, nsResolver, XPathResult.ANY_TYPE, null);
+ while(elmt = elmts.iterateNext()) {
+ if(Zotero.Utilities.superCleanString(elmt.nodeValue) == "Viewing record") {
+ Zotero.debug("SIRSI doWeb: Viewing record");
+ sirsiNew = false;
+ }
+ }//END while elmts
+ }//END FUNCTION doWeb
+
+ if (sirsiNew) { //executes Simon's SIRSI 2003+ scraper code
+ if(!scrape(doc)) {
+ var checkboxes = new Array();
+ var urls = new Array();
+ var availableItems = new Array();
+ //pull items
+ var tableRows = doc.evaluate('//ul[@class="hit_list"]/li/ul[contains(@class, "hit_list_row")][//input[@value="Details"]]', doc, nsResolver, XPathResult.ANY_TYPE, null);
+
+ // Go through table rows
+ while(tableRow = tableRows.iterateNext()) {
+ var input = doc.evaluate('.//input[@value="Details"]', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
+ var text = doc.evaluate('.//strong', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
+ if(text) {
+ availableItems[input.name] = text;
+ }
+ }//END while
+ var items = Zotero.selectItems(availableItems);
+ if(!items) {
+ return true;
+ }
+ var hostRe = new RegExp("^http(?:s)?://[^/]+");
+ var m = hostRe.exec(doc.location.href);
+ Zotero.debug("href: " + doc.location.href);
+ var hitlist = doc.forms.namedItem("hitlist");
+ var baseUrl = m[0]+hitlist.getAttribute("action")+"?first_hit="+hitlist.elements.namedItem("first_hit").value+"&last_hit="+hitlist.elements.namedItem("last_hit").value;
+ var uris = new Array();
+ for(var i in items) {
+ uris.push(baseUrl+"&"+i+"=Details");
+ }
+ Zotero.Utilities.processDocuments(uris, function(doc) { scrape(doc) }, function() { Zotero.done() }, null);
+ Zotero.wait();
+ }//END if not scrape(doc)
+ }else{ //executes Simon's SIRSI -2003 translator code
+ Zotero.debug("Running SIRSI -2003 code");
+ var uri = doc.location.href;
+ var recNumbers = new Array();
+ var xpath = '//form[@name="hitlist"]/table/tbody/tr';
+ var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
+ var elmt = elmts.iterateNext();
+ if(elmt) { // Search results page
+ var uriRegexp = /^http:\/\/[^\/]+/;
+ var m = uriRegexp.exec(uri);
+ var postAction = doc.forms.namedItem("hitlist").getAttribute("action");
+ var newUri = m[0]+postAction.substr(0, postAction.length-1)+"40";
+ var titleRe = /<br>\s*(.*[^\s])\s*<br>/i;
+ var items = new Array();
+ do {
+ var checkbox = doc.evaluate('.//input[@type="checkbox"]', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
+ // Collect title
+ var title = doc.evaluate("./td[2]", elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
+ if(checkbox && title) {
+ items[checkbox.name] = Zotero.Utilities.cleanString(title);
+ }
+ } while(elmt = elmts.iterateNext());
+ items = Zotero.selectItems(items);
+ if(!items) {
+ return true;
+ }
+ for(var i in items) {
+ recNumbers.push(i);
+ }
+ } else {// Normal page
+ // this regex will fail about 1/100,000,000 tries
+ var uriRegexp = /^((.*?)\/([0-9]+?))\//;
+ var m = uriRegexp.exec(uri);
+ var newUri = m[1]+"/40"
+ var elmts = doc.evaluate('/html/body/form', doc, nsResolver, XPathResult.ANY_TYPE, null);
+ while(elmt = elmts.iterateNext()) {
+ var initialText = doc.evaluate('.//text()[1]', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
+ if(initialText && initialText.nodeValue && Zotero.Utilities.superCleanString(initialText.nodeValue) == "Viewing record") {
+ recNumbers.push(doc.evaluate('./b[1]/text()[1]', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
+ break;
+ }
+ }
+ }
+ var translator = Zotero.loadTranslator("import");
+ translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
+ var marc = translator.getTranslatorObject();
+ Zotero.Utilities.loadDocument(newUri+'?marks='+recNumbers.join(",")+'&shadow=NO&format=FLAT+ASCII&sort=TITLE&vopt_elst=ALL&library=ALL&display_rule=ASCENDING&duedate_code=l&holdcount_code=t&DOWNLOAD_x=22&DOWNLOAD_y=12&address=&form_type=', function(doc) {
+ var pre = doc.getElementsByTagName("pre");
+ var text = pre[0].textContent;
+ var documents = text.split("*** DOCUMENT BOUNDARY ***");
+ for(var j=1; j<documents.length; j++) {
+ var uri = newUri+"?marks="+recNumbers[j]+"&shadow=NO&format=FLAT+ASCII&sort=TITLE&vopt_elst=ALL&library=ALL&display_rule=ASCENDING&duedate_code=l&holdcount_code=t&DOWNLOAD_x=22&DOWNLOAD_y=12&address=&form_type=";
+ var lines = documents[j].split("\n");
+ var record = new marc.record();
+ var tag, content;
+ var ind = "";
+ for(var i=0; i<lines.length; i++) {
+ var line = lines[i];
+ if(line[0] == "." && line.substr(4,2) == ". ") {
+ if(tag) {
+ content = content.replace(/\|([a-z])/g, marc.subfieldDelimiter+"$1");
+ record.addField(tag, ind, content);
+ }
+ } else {
+ content += " "+line.substr(6);
+ continue;
+ }
+ tag = line.substr(1, 3);
+ if(tag[0] != "0" || tag[1] != "0") {
+ ind = line.substr(6, 2);
+ content = line.substr(8);
+ } else {
+ content = line.substr(7);
+ if(tag == "000") {
+ tag = undefined;
+ record.leader = "00000"+content;
+ Zotero.debug("the leader is: "+record.leader);
+ }
+ }
+ }//end FOR
+ var newItem = new Zotero.Item();
+ record.translate(newItem);
+ var domain = url.match(/https?:\/\/([^/]+)/);
+ newItem.repository = domain[1]+" Library Catalog";
+ newItem.complete();
+ }//end FOR
+ Zotero.done();
+ });
+ Zotero.wait();
+ }//END while
+}//END scrape function
diff --git a/translators/Library Catalog (InnoPAC).js b/translators/Library Catalog (InnoPAC).js
@@ -3,7 +3,7 @@
"translatorType":4,
"label":"Library Catalog (InnoPAC)",
"creator":"Simon Kornblith and Michael Berkowitz",
- "target":"(search~|\\/search\\?|(a|X|t|Y|w)\\?|\\?(searchtype|searchscope)|frameset&FF|record=b[0-9]+~S[0-9]|/search/q\\?)",
+ "target":"(search~|\\/search\\?|(a|X|t|Y|w)\\?|\\?(searchtype|searchscope)|frameset&FF|record=b[0-9]+~?S?[0-9]?|/search/q\\?)",
"minVersion":"1.0.0b3.r1",
"maxVersion":"",
"priority":200,
@@ -224,4 +224,4 @@ function doWeb(doc, url) {
}
Zotero.wait();
-}
-\ No newline at end of file
+}