commit 43025f971d39ce29bb9762ffb74400554fa545f8
parent 78a88f749f0ae1854c13c6df884612a818218150
Author: Dan Stillman <dstillman@zotero.org>
Date: Thu, 17 Dec 2009 09:33:04 +0000
Pushed National Archives of Australia
Deleted NAA RecordSearch (which it replaces)
Diffstat:
4 files changed, 207 insertions(+), 75 deletions(-)
diff --git a/deleted.txt b/deleted.txt
@@ -1,4 +1,4 @@
-2 # Increment number when modifying file
+3 # Increment number when modifying file
96b9f483-c44d-5784-cdad-ce21b984 # Amazon
add7c71c-21f3-ee14-d188-caf9da12 # SIRSI 2003+
@@ -13,3 +13,4 @@ e07e9b8c-0e98-4915-bb5a-32a08cb2f365 # Open WorldCat (Search)
dd149efc-7f0e-43e4-b3df-b6d15e171717 # Persée
8c1f42d5-02fa-437b-b2b2-73afc768eb07 # PNAS (replaced by HighWire 2.0)
56ea09bc-57ee-4f50-976e-cf7cb1f6c6d8 # Royal Society Publishing (replaced by HighWire 2.0)
+83538f48-906f-40ef-bdb3-e94f63676307 # NAA RecordSearch (replaced by National Archives of Australia)
diff --git a/repotime.txt b/repotime.txt
@@ -1 +1 @@
-2009-12-17 08:10:00
+2009-12-17 09:35:00
diff --git a/translators/NAA RecordSearch.js b/translators/NAA RecordSearch.js
@@ -1,72 +0,0 @@
-{
- "translatorID":"83538f48-906f-40ef-bdb3-e94f63676307",
- "translatorType":4,
- "label":"NAA RecordSearch",
- "creator":"Tim Sherratt",
- "target":"http://naa12.naa.gov.au/scripts/",
- "minVersion":"1.0.0b4.r5",
- "maxVersion":"",
- "priority":100,
- "inRepository":true,
- "lastUpdated":"2009-01-08 08:19:07"
-}
-
-function detectWeb(doc, url) {
- if (url.match(/Items_listing.asp/i)) {
- return "multiple";
- } else if (url.match(/ItemDetail.asp/i)) {
- return "manuscript";
- }
-}
-
-function doWeb(doc, url) {
- var namespace = doc.documentElement.namespaceURI;
- var nsResolver = namespace ? function(prefix) {
- if (prefix == 'x') return namespace; else return null;
- } : null;
- if (detectWeb(doc, url) == "multiple") {
- var records = new Array();
- var items = new Object();
- var titles = doc.evaluate('//form[2]/table/tbody/tr/td[b="Title"]', doc, nsResolver, XPathResult.ANY_TYPE, null);
- var links = doc.evaluate('//form[2]/table/tbody/tr/td[b="Control symbol"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null);
- var title;
- var link;
- while ((title = titles.iterateNext()) && (link = links.iterateNext())) {
- items[link.href] = Zotero.Utilities.trimInternal(title.lastChild.textContent);
- }
- items = Zotero.selectItems(items);
- for (var i in items) {
- records.push(i);
- }
- } else {
- records = [url];
- }
- Zotero.Utilities.processDocuments(records, function(doc) {
- var title = Zotero.Utilities.cleanString(doc.evaluate('//table/tbody/tr/td[b="Title"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().lastChild.textContent);
- var series = Zotero.Utilities.cleanString(doc.evaluate('//table/tbody/tr/td[b="Series number"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().lastChild.textContent);
- var control = Zotero.Utilities.cleanString(doc.evaluate('//table/tbody/tr/td[b="Control symbol"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().lastChild.textContent);
- var date = Zotero.Utilities.cleanString(doc.evaluate('//table/tbody/tr/td[b="Contents date range"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().lastChild.textContent);
- var access = Zotero.Utilities.cleanString(doc.evaluate('//table/tbody/tr/td[b="Access status"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().lastChild.textContent);
- var location = Zotero.Utilities.cleanString(doc.evaluate('//table/tbody/tr/td[b="Location"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().lastChild.textContent);
- var barcode = Zotero.Utilities.cleanString(doc.evaluate('//table/tbody/tr/td[b="Barcode"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().lastChild.textContent);
- if (doc.body.innerHTML.match("View digital copy")) {
- var digitised = "yes";
- } else {
- var digitised = "no";
- }
- var repository ="National Archives of Australia, " + location;
- var url = "http://www.aa.gov.au/cgi-bin/Search?O=I&Number=" + barcode;
- var ref_number = series + ", " + control;
- var type = "file";
- var item = new Zotero.Item("manuscript");
- item.title = title;
- item.archiveLocation = ref_number;
- item.url = url;
- item.date = date;
- item.manuscriptType = type;
- item.extra = "Access: " + access + "\nDigitised: " + digitised;
- item.repository = repository;
- item.complete();
-
- }, function() {Zotero.done();});
-}
-\ No newline at end of file
diff --git a/translators/National Archives of Australia.js b/translators/National Archives of Australia.js
@@ -0,0 +1,204 @@
+{
+ "translatorID":"50a4cf3f-92ef-4e9f-ab15-815229159b16",
+ "translatorType":4,
+ "label":"National Archives of Australia",
+ "creator":"Tim Sherratt",
+ "target":"^http://[^/]*naa.gov.au/",
+ "minVersion":"1.0",
+ "maxVersion":"",
+ "priority":90,
+ "inRepository":false,
+ "lastUpdated":"2009-12-17 09:35:00"
+}
+
+function detectWeb(doc, url) {
+ //RecordSearch - items and series - or Photosearch results
+ if (url.match(/Series_listing.asp/i) || url.match(/Items_listing.asp/i) || url.match(/PhotoSearchSearchResults.asp/i)) {
+ return "multiple";
+ } else if (url.match(/SeriesDetail.asp/i) || url.match(/ItemDetail.asp/i) || url.match(/PhotoSearchItemDetail.asp/i) || url.match(/imagine.asp/i)) {
+ return "manuscript";
+ }
+}
+function doWeb(doc, url) {
+ var namespace = doc.documentElement.namespaceURI;
+ var nsResolver = namespace ? function(prefix) {
+ if (prefix == 'x') return namespace; else return null;
+ } : null;
+
+ // To avoid cross domain errors make sure links match current sub-domain
+ if (url.match(/naa12/i)) {
+ baseURL = "http://naa12.naa.gov.au/scripts/";
+ } else if (url.match(/recordsearch/i)) {
+ baseURL = "http://recordsearch.naa.gov.au/scripts/";
+ }
+ var records = new Array();
+ var titles, links, title, link;
+ if (detectWeb(doc, url) == "multiple") {
+ var items = new Object();
+ // Files
+ if (url.match(/Items_listing.asp/i)) {
+ titles = doc.evaluate('//td[b="Title"]', doc, nsResolver, XPathResult.ANY_TYPE, null);
+ links = doc.evaluate('//td[b="Control symbol"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null);
+ // Photos
+ } else if (url.match(/PhotoSearchSearchResults.asp/i)) {
+ titles = doc.evaluate('//td[b="Title :"]/a[1]', doc, nsResolver, XPathResult.ANY_TYPE, null);
+ links = doc.evaluate('//td[b="Title :"]/a[1]', doc, nsResolver, XPathResult.ANY_TYPE, null);
+ //Series
+ } else if (url.match(/Series_listing.asp/i)) {
+ titles = doc.evaluate('//td[b="Title"]', doc, nsResolver, XPathResult.ANY_TYPE, null);
+ links = doc.evaluate('//td[b="Series number"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null);
+ }
+ while ((title = titles.iterateNext()) && (link = links.iterateNext())) {
+ items[link.href] = Zotero.Utilities.trimInternal(title.lastChild.textContent);
+ Zotero.debug(title.lastChild.textContent);
+ }
+ items = Zotero.selectItems(items);
+ for (var i in items) {
+ records.push(i);
+ }
+ } else {
+ records = [url];
+ }
+ var setupCallback = function () {
+ if (records.length) {
+ var item = new Zotero.Item("manuscript");
+ item.repository = "National Archives of Australia";
+ var record = records.shift();
+ Zotero.debug(record);
+ var postString;
+ // Scrape digital image - ie a single folio - details
+ if (record.match(/Imagine.asp/i)) {
+ // You're using my Greasemonkey script to view images
+ var b, i, c;
+ if (doc.body.innerHTML.match(/Digital copy of NAA:/)) {
+ doc.evaluate('//img[@id="fileimage"]/@src', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent.match(/B=(\d+)&S=(\d+)&/);
+ b = RegExp.$1;
+ i = RegExp.$2;
+ c = Zotero.Utilities.trimInternal(doc.evaluate('//input[@id="printto"]/@value', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent);
+ // You're using the original RS interface
+ } else {
+ b = Zotero.Utilities.trimInternal(doc.evaluate('//input[@id="Hidden1"]/@value', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent);
+ i = Zotero.Utilities.trimInternal(doc.evaluate('//input[@id="Text1"]/@value', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent);
+ c = Zotero.Utilities.trimInternal(doc.evaluate('//input[@id="Hidden3"]/@value', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent);
+ }
+ postString = "B=" + b + "&C=" + c + "&F=1&I=" + i + "&L=Y&M=R&MX=Y&S=Y&SE=1&X=N";
+ Zotero.Utilities.HTTP.doPost(record, postString, function (text) {
+ // This is a digital image -- ie a folio
+ var barcode = text.match(/Digital copy of item with barcode\s+(\d+)/)[1];
+ Zotero.debug(barcode);
+ item.pages = text.match(/NAME="I" VALUE="(\d+)"/)[1];
+ item.numPages = text.match(/NAME="C" VALUE="(\d+)"/)[1];
+ item.url = "http://naa16.naa.gov.au/rs_images/ShowImage.php?B=" + barcode + "&S=" + item.pages + "&T=P";
+ var itemURL = baseURL + "ItemDetail.asp?M=0&B=" + barcode;
+ item.manuscriptType = 'folio';
+ Zotero.Utilities.processDocuments(itemURL, function(itemDoc) {
+ var series = Zotero.Utilities.trimInternal(itemDoc.evaluate('//td[b="Series number"]', itemDoc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
+ var control = Zotero.Utilities.trimInternal(itemDoc.evaluate('//td[b="Control symbol"]', itemDoc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
+ var refNumber = series + ", " + control;
+ item.archiveLocation = refNumber;
+ item.title = "Page " + item.pages + " of NAA: "+refNumber;
+ item.shortTitle = "NAA: " + refNumber;
+ item.attachments = [{url:item.url, title:"Digital image of NAA: " + refNumber + ", page " + item.pages, mimeType:"image/jpeg" }];
+ item.complete();
+ setupCallback();
+ });
+ });
+ // Scrape photo details
+ } else if (record.match(/PhotoSearchItemDetail.asp/)) {
+ Zotero.Utilities.HTTP.doGet(record, function (text) {
+ // Clean up unpredictable linebreaks and tabs
+ text = text.replace(/\n/gm, "");
+ text = text.replace(/\r/gm, "");
+ text = text.replace(/\t/gm, "");
+ item.title = Zotero.Utilities.trimInternal(text.match(/<b>Title :<\/b>(.*?)<br/)[1]);
+ item.date = Zotero.Utilities.trimInternal(text.match(/<b>Date :<\/b>(.*?)<br/)[1]);
+ item.archiveLocation = Zotero.Utilities.trimInternal(text.match(/<b>Image no. :<\/b>(.*?)<br/)[1]);
+ var barcode = Zotero.Utilities.trimInternal(text.match(/<b>Barcode : <\/b>(.*?)<br/)[1]);
+ var location = Zotero.Utilities.trimInternal(text.match(/<b>Location : <\/b>(.*?)<br/)[1]);
+ if (!text.match(/<b>Primary subject :<\/b>.*?Not Assigned/)) { var tag1 = text.match(/<b>Primary subject :<\/b>.*?<a href.*?>(.*?)<\/a>/)[1]};
+ if (!text.match(/<b>Secondary subject :<\/b>.*?Not Assigned/)) { var tag2 = text.match(/<b>Secondary subject :<\/b>.*?<a href.*?>(.*?)<\/a>/)[1]};
+ if (tag1) { item.tags.push(Zotero.Utilities.trimInternal(tag1).toLowerCase()) };
+ if (tag2) { item.tags.push(Zotero.Utilities.trimInternal(tag2).toLowerCase()) };
+ var imgURL = "http://naa16.naa.gov.au/rs_images/ShowImage.php?B=" + barcode + "&T=P&S=1";
+ item.url = "http://www.naa.gov.au/cgi-bin/Search?O=PSI&Number=" + barcode;
+ item.manuscriptType = "photograph";
+ Zotero.debug(item.tags);
+ // Save a copy of the photo
+ item.attachments = [{url:imgURL, title:"Digital image of NAA: "+ item.archiveLocation, mimeType:"image/jpeg" }];
+ item.complete();
+ setupCallback();
+ });
+ // Scrape series details
+ } else if (record.match(/SeriesDetail.asp/i)) {
+ Zotero.Utilities.processDocuments(record, function (doc) {
+ item.title = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Title"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
+ item.archiveLocation = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Series number"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
+ item.date = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Accumulation dates"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
+ var location = doc.evaluate('//td[b="Quantity and location"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
+ if (location) {
+ location = location.textContent.replace(/Quantity and location/i, "").replace(/\s([\w]+)([\d]+\.*\d*)/gi, " $1; $2");
+ }
+ Zotero.debug(location);
+ var agencies = doc.evaluate('//td[b="Agency / person recording"]/table/tbody/tr/td[2]', doc, nsResolver, XPathResult.ANY_TYPE, null);
+ while (agency = agencies.iterateNext()) {
+ item.creators.push({lastName: agency.textContent, creatorType: "creator"});
+ }
+ item.url = "http://www.naa.gov.au/cgi-bin/Search?Number=" + item.archiveLocation;
+ item.manuscriptType = "series";
+ // Find out how many items from this series have been described on RecordSearch
+ var itemsURL = baseURL + "SearchOF.asp?DP=2&Q=SER_SERIES_NO=QT" + item.archiveLocation + "QT";
+ Zotero.Utilities.processDocuments(itemsURL, function(itemDoc) {
+ var numItems = Zotero.Utilities.trimInternal(itemDoc.evaluate('//tr[2]/td[2]', itemDoc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent);
+ Zotero.debug(numItems);
+ if (numItems == "No records found") {
+ numItems = "none";
+ }
+ item.extra = "Quantity and location: " + location + "\nNumber of items described: " + numItems;
+ item.complete();
+ setupCallback();
+ });
+ });
+ // Scrape file details
+ } else if (record.match(/ItemDetail.asp/i)) {
+ Zotero.Utilities.processDocuments(record, function (doc) {
+ item.title = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Title"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
+ var series = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Series number"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
+ var control = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Control symbol"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
+ item.date = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Contents date range"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
+ var access = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Access status"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
+ var location = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Location"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
+ var barcode = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Barcode"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
+ // Has the file been digitised?
+ if (doc.body.innerHTML.match("View digital copy")) {
+ var digitised = "yes";
+ } else {
+ var digitised = "no";
+ }
+ item.url = "http://www.naa.gov.au/cgi-bin/Search?O=I&Number=" + barcode;
+ item.archiveLocation = series + ", " + control;
+ item.manuscriptType = "file";
+ item.extra = "Location: " + location + "\nAccess: " + access + "\nDigitised: " + digitised;
+ // If it's digitised find out how many pages in the digitised file
+ itemURL = baseURL + "imagine.asp?B=" + barcode + "&I=1&SE=1";
+ if (digitised == "yes") {
+ Zotero.Utilities.processDocuments(itemURL, function(itemDoc) {
+ var pages = Zotero.Utilities.trimInternal(itemDoc.evaluate('//input[@id="Hidden3"]/@value', itemDoc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent);
+ item.numPages = "1-" + pages;
+ item.pages = "1-" + pages;
+ item.complete();
+ setupCallback();
+ });
+ } else {
+ item.complete();
+ setupCallback();
+ }
+ });
+ }
+ } else {
+ Zotero.done();
+ }
+ }
+ setupCallback();
+ Zotero.wait();
+}
+