commit f4d759ebf436ecc4c7ae43d5d46969dbcea5cba2
parent a0ecb645a8dbc6271c28e901fe9159a075e0ff6b
Author: Avram Lyon <ajlyon@gmail.com>
Date: Sun, 15 Aug 2010 10:32:24 +0000
New version by Tim Sherratt
Diffstat:
1 file changed, 226 insertions(+), 179 deletions(-)
diff --git a/translators/National Archives of Australia.js b/translators/National Archives of Australia.js
@@ -1,19 +1,19 @@
{
- "translatorID":"50a4cf3f-92ef-4e9f-ab15-815229159b16",
- "translatorType":4,
- "label":"National Archives of Australia",
- "creator":"Tim Sherratt",
- "target":"^http://[^/]*naa.gov.au/",
- "minVersion":"1.0",
- "maxVersion":"",
- "priority":90,
- "inRepository":false,
- "lastUpdated":"2009-12-17 09:35:00"
+ "translatorID":"50a4cf3f-92ef-4e9f-ab15-815229159b16",
+ "label":"National Archives of Australia",
+ "creator":"Tim Sherratt",
+ "target":"^http://[^/]*naa\\.gov\\.au/",
+ "minVersion":"1.0",
+ "maxVersion":"",
+ "priority":100,
+ "inRepository":yes,
+ "translatorType":4,
+ "lastUpdated":"2010-08-12 15:38:20"
}
function detectWeb(doc, url) {
//RecordSearch - items and series - or Photosearch results
- if (url.match(/Series_listing.asp/i) || url.match(/Items_listing.asp/i) || url.match(/PhotoSearchSearchResults.asp/i)) {
+ if (url.match(/SeriesListing.asp/i) || url.match(/ItemsListing.asp/i) || url.match(/PhotoSearchSearchResults.asp/i)) {
return "multiple";
} else if (url.match(/SeriesDetail.asp/i) || url.match(/ItemDetail.asp/i) || url.match(/PhotoSearchItemDetail.asp/i) || url.match(/imagine.asp/i)) {
return "manuscript";
@@ -24,181 +24,228 @@ function doWeb(doc, url) {
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
-
- // To avoid cross domain errors make sure links match current sub-domain
- if (url.match(/naa12/i)) {
- baseURL = "http://naa12.naa.gov.au/scripts/";
- } else if (url.match(/recordsearch/i)) {
- baseURL = "http://recordsearch.naa.gov.au/scripts/";
- }
- var records = new Array();
- var titles, links, title, link;
- if (detectWeb(doc, url) == "multiple") {
- var items = new Object();
- // Files
- if (url.match(/Items_listing.asp/i)) {
- titles = doc.evaluate('//td[b="Title"]', doc, nsResolver, XPathResult.ANY_TYPE, null);
- links = doc.evaluate('//td[b="Control symbol"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null);
- // Photos
- } else if (url.match(/PhotoSearchSearchResults.asp/i)) {
- titles = doc.evaluate('//td[b="Title :"]/a[1]', doc, nsResolver, XPathResult.ANY_TYPE, null);
- links = doc.evaluate('//td[b="Title :"]/a[1]', doc, nsResolver, XPathResult.ANY_TYPE, null);
- //Series
- } else if (url.match(/Series_listing.asp/i)) {
- titles = doc.evaluate('//td[b="Title"]', doc, nsResolver, XPathResult.ANY_TYPE, null);
- links = doc.evaluate('//td[b="Series number"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null);
- }
- while ((title = titles.iterateNext()) && (link = links.iterateNext())) {
- items[link.href] = Zotero.Utilities.trimInternal(title.lastChild.textContent);
- Zotero.debug(title.lastChild.textContent);
- }
- items = Zotero.selectItems(items);
- for (var i in items) {
- records.push(i);
- }
+ // If it's a single page of a digitised file, then send it to be processed directly.
+ // This is because digitised pages, after the first, are retrieved via POST, thus if you feed the url to processDocuments
+ // you'll only ever get the first page.
+ if (url.match(/imagine.asp/i)) {
+ processFolio(doc);
+ Zotero.done();
+ // Everything else can be handled normally.
} else {
- records = [url];
- }
- var setupCallback = function () {
- if (records.length) {
- var item = new Zotero.Item("manuscript");
- item.repository = "National Archives of Australia";
- var record = records.shift();
- Zotero.debug(record);
- var postString;
- // Scrape digital image - ie a single folio - details
- if (record.match(/Imagine.asp/i)) {
- // You're using my Greasemonkey script to view images
- var b, i, c;
- if (doc.body.innerHTML.match(/Digital copy of NAA:/)) {
- doc.evaluate('//img[@id="fileimage"]/@src', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent.match(/B=(\d+)&S=(\d+)&/);
- b = RegExp.$1;
- i = RegExp.$2;
- c = Zotero.Utilities.trimInternal(doc.evaluate('//input[@id="printto"]/@value', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent);
- // You're using the original RS interface
+ // To avoid cross domain errors find baseurl
+ var baseURL = doc.location.href.match(/(http:\/\/[a-z0-9]+\.naa\.gov\.au)/)[1];
+ var records = new Array();
+ var titles, links, title, link;
+ if (detectWeb(doc, url) == "multiple") {
+ var items = new Object();
+ // Files
+ if (url.match(/ItemsListing.asp/i)) {
+ titles = doc.evaluate('//td[4][@title="Go to Item details"]', doc, nsResolver, XPathResult.ANY_TYPE, null);
+ links = doc.evaluate('//td[4][@title="Go to Item details"]/@onclick', doc, nsResolver, XPathResult.ANY_TYPE, null);
+ // Photos
+ } else if (url.match(/PhotoSearchSearchResults.asp/i)) {
+ titles = doc.evaluate('//td[b="Title :"]/a[1]', doc, nsResolver, XPathResult.ANY_TYPE, null);
+ links = doc.evaluate('//td[b="Title :"]/a[1]', doc, nsResolver, XPathResult.ANY_TYPE, null);
+ //Series
+ } else if (url.match(/SeriesListing.asp/i)) {
+ titles = doc.evaluate('//td[3][@title="Go to Series details"]', doc, nsResolver, XPathResult.ANY_TYPE, null);
+ links = doc.evaluate('//td[3][@title="Go to Series details"]/@onclick', doc, nsResolver, XPathResult.ANY_TYPE, null);
+ }
+ while ((title = titles.iterateNext()) && (link = links.iterateNext())) {
+ if (url.match(/PhotoSearchSearchResults.asp/i)) {
+ items[link.href] = Zotero.Utilities.trimInternal(title.lastChild.textContent);
} else {
- b = Zotero.Utilities.trimInternal(doc.evaluate('//input[@id="Hidden1"]/@value', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent);
- i = Zotero.Utilities.trimInternal(doc.evaluate('//input[@id="Text1"]/@value', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent);
- c = Zotero.Utilities.trimInternal(doc.evaluate('//input[@id="Hidden3"]/@value', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent);
+ items[baseURL + '/SearchNRetrieve/Interface' + link.textContent.match(/window\.location = '\.\.(.+?)'/)[1]] = Zotero.Utilities.trimInternal(title.firstChild.textContent);
}
- postString = "B=" + b + "&C=" + c + "&F=1&I=" + i + "&L=Y&M=R&MX=Y&S=Y&SE=1&X=N";
- Zotero.Utilities.HTTP.doPost(record, postString, function (text) {
- // This is a digital image -- ie a folio
- var barcode = text.match(/Digital copy of item with barcode\s+(\d+)/)[1];
- Zotero.debug(barcode);
- item.pages = text.match(/NAME="I" VALUE="(\d+)"/)[1];
- item.numPages = text.match(/NAME="C" VALUE="(\d+)"/)[1];
- item.url = "http://naa16.naa.gov.au/rs_images/ShowImage.php?B=" + barcode + "&S=" + item.pages + "&T=P";
- var itemURL = baseURL + "ItemDetail.asp?M=0&B=" + barcode;
- item.manuscriptType = 'folio';
- Zotero.Utilities.processDocuments(itemURL, function(itemDoc) {
- var series = Zotero.Utilities.trimInternal(itemDoc.evaluate('//td[b="Series number"]', itemDoc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
- var control = Zotero.Utilities.trimInternal(itemDoc.evaluate('//td[b="Control symbol"]', itemDoc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
- var refNumber = series + ", " + control;
- item.archiveLocation = refNumber;
- item.title = "Page " + item.pages + " of NAA: "+refNumber;
- item.shortTitle = "NAA: " + refNumber;
- item.attachments = [{url:item.url, title:"Digital image of NAA: " + refNumber + ", page " + item.pages, mimeType:"image/jpeg" }];
- item.complete();
- setupCallback();
- });
- });
- // Scrape photo details
- } else if (record.match(/PhotoSearchItemDetail.asp/)) {
- Zotero.Utilities.HTTP.doGet(record, function (text) {
- // Clean up unpredictable linebreaks and tabs
- text = text.replace(/\n/gm, "");
- text = text.replace(/\r/gm, "");
- text = text.replace(/\t/gm, "");
- item.title = Zotero.Utilities.trimInternal(text.match(/<b>Title :<\/b>(.*?)<br/)[1]);
- item.date = Zotero.Utilities.trimInternal(text.match(/<b>Date :<\/b>(.*?)<br/)[1]);
- item.archiveLocation = Zotero.Utilities.trimInternal(text.match(/<b>Image no. :<\/b>(.*?)<br/)[1]);
- var barcode = Zotero.Utilities.trimInternal(text.match(/<b>Barcode : <\/b>(.*?)<br/)[1]);
- var location = Zotero.Utilities.trimInternal(text.match(/<b>Location : <\/b>(.*?)<br/)[1]);
- if (!text.match(/<b>Primary subject :<\/b>.*?Not Assigned/)) { var tag1 = text.match(/<b>Primary subject :<\/b>.*?<a href.*?>(.*?)<\/a>/)[1]};
- if (!text.match(/<b>Secondary subject :<\/b>.*?Not Assigned/)) { var tag2 = text.match(/<b>Secondary subject :<\/b>.*?<a href.*?>(.*?)<\/a>/)[1]};
- if (tag1) { item.tags.push(Zotero.Utilities.trimInternal(tag1).toLowerCase()) };
- if (tag2) { item.tags.push(Zotero.Utilities.trimInternal(tag2).toLowerCase()) };
- var imgURL = "http://naa16.naa.gov.au/rs_images/ShowImage.php?B=" + barcode + "&T=P&S=1";
- item.url = "http://www.naa.gov.au/cgi-bin/Search?O=PSI&Number=" + barcode;
- item.manuscriptType = "photograph";
- Zotero.debug(item.tags);
- // Save a copy of the photo
- item.attachments = [{url:imgURL, title:"Digital image of NAA: "+ item.archiveLocation, mimeType:"image/jpeg" }];
- item.complete();
- setupCallback();
- });
- // Scrape series details
- } else if (record.match(/SeriesDetail.asp/i)) {
- Zotero.Utilities.processDocuments(record, function (doc) {
- item.title = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Title"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
- item.archiveLocation = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Series number"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
- item.date = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Accumulation dates"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
- var location = doc.evaluate('//td[b="Quantity and location"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
- if (location) {
- location = location.textContent.replace(/Quantity and location/i, "").replace(/\s([\w]+)([\d]+\.*\d*)/gi, " $1; $2");
- }
- Zotero.debug(location);
- var agencies = doc.evaluate('//td[b="Agency / person recording"]/table/tbody/tr/td[2]', doc, nsResolver, XPathResult.ANY_TYPE, null);
- while (agency = agencies.iterateNext()) {
- item.creators.push({lastName: agency.textContent, creatorType: "creator"});
- }
- item.url = "http://www.naa.gov.au/cgi-bin/Search?Number=" + item.archiveLocation;
- item.manuscriptType = "series";
- // Find out how many items from this series have been described on RecordSearch
- var itemsURL = baseURL + "SearchOF.asp?DP=2&Q=SER_SERIES_NO=QT" + item.archiveLocation + "QT";
- Zotero.Utilities.processDocuments(itemsURL, function(itemDoc) {
- var numItems = Zotero.Utilities.trimInternal(itemDoc.evaluate('//tr[2]/td[2]', itemDoc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent);
- Zotero.debug(numItems);
- if (numItems == "No records found") {
- numItems = "none";
- }
- item.extra = "Quantity and location: " + location + "\nNumber of items described: " + numItems;
- item.complete();
- setupCallback();
- });
- });
- // Scrape file details
- } else if (record.match(/ItemDetail.asp/i)) {
- Zotero.Utilities.processDocuments(record, function (doc) {
- item.title = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Title"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
- var series = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Series number"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
- var control = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Control symbol"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
- item.date = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Contents date range"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
- var access = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Access status"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
- var location = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Location"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
- var barcode = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Barcode"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
- // Has the file been digitised?
- if (doc.body.innerHTML.match("View digital copy")) {
- var digitised = "yes";
- } else {
- var digitised = "no";
- }
- item.url = "http://www.naa.gov.au/cgi-bin/Search?O=I&Number=" + barcode;
- item.archiveLocation = series + ", " + control;
- item.manuscriptType = "file";
- item.extra = "Location: " + location + "\nAccess: " + access + "\nDigitised: " + digitised;
- // If it's digitised find out how many pages in the digitised file
- itemURL = baseURL + "imagine.asp?B=" + barcode + "&I=1&SE=1";
- if (digitised == "yes") {
- Zotero.Utilities.processDocuments(itemURL, function(itemDoc) {
- var pages = Zotero.Utilities.trimInternal(itemDoc.evaluate('//input[@id="Hidden3"]/@value', itemDoc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent);
- item.numPages = "1-" + pages;
- item.pages = "1-" + pages;
- item.complete();
- setupCallback();
- });
- } else {
- item.complete();
- setupCallback();
- }
- });
+ }
+ items = Zotero.selectItems(items);
+ for (var i in items) {
+ records.push(i);
}
} else {
- Zotero.done();
+ records = [url];
}
+ Zotero.Utilities.processDocuments(records, scrape, function(){Zotero.done();});
+ Zotero.wait();
}
- setupCallback();
+}
+function processFolio(doc) {
+ var namespace = doc.documentElement.namespaceURI;
+ var nsResolver = namespace ? function(prefix) {
+ if (prefix == 'x') return namespace; else return null;
+ } : null;
+ // To avoid cross-domain problems, find the base url
+ var baseURL = doc.location.href.match(/(http:\/\/[a-z0-9]+\.naa\.gov\.au)/)[1];
+ var item = new Zotero.Item("manuscript");
+ item.archive = "National Archives of Australia";
+ item.libraryCatalog = "RecordSearch";
+ var barcode, page, numPages;
+ // Using my Greasemonkey interface
+ if (doc.body.innerHTML.match(/Digital copy of NAA:/)) {
+ doc.evaluate('//img[@id="fileimage"]/@src', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent.match(/B=(\d+)&S=(\d+)&/);
+ barcode = RegExp.$1;
+ page = RegExp.$2;
+ numPages = Zotero.Utilities.trimInternal(doc.evaluate('//input[@id="printto"]/@value', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent);
+ // Using the original RS interface
+ } else {
+ barcode = Zotero.Utilities.trimInternal(doc.evaluate('//input[@id="Hidden1"]/@value', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent);
+ page = Zotero.Utilities.trimInternal(doc.evaluate('//input[@id="Text1"]/@value', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent);
+ numPages = Zotero.Utilities.trimInternal(doc.evaluate('//input[@id="Hidden3"]/@value', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent);
+ }
+ item.manuscriptType = 'folio';
+ item.pages = page;
+ item.numPages = numPages;
+ // The link to the image file - there's no way to link to the image in the context of the file
+ item.url = 'http://recordsearch.naa.gov.au/NaaMedia/ShowImage.asp?B=' + barcode + '&S=' + item.pages + '&T=P';
+ // Retrieve file details and extract reference details
+ var itemURL = baseURL + '/SearchNRetrieve/Interface/DetailsReports/ItemDetail.aspx?Barcode=' + barcode;
+ var itemDoc = Zotero.Utilities.retrieveDocument(itemURL);
+ var series = Zotero.Utilities.trimInternal(itemDoc.evaluate('//td[@class="field"][. ="Series number"]/following-sibling::td/a', itemDoc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
+ var control = Zotero.Utilities.trimInternal(itemDoc.evaluate('//td[@class="field"][. ="Control symbol"]/following-sibling::td', itemDoc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
+ var refNumber = series + ", " + control;
+ item.title = 'Page ' + page + ' of NAA: ' + refNumber;
+ item.archiveLocation = refNumber;
+ // Save a copy of the image
+ item.attachments = [{url:item.url, title:'Digital copy of NAA: ' + refNumber + ', p. ' + page, mimeType:"image/jpeg" }];
+ // MACHINE TAGS
+ // The file of which this page is a part.
+ item.tags.push('dcterms:isPartOf="http://www.naa.gov.au/cgi-bin/Search?O=I&Number=' + barcode + '"');
+ // Citation
+ item.tags.push('dcterms:bibliographicCitation="NAA: ' + refNumber + ', p. ' + page + '"');
+ item.tags.push('xmlns:dcterms="http://purl.org/dc/terms/"');
+ item.complete();
Zotero.wait();
}
+function scrape(doc) {
+ var namespace = doc.documentElement.namespaceURI;
+ var nsResolver = namespace ? function(prefix) {
+ if (prefix == 'x') return namespace; else return null;
+ } : null;
+ // To avoid cross-domain problems, find the base url
+ var baseURL = doc.location.href.match(/(http:\/\/[a-z0-9]+\.naa\.gov\.au)/)[1];
+ var item = new Zotero.Item("manuscript");
+ item.archive = "National Archives of Australia";
+ // Photosearch item
+ if (doc.location.href.match(/PhotoSearchItemDetail.asp/i)) {
+ var tags = new Array();
+ item.libraryCatalog = "PhotoSearch";
+ item.title = Zotero.Utilities.trimInternal(doc.evaluate('//b[. ="Title :"]/following-sibling::text()[1]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent);
+ item.manuscriptType = "photograph";
+ var barcode = Zotero.Utilities.trimInternal(doc.evaluate('//b[. ="Barcode : "]/following-sibling::text()[1]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent);
+ var series = Zotero.Utilities.trimInternal(doc.evaluate('//b[. ="Find other items in this series :"]/following-sibling::a/text()[1]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent);
+ var refNumber = Zotero.Utilities.trimInternal(doc.evaluate('//b[. ="Image no. :"]/following-sibling::text()[1]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent);
+ item.archiveLocation = refNumber;
+ item.url = "http://www.naa.gov.au/cgi-bin/Search?O=PSI&Number=" + barcode;
+ if (doc.evaluate('//b[. ="Date :"]/following-sibling::text()[1]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue != null) {
+ item.date = Zotero.Utilities.trimInternal(doc.evaluate('//b[. ="Date :"]/following-sibling::text()[1]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent);
+ }
+ if (doc.evaluate('//b[. ="Location : "]/following-sibling::text()[1]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue != null) {
+ item.place = Zotero.Utilities.trimInternal(doc.evaluate('//b[. ="Location : "]/following-sibling::text()[1]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent);
+ }
+ // Save subjects as tags
+ subjects = new Array();
+ subjects.push(Zotero.Utilities.trimInternal(doc.evaluate('//b[. ="Primary subject :"]/following-sibling::*[1]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent).toLowerCase());
+ subjects.push(Zotero.Utilities.trimInternal(doc.evaluate('//b[. ="Secondary subject :"]/following-sibling::*[1]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent).toLowerCase());
+ for (var i in subjects) {
+ if (subjects[i] != '') {
+ item.tags.push(subjects[i]);
+ }
+ }
+ // Citation
+ item.tags.push('dcterms:bibliographicCitation="NAA: ' + refNumber + '"');
+ // Save barcode as identifier
+ item.tags.push('dcterms:identifier="' + barcode + '"');
+ // Series of which this is a member
+ item.tags.push('dcterms:isPartOf="http://www.naa.gov.au/cgi-bin/Search?Number=' + series + '"');
+ // Same file in RecordSearch
+ item.tags.push('owl:sameAs="http://www.naa.gov.au/cgi-bin/Search?O=I&Number=' + barcode + '"');
+ // Namespace declarations
+ item.tags.push('xmlns:dcterms="http://purl.org/dc/terms/"');
+ item.tags.push('xmlns:owl="http://www.w3.org/2002/07/owl#"');
+ // Attach copy of photo as attachment
+ var imgURL = "http://recordsearch.naa.gov.au/NaaMedia/ShowImage.asp?B=" + barcode + "&S=1&T=P";
+ item.attachments = [{url:imgURL, title:"Digital image of NAA: "+ item.archiveLocation, mimeType:"image/jpeg" }];
+ } else if (doc.location.href.match(/SeriesDetail.asp/i)) {
+ item.libraryCatalog = "RecordSearch";
+ item.title = Zotero.Utilities.trimInternal(doc.evaluate('//td[@class="field"][. ="Title"]/following-sibling::td', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
+ var refNumber = Zotero.Utilities.trimInternal(doc.evaluate('//td[@class="field"][. ="Series number"]/following-sibling::td', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
+ item.archiveLocation = refNumber;
+ item.manuscriptType = "series";
+ // Link into RecordSearch
+ item.url = "http://www.naa.gov.au/cgi-bin/Search?Number=" + refNumber;
+ // Contents dates
+ item.date = Zotero.Utilities.trimInternal(doc.evaluate('//td[@class="field"][. ="Contents dates "]/following-sibling::td', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
+ // Agencies recording into this series
+ var agencies = doc.evaluate('//div[@id="provenanceRecording"]/ul/li/div[@class="linkagesInfo"]', doc, nsResolver, XPathResult.ANY_TYPE, null);
+ while (agency = agencies.iterateNext()) {
+ item.creators.push({lastName: agency.textContent, creatorType: "creator"});
+ }
+ // Save series note as abstract
+ if (doc.evaluate('//div[@id="notes"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue != null) {
+ item.abstractNote = Zotero.Utilities.cleanTags(Zotero.Utilities.trimInternal(doc.evaluate('//div[@id="notes"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent));
+ }
+ // MACHINE TAGS
+ // Format
+ if (doc.evaluate('//td[@class="field"][div="Predominant physical format"]/following-sibling::td', doc, nsResolver, XPathResult.FIRST_ANY_TYPE, null) != null) {
+ item.tags.push('dcterms:format="' + Zotero.Utilities.trimInternal(doc.evaluate('//td[@class="field"][div="Predominant physical format"]/following-sibling::td', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent) + '"');
+ }
+ // Number of items described on RecordSearch
+ if (doc.evaluate('//td[@class="field"][. ="Items in this series on RecordSearch"]/following-sibling::td/a', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent != '') {
+ item.tags.push('dcterms:extent="' + Zotero.Utilities.trimInternal(doc.evaluate('//td[@class="field"][. ="Items in this series on RecordSearch"]/following-sibling::td/a', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent) + ' items described"');
+ }
+ // Quantities and locations
+ var quantities = doc.evaluate('//td[@class="field"][. ="Quantity and location"]/following-sibling::td/ul/li', doc, nsResolver, XPathResult.ANY_TYPE, null);
+ while (quantity = quantities.iterateNext()) {
+ item.tags.push('dcterms:extent="' +quantity.textContent + '"');
+ }
+ // Citation
+ item.tags.push('dcterms:bibliographicCitation="NAA: ' + refNumber + '"');
+ // Declare dcterms namespace
+ item.tags.push('xmlns:dcterms="http://purl.org/dc/terms/"');
+ } else if (doc.location.href.match(/ItemDetail.asp/i)) {
+ item.manuscriptType = 'file';
+ item.libraryCatalog = "RecordSearch";
+ item.title = Zotero.Utilities.trimInternal(doc.evaluate('//td[@class="field"][. ="Title"]/following-sibling::td', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
+ var series = Zotero.Utilities.trimInternal(doc.evaluate('//td[@class="field"][. ="Series number"]/following-sibling::td/a', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
+ var control = Zotero.Utilities.trimInternal(doc.evaluate('//td[@class="field"][. ="Control symbol"]/following-sibling::td', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
+ var refNumber = series + ', ' + control;
+ item.archiveLocation = refNumber;
+ var barcode = Zotero.Utilities.trimInternal(doc.evaluate('//td[@class="field"][. ="Item barcode"]/following-sibling::td', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
+ // Link into RecordSearch
+ item.url = "http://www.naa.gov.au/cgi-bin/Search?O=I&Number=" + barcode;
+ // Contents dates
+ item.date = Zotero.Utilities.trimInternal(doc.evaluate('//td[@class="field"][. ="Contents date range"]/following-sibling::td', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
+ // Location
+ if (doc.evaluate('//td[@class="field"][. ="Location"]/following-sibling::td', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue != null) {
+ item.place = Zotero.Utilities.trimInternal(doc.evaluate('//td[@class="field"][. ="Location"]/following-sibling::td', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent);
+ }
+ // Save item note as abstract
+ if (doc.evaluate('//div[@id="notes"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue != null) {
+ item.abstractNote = Zotero.Utilities.cleanTags(Zotero.Utilities.trimInternal(doc.evaluate('//div[@id="notes"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent));
+ }
+ // MACHINE TAGS
+ // The series this item belongs to
+ item.tags.push('dcterms:isPartOf="http://www.naa.gov.au/cgi-bin/Search?Number=' + series + '"');
+ // Citation
+ item.tags.push('dcterms:bibliographicCitation="NAA: ' + refNumber + '"');
+ // Save the barcode as an identifier
+ item.tags.push('dcterms:identifier="' + barcode + '"');
+ // Access status
+ item.tags.push('dcterms:accessRights="' + Zotero.Utilities.trimInternal(doc.evaluate('//td[@class="field"][. ="Access status"]/following-sibling::td', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent) + '"');
+ // Format
+ if (doc.evaluate('//td[@class="field"][div="Physical format"]/following-sibling::td', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue != null) {
+ item.tags.push('dcterms:format="' + Zotero.Utilities.trimInternal(doc.evaluate('//td[@class="field"][div="Physical format"]/following-sibling::td', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent) + '"');
+ }
+ // Is there a digital copy? - if so find the number of pages in the digitised file
+ if (doc.evaluate('//a[. ="View digital copy "]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue != null) {
+ itemURL = baseURL + "/scripts/Imagine.asp?B=" + barcode;
+ // Retrieve the digitised file
+ itemDoc = Zotero.Utilities.retrieveDocument(itemURL);
+ item.numPages =Zotero.Utilities.trimInternal(itemDoc.evaluate('//input[@id="Hidden3"]/@value', itemDoc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent);
+ }
+ // Declare dcterms namespace
+ item.tags.push('xmlns:dcterms="http://purl.org/dc/terms/"');
+ }
+ item.complete();
+}