www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | Submodules | README | LICENSE

commit 0753d7891020c73b17f11678a1fc6ef29f08d45a
parent 9bcaad5946c7e1811c9ae4604ba5bd65e78b9ece
Author: Simon Kornblith <simon@simonster.com>
Date:   Tue,  6 Jun 2006 21:35:23 +0000

- Add VLTS scraper
- Fix loadDocument/processDocuments (broken by r145)



Diffstat:
Mchrome/chromeFiles/content/scholar/ingester/browser.xul | 9++++++---
Mscrapers.sql | 45+++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/chrome/chromeFiles/content/scholar/ingester/browser.xul b/chrome/chromeFiles/content/scholar/ingester/browser.xul @@ -19,7 +19,10 @@ <image id="scholar-status-image" width="16" height="16" onclick="Scholar.Ingester.Interface.scrapeThisPage()" /> </statusbarpanel> </statusbar> - <box style="visibility: collapse"> - <browser id="scholar-hidden-browser" /> - </box> + + <window id="main-window"> + <box style="visibility: collapse"> + <browser id="scholar-hidden-browser" /> + </box> + </window> </overlay> diff --git a/scrapers.sql b/scrapers.sql @@ -944,7 +944,6 @@ utilities.loadDocument(newUri, browser, function(newBrowser) { wait();'); - INSERT INTO "scrapers" VALUES(12, NULL, NULL, 20060603002000, 'Dynix Scraper', 'Simon Kornblith', 'ipac\.jsp\?.*uri=full=[0-9]', NULL, 'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#''; var prefixDC = ''http://purl.org/dc/elements/1.1/''; @@ -988,7 +987,7 @@ utilities.loadDocument(newUri, browser, function(newBrowser) { var elmt = elmts[i]; var field = stringTrimmer(getNode(doc, elmt, ''./TD[1]/A[1]/text()[1]'', nsResolver).nodeValue); var value = getNodeString(doc, elmt, ''./TD[2]/TABLE[1]/TBODY[1]/TR[1]/TD[1]/A[1]//text()'', nsResolver); - var value = value.replace(/\$([a-z]) /g, record.subfield_delimiter+"$1"); + value = value.replace(/\$([a-z]) /g, record.subfield_delimiter+"$1"); if(field != "FMT" && field != "LDR") { var ind1 = ""; @@ -1011,4 +1010,46 @@ utilities.loadDocument(newUri, browser, function(newBrowser) { }, function() {}) wait();'); + +INSERT INTO "scrapers" VALUES(13, NULL, NULL, 20060603002000, 'VTLS Scraper', 'Simon Kornblith', 'chameleon\?.*function=(?:CARDSCR|INITREQ)', NULL, +'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#''; +var prefixDC = ''http://purl.org/dc/elements/1.1/''; +var prefixDCMI = ''http://purl.org/dc/dcmitype/''; +var prefixDummy = ''http://chnm.gmu.edu/firefox-scholar/''; + +var uri = doc.location.href; +var newUri = uri.replace(/function=[A-Z]{7}/, "function=MARCSCR"); +utilities.debugPrint(newUri); + +var getNode = function(doc, contextNode, xpath, nsResolver) { + return doc.evaluate(xpath, contextNode, nsResolver, XPathResult.ANY_TYPE,null).iterateNext(); +} + +utilities.loadDocument(newUri, browser, function(newBrowser) { + newDoc = newBrowser.contentDocument; + + var namespace = newDoc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == ''x'') return namespace; else return null; + } : null; + + var xpath = ''//table[@class="outertable"]/tbody/tr[td[4]]''; + var elmts = utilities.gatherElementsOnXPath(newDoc, newDoc, xpath, nsResolver); + var record = new MARC_Record(); + for(var i=0; i<elmts.length; i++) { + var elmt = elmts[i]; + var field = getNode(doc, elmt, ''./TD[1]/text()[1]'', nsResolver).nodeValue; + var ind1 = getNode(doc, elmt, ''./TD[2]/text()[1]'', nsResolver).nodeValue; + var ind2 = getNode(doc, elmt, ''./TD[3]/text()[1]'', nsResolver).nodeValue; + var value = getNode(doc, elmt, ''./TD[4]/text()[1]'', nsResolver).nodeValue; + value = value.replace(/\\([a-z]) /g, record.subfield_delimiter+"$1"); + + record.add_field(field, ind1, ind2, value); + } + + model = utilities.importMARCRecord(record, uri, model); + done(); +}, function() {}) + +wait();'); COMMIT; \ No newline at end of file