www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | Submodules | README | LICENSE

commit 303c6ee68d8c2e4536359192565175779a1c0863
parent d73127b1b33aa37da77071e1b5e41f8dac7ec6bd
Author: Simon Kornblith <simon@simonster.com>
Date:   Mon, 26 Jun 2006 01:08:59 +0000

closes #41, get library call number


Diffstat:
Mchrome/chromeFiles/content/scholar/xpcom/ingester.js | 67++++++++++++++++++++++++++++++++++++++++++++++---------------------
Mscrapers.sql | 15+++++++++------
2 files changed, 55 insertions(+), 27 deletions(-)

diff --git a/chrome/chromeFiles/content/scholar/xpcom/ingester.js b/chrome/chromeFiles/content/scholar/xpcom/ingester.js @@ -412,8 +412,8 @@ Scholar.Ingester.Utilities.prototype.cleanAuthor = function(author) { * Cleans whitespace off a string and replaces multiple spaces with one */ Scholar.Ingester.Utilities.prototype.cleanString = function(s) { - s = this.trimString(s); - return s.replace(/[ \xA0]+/g, " "); + s = s.replace(/[ \xA0]+/g, " "); + return this.trimString(s); } /* @@ -523,14 +523,18 @@ Scholar.Ingester.Utilities.prototype._MARCAssociateField = function(record, uri, Scholar.debug('Found '+field.length+' matches for '+fieldNo+part); if(field) { for(i in field) { - if(field[i][part]) { - var value = field[i][part]; - Scholar.debug(value); - if(fieldNo == '245') { // special case - title + subtitle - if(field[i]['b']) { - value += ' '+field[i]['b']; + var value; + for(var j=0; j<part.length; j++) { + var myPart = part.substr(j, 1); + if(field[i][myPart]) { + if(value) { + value += " "+field[i][myPart]; + } else { + value = field[i][myPart]; } } + } + if(value) { if(execMe) { value = execMe(value); } @@ -550,6 +554,7 @@ Scholar.Ingester.Utilities.prototype.importMARCRecord = function(record, uri, mo var prefixDC = 'http://purl.org/dc/elements/1.1/'; var prefixDCMI = 'http://purl.org/dc/dcmitype/'; var prefixDummy = 'http://chnm.gmu.edu/firefox-scholar/'; + var prefixRDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'; // Extract ISBNs model = this._MARCAssociateField(record, uri, model, '020', prefixDC + 'identifier', this._MARCCleanNumber, 'ISBN '); @@ -570,7 +575,7 @@ Scholar.Ingester.Utilities.prototype.importMARCRecord = function(record, uri, mo } } // Extract title - model = this._MARCAssociateField(record, uri, model, '245', prefixDC + 'title', this._MARCCleanString); + model = this._MARCAssociateField(record, uri, model, '245', prefixDC + 'title', this._MARCCleanString, '', 'ab'); // Extract edition model = this._MARCAssociateField(record, uri, model, '250', prefixDC + 'hasVersion', this._MARCCleanString); // Extract place info @@ -581,6 +586,16 @@ Scholar.Ingester.Utilities.prototype.importMARCRecord = function(record, uri, mo model = this._MARCAssociateField(record, uri, model, '260', prefixDC + 'year', this._MARCPullYear, '', 'c'); // Extract series model = this._MARCAssociateField(record, uri, model, '440', prefixDummy + 'series', this._MARCCleanString); + // Extract call number + model = this._MARCAssociateField(record, uri, model, '050', prefixDC + 'identifier', this._MARCCleanString, 'LCC ', 'ab'); + model = this._MARCAssociateField(record, uri, model, '060', prefixDC + 'identifier', this._MARCCleanString, 'NLM ', 'ab'); + model = this._MARCAssociateField(record, uri, model, '070', prefixDC + 'identifier', this._MARCCleanString, 'NAL ', 'ab'); + model = this._MARCAssociateField(record, uri, model, '080', prefixDC + 'identifier', this._MARCCleanString, 'UDC ', 'ab'); + model = this._MARCAssociateField(record, uri, model, '082', prefixDC + 'identifier', this._MARCCleanString, 'DDC ', 'a'); + model = this._MARCAssociateField(record, uri, model, '084', prefixDC + 'identifier', this._MARCCleanString, 'CN ', 'ab'); + + // Set type + model = model.addStatement(uri, prefixRDF + 'type', prefixDummy + "book", true); } /* @@ -912,6 +927,9 @@ Scholar.Ingester.Document.prototype._updateDatabase = function() { var prefixDCMI = 'http://purl.org/dc/dcmitype/'; var prefixDummy = 'http://chnm.gmu.edu/firefox-scholar/'; + // Call number fields, in order of preference + var callNumbers = new Array("LCC", "DDC", "UDC", "NLM", "NAL", "CN"); + try { for(var uri in this.model.data) { // Get typeID, defaulting to "website" @@ -991,22 +1009,29 @@ Scholar.Ingester.Document.prototype._updateDatabase = function() { } } - // Handle ISBNs/ISSNs + // Handle ISBNs/ISSNs/Call Numbers if(this.model.data[uri][prefixDC + 'identifier']) { + var oldIndex = -1; var needISSN = Scholar.ItemFields.isValidForType(Scholar.ItemFields.getID("ISSN"), typeID); var needISBN = Scholar.ItemFields.isValidForType(Scholar.ItemFields.getID("ISBN"), typeID); - if(needISSN || needISBN) { - for(i in this.model.data[uri][prefixDC + 'identifier']) { - firstFour = this.model.data[uri][prefixDC + 'identifier'][i].substring(0, 4); - if(needISSN && firstFour == 'ISSN') { - newItem.setField("ISSN", this.model.data[uri][prefixDC + 'identifier'][0].substring(5)); - break; - } - if(needISBN && firstFour == 'ISBN') { - newItem.setField("ISBN", this.model.data[uri][prefixDC + 'identifier'][0].substring(5)); - break; - } + for(i in this.model.data[uri][prefixDC + 'identifier']) { + prefix = this.model.data[uri][prefixDC + 'identifier'][i].substr(0, this.model.data[uri][prefixDC + 'identifier'][i].indexOf(" ")); + if(needISSN && prefix == 'ISSN') { + newItem.setField("ISSN", this.model.data[uri][prefixDC + 'identifier'][i].substring(5)); + needISSN = false; + } + if(needISBN && prefix == 'ISBN') { + newItem.setField("ISBN", this.model.data[uri][prefixDC + 'identifier'][i].substring(5)); + needISBN = false; } + var newIndex = Scholar.arraySearch(prefix, callNumbers); + if(newIndex && newIndex > oldIndex) { + oldIndex = newIndex; + var callNumber = this.model.data[uri][prefixDC + 'identifier'][i].substring(prefix.length+1); + } + } + if(callNumber) { + newItem.setField("callNumber", callNumber); } } diff --git a/scrapers.sql b/scrapers.sql @@ -1,7 +1,7 @@ --- 23 +-- 24 -- Set the following timestamp to the most recent scraper update date -REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-06-25 18:00:00')); +REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-06-25 21:06:00')); REPLACE INTO "scrapers" VALUES('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-06-22 22:58:00', 'Amazon.com Scraper', 'Simon Kornblith', '^http://www\.amazon\.com/(?:gp/(?:product|search)/|exec/obidos/search-handle-url/)', NULL, 'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#''; var prefixDC = ''http://purl.org/dc/elements/1.1/''; @@ -809,7 +809,7 @@ if(newUri) { wait();'); -REPLACE INTO "scrapers" VALUES('add7c71c-21f3-ee14-d188-caf9da12728b', '2006-06-25 15:32:00', 'SIRSI 2003+ Scraper', 'Simon Kornblith', '/uhtbin/cgisirsi', +REPLACE INTO "scrapers" VALUES('add7c71c-21f3-ee14-d188-caf9da12728b', '2006-06-25 21:06:00', 'SIRSI 2003+ Scraper', 'Simon Kornblith', '/uhtbin/cgisirsi', 'var namespace = doc.documentElement.namespaceURI; var nsResolver = namespace ? function(prefix) { if (prefix == ''x'') return namespace; else return null; @@ -911,6 +911,11 @@ function scrape(doc) { } catch (e) {} } + var callNumber = utilities.getNode(doc, doc, ''//tr/td[1][@class="holdingslist"]/text()'', nsResolver); + if(callNumber && callNumber.nodeValue) { + model.addStatement(uri, prefixDC + "identifier", "CN "+callNumber.nodeValue, true); + } + model.addStatement(uri, prefixRDF + "type", prefixDummy + "book", false); return true; } @@ -1343,7 +1348,7 @@ if(detailRe.test(doc.location.href)) { wait(); }'); -REPLACE INTO "scrapers" VALUES('cf87eca8-041d-b954-795a-2d86348999d5', '2006-06-23 13:34:00', 'Aleph Scraper', 'Simon Kornblith', '^http://[^/]+/F(?:/[A-Z0-9\-]+(?:\?.*)?$|\?func=find)', +REPLACE INTO "scrapers" VALUES('cf87eca8-041d-b954-795a-2d86348999d5', '2006-06-25 20:51:00', 'Aleph Scraper', 'Simon Kornblith', '^http://[^/]+/F(?:/[A-Z0-9\-]+(?:\?.*)?$|\?func=find)', 'var singleRe = new RegExp("^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=[0-9]{3}"); if(singleRe.test(doc.location.href)) { @@ -1425,8 +1430,6 @@ utilities.processDocuments(browser, null, newUris, function(newBrowser) { record.add_field(code, ind1, ind2, value); } } - - model.addStatement(uri, prefixRDF + "type", prefixDummy + "book", false); utilities.importMARCRecord(record, uri, model); }, function() { done(); }, function() {});