commit 303c6ee68d8c2e4536359192565175779a1c0863
parent d73127b1b33aa37da77071e1b5e41f8dac7ec6bd
Author: Simon Kornblith <simon@simonster.com>
Date: Mon, 26 Jun 2006 01:08:59 +0000
closes #41, get library call number
Diffstat:
2 files changed, 55 insertions(+), 27 deletions(-)
diff --git a/chrome/chromeFiles/content/scholar/xpcom/ingester.js b/chrome/chromeFiles/content/scholar/xpcom/ingester.js
@@ -412,8 +412,8 @@ Scholar.Ingester.Utilities.prototype.cleanAuthor = function(author) {
* Cleans whitespace off a string and replaces multiple spaces with one
*/
Scholar.Ingester.Utilities.prototype.cleanString = function(s) {
- s = this.trimString(s);
- return s.replace(/[ \xA0]+/g, " ");
+ s = s.replace(/[ \xA0]+/g, " ");
+ return this.trimString(s);
}
/*
@@ -523,14 +523,18 @@ Scholar.Ingester.Utilities.prototype._MARCAssociateField = function(record, uri,
Scholar.debug('Found '+field.length+' matches for '+fieldNo+part);
if(field) {
for(i in field) {
- if(field[i][part]) {
- var value = field[i][part];
- Scholar.debug(value);
- if(fieldNo == '245') { // special case - title + subtitle
- if(field[i]['b']) {
- value += ' '+field[i]['b'];
+ var value;
+ for(var j=0; j<part.length; j++) {
+ var myPart = part.substr(j, 1);
+ if(field[i][myPart]) {
+ if(value) {
+ value += " "+field[i][myPart];
+ } else {
+ value = field[i][myPart];
}
}
+ }
+ if(value) {
if(execMe) {
value = execMe(value);
}
@@ -550,6 +554,7 @@ Scholar.Ingester.Utilities.prototype.importMARCRecord = function(record, uri, mo
var prefixDC = 'http://purl.org/dc/elements/1.1/';
var prefixDCMI = 'http://purl.org/dc/dcmitype/';
var prefixDummy = 'http://chnm.gmu.edu/firefox-scholar/';
+ var prefixRDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
// Extract ISBNs
model = this._MARCAssociateField(record, uri, model, '020', prefixDC + 'identifier', this._MARCCleanNumber, 'ISBN ');
@@ -570,7 +575,7 @@ Scholar.Ingester.Utilities.prototype.importMARCRecord = function(record, uri, mo
}
}
// Extract title
- model = this._MARCAssociateField(record, uri, model, '245', prefixDC + 'title', this._MARCCleanString);
+ model = this._MARCAssociateField(record, uri, model, '245', prefixDC + 'title', this._MARCCleanString, '', 'ab');
// Extract edition
model = this._MARCAssociateField(record, uri, model, '250', prefixDC + 'hasVersion', this._MARCCleanString);
// Extract place info
@@ -581,6 +586,16 @@ Scholar.Ingester.Utilities.prototype.importMARCRecord = function(record, uri, mo
model = this._MARCAssociateField(record, uri, model, '260', prefixDC + 'year', this._MARCPullYear, '', 'c');
// Extract series
model = this._MARCAssociateField(record, uri, model, '440', prefixDummy + 'series', this._MARCCleanString);
+ // Extract call number
+ model = this._MARCAssociateField(record, uri, model, '050', prefixDC + 'identifier', this._MARCCleanString, 'LCC ', 'ab');
+ model = this._MARCAssociateField(record, uri, model, '060', prefixDC + 'identifier', this._MARCCleanString, 'NLM ', 'ab');
+ model = this._MARCAssociateField(record, uri, model, '070', prefixDC + 'identifier', this._MARCCleanString, 'NAL ', 'ab');
+ model = this._MARCAssociateField(record, uri, model, '080', prefixDC + 'identifier', this._MARCCleanString, 'UDC ', 'ab');
+ model = this._MARCAssociateField(record, uri, model, '082', prefixDC + 'identifier', this._MARCCleanString, 'DDC ', 'a');
+ model = this._MARCAssociateField(record, uri, model, '084', prefixDC + 'identifier', this._MARCCleanString, 'CN ', 'ab');
+
+ // Set type
+ model = model.addStatement(uri, prefixRDF + 'type', prefixDummy + "book", true);
}
/*
@@ -912,6 +927,9 @@ Scholar.Ingester.Document.prototype._updateDatabase = function() {
var prefixDCMI = 'http://purl.org/dc/dcmitype/';
var prefixDummy = 'http://chnm.gmu.edu/firefox-scholar/';
+ // Call number fields, in order of preference
+ var callNumbers = new Array("LCC", "DDC", "UDC", "NLM", "NAL", "CN");
+
try {
for(var uri in this.model.data) {
// Get typeID, defaulting to "website"
@@ -991,22 +1009,29 @@ Scholar.Ingester.Document.prototype._updateDatabase = function() {
}
}
- // Handle ISBNs/ISSNs
+ // Handle ISBNs/ISSNs/Call Numbers
if(this.model.data[uri][prefixDC + 'identifier']) {
+ var oldIndex = -1;
var needISSN = Scholar.ItemFields.isValidForType(Scholar.ItemFields.getID("ISSN"), typeID);
var needISBN = Scholar.ItemFields.isValidForType(Scholar.ItemFields.getID("ISBN"), typeID);
- if(needISSN || needISBN) {
- for(i in this.model.data[uri][prefixDC + 'identifier']) {
- firstFour = this.model.data[uri][prefixDC + 'identifier'][i].substring(0, 4);
- if(needISSN && firstFour == 'ISSN') {
- newItem.setField("ISSN", this.model.data[uri][prefixDC + 'identifier'][0].substring(5));
- break;
- }
- if(needISBN && firstFour == 'ISBN') {
- newItem.setField("ISBN", this.model.data[uri][prefixDC + 'identifier'][0].substring(5));
- break;
- }
+ for(i in this.model.data[uri][prefixDC + 'identifier']) {
+ prefix = this.model.data[uri][prefixDC + 'identifier'][i].substr(0, this.model.data[uri][prefixDC + 'identifier'][i].indexOf(" "));
+ if(needISSN && prefix == 'ISSN') {
+ newItem.setField("ISSN", this.model.data[uri][prefixDC + 'identifier'][i].substring(5));
+ needISSN = false;
+ }
+ if(needISBN && prefix == 'ISBN') {
+ newItem.setField("ISBN", this.model.data[uri][prefixDC + 'identifier'][i].substring(5));
+ needISBN = false;
}
+ var newIndex = Scholar.arraySearch(prefix, callNumbers);
+ if(newIndex && newIndex > oldIndex) {
+ oldIndex = newIndex;
+ var callNumber = this.model.data[uri][prefixDC + 'identifier'][i].substring(prefix.length+1);
+ }
+ }
+ if(callNumber) {
+ newItem.setField("callNumber", callNumber);
}
}
diff --git a/scrapers.sql b/scrapers.sql
@@ -1,7 +1,7 @@
--- 23
+-- 24
-- Set the following timestamp to the most recent scraper update date
-REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-06-25 18:00:00'));
+REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-06-25 21:06:00'));
REPLACE INTO "scrapers" VALUES('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-06-22 22:58:00', 'Amazon.com Scraper', 'Simon Kornblith', '^http://www\.amazon\.com/(?:gp/(?:product|search)/|exec/obidos/search-handle-url/)', NULL, 'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#'';
var prefixDC = ''http://purl.org/dc/elements/1.1/'';
@@ -809,7 +809,7 @@ if(newUri) {
wait();');
-REPLACE INTO "scrapers" VALUES('add7c71c-21f3-ee14-d188-caf9da12728b', '2006-06-25 15:32:00', 'SIRSI 2003+ Scraper', 'Simon Kornblith', '/uhtbin/cgisirsi',
+REPLACE INTO "scrapers" VALUES('add7c71c-21f3-ee14-d188-caf9da12728b', '2006-06-25 21:06:00', 'SIRSI 2003+ Scraper', 'Simon Kornblith', '/uhtbin/cgisirsi',
'var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == ''x'') return namespace; else return null;
@@ -911,6 +911,11 @@ function scrape(doc) {
} catch (e) {}
}
+ var callNumber = utilities.getNode(doc, doc, ''//tr/td[1][@class="holdingslist"]/text()'', nsResolver);
+ if(callNumber && callNumber.nodeValue) {
+ model.addStatement(uri, prefixDC + "identifier", "CN "+callNumber.nodeValue, true);
+ }
+
model.addStatement(uri, prefixRDF + "type", prefixDummy + "book", false);
return true;
}
@@ -1343,7 +1348,7 @@ if(detailRe.test(doc.location.href)) {
wait();
}');
-REPLACE INTO "scrapers" VALUES('cf87eca8-041d-b954-795a-2d86348999d5', '2006-06-23 13:34:00', 'Aleph Scraper', 'Simon Kornblith', '^http://[^/]+/F(?:/[A-Z0-9\-]+(?:\?.*)?$|\?func=find)',
+REPLACE INTO "scrapers" VALUES('cf87eca8-041d-b954-795a-2d86348999d5', '2006-06-25 20:51:00', 'Aleph Scraper', 'Simon Kornblith', '^http://[^/]+/F(?:/[A-Z0-9\-]+(?:\?.*)?$|\?func=find)',
'var singleRe = new RegExp("^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=[0-9]{3}");
if(singleRe.test(doc.location.href)) {
@@ -1425,8 +1430,6 @@ utilities.processDocuments(browser, null, newUris, function(newBrowser) {
record.add_field(code, ind1, ind2, value);
}
}
-
- model.addStatement(uri, prefixRDF + "type", prefixDummy + "book", false);
utilities.importMARCRecord(record, uri, model);
}, function() { done(); }, function() {});