commit 85d8153024e114938b4802f9001fd5bd039eefa6
parent bd840a4c58237ade0f7b6637d3d8889f8d78aa13
Author: Simon Kornblith <simon@simonster.com>
Date: Sat, 3 Jun 2006 22:26:01 +0000
Add library, hooks for scraping MARC records.
Diffstat:
4 files changed, 622 insertions(+), 20 deletions(-)
diff --git a/chrome/chromeFiles/content/scholar/ingester/browser.js b/chrome/chromeFiles/content/scholar/ingester/browser.js
@@ -263,7 +263,7 @@ Scholar.Ingester.Interface.Progress = function(myWindow, myDocument, headline) {
this.div.style.right = '20px';
this.div.style.top = '20px';
this.div.style.width = '200px';
- this.div.style.height = '120px';
+ this.div.style.height = '150px';
this.div.style.backgroundColor = '#7eadd9'
this.div.style.color = '#000';
this.div.style.padding = '5px';
diff --git a/chrome/chromeFiles/content/scholar/xpcom/ingester.js b/chrome/chromeFiles/content/scholar/xpcom/ingester.js
@@ -27,7 +27,10 @@ Scholar.Ingester.Model = function() {
// RDF URI and thus this is unnecessary.
Scholar.Ingester.Model.prototype.addStatement = function(uri, rdfUri, literal) {
if(!this.data[uri]) this.data[uri] = new Object();
- this.data[uri][rdfUri] = literal;
+ if(!this.data[uri][rdfUri]) {
+ this.data[uri][rdfUri] = new Array();
+ }
+ this.data[uri][rdfUri].push(literal);
Scholar.debug(rdfUri+" for "+uri+" is "+literal);
}
@@ -200,6 +203,92 @@ Scholar.Ingester.Utilities.prototype.collectURLsWithSubstring = function(doc, su
// essential components for Scholar and would take a great deal of effort to
// implement. We can, however, always implement them later.
+// These functions are for use by importMARCRecord. They're private, because,
+// while they are useful, it's also nice if as many of our scrapers as possible
+// are PiggyBank compatible, and if our scrapers used functions, that would
+// break compatibility
+Scholar.Ingester.Utilities.prototype._MARCCleanString = function(author) {
+ author = author.replace(/^[\s\.\,\/\[\]\:]+/, '');
+ return author.replace(/[\s\.\,\/\[\]\:]+$/, '');
+}
+
+Scholar.Ingester.Utilities.prototype._MARCCleanAuthor = function(author) {
+ author = author.replace(/^[\s\.\,\/\[\]\:]+/, '');
+ author = author.replace(/[\s\.\,\/\[\]\:]+$/, '');
+ var splitNames = author.split(', ');
+ if(splitNames.length > 1) {
+ author = splitNames[1]+' '+splitNames[0];
+ }
+ return author;
+}
+
+Scholar.Ingester.Utilities.prototype._MARCAssociateField = function(record, uri, model, fieldNo, rdfUri, execMe, prefix, part) {
+ if(!part) {
+ part = 'a';
+ }
+ var field = record.get_field_subfields(fieldNo);
+ Scholar.debug('Found '+field.length+' matches for '+fieldNo+part);
+ if(field) {
+ for(i in field) {
+ if(field[i][part]) {
+ var value = field[i][part];
+ Scholar.debug(value);
+ if(fieldNo == '245') { // special case - title + subtitle
+ if(field[i]['b']) {
+ value += ' '+field[i]['b'];
+ }
+ }
+ if(execMe) {
+ value = execMe(value);
+ }
+ if(prefix) {
+ value = prefix + value;
+ }
+ model.addStatement(uri, rdfUri, value);
+ }
+ }
+ }
+ return model;
+}
+
+// This is an extension to PiggyBank's architecture. It's here so that we don't
+// need an enormous library for each scraper that wants to use MARC records
+Scholar.Ingester.Utilities.prototype.importMARCRecord = function(text, format, uri, model) {
+ var prefixDC = 'http://purl.org/dc/elements/1.1/';
+ var prefixDCMI = 'http://purl.org/dc/dcmitype/';
+ var prefixDummy = 'http://chnm.gmu.edu/firefox-scholar/';
+
+ var record = new Scholar.Ingester.MARC_Record();
+ record.load(text, format);
+
+ // Extract ISBNs
+ model = this._MARCAssociateField(record, uri, model, '020', prefixDC + 'identifier', this._MARCCleanString, 'ISBN ');
+ // Extract ISSNs
+ model = this._MARCAssociateField(record, uri, model, '022', prefixDC + 'identifier', this._MARCCleanString, 'ISBN ');
+ // Extract creators
+ model = this._MARCAssociateField(record, uri, model, '100', prefixDC + 'creator', this._MARCCleanAuthor);
+ model = this._MARCAssociateField(record, uri, model, '110', prefixDC + 'creator', this._MARCCleanString);
+ model = this._MARCAssociateField(record, uri, model, '111', prefixDC + 'creator', this._MARCCleanString);
+ model = this._MARCAssociateField(record, uri, model, '130', prefixDC + 'creator', this._MARCCleanString);
+ if(!model.data[uri][prefixDC + 'creator']) {
+ var field = record.get_field_subfields('600');
+ if(field) {
+ model = this.addStatement(uri, prefixDC + 'creator', this._MARCCleanAuthor(field[0]['a']));
+ }
+ }
+ // Extract title
+ model = this._MARCAssociateField(record, uri, model, '245', prefixDC + 'title', this._MARCCleanString);
+ // Extract edition
+ model = this._MARCAssociateField(record, uri, model, '250', prefixDC + 'edition', this._MARCCleanString);
+ // Extract place info
+ model = this._MARCAssociateField(record, uri, model, '260', prefixDummy + 'place', this._MARCCleanString, '', 'a');
+ // Extract publisher info
+ model = this._MARCAssociateField(record, uri, model, '260', prefixDC + 'publisher', this._MARCCleanString, '', 'b');
+ // Extract series
+ model = this._MARCAssociateField(record, uri, model, '440', prefixDummy + 'series', this._MARCCleanString);
+}
+
+
// These are front ends for XMLHttpRequest. XMLHttpRequest can't actually be
// accessed outside the sandbox, and even if it could, it wouldn't let scripts
// access across domains, so everything's replicated here.
@@ -361,13 +450,15 @@ Scholar.Ingester.Document.prototype.canScrape = function(currentScraper) {
// passed regular expression test
if((!currentScraper.urlPattern || canScrape)
&& currentScraper.scraperDetectCode) {
+ Scholar.debug("Checking scraperDetectCode");
var scraperSandbox = this.sandbox;
try {
- canScrape = this.evalInSandbox("(function(){\n" +
+ canScrape = Components.utils.evalInSandbox("(function(){\n" +
currentScraper.scraperDetectCode +
"\n})()", scraperSandbox);
} catch(e) {
- throw e+' in scraperDetectCode for '+currentScraper.label;
+ Scholar.debug(e+' in scraperDetectCode for '+currentScraper.label);
+ canScrape = false;
}
}
return canScrape;
@@ -385,11 +476,10 @@ Scholar.Ingester.Document.prototype.scrapePage = function(callback) {
Scholar.debug("Scraping "+this.browser.contentDocument.location.href);
var scraperSandbox = this.sandbox;
-
try {
Components.utils.evalInSandbox(this.scraper.scraperJavaScript, scraperSandbox);
} catch(e) {
- throw e+' in scraperJavaScript for '+this.scraper.label;
+ Scholar.debug(e+' in scraperJavaScript for '+this.scraper.label);
this._scrapePageComplete();
}
@@ -465,36 +555,48 @@ Scholar.Ingester.Document.prototype._updateDatabase = function() {
var newItem = Scholar.Items.getNewItemByType(1);
newItem.setField("source", uri);
if(this.model.data[uri][prefixDC + 'title']) {
- newItem.setField("title", this.model.data[uri][prefixDC + 'title']);
+ newItem.setField("title", this.model.data[uri][prefixDC + 'title'][0]);
}
if(this.model.data[uri][prefixDC + 'publisher']) {
- newItem.setField("publisher", this.model.data[uri][prefixDC + 'publisher']);
+ newItem.setField("publisher", this.model.data[uri][prefixDC + 'publisher'][0]);
}
if(this.model.data[uri][prefixDC + 'year']) {
if(this.model.data[uri][prefixDC + 'year'].length == 4) {
- newItem.setField("year", this.model.data[uri][prefixDC + 'year']);
+ newItem.setField("year", this.model.data[uri][prefixDC + 'year'][0]);
} else {
try {
- newItem.setField(this.model.data[uri][prefixDC + 'year'].substring(
- this.model.data[uri][prefixDC + 'year'].lastIndexOf(" ")+1,
- this.model.data[uri][prefixDC + 'year'].length));
+ newItem.setField(this.model.data[uri][prefixDC + 'year'][0].substring(
+ this.model.data[uri][prefixDC + 'year'][0].lastIndexOf(" ")+1,
+ this.model.data[uri][prefixDC + 'year'][0].length));
} catch(e) {}
}
}
if(this.model.data[uri][prefixDC + 'edition']) {
- newItem.setField("edition", this.model.data[uri][prefixDC + 'edition']);
+ newItem.setField("edition", this.model.data[uri][prefixDC + 'edition'][0]);
+ }
+ if(this.model.data[uri][prefixDummy + 'series']) {
+ newItem.setField("series", this.model.data[uri][prefixDummy + 'series'][0]);
+ }
+ if(this.model.data[uri][prefixDummy + 'place']) {
+ newItem.setField("place", this.model.data[uri][prefixDummy + 'place'][0]);
}
if(this.model.data[uri][prefixDC + 'identifier']) {
- newItem.setField("ISBN", this.model.data[uri][prefixDC + 'identifier'].substring(5));
+ for(i in this.model.data[uri][prefixDC + 'identifier']) {
+ if(this.model.data[uri][prefixDC + 'identifier'][i].substring(0, 4) == 'ISBN') {
+ newItem.setField("ISBN", this.model.data[uri][prefixDC + 'identifier'][0].substring(5));
+ break;
+ }
+ }
}
if(this.model.data[uri][prefixDC + 'creator']) {
- var creator = this.model.data[uri][prefixDC + 'creator'];
+ for(i in this.model.data[uri][prefixDC + 'creator']) {
+ var creator = this.model.data[uri][prefixDC + 'creator'][i];
+ var spaceIndex = creator.lastIndexOf(" ");
+ var lastName = creator.substring(spaceIndex+1, creator.length);
+ var firstName = creator.substring(0, spaceIndex);
- var spaceIndex = creator.lastIndexOf(" ");
- var lastName = creator.substring(spaceIndex+1, creator.length);
- var firstName = creator.substring(0, spaceIndex);
-
- newItem.setCreator(0, firstName, lastName);
+ newItem.setCreator(i, firstName, lastName);
+ }
}
newItem.save();
diff --git a/chrome/chromeFiles/content/scholar/xpcom/marc.js b/chrome/chromeFiles/content/scholar/xpcom/marc.js
@@ -0,0 +1,495 @@
+/*
+* Scholar.Ingester.MARC_Record.js
+* Stefano Bargioni, Pontificia Universitˆ della Santa Croce - Biblioteca
+* Trattamento di record MARC in JavaScript
+*
+* Original version copyright (C) 2005 Stefano Bargioni, licensed under the LGPL
+* (Available at http://www.pusc.it/bib/mel/Scholar.Ingester.MARC_Record.js)
+*
+* This library is free software; you can redistribute it or
+* modify it under the terms of the GNU Lesser General Public
+* License as published by the Free Software Foundation; either
+* version 2.1 of the License, or (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* Lesser General Public License for more details.
+*/
+
+Scholar.Ingester.MARC_Record = function() { // new MARC record
+ this.VERSIONE = '2.6.6b';
+ this.VERSIONE_data ='2005-05-10';
+
+ this.leader = {
+ record_length:'00000',
+ record_status:'n', // acdnp
+ type_of_record:' ',
+ bibliographic_level:' ',
+ type_of_control:' ',
+ character_coding_scheme:' ',
+ indicator_count:'2',
+ subfield_code_length:'2',
+ base_address_of_data:'00000',
+ encoding_level:' ',
+ descriptive_cataloging_form:' ',
+ linked_record_requirement:' ',
+ entry_map:'4500'
+ }; // 24 chars
+
+ this.field_terminator = '\x1E';
+ this.record_terminator = '\x1D';
+ this.subfield_delimiter = '\x1F';
+ this.directory = '';
+ this.directory_terminator = this.field_terminator;
+ this.variable_fields = new Array();
+ return this;
+}
+
+Scholar.Ingester.MARC_Record.prototype.load = function(s,f) { // loads record s passed in format f
+ if (f == 'binary') {
+ this.leader.record_length = '00000';
+ this.leader.record_status = s.substr(5,1);
+ this.leader.type_of_record = s.substr(6,1);
+ this.leader.bibliographic_level = s.substr(7,1);
+ this.leader.type_of_control = s.substr(8,1);
+ this.leader.character_coding_scheme = s.substr(9,1);
+ this.leader.indicator_count = '2';
+ this.leader.subfield_code_length = '2';
+ this.leader.base_address_of_data = '00000';
+ this.leader.encoding_level = s.substr(17,1);
+ this.leader.descriptive_cataloging_form = s.substr(18,1);
+ this.leader.linked_record_requirement = s.substr(19,1);
+ this.leader.entry_map = '4500';
+
+ this.directory = '';
+ this.directory_terminator = this.field_terminator;
+ this.variable_fields = new Array();
+
+ // loads fields
+ var campi = s.split(this.field_terminator);
+ var k;
+ for (k=1; k<-1+campi.length; k++) { // the first and the last are unuseful
+ // the first is the header + directory, the last is the this.record_terminator
+ var tag = campi[0].substr(24+(k-1)*12,3);
+ var ind1 = ''; var ind2 = ''; var value = campi[k];
+ if (tag.substr(0,2) != '00') {
+ ind1 = campi[k].substr(0,1);
+ ind2 = campi[k].substr(1,1);
+ value = campi[k].substr(2);
+ }
+ this.add_field(tag,ind1,ind2,value);
+ }
+ }
+ if (f == 'MARC_Harvard') {
+ var linee = s.split('\n');
+ for (var i=0; i<linee.length; i++) {
+ linee[i] = this._trim(linee[i]);
+ if (linee[i] == '') continue; // jumps empty lines
+ // linee[i] = linee[i].replace(/\t/g,' ');
+ linee[i] = linee[i].replace(/ \t/g,'\t');
+ linee[i] = linee[i].replace(/\xA0/g,' '); // in some browsers, nbsp is copied as xA0
+ var tranche = linee[i].split('|a ');
+ var tag, ind1, ind2, value;
+ if (tranche.length == 1) {
+ tag = linee[i].substr(0,3);
+ value = linee[i].substr(4);
+ }
+ else {
+ tag = tranche[0].substr(0,3);
+ ind1 = tranche[0].substr(3,1);
+ ind2 = tranche[0].substr(4,1);
+ value = tranche[1];
+ value = this._trim(value);
+ var replacer = this.subfield_delimiter+'$1';
+ value = value.replace(/\|(.) /g,replacer);
+ }
+ if (tag == 'LDR') {
+ this.leader.record_length = '00000';
+ this.leader.record_status = value.substr(5,1);
+ this.leader.type_of_record = value.substr(6,1);
+ this.leader.bibliographic_level = value.substr(7,1);
+ this.leader.type_of_control = value.substr(8,1);
+ this.leader.character_coding_scheme = value.substr(9,1);
+ this.leader.indicator_count = '2';
+ this.leader.subfield_code_length = '2';
+ this.leader.base_address_of_data = '00000';
+ this.leader.encoding_level = value.substr(17,1);
+ this.leader.descriptive_cataloging_form = value.substr(18,1);
+ this.leader.linked_record_requirement = value.substr(19,1);
+ this.leader.entry_map = '4500';
+
+ this.directory = '';
+ this.directory_terminator = this.field_terminator;
+ this.variable_fields = new Array();
+ }
+ else if (tag > '008' && tag < '899') { // jumps low and high tags, also H03 and similia
+ if (tag != '040') this.add_field(tag,ind1,ind2,value);
+ }
+ }
+ this.add_field_005();
+ }
+ if (f == 'MARC_BNI') {
+ var linee = s.split('\n');
+ for (var i=0; i<linee.length; i++) {
+ linee[i] = this._trim(linee[i]);
+ if (linee[i] == '') continue; // jumps empty lines
+ linee[i] = linee[i].replace(/\xA0/g,' '); // in some browsers, nbsp is copied as xA0
+ linee[i] = linee[i].replace(/\|/g,' ');
+ linee[i] = linee[i].replace(/_/g,' ');
+ linee[i] = linee[i].replace(/\$/g,this.subfield_delimiter);
+ var tranche = linee[i].split('\t');
+ var tag = tranche[0];
+ var ind1 = tranche[1].substr(0,1);
+ var ind2 = tranche[1].substr(1,1);
+ var value = this._trim(tranche[2]);
+ if (tag == 'LEA') {
+ this.leader.record_length = '00000';
+ this.leader.record_status = value.substr(5,1);
+ this.leader.type_of_record = value.substr(6,1);
+ this.leader.bibliographic_level = value.substr(7,1);
+ this.leader.type_of_control = value.substr(8,1);
+ this.leader.character_coding_scheme = value.substr(9,1);
+ this.leader.indicator_count = '2';
+ this.leader.subfield_code_length = '2';
+ this.leader.base_address_of_data = '00000';
+ this.leader.encoding_level = value.substr(17,1);
+ this.leader.descriptive_cataloging_form = value.substr(18,1);
+ this.leader.linked_record_requirement = value.substr(19,1);
+ this.leader.entry_map = '4500';
+
+ this.directory = '';
+ this.directory_terminator = this.field_terminator;
+ this.variable_fields = new Array();
+ }
+ else if (tag > '008' && tag < '899') { // jumps low and high tags
+ if (tag != '040') this.add_field(tag,ind1,ind2,value);
+ }
+ }
+ this.add_field_005();
+ }
+ if (f == 'MARC_Loc') { // MARC copiato dal browser dal sito catalog.loc.gov
+ var linee = s.split('\n');
+ for (var i=0; i<linee.length; i++) {
+ linee[i] = this._trim(linee[i]);
+ if (linee[i] == '') continue; // jumps empty lines
+ linee[i] = linee[i].replace(/\xA0/g,' '); // in some browsers, nbsp is copied as xA0
+ linee[i] = linee[i].replace(/_/g,' ');
+ linee[i] = linee[i].replace(/\t/g,'');
+ var replacer = this.subfield_delimiter+'$1';
+ linee[i] = linee[i].replace(/\|(.) /g,replacer);
+ linee[i] = linee[i].replace(/\|/g,this.subfield_delimiter);
+ var tag = linee[i].substr(0,3);
+ var ind1 = linee[i].substr(4,1);
+ var ind2 = linee[i].substr(5,1);
+ var value = linee[i].substr(7);
+ if (tag == '000') {
+ linee[i] = linee[i].replace(/ /,' ');
+ value = linee[i].substr(4);
+ this.leader.record_length = '00000';
+ this.leader.record_status = value.substr(5,1);
+ this.leader.type_of_record = value.substr(6,1);
+ this.leader.bibliographic_level = value.substr(7,1);
+ this.leader.type_of_control = value.substr(8,1);
+ this.leader.character_coding_scheme = value.substr(9,1);
+ this.leader.indicator_count = '2';
+ this.leader.subfield_code_length = '2';
+ this.leader.base_address_of_data = '00000';
+ this.leader.encoding_level = value.substr(17,1);
+ this.leader.descriptive_cataloging_form = value.substr(18,1);
+ this.leader.linked_record_requirement = value.substr(19,1);
+ this.leader.entry_map = '4500';
+
+ this.directory = '';
+ this.directory_terminator = this.field_terminator;
+ this.variable_fields = new Array();
+ }
+ else if (tag > '008' && tag < '899') { // jumps low and high tags
+ if (tag != '040') this.add_field(tag,ind1,ind2,value);
+ }
+ }
+ this.add_field_005();
+ }
+
+ this.update_record_length();
+ this.update_base_address_of_data();
+ return this;
+}
+
+Scholar.Ingester.MARC_Record.prototype.update_base_address_of_data = function() { // updates the base_address
+ this.leader.base_address_of_data = this._zero_fill(24+this.variable_fields.length*12+1,5);
+ return this.leader.base_address_of_data;
+}
+
+Scholar.Ingester.MARC_Record.prototype.update_displacements = function() { // rebuilds the directory
+ var displ = 0;
+ this.directory = '';
+ for (var i=0; i<this.variable_fields.length; i++) {
+ var len = this.variable_fields[i].value.length + 1 +
+ this.variable_fields[i].ind1.length +
+ this.variable_fields[i].ind2.length;
+ this.directory += this.variable_fields[i].tag +
+ this._zero_fill(len,4) + this._zero_fill(displ,5);
+ displ += len;
+ }
+ return true;
+}
+Scholar.Ingester.MARC_Record.prototype.update_record_length = function() { // updates total record length
+ var fields_total_length = 0; var f;
+ for (f=0; f<this.variable_fields.length;f++) {
+ fields_total_length += this.variable_fields[f].ind1.length+this.variable_fields[f].ind2.length+this.variable_fields[f].value.length + 1;
+ }
+ var rl = 24+this.directory.length+1+fields_total_length+1;
+ this.leader.record_length = this._zero_fill(rl,5);
+}
+
+Scholar.Ingester.MARC_Record.prototype.sort_directory = function() { // sorts directory and array variable_fields by tag and occ
+ // ordinamento della directory
+ if (this.directory.length <= 12) { return true; } // already sorted
+ var directory_entries = new Array();
+ var i;
+ for (i=0; i<this.directory.length; i=i+12) {
+ directory_entries[directory_entries.length] = this.directory.substr(i,12);
+ }
+ directory_entries.sort();
+ this.directory = directory_entries.join('');
+ // sorts array variable_fields
+ this.variable_fields.sort(function(a,b) { return a.tag - b.tag + a.occ - b.occ; });
+ return true;
+}
+
+Scholar.Ingester.MARC_Record.prototype.show_leader = function() {
+ var leader = ''; var f;
+ for (f in this.leader) { leader += this.leader[f]; }
+ return leader;
+}
+
+Scholar.Ingester.MARC_Record.prototype.show_fields = function() {
+ var fields = ''; var f;
+ for (f=0; f<this.variable_fields.length;f++) {
+ fields += this.variable_fields[f].ind1 +
+ this.variable_fields[f].ind2 +
+ this.variable_fields[f].value +
+ this.field_terminator;
+ }
+ return fields;
+}
+
+Scholar.Ingester.MARC_Record.prototype.show_directory = function() {
+ var d = '';
+ for (var i = 0; i<this.directory.length; i+=12) {
+ d += this.directory.substr(i,3) + ' ' +
+ this.directory.substr(i+3,4) + ' ' +
+ this.directory.substr(i+7,5) + '\n';
+ }
+ return d;
+}
+
+Scholar.Ingester.MARC_Record.prototype.add_field_005 = function() {
+ var now = new Date();
+ now = now.getFullYear() +
+ this._zero_fill(now.getMonth()+1,2) +
+ this._zero_fill(now.getDate(),2) +
+ this._zero_fill(now.getHours(),2) +
+ this._zero_fill(now.getMinutes(),2) +
+ this._zero_fill(now.getSeconds(),2) + '.0';
+ this.add_field('005','','',now);
+ return now;
+}
+
+Scholar.Ingester.MARC_Record.prototype.count_occ = function(tag) { // counts occ of tag
+ var n = 0;
+ for (var i=0; i<this.variable_fields.length; i++) {
+ if (this.variable_fields[i].tag == tag) { n++; }
+ }
+ return n;
+}
+
+Scholar.Ingester.MARC_Record.prototype.exists = function(tag) { // field existence
+ if (this.count_occ(tag) > 0) return true;
+ return false;
+}
+
+function MARC_field(rec,tag,ind1,ind2,value) { // new MARC gield
+ this.tag = tag;
+ this.occ = rec.count_occ(tag)+1; // occurrence order no.
+ this.ind1 = ind1; if (this.ind1 == '') this.ind1 = ' ';
+ this.ind2 = ind2; if (this.ind2 == '') this.ind2 = ' ';
+ if (tag.substr(0,2) == '00') {
+ this.ind1 = ''; this.ind2 = '';
+ }
+ this.value = value;
+ return this;
+}
+
+Scholar.Ingester.MARC_Record.prototype.display = function(type) { // displays record in format type
+ type = type.toLowerCase();
+ if (type == 'binary') return this.show_leader() +
+ this.directory +
+ this.field_terminator +
+ this.show_fields() +
+ this.record_terminator;
+ if (type == 'html') {
+ var s = '<table class="record_table">';
+ var l = R.show_leader();
+ s += '<tr><td class="tag">000</td><td class="ind"></td><td class="ind"></td><td class="record_value">'+l+'</td></tr>';
+ var i;
+ for (i=0; i<this.variable_fields.length; i++) {
+ var ind1 = this.variable_fields[i].ind1; if (ind1 == ' ') { ind1 = ' '; }
+ var ind2 = this.variable_fields[i].ind2; if (ind2 == ' ') { ind2 = ' '; }
+ s += '<tr>';
+ s += '<td class="tag">'+this.variable_fields[i].tag+'</td>';
+ s += '<td class="ind">'+ind1+'</td>';
+ s += '<td class="ind">'+ind2+'</td>';
+ var v = this.variable_fields[i].value;
+ if (this.variable_fields[i].tag == '008') v = v.replace(/ /g,' ');
+ s += '<td class="record_value">'+this._ddagger(v)+'</td>';
+ s += '</tr>';
+ }
+ s += '</table>';
+ return s;
+ }
+ if (type == 'xml') {
+ s = '';
+ s += '<?xml version="1.0" encoding="iso-8859-1"?><collection xmlns="http://www.loc.gov/MARC21/slim"><record>';
+ s += '<leader>'+this.show_leader()+'</leader>';
+ // var i;
+ for (i=0; i<this.variable_fields.length; i++) {
+ ind1 = this.variable_fields[i].ind1; if (ind1 != '') ind1 = ' ind1="'+ind1+'"';
+ ind2 = this.variable_fields[i].ind2; if (ind2 != '') ind2 = ' ind2="'+ind2+'"';
+ if (this.variable_fields[i].tag.substr(0,2) == '00') s += '<controlfield tag="'+this.variable_fields[i].tag+'">'+this.variable_fields[i].value+'</controlfield>';
+ else {
+ var subfields = this.variable_fields[i].value.split(this.subfield_delimiter);
+ // alert(this.variable_fields[i].value+' '+subfields.length); // test
+ if (subfields.length == 1) subfields[1] = '?'+this.variable_fields[i].value;
+ var sf = '';
+ for (var j=1; j<subfields.length; j++) {
+ sf += '<subfield code="'+subfields[j].substr(0,1)+'">'+subfields[j].substr(1)+'</subfield>';
+ }
+ s += '<datafield tag="' + this.variable_fields[i].tag + '"' + ind1 + ind2 + '>' + sf + '</datafield>';
+ }
+ }
+ s += '</record></collection>';
+ return s;
+ }
+ if (type == 'xml-html') {
+ s = this.display('xml');
+ // abbellimenti
+ s = s.replace(/\<leader\>/,'\n <leader>');
+ s = s.replace(/\<controlfield/g,'\n <controlfield');
+ s = s.replace(/\<datafield/g,'\n <datafield');
+ s = s.replace(/\<collection/g,'\n<collection');
+ s = s.replace(/\<record/g,'\n<record');
+ s = s.replace(/\<\/datafield/g,'\n </datafield');
+ s = s.replace(/\<\/collection/g,'\n</collection');
+ s = s.replace(/\<\/record/g,'\n</record');
+ s = s.replace(/\<subfield/g,'\n <subfield');
+ s = s.replace(/\x1F/g,'%1F'); s = this._ddagger(s);
+ // escape chars < e >
+ s = s.replace(/\</g,'<');
+ s = s.replace(/\>/g,'>');
+ // colore alle keyword
+ s = s.replace(/(controlfield|datafield|collection|record|leader|subfield)/g,'<span class="cdfield">$1</span>');
+ s = s.replace(/(tag|code|ind1|ind2)=/g,'<span class="attrib">$1=</span>');
+ return s;
+ }
+ return false;
+}
+
+Scholar.Ingester.MARC_Record.prototype.get_field = function(tag) { // returns an array of values, one for each occurrence
+ var v = new Array(); var i;
+ for (i=0; i<this.variable_fields.length; i++) {
+ if (this.variable_fields[i].tag == tag) {
+ v[v.length] = this.variable_fields[i].ind1 +
+ this.variable_fields[i].ind2 +
+ this.variable_fields[i].value;
+ }
+ }
+ return v;
+}
+
+// This function added by Simon Kornblith
+Scholar.Ingester.MARC_Record.prototype.get_field_subfields = function(tag) { // returns a two-dimensional array of values
+ var field = this.get_field(tag);
+ var return_me = new Array();
+ for(var i in field) {
+ return_me[i] = new Object();
+ var subfields = field[i].split(this.subfield_delimiter);
+ if (subfields.length == 1) {
+ return_me[i]['?'] = field[i];
+ } else {
+ for (var j=1; j<subfields.length; j++) {
+ return_me[i][subfields[j].substr(0,1)] = subfields[j].substr(1);
+ }
+ }
+ }
+ return return_me;
+}
+
+Scholar.Ingester.MARC_Record.prototype.add_field = function(tag,ind1,ind2,value) { // adds a field to the record
+ if (tag.length != 3) { return false; }
+ var F = new MARC_field(this,tag,ind1,ind2,value);
+ // adds pointer to list of fields
+ this.variable_fields[this.variable_fields.length] = F;
+ // adds the entry to the directory
+ this.directory += F.tag+this._zero_fill(F.ind1.length+F.ind2.length+F.value.length+1,4)+'00000';
+ // sorts the directory
+ this.sort_directory();
+ // updates lengths
+ this.update_base_address_of_data();
+ this.update_displacements();
+ this.update_record_length();
+ return F;
+}
+
+Scholar.Ingester.MARC_Record.prototype.delete_field = function(tag,occurrence) {
+ // lookup and delete the occurrence from array variable_fields
+ var i;
+ for (i=0; i<this.variable_fields.length; i++) {
+ if (this.variable_fields[i].tag == tag && this.variable_fields[i].occ == occurrence) break;
+ }
+ if (i==this.variable_fields.length) return false; // campo non trovato
+ // deletes the occ. i from array variable_fields scaling next values
+ var j;
+ for (j=i+1; j<this.variable_fields.length; j++) {
+ this.variable_fields[i++]=this.variable_fields[j];
+ }
+ this.variable_fields.length--; // deletes last element
+ // lookup and delete the occurrence from directory (must exist; no sort is needed)
+ var nocc = 0;
+ // var i;
+ for (i=0; i<this.directory.length;i=i+12) {
+ if (this.directory.substr(i,3) == tag) nocc++;
+ if (occurrence == nocc) { // occ found
+ break;
+ }
+ }
+ if (i >= this.directory.length) alert('Internal error!');
+ this.directory = this.directory.substr(0,i) + this.directory.substr(i+12);
+ // updates lengths
+ this.update_base_address_of_data();
+ this.update_displacements();
+ this.update_record_length();
+ return true;
+}
+
+Scholar.Ingester.MARC_Record.prototype._ddagger = function(s) { // display doubledagger in html code
+ s = s.replace(/\%1F(.)/g, "<span class=\"this._ddagger\">‡$1</span>");
+ s = s.replace(/\x1F(.)/g, "<span class=\"this._ddagger\">‡$1</span>");
+ return s;
+}
+
+Scholar.Ingester.MARC_Record.prototype._trim = function(s) { // eliminates blanks from both sides
+ s = s.replace(/\s+$/,'');
+ return s.replace(/^\s+/,'');
+}
+
+Scholar.Ingester.MARC_Record.prototype._zero_fill = function(s,l) { // left '0' padding of s, up to l (l<=15)
+ var t = '000000000000000';
+ t = t+s;
+ return t.substr(t.length-l,l);
+}
+
+Scholar.Ingester.MARC_Record.prototype.version = function() { // returns version and date
+ return 'MARC Editor Lite '+this.VERSIONE+' ('+this.VERSIONE_data+')';
+}
+\ No newline at end of file
diff --git a/components/chnmIScholarService.js b/components/chnmIScholarService.js
@@ -35,6 +35,10 @@ Cc["@mozilla.org/moz/jssubscript-loader;1"]
.getService(Ci.mozIJSSubScriptLoader)
.loadSubScript("chrome://scholar/content/xpcom/ingester.js");
+Cc["@mozilla.org/moz/jssubscript-loader;1"]
+ .getService(Ci.mozIJSSubScriptLoader)
+ .loadSubScript("chrome://scholar/content/xpcom/marc.js");
+
/********************************************************************/