commit 438ff82955f96b8ede7908ce69927ee53bd405e1
parent 146b92585dce1823babb500453dc960b6497c194
Author: Simon Kornblith <simon@simonster.com>
Date: Thu, 31 Aug 2006 07:45:03 +0000
- replace storage streams with plain old strings for translate IO. there's not much of a reason to use storage streams now, and it was screwing up non-ASCII characters.
- make EBSCO scraper work better through a proxy
- shorten Accession Number -> Accession No, Journal Abbreviation -> Journal Abbr, Publication Title -> Publication. it does look a bit stranger, but it also makes the interface more functional (especially for those of us without giant widescreen LCDs ;-)
Diffstat:
3 files changed, 75 insertions(+), 100 deletions(-)
diff --git a/chrome/chromeFiles/content/scholar/xpcom/scholar.js b/chrome/chromeFiles/content/scholar/xpcom/scholar.js
@@ -737,7 +737,7 @@ Scholar.Date = new function(){
var months = CSL.getMonthStrings("long");
string += months[date.month];
if(date.day) {
- string += ", "+date.day;
+ string += " "+parseInt(date.day, 10).toString()+", ";
} else {
string += " ";
}
diff --git a/chrome/chromeFiles/content/scholar/xpcom/translate.js b/chrome/chromeFiles/content/scholar/xpcom/translate.js
@@ -241,15 +241,9 @@ Scholar.Translate.prototype.setLocation = function(location) {
* sets the string to be used as a file
*/
Scholar.Translate.prototype.setString = function(string) {
- this.string = string;
- this._createStorageStream();
-
- this._storageStreamLength = string.length;
-
- // write string
- var fStream = this._storageStream.getOutputStream(0);
- fStream.write(string, this._storageStreamLength);
- fStream.close();
+ this._storage = string;
+ this._storageLength = string.length;
+ this._storagePointer = 0;
}
/*
@@ -467,7 +461,7 @@ Scholar.Translate.prototype.translate = function() {
throw("cannot translate: no translator specified");
}
- if(!this.location && this.type != "search" && !this._storageStream) {
+ if(!this.location && this.type != "search" && !this._storage) {
// searches operate differently, because we could have an array of
// translators and have to go through each
throw("cannot translate: no location specified");
@@ -477,6 +471,12 @@ Scholar.Translate.prototype.translate = function() {
return;
}
+ if(this._storage) {
+ // enable reading from storage, which we can't do until the translator
+ // is loaded
+ this._storageFunctions(true);
+ }
+
// hack to see if there are any options, bc length does not work on objects
if(this.type == "export") {
for(var i in this._displayOptions) {
@@ -1296,17 +1296,11 @@ Scholar.Translate.prototype._import = function() {
* sets up import for IO
*/
Scholar.Translate.prototype._importConfigureIO = function() {
- if(this._storageStream) {
+ if(this._storage) {
if(this._configOptions.dataMode == "rdf") {
this._rdf = new Object();
// read string out of storage stream
- var sStream = Components.classes["@mozilla.org/scriptableinputstream;1"].
- createInstance(Components.interfaces.nsIScriptableInputStream);
- sStream.init(this._storageStream.newInputStream(0));
- var str = sStream.read(this._storageStreamLength);
- sStream.close();
-
var IOService = Components.classes['@mozilla.org/network/io-service;1']
.getService(Components.interfaces.nsIIOService);
this._rdf.dataSource = Components.classes["@mozilla.org/rdf/datasource;1?name=in-memory-datasource"].
@@ -1316,19 +1310,13 @@ Scholar.Translate.prototype._importConfigureIO = function() {
// get URI and parse
var baseURI = (this.location ? IOService.newURI(this.location, "utf-8", null) : null);
- parser.parseString(this._rdf.dataSource, baseURI, str);
+ parser.parseString(this._rdf.dataSource, baseURI, this._storage);
// make an instance of the RDF handler
this._sandbox.Scholar.RDF = new Scholar.Translate.RDF(this._rdf.dataSource);
} else {
- this._storageStreamFunctions(true);
-
- if(this._scriptableStream) {
- // close scriptable stream so functions will be forced to get a
- // new one
- this._scriptableStream.close();
- this._scriptableStream = undefined;
- }
+ this._storageFunctions(true);
+ this._storagePointer = 0;
}
} else {
if(this._configOptions.dataMode == "rdf") {
@@ -1619,37 +1607,25 @@ Scholar.Translate.prototype._initializeInternalIO = function() {
// make an instance of the RDF handler
this._sandbox.Scholar.RDF = new Scholar.Translate.RDF(this._rdf.dataSource);
} else {
- this._createStorageStream();
- this._storageStreamFunctions(true, true);
+ this._storage = "";
+ this._storageLength = 0;
+ this._storagePointer = 0;
+ this._storageFunctions(true, true);
}
}
}
/*
- * creates and returns storage stream
- */
-Scholar.Translate.prototype._createStorageStream = function() {
- // create a storage stream
- this._storageStream = Components.classes["@mozilla.org/storagestream;1"].
- createInstance(Components.interfaces.nsIStorageStream);
- this._storageStream.init(4096, 4294967295, null); // virtually no size limit
-}
-
-/*
* sets up functions for reading/writing to a storage stream
*/
-Scholar.Translate.prototype._storageStreamFunctions = function(read, write) {
+Scholar.Translate.prototype._storageFunctions = function(read, write) {
var me = this;
if(write) {
// set up write() method
- var fStream = this._storageStream.getOutputStream(0);
- this._sandbox.Scholar.write = function(data) { fStream.write(data, data.length) };
-
- // set Scholar.eof() to close the storage stream
- this._sandbox.Scholar.eof = function() {
- fStream.QueryInterface(Components.interfaces.nsIOutputStream);
- fStream.close();
- }
+ this._sandbox.Scholar.write = function(data) {
+ me._storage += data;
+ me._storageLength += data.length;
+ };
}
if(read) {
@@ -1658,51 +1634,45 @@ Scholar.Translate.prototype._storageStreamFunctions = function(read, write) {
var lastCharacter;
this._sandbox.Scholar.read = function() {
- if(!me._scriptableStream) { // allocate an fStream and sStream on the fly
- // otherwise with no data we get an error
- me._scriptableStream = Components.classes["@mozilla.org/scriptableinputstream;1"].
- createInstance(Components.interfaces.nsIScriptableInputStream);
- me._scriptableStream.init(me._storageStream.newInputStream(0));
-
- // attach sStream to stack of streams to close
- me._streams.push(me._scriptableStream);
- }
-
- var character = me._scriptableStream.read(1);
- if(!character) {
+ if(me._storagePointer >= me._storageLength) {
return false;
}
- var string = "";
- if(lastCharacter == "\r" && character == "\n") {
- // if the last read got a cr, and this first char was
- // an lf, ignore the lf
- character = "";
- }
+ var oldPointer = me._storagePointer;
+ var lfIndex = me._storage.indexOf("\n", me._storagePointer);
- while(character != "\n" && character != "\r" && character) {
- string += character;
- character = me._scriptableStream.read(1);
+ if(lfIndex != -1) {
+ // in case we have a CRLF
+ me._storagePointer = lfIndex+1;
+ if(me._storageLength > lfIndex && me._storage[lfIndex-1] == "\r") {
+ lfIndex--;
+ }
+ return me._storage.substr(oldPointer, lfIndex-oldPointer);
}
- lastCharacter = character;
+ var crIndex = me._storage.indexOf("\r", me._storagePointer);
+ if(crIndex != -1) {
+ me._storagePointer = crIndex+1;
+ return me._storage.substr(oldPointer, crIndex-oldPointer-1);
+ }
- return string;
+ me._storagePointer = me._storageLength;
+ return me._storage;
}
- } else { // block reading
+ } else { // block reading
this._sandbox.Scholar.read = function(amount) {
- if(!me._scriptableStream) { // allocate an fStream and
- // sStream on the fly; otherwise
- // with no data we get an error
- me._scriptableStream = Components.classes["@mozilla.org/scriptableinputstream;1"].
- createInstance(Components.interfaces.nsIScriptableInputStream);
- me._scriptableStream.init(me._storageStream.newInputStream(0));
-
- // attach sStream to stack of streams to close
- me._streams.push(me._scriptableStream);
+ if(me._storagePointer >= me._storageLength) {
+ return false;
+ }
+
+ if((me._storagePointer+amount) <= me._storageLength) {
+ me._storagePointer = me._storageLength;
+ return me._storage;
}
- return me._scriptableStream.read(amount);
+ var oldPointer = me._storagePointer;
+ me._storagePointer += amount;
+ return me._storage.substr(oldPointer, amount);
}
}
}
diff --git a/scrapers.sql b/scrapers.sql
@@ -1,4 +1,4 @@
--- 68
+-- 69
-- Set the following timestamp to the most recent scraper update date
REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-15 15:42:00'));
@@ -3038,7 +3038,7 @@ function doWeb(doc, url) {
var urls = new Array();
for(var i in items) {
var m = relatedMatch.exec(relatedLinks[i]);
- urls.push("http://scholar.google.com/scholar.ris?hl=en&lr=&q=info:"+m[1]+"&output=citation&oi=citation");
+ urls.push("http://scholar.google.com/scholar.ris?hl=en&lr=&q=info:"+m[1]+"&oe=UTF-8&output=citation&oi=citation");
if(links[i]) {
attachments.push([{title:"Google Scholar Linked Page", type:"text/html",
url:links[i]}]);
@@ -3150,24 +3150,24 @@ function doWeb(doc, url) {
Scholar.wait();
}');
-REPLACE INTO "translators" VALUES ('d0b1914a-11f1-4dd7-8557-b32fe8a3dd47', '2006-08-18 18:03:00', 4, 'EBSCOhost', 'Simon Kornblith', '^http://web\.ebscohost\.com/ehost/(?:results|detail)',
+REPLACE INTO "translators" VALUES ('d0b1914a-11f1-4dd7-8557-b32fe8a3dd47', '2006-08-18 18:03:00', 4, 'EBSCOhost', 'Simon Kornblith', '^http://[^/]+/ehost/(?:results|detail)',
'function detectWeb(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == ''x'') return namespace; else return null;
} : null;
- var searchRe = new RegExp("^http://web\\.ebscohost\\.com/ehost/results", "i");
-
// See if this is a seach results page
- if(searchRe.test(url)) {
+ var searchResult = doc.evaluate(''//table[@class="result-list-inner"]'', doc, nsResolver,
+ XPathResult.ANY_TYPE, null).iterateNext();
+ if(searchResult) {
return "multiple";
- } else {
- var persistentLink = doc.evaluate(''//tr[td[@class="left-content-ft"]/text() = "Persistent link to this record:"]/td[@class="right-content-ft"]'',
- doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
- if(persistentLink) {
- return "journalArticle";
- }
+ }
+
+ var persistentLink = doc.evaluate(''//tr[td[@class="left-content-ft"]/text() = "Persistent link to this record:"]/td[@class="right-content-ft"]'',
+ doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
+ if(persistentLink) {
+ return "journalArticle";
}
}',
'function fullEscape(text) {
@@ -3180,6 +3180,10 @@ function doWeb(doc, url) {
if (prefix == ''x'') return namespace; else return null;
} : null;
+ var hostRe = new RegExp("^http://([^/]+)/");
+ var m = hostRe.exec(url);
+ var host = m[1];
+
var queryRe = /\?(.*)$/;
var m = queryRe.exec(url);
var queryString = m[1];
@@ -3191,8 +3195,9 @@ function doWeb(doc, url) {
XPathResult.ANY_TYPE, null).iterateNext();
viewState = fullEscape(viewState.value);
- var searchRe = new RegExp("^http://web\\.ebscohost\\.com/ehost/results", "i");
- if(searchRe.test(url)) {
+ var searchResult = doc.evaluate(''//table[@class="result-list-inner"]'', doc, nsResolver,
+ XPathResult.ANY_TYPE, null).iterateNext();
+ if(searchResult) {
var items = new Object();
var tableRows = doc.evaluate(''//table[@class="cluster-result-record-table"]/tbody/tr'',
@@ -3253,7 +3258,7 @@ function doWeb(doc, url) {
folderBase += "&__EVENTVALIDATION="+fullEscape(folderEventValidation);
var deliverString = "__EVENTTARGET=ctl00%24ctl00%24MainContentArea%24MainContentArea%24btnDelivery%24lnkSave&"+folderBase
- Scholar.Utilities.HTTP.doPost("http://web.ebscohost.com/ehost/"+folderURL,
+ Scholar.Utilities.HTTP.doPost("http://"+host+"/ehost/"+folderURL,
deliverString, function(text) {
var postLocation = /<form name="aspnetForm" method="post" action="([^"]+)"/
var m = postLocation.exec(text);
@@ -3262,13 +3267,13 @@ function doWeb(doc, url) {
var m = viewStateMatch.exec(text);
var downloadString = "__EVENTTARGET=&__EVENTARGUMENT=&__VIEWSTATE="+fullEscape(m[1])+"&ctl00%24ctl00%24MainContentArea%24MainContentArea%24ctl01%24chkRemoveFromFolder=on&ctl00%24ctl00%24MainContentArea%24MainContentArea%24ctl01%24btnSubmit=Save&ctl00%24ctl00%24MainContentArea%24MainContentArea%24ctl01%24BibFormat=1";
- Scholar.Utilities.HTTP.doPost("http://web.ebscohost.com/ehost/"+deliveryURL,
+ Scholar.Utilities.HTTP.doPost("http://"+host+"/ehost/"+deliveryURL,
getString, function(text) {
- Scholar.Utilities.HTTP.doPost("http://web.ebscohost.com/ehost/"+deliveryURL,
+ Scholar.Utilities.HTTP.doPost("http://"+host+"/ehost/"+deliveryURL,
downloadString, function(text) { // get marked
var form = doc.createElement("form");
form.setAttribute("method", "post");
- form.setAttribute("action", "http://web.ebscohost.com/ehost/"+folderURL);
+ form.setAttribute("action", "http://"+host+"/ehost/"+folderURL);
var args = [
["__EVENTARGUMENT", ""],
["__VIEWSTATE", folderViewState],