commit 3edb6e0286495cc8a759e28af49943504ad557a2
parent 9f573794153513a4ffa49eba0d1392444878f241
Author: Simon Kornblith <simon@simonster.com>
Date: Tue, 8 Aug 2006 21:17:07 +0000
closes #86, steal EndNote download links
Scholar should now attempt to process citation information from EndNote download links (MIME types application/x-endnote-refer and application/x-research-info-systems). in situations where Scholar cannot process the information, a standard helper app dialog will appear. this behavior is controlled by the preference extensions.scholar.parseEndNoteMIMETypes.
Diffstat:
5 files changed, 339 insertions(+), 108 deletions(-)
diff --git a/chrome/chromeFiles/content/scholar/ingester/browser.js b/chrome/chromeFiles/content/scholar/ingester/browser.js
@@ -27,6 +27,7 @@ Scholar_Ingester_Interface.init = function() {
Scholar_Ingester_Interface.browserData = new Object();
Scholar_Ingester_Interface._scrapePopupShowing = false;
Scholar.Ingester.ProxyMonitor.init();
+ Scholar.Ingester.MIMEHandler.init();
window.addEventListener("load", Scholar_Ingester_Interface.chromeLoad, false);
window.addEventListener("unload", Scholar_Ingester_Interface.chromeUnload, false);
diff --git a/chrome/chromeFiles/content/scholar/xpcom/ingester.js b/chrome/chromeFiles/content/scholar/xpcom/ingester.js
@@ -457,4 +457,155 @@ Scholar.OpenURL = new function() {
return "";
}
}
+}
+
+Scholar.Ingester.MIMEHandler = new function() {
+ var on = false;
+
+ this.init = init;
+
+ /*
+ * registers URIContentListener to handle MIME types
+ */
+ function init() {
+ if(!on && Scholar.Prefs.get("parseEndNoteMIMETypes")) {
+ var uriLoader = Components.classes["@mozilla.org/uriloader;1"].
+ getService(Components.interfaces.nsIURILoader);
+ uriLoader.registerContentListener(Scholar.Ingester.MIMEHandler.URIContentListener);
+ on = true;
+ }
+ }
+}
+
+/*
+ * Scholar.Ingester.MIMEHandler.URIContentListener: implements
+ * nsIURIContentListener interface to grab MIME types
+ */
+Scholar.Ingester.MIMEHandler.URIContentListener = new function() {
+ var _desiredContentTypes = ["application/x-endnote-refer", "application/x-research-info-systems"];
+
+ this.QueryInterface = QueryInterface;
+ this.canHandleContent = canHandleContent;
+ this.doContent = doContent;
+ this.isPreferred = isPreferred;
+ this.onStartURIOpen = onStartURIOpen;
+
+ function QueryInterface(iid) {
+ if(iid.equals(Components.interfaces.nsISupports)
+ || iid.equals(Components.interfaces.nsISupportsWeakReference)
+ || iid.equals(Components.interfaces.nsIURIContentListener)) {
+ return this;
+ }
+ throw Components.results.NS_ERROR_NO_INTERFACE;
+ }
+
+ function canHandleContent(contentType, isContentPreferred, desiredContentType) {
+ if(Scholar.inArray(contentType, _desiredContentTypes)) {
+ return true;
+ }
+ return false;
+ }
+
+ function doContent(contentType, isContentPreferred, request, contentHandler) {
+ Scholar.debug("doing content for "+request.name);
+ contentHandler.value = new Scholar.Ingester.MIMEHandler.StreamListener(request, contentType);
+ return false;
+ }
+
+ function isPreferred(contentType, desiredContentType) {
+ if(Scholar.inArray(contentType, _desiredContentTypes)) {
+ return true;
+ }
+ return false;
+ }
+
+ function onStartURIOpen(URI) {
+ return true;
+ }
+}
+
+/*
+ * Scholar.Ingester.MIMEHandler.StreamListener: implements nsIStreamListener and
+ * nsIRequestObserver interfaces to download MIME types we've grabbed
+ */
+Scholar.Ingester.MIMEHandler.StreamListener = function(request, contentType) {
+ this._request = request;
+ this._contentType = contentType
+ this._readString = "";
+ this._scriptableStream = null;
+ this._scriptableStreamInput = null
+
+ // get front window
+ var windowWatcher = Components.classes["@mozilla.org/embedcomp/window-watcher;1"].
+ getService(Components.interfaces.nsIWindowWatcher);
+ this._frontWindow = windowWatcher.activeWindow;
+ this._frontWindow.Scholar_Ingester_Interface.Progress.show();
+}
+
+Scholar.Ingester.MIMEHandler.StreamListener.prototype.QueryInterface = function(iid) {
+ if(iid.equals(Components.interfaces.nsISupports)
+ || iid.equals(Components.interfaces.nsIRequestObserver)
+ || iid.equals(Components.interfaces.nsIStreamListener)) {
+ return this;
+ }
+ throw Components.results.NS_ERROR_NO_INTERFACE;
+}
+
+Scholar.Ingester.MIMEHandler.StreamListener.prototype.onStartRequest = function(channel, context) {}
+
+/*
+ * called when there's data available; basicallly, we just want to collect this data
+ */
+Scholar.Ingester.MIMEHandler.StreamListener.prototype.onDataAvailable = function(request, context, inputStream, offset, count) {
+ Scholar.debug(count+" bytes available");
+
+ if(inputStream != this._scriptableStreamInput) { // get storage stream
+ // if there's not one
+ this._scriptableStream = Components.classes["@mozilla.org/scriptableinputstream;1"].
+ createInstance(Components.interfaces.nsIScriptableInputStream);
+ this._scriptableStream.init(inputStream);
+ this._scriptableStreamInput = inputStream;
+ }
+ this._readString += this._scriptableStream.read(count);
+}
+
+/*
+ * called when the request is done
+ */
+Scholar.Ingester.MIMEHandler.StreamListener.prototype.onStopRequest = function(channel, context, status) {
+ Scholar.debug("request finished");
+ var externalHelperAppService = Components.classes["@mozilla.org/uriloader/external-helper-app-service;1"].
+ getService(Components.interfaces.nsIExternalHelperAppService);
+
+ // attempt to import through Scholar.Translate
+ var translation = new Scholar.Translate("import");
+ translation.setLocation(this._request.name);
+ translation.setString(this._readString);
+ translation.setHandler("itemDone", this._frontWindow.Scholar_Ingester_Interface._itemDone);
+ translation.setHandler("done", this._frontWindow.Scholar_Ingester_Interface._finishScraping);
+
+ // attempt to retrieve translators
+ var translators = translation.getTranslators();
+ if(!translators.length) {
+ // we lied. we can't really translate this file. call
+ // nsIExternalHelperAppService with the data
+ this._frontWindow.Scholar_Ingester_Interface.Progress.kill();
+
+ var streamListener;
+ if(streamListener = externalHelperAppService.doContent(this._contentType, this._request, this._frontWindow)) {
+ // create a string input stream
+ var inputStream = Components.classes["@mozilla.org/io/string-input-stream;1"].
+ createInstance(Components.interfaces.nsIStringInputStream);
+ inputStream.setData(this._readString, this._readString.length);
+
+ streamListener.onStartRequest(channel, context);
+ streamListener.onDataAvailable(this._request, context, inputStream, 0, this._readString.length);
+ streamListener.onStopRequest(channel, context, status);
+ }
+ return false;
+ }
+
+ // translate using first available
+ translation.setTranslator(translators[0]);
+ translation.translate();
}
\ No newline at end of file
diff --git a/chrome/chromeFiles/content/scholar/xpcom/translate.js b/chrome/chromeFiles/content/scholar/xpcom/translate.js
@@ -38,6 +38,7 @@
* for web - this is a URL
* item - item to be used for searching (read-only; set with setItem)
* path - the path to the target; for web, this is the same as location
+ * string - the string content to be used as a file.
* saveItem - whether new items should be saved to the database. defaults to
* true; set using second argument of constructor.
*
@@ -57,6 +58,8 @@
* among other things, disables passing of the translate
* object to handlers and modifies complete() function on
* returned items
+ * _storageStream - the storage stream to be used, if one is configured
+ * _storageStreamLength - the length of the storage stream
*
* WEB-ONLY PRIVATE PROPERTIES:
*
@@ -135,11 +138,31 @@ Scholar.Translate.prototype.setLocation = function(location) {
this.path = this.location;
} else {
this.location = location;
- this.path = location.path;
+ if(this.location instanceof Components.interfaces.nsIFile) { // if a file
+ this.path = location.path;
+ } else { // if a url
+ this.path = location;
+ }
}
}
/*
+ * sets the string to be used as a file
+ */
+Scholar.Translate.prototype.setString = function(string) {
+ this.string = string;
+ this._createStorageStream();
+
+ Scholar.debug(string);
+ this._storageStreamLength = string.length;
+
+ // write string
+ var fStream = this._storageStream.getOutputStream(0);
+ fStream.write(string, this._storageStreamLength);
+ fStream.close();
+}
+
+/*
* sets the translator to be used for import/export
*
* accepts either the object from getTranslators() or an ID
@@ -672,7 +695,10 @@ Scholar.Translate.prototype._closeStreams = function() {
try {
stream.QueryInterface(Components.interfaces.nsIFileInputStream);
} catch(e) {
- stream.QueryInterface(Components.interfaces.nsIFileOutputStream);
+ try {
+ stream.QueryInterface(Components.interfaces.nsIFileOutputStream);
+ } catch(e) {
+ }
}
// encase close in try block, because it's possible it's already
@@ -934,52 +960,85 @@ Scholar.Translate.prototype._import = function() {
* sets up import for IO
*/
Scholar.Translate.prototype._importConfigureIO = function() {
- if(this._configOptions.dataMode == "rdf") {
- var IOService = Components.classes['@mozilla.org/network/io-service;1']
- .getService(Components.interfaces.nsIIOService);
- var fileHandler = IOService.getProtocolHandler("file")
- .QueryInterface(Components.interfaces.nsIFileProtocolHandler);
- var URL = fileHandler.getURLSpecFromFile(this.location);
- delete fileHandler, IOService;
-
- var RDFService = Components.classes['@mozilla.org/rdf/rdf-service;1']
- .getService(Components.interfaces.nsIRDFService);
- var dataSource = RDFService.GetDataSourceBlocking(URL);
-
- // make an instance of the RDF handler
- this._sandbox.Scholar.RDF = new Scholar.Translate.RDF(dataSource);
- } else {
- // open file
- var fStream = Components.classes["@mozilla.org/network/file-input-stream;1"]
- .createInstance(Components.interfaces.nsIFileInputStream);
- fStream.init(this.location, 0x01, 0664, 0);
- this._streams.push(fStream);
-
- if(this._configOptions.dataMode == "line") { // line by line reading
- var notEof = true;
- var lineData = new Object();
+ if(this._storageStream) {
+ if(this._configOptions.dataMode == "rdf") {
+ // read string out of storage stream
+ var sStream = Components.classes["@mozilla.org/scriptableinputstream;1"].
+ createInstance(Components.interfaces.nsIScriptableInputStream);
+ sStream.init(this._storageStream.newInputStream(0));
+ var str = sStream.read(this._storageStreamLength);
+ sStream.close();
- fStream.QueryInterface(Components.interfaces.nsILineInputStream);
+ var IOService = Components.classes['@mozilla.org/network/io-service;1']
+ .getService(Components.interfaces.nsIIOService);
+ var dataSource = Components.classes["@mozilla.org/rdf/datasource;1?name=in-memory-datasource"].
+ createInstance(Components.interfaces.nsIRDFDataSource);
+ var parser = Components.classes["@mozilla.org/rdf/xml-parser;1"].
+ createInstance(Components.interfaces.nsIRDFXMLParser);
- this._sandbox.Scholar.read = function() {
- if(notEof) {
- notEof = fStream.readLine(lineData);
- return lineData.value;
- } else {
- return false;
- }
- }
- } else { // block reading
- var sStream = Components.classes["@mozilla.org/scriptableinputstream;1"]
- .createInstance(Components.interfaces.nsIScriptableInputStream);
- sStream.init(fStream);
+ // get URI and parse
+ var baseURI = (this.location ? IOService.newURI(this.location, "utf-8", null) : null);
+ parser.parseString(dataSource, baseURI, str);
- this._sandbox.Scholar.read = function(amount) {
- return sStream.read(amount);
+ // make an instance of the RDF handler
+ this._sandbox.Scholar.RDF = new Scholar.Translate.RDF(dataSource);
+ } else {
+ this._storageStreamFunctions(true);
+
+ if(this._scriptableStream) {
+ // close scriptable stream so functions will be forced to get a
+ // new one
+ this._scriptableStream.close();
+ this._scriptableStream = undefined;
}
+ }
+ } else {
+ if(this._configOptions.dataMode == "rdf") {
+ var IOService = Components.classes['@mozilla.org/network/io-service;1']
+ .getService(Components.interfaces.nsIIOService);
+ var fileHandler = IOService.getProtocolHandler("file")
+ .QueryInterface(Components.interfaces.nsIFileProtocolHandler);
+ var URL = fileHandler.getURLSpecFromFile(this.location);
+
+ var RDFService = Components.classes['@mozilla.org/rdf/rdf-service;1']
+ .getService(Components.interfaces.nsIRDFService);
+ var dataSource = RDFService.GetDataSourceBlocking(URL);
- // attach sStream to stack of streams to close
- this._streams.push(sStream);
+ // make an instance of the RDF handler
+ this._sandbox.Scholar.RDF = new Scholar.Translate.RDF(dataSource);
+ } else {
+ // open file and set read methods
+ var fStream = Components.classes["@mozilla.org/network/file-input-stream;1"]
+ .createInstance(Components.interfaces.nsIFileInputStream);
+ fStream.init(this.location, 0x01, 0664, 0);
+ this._streams.push(fStream);
+
+ if(this._configOptions.dataMode == "line") { // line by line reading
+ var notEof = true;
+ var lineData = new Object();
+
+ fStream.QueryInterface(Components.interfaces.nsILineInputStream);
+
+ this._sandbox.Scholar.read = function() {
+ if(notEof) {
+ notEof = fStream.readLine(lineData);
+ return lineData.value;
+ } else {
+ return false;
+ }
+ }
+ } else { // block reading
+ var sStream = Components.classes["@mozilla.org/scriptableinputstream;1"]
+ .createInstance(Components.interfaces.nsIScriptableInputStream);
+ sStream.init(fStream);
+
+ this._sandbox.Scholar.read = function(amount) {
+ return sStream.read(amount);
+ }
+
+ // attach sStream to stack of streams to close
+ this._streams.push(sStream);
+ }
}
}
}
@@ -1087,73 +1146,90 @@ Scholar.Translate.prototype._initializeInternalIO = function() {
// make an instance of the RDF handler
this._sandbox.Scholar.RDF = new Scholar.Translate.RDF(dataSource);
} else {
- // create a storage stream
- var storageStream = Components.classes["@mozilla.org/storagestream;1"].
- createInstance(Components.interfaces.nsIStorageStream);
- storageStream.init(4096, 4294967295, null); // virtually no size limit
+ this._createStorageStream();
+ this._storageStreamFunctions(true, true);
+ }
+ }
+}
+
+/*
+ * creates and returns storage stream
+ */
+Scholar.Translate.prototype._createStorageStream = function() {
+ // create a storage stream
+ this._storageStream = Components.classes["@mozilla.org/storagestream;1"].
+ createInstance(Components.interfaces.nsIStorageStream);
+ this._storageStream.init(4096, 4294967295, null); // virtually no size limit
+}
+
+/*
+ * sets up functions for reading/writing to a storage stream
+ */
+Scholar.Translate.prototype._storageStreamFunctions = function(read, write) {
+ var me = this;
+ if(write) {
+ // set up write() method
+ var fStream = _storageStream.getOutputStream(0);
+ this._sandbox.Scholar.write = function(data) { fStream.write(data, data.length) };
+
+ // set Scholar.eof() to close the storage stream
+ this._sandbox.Scholar.eof = function() {
+ this._storageStream.QueryInterface(Components.interfaces.nsIOutputStream);
+ this._storageStream.close();
+ }
+ }
+
+ if(read) {
+ // set up read methods
+ if(this._configOptions.dataMode == "line") { // line by line reading
+ var lastCharacter;
- // set up write() method
- var fStream = storageStream.getOutputStream(0);
- this._sandbox.Scholar.write = function(data) { fStream.write(data, data.length) };
+ this._sandbox.Scholar.read = function() {
+ if(!me._scriptableStream) { // allocate an fStream and sStream on the fly
+ // otherwise with no data we get an error
+ me._scriptableStream = Components.classes["@mozilla.org/scriptableinputstream;1"].
+ createInstance(Components.interfaces.nsIScriptableInputStream);
+ me._scriptableStream.init(me._storageStream.newInputStream(0));
- // set up read methods
- var sStream;
- var me = this;
- if(this._configOptions.dataMode == "line") { // line by line reading
- var lastCharacter;
-
- this._sandbox.Scholar.read = function() {
- if(!sStream) { // allocate an fStream and sStream on the fly
- // otherwise with no data we get an error
- sStream = Components.classes["@mozilla.org/scriptableinputstream;1"]
- .createInstance(Components.interfaces.nsIScriptableInputStream);
- sStream.init(fStream.newInputStream(0));
-
- // attach sStream to stack of streams to close
- me._streams.push(sStream);
- }
+ // attach sStream to stack of streams to close
+ me._streams.push(me._scriptableStream);
+ }
+
+ var character = me._scriptableStream.read(1);
+ if(!character) {
+ return false;
+ }
+ var string = "";
- var character = sStream.read(1);
- if(!character) {
- return false;
- }
- var string = "";
-
- if(lastCharacter == "\r" && character == "\n") {
- // if the last read got a cr, and this first char was
- // an lf, ignore the lf
- character = "";
- }
-
- while(character != "\n" && character != "\r" && character) {
- string += character;
- character = sStream.read(1);
- }
-
- lastCharacter = character;
-
- return string;
+ if(lastCharacter == "\r" && character == "\n") {
+ // if the last read got a cr, and this first char was
+ // an lf, ignore the lf
+ character = "";
}
- } else { // block reading
- this._sandbox.Scholar.read = function(amount) {
- if(!sStream) { // allocate an fStream and sStream on the fly
- // otherwise with no data we get an error
- sStream = Components.classes["@mozilla.org/scriptableinputstream;1"]
- .createInstance(Components.interfaces.nsIScriptableInputStream);
- sStream.init(fStream.newInputStream(0));
- // attach sStream to stack of streams to close
- me._streams.push(sStream);
- }
-
- return sStream.read(amount);
+ while(character != "\n" && character != "\r" && character) {
+ string += character;
+ character = me._scriptableStream.read(1);
}
+
+ lastCharacter = character;
+
+ return string;
}
+ } else { // block reading
+ this._sandbox.Scholar.read = function(amount) {
+ if(!me._scriptableStream) { // allocate an fStream and
+ // sStream on the fly; otherwise
+ // with no data we get an error
+ me._scriptableStream = Components.classes["@mozilla.org/scriptableinputstream;1"].
+ createInstance(Components.interfaces.nsIScriptableInputStream);
+ me._scriptableStream.init(me._storageStream.newInputStream(0));
- // set Scholar.eof() to close the storage stream
- this._sandbox.Scholar.eof = function() {
- storageStream.QueryInterface(Components.interfaces.nsIOutputStream);
- storageStream.close();
+ // attach sStream to stack of streams to close
+ me._streams.push(me._scriptableStream);
+ }
+
+ return me._scriptableStream.read(amount);
}
}
}
diff --git a/defaults/preferences/scholar.js b/defaults/preferences/scholar.js
@@ -4,4 +4,5 @@
pref("extensions.scholar.automaticScraperUpdates",true);
pref("extensions.scholar.scholarPaneOnTop",false);
pref("extensions.scholar.openURL.resolver","http://athene.gmu.edu:8888/lfp/LinkFinderPlus/Display");
-pref("extensions.scholar.openURL.version","0.1");
-\ No newline at end of file
+pref("extensions.scholar.openURL.version","0.1");
+pref("extensions.scholar.parseEndNoteMIMETypes",true);
+\ No newline at end of file
diff --git a/scrapers.sql b/scrapers.sql
@@ -1,7 +1,7 @@
--- 39
+-- 40
-- Set the following timestamp to the most recent scraper update date
-REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-07 21:55:00'));
+REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-08 17:12:00'));
REPLACE INTO "translators" VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-06-28 23:08:00', 4, 'Amazon.com Scraper', 'Simon Kornblith', '^http://www\.amazon\.com/(?:gp/(?:product|search)/|exec/obidos/search-handle-url/|s/)',
'function detectWeb(doc, url) {
@@ -4068,12 +4068,12 @@ function doImport() {
}
}');
-REPLACE INTO "translators" VALUES ('32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7', '2006-06-30 15:36:00', 3, 'RIS', 'Simon Kornblith', 'ris',
+REPLACE INTO "translators" VALUES ('32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7', '2006-08-08 17:12:00', 3, 'RIS', 'Simon Kornblith', 'ris',
'Scholar.configure("dataMode", "line");
Scholar.addOption("exportNotes", true);
function detectImport() {
- var line
+ var line;
while(line = Scholar.read()) {
if(line.replace(/\s/g, "") != "") {
if(line.substr(0, 6) == "TY - ") {
@@ -4141,6 +4141,8 @@ var inputTypeMap = {
function processTag(item, tag, value) {
if(fieldMap[tag]) {
item[fieldMap[tag]] = value;
+ } else if(inputFieldMap[tag]) {
+ item[inputFieldMap[tag]] = value;
} else if(tag == "TY") {
// look for type