commit bb57e6ba7dabaa7a69f0ca7062583ab80bed7116
parent 8f344872055eb4bae690da83cbf2197d42e9b864
Author: Simon Kornblith <simon@simonster.com>
Date: Fri, 2 Jun 2006 18:22:34 +0000
Provide visual feedback for scraping
Diffstat:
4 files changed, 219 insertions(+), 95 deletions(-)
diff --git a/chrome/chromeFiles/content/scholar/ingester/browser.js b/chrome/chromeFiles/content/scholar/ingester/browser.js
@@ -1,9 +1,23 @@
// Firefox Scholar Ingester Browser Functions
-// Utilities based on code taken from Greasemonkey
+// Based on code taken from Greasemonkey and PiggyBank
// This code is licensed according to the GPL
+//////////////////////////////////////////////////////////////////////////////
+//
+// Scholar.Ingester.Interface
+//
+//////////////////////////////////////////////////////////////////////////////
+
+// Class to interface with the browser when ingesting data
+
Scholar.Ingester.Interface = function() {}
+//////////////////////////////////////////////////////////////////////////////
+//
+// Public Scholar.Ingester.Interface methods
+//
+//////////////////////////////////////////////////////////////////////////////
+
/*
* Initialize some variables and prepare event listeners for when chrome is done
* loading
@@ -40,63 +54,14 @@ Scholar.Ingester.Interface.chromeUnload = function() {
this.tabBrowser.removeProgressListener(this);
}
-
-/*
- * Gets a document object given a browser window object
- *
- * NOTE: Browser objects are associated with document objects via keys generated
- * from the time the browser object is opened. I'm not sure if this is the
- * appropriate mechanism for handling this, but it's what PiggyBank used and it
- * appears to work.
- */
-Scholar.Ingester.Interface.getDocument = function(browser) {
- try {
- var key = browser.getAttribute("scholar-key");
- if(Scholar.Ingester.Interface.browserDocuments[key]) {
- return Scholar.Ingester.Interface.browserDocuments[key];
- }
- } finally {}
- return false;
-}
-
-/*
- * Creates a new document object for a browser window object, attempts to
- * retrieve appropriate scraper
- */
-Scholar.Ingester.Interface.setDocument = function(browser) {
- try {
- var key = browser.getAttribute("scholar-key");
- } finally {
- if(!key) {
- var key = (new Date()).getTime();
- browser.setAttribute("scholar-key", key);
- }
- }
- Scholar.Ingester.Interface.browserDocuments[key] = new Scholar.Ingester.Document(browser);
- Scholar.Ingester.Interface.browserDocuments[key].retrieveScraper();
-}
-
-/*
- * Deletes the document object associated with a given browser window object
- */
-Scholar.Ingester.Interface.deleteDocument = function(browser) {
- try {
- var key = browser.getAttribute("scholar-key");
- if(Scholar.Ingester.Interface.browserDocuments[key]) {
- delete Scholar.Ingester.Interface.browserDocuments[key];
- return true;
- }
- } finally {}
- return false;
-}
-
/*
* Scrapes a page (called when the capture icon is clicked)
*/
Scholar.Ingester.Interface.scrapeThisPage = function() {
- var document = Scholar.Ingester.Interface.getDocument(Scholar.Ingester.Interface.tabBrowser.selectedBrowser);
- if(document.scraper) {
- document.scrapePage();
+ var documentObject = Scholar.Ingester.Interface._getDocument(Scholar.Ingester.Interface.tabBrowser.selectedBrowser);
+ if(documentObject.scraper) {
+ Scholar.Ingester.Interface.scrapeProgress = new Scholar.Ingester.Interface.Progress(window, Scholar.Ingester.Interface.tabBrowser.selectedBrowser.contentDocument, Scholar.getString("ingester.scraping"));
+ documentObject.scrapePage(Scholar.Ingester.Interface._finishScraping);
}
}
@@ -105,11 +70,11 @@ Scholar.Ingester.Interface.scrapeThisPage = function() {
* thereof of the current page
*/
Scholar.Ingester.Interface.updateStatus = function(browser) {
- var document = Scholar.Ingester.Interface.getDocument(browser);
- if(document && document.scraper) {
- this.statusImage.src = "chrome://scholar/skin/capture_colored.png";
+ var documentObject = Scholar.Ingester.Interface._getDocument(browser);
+ if(documentObject && documentObject.scraper) {
+ Scholar.Ingester.Interface.statusImage.src = "chrome://scholar/skin/capture_colored.png";
} else {
- this.statusImage.src = "chrome://scholar/skin/capture_gray.png";
+ Scholar.Ingester.Interface.statusImage.src = "chrome://scholar/skin/capture_gray.png";
}
}
@@ -122,8 +87,8 @@ Scholar.Ingester.Interface.updateStatus = function(browser) {
* if a tab is loaded behind the currently selected page, the ingester will not
* create a new object for it.
*/
-Scholar.Ingester.Interface.contentLoad = function() {
- Scholar.Ingester.Interface.setDocument(Scholar.Ingester.Interface.tabBrowser.selectedBrowser);
+Scholar.Ingester.Interface.contentLoad = function() {
+ Scholar.Ingester.Interface._setDocument(Scholar.Ingester.Interface.tabBrowser.selectedBrowser);
Scholar.Ingester.Interface.updateStatus(Scholar.Ingester.Interface.tabBrowser.selectedBrowser);
}
@@ -159,7 +124,7 @@ Scholar.Ingester.Interface.Listener.onLocationChange = function() {
Scholar.Ingester.Interface.browsers.splice(i,1);
// To execute if document object does not exist
- Scholar.Ingester.Interface.deleteDocument(browser);
+ Scholar.Ingester.Interface._deleteDocument(browser);
}
}
@@ -185,4 +150,179 @@ Scholar.Ingester.Interface.Listener.onLocationChange = function() {
Scholar.Ingester.Interface.updateStatus(
Scholar.Ingester.Interface.tabBrowser.selectedBrowser
);
-}
-\ No newline at end of file
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Private Scholar.Ingester.Document methods
+//
+//////////////////////////////////////////////////////////////////////////////
+
+/*
+ * Gets a document object given a browser window object
+ *
+ * NOTE: Browser objects are associated with document objects via keys generated
+ * from the time the browser object is opened. I'm not sure if this is the
+ * appropriate mechanism for handling this, but it's what PiggyBank used and it
+ * appears to work.
+ */
+Scholar.Ingester.Interface._getDocument = function(browser) {
+ try {
+ var key = browser.getAttribute("scholar-key");
+ if(Scholar.Ingester.Interface.browserDocuments[key]) {
+ return Scholar.Ingester.Interface.browserDocuments[key];
+ }
+ } finally {}
+ return false;
+}
+
+/*
+ * Creates a new document object for a browser window object, attempts to
+ * retrieve appropriate scraper
+ */
+Scholar.Ingester.Interface._setDocument = function(browser) {
+ try {
+ var key = browser.getAttribute("scholar-key");
+ } finally {
+ if(!key) {
+ var key = (new Date()).getTime();
+ browser.setAttribute("scholar-key", key);
+ }
+ }
+ Scholar.Ingester.Interface.browserDocuments[key] = new Scholar.Ingester.Document(browser);
+ Scholar.Ingester.Interface.browserDocuments[key].retrieveScraper();
+}
+
+/*
+ * Deletes the document object associated with a given browser window object
+ */
+Scholar.Ingester.Interface._deleteDocument = function(browser) {
+ try {
+ var key = browser.getAttribute("scholar-key");
+ if(Scholar.Ingester.Interface.browserDocuments[key]) {
+ delete Scholar.Ingester.Interface.browserDocuments[key];
+ return true;
+ }
+ } finally {}
+ return false;
+}
+
+/*
+ * Callback to be executed when scraping is complete
+ */
+Scholar.Ingester.Interface._finishScraping = function(documentObject) {
+ Scholar.Ingester.Interface.scrapeProgress.changeHeadline(Scholar.getString("ingester.scrapeComplete"));
+
+ var fields = Scholar.ItemFields.getItemTypeFields(documentObject.item.getField("itemTypeID"));
+
+ var titleLabel = Scholar.getString("itemFields.title") + ":"
+ Scholar.Ingester.Interface.scrapeProgress.addResult(titleLabel, this.item.getField("title"));
+ var creators = documentObject.item.numCreators();
+ if(creators) {
+ for(var i=0; i<creators; i++) {
+ var creator = documentObject.item.getCreator(i);
+ var label = Scholar.getString("creatorTypes."+Scholar.CreatorTypes.getTypeName(creator.creatorTypeID)) + ":";
+ var data = creator.firstName + ' ' + creator.lastName;
+ Scholar.Ingester.Interface.scrapeProgress.addResult(label, data);
+ }
+ }
+
+ for(i in fields) {
+ var data = documentObject.item.getField(fields[i]);
+ if(data) {
+ var name = Scholar.ItemFields.getName(fields[i]);
+ if(name != "source") {
+ var label = Scholar.getString("itemFields."+ name) + ":";
+ Scholar.Ingester.Interface.scrapeProgress.addResult(label, data);
+ }
+ }
+ }
+
+ setTimeout(function() { Scholar.Ingester.Interface.scrapeProgress.fade() }, 2000);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Scholar.Ingester.Progress
+//
+//////////////////////////////////////////////////////////////////////////////
+
+// Handles the display of a div showing progress in scraping
+
+Scholar.Ingester.Interface.Progress = function(myWindow, myDocument, headline) {
+ this.window = myWindow;
+ this.document = myDocument;
+ this.div = this.document.createElement('div');
+ this.div.style.MozOpacity = '.9';
+ this.div.style.position = 'fixed';
+ this.div.style.right = '20px';
+ this.div.style.top = '20px';
+ this.div.style.width = '200px';
+ this.div.style.height = '120px';
+ this.div.style.backgroundColor = '#7eadd9'
+ this.div.style.color = '#000';
+ this.div.style.padding = '5px';
+ this.div.style.fontFamily = 'Arial, Geneva, Helvetica';
+ this.div.style.overflow = 'hidden';
+ this.div.id = 'firefoxScholarProgressDiv';
+
+ this.headlineP = this.document.createElement("div");
+ this.headlineP.style.textAlign = 'center';
+ this.headlineP.style.fontSize = '22px';
+ this.headlineP.style.marginBottom = '5px';
+ if(!headline) {
+ headline = ' ';
+ }
+ var headlineNode = this.document.createTextNode(headline);
+ this.headlineP.appendChild(headlineNode);
+ this.div.appendChild(this.headlineP);
+
+ this.bodyP = this.document.createElement("div");
+ this.table = this.document.createElement("table");
+ this.table.style.borderCollapse = 'collapse';
+ this.bodyP.appendChild(this.table);
+ this.div.appendChild(this.bodyP);
+
+ this.document.body.appendChild(this.div);
+}
+
+Scholar.Ingester.Interface.Progress.prototype.changeHeadline = function(headline) {
+ this.headlineP.removeChild(this.headlineP.firstChild);
+
+ var headlineNode = this.document.createTextNode(headline);
+ this.headlineP.appendChild(headlineNode);
+}
+
+Scholar.Ingester.Interface.Progress.prototype.addResult = function(label, data) {
+ var labelNode = this.document.createTextNode(label);
+ var dataNode = this.document.createTextNode(data);
+
+ var tr = this.document.createElement("tr");
+ var labelTd = this.document.createElement("td");
+ labelTd.style.fontSize = '10px';
+ labelTd.style.width = '60px';
+ var dataTd = this.document.createElement("td");
+ dataTd.style.fontSize = '10px';
+
+ labelTd.appendChild(labelNode);
+ dataTd.appendChild(dataNode);
+ tr.appendChild(labelTd);
+ tr.appendChild(dataTd);
+ this.table.appendChild(tr);
+}
+
+Scholar.Ingester.Interface.Progress.prototype.fade = function() {
+ // Icky, icky hack to keep objects
+ var me = this;
+ this._fader = function() {
+ if(me.div.style.MozOpacity <= 0) {
+ me.div.style.display = 'none';
+ } else {
+ me.div.style.MozOpacity -= .1;
+ setTimeout(me._fader, 100);
+ }
+ }
+
+ // Begin fade
+ this._fader();
+}
diff --git a/chrome/chromeFiles/content/scholar/ingester/scrape-progress.xul b/chrome/chromeFiles/content/scholar/ingester/scrape-progress.xul
@@ -1,27 +0,0 @@
-<?xml version="1.0" ?>
-<?xml-stylesheet href="chrome://global/skin/" type="text/css"?>
-<!DOCTYPE overlay SYSTEM "chrome://piggy-bank/locale/load-dom-dialog.dtd">
-
-<window
- xmlns="http://www.mozilla.org/keymaster/gatekeeper/there.is.only.xul"
- xmlns:xul="http://www.mozilla.org/keymaster/gatekeeper/there.is.only.xul"
- xmlns:html="http://www.w3.org/1999/xhtml"
- id="scrape-progress"
- windowtype="Options"
- orient="vertical"
- screenX="10" screenY="10"
- persist="width height screenX screenY sizeMode"
- title="Scraping Page…"
->
-
- <hbox flex="1">
- <vbox flex="1" style="padding: 10px">
- <label value="Scraping Page…" />
- <progressmeter id="progress" mode="undetermined" />
- </vbox>
- <resizer id="window-resizer" dir="bottomright"/>
- <box style="visibility: collapse">
- <tabbrowser id="hidden-browser" />
- </box>
- </hbox>
-</window>
diff --git a/chrome/chromeFiles/content/scholar/xpcom/ingester.js b/chrome/chromeFiles/content/scholar/xpcom/ingester.js
@@ -356,7 +356,7 @@ Scholar.Ingester.Document.prototype.canScrape = function(currentScraper) {
currentScraper.scraperDetectCode +
"\n})()", scraperSandbox);
} catch(e) {
- throw e+' in scraper '+currentScraper.label;
+ throw e+' in scraperDetectCode for '+currentScraper.label;
}
}
return canScrape;
@@ -375,7 +375,11 @@ Scholar.Ingester.Document.prototype.scrapePage = function(callback) {
var scraperSandbox = this.sandbox;
- Components.utils.evalInSandbox(this.scraper.scraperJavaScript, scraperSandbox);
+ try {
+ Components.utils.evalInSandbox(this.scraper.scraperJavaScript, scraperSandbox);
+ } catch(e) {
+ throw e+' in scraperJavaScript for '+this.scraper.label;
+ }
// If synchronous, call _scrapePageComplete();
if(!scraperSandbox._waitForCompletion) {
@@ -413,7 +417,7 @@ Scholar.Ingester.Document.prototype.scrapePage = function(callback) {
Scholar.Ingester.Document.prototype._scrapePageComplete = function() {
this._updateDatabase();
if(this._scrapeCallback) {
- this._scrapeCallback();
+ this._scrapeCallback(this);
}
}
@@ -469,5 +473,10 @@ Scholar.Ingester.Document.prototype._updateDatabase = function() {
newItem.setCreator(0, firstName, lastName);
}
newItem.save();
+
+ // First one is stored so as to be accessible
+ if(!this.item) {
+ this.item = newItem;
+ }
}
}
\ No newline at end of file
diff --git a/chrome/chromeFiles/locale/en-US/scholar/scholar.properties b/chrome/chromeFiles/locale/en-US/scholar/scholar.properties
@@ -21,4 +21,7 @@ itemTypes.journalArticle = Journal Article
creatorTypes.author = Author
creatorTypes.contributor = Contributor
-creatorTypes.editor = Editor
-\ No newline at end of file
+creatorTypes.editor = Editor
+
+ingester.scraping = Scraping Page...
+ingester.scrapeComplete = Scraping Complete
+\ No newline at end of file