www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | Submodules | README | LICENSE

commit bb57e6ba7dabaa7a69f0ca7062583ab80bed7116
parent 8f344872055eb4bae690da83cbf2197d42e9b864
Author: Simon Kornblith <simon@simonster.com>
Date:   Fri,  2 Jun 2006 18:22:34 +0000

Provide visual feedback for scraping

Diffstat:
Mchrome/chromeFiles/content/scholar/ingester/browser.js | 265++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
Dchrome/chromeFiles/content/scholar/ingester/scrape-progress.xul | 27---------------------------
Mchrome/chromeFiles/content/scholar/xpcom/ingester.js | 15++++++++++++---
Mchrome/chromeFiles/locale/en-US/scholar/scholar.properties | 7+++++--
4 files changed, 219 insertions(+), 95 deletions(-)

diff --git a/chrome/chromeFiles/content/scholar/ingester/browser.js b/chrome/chromeFiles/content/scholar/ingester/browser.js @@ -1,9 +1,23 @@ // Firefox Scholar Ingester Browser Functions -// Utilities based on code taken from Greasemonkey +// Based on code taken from Greasemonkey and PiggyBank // This code is licensed according to the GPL +////////////////////////////////////////////////////////////////////////////// +// +// Scholar.Ingester.Interface +// +////////////////////////////////////////////////////////////////////////////// + +// Class to interface with the browser when ingesting data + Scholar.Ingester.Interface = function() {} +////////////////////////////////////////////////////////////////////////////// +// +// Public Scholar.Ingester.Interface methods +// +////////////////////////////////////////////////////////////////////////////// + /* * Initialize some variables and prepare event listeners for when chrome is done * loading @@ -40,63 +54,14 @@ Scholar.Ingester.Interface.chromeUnload = function() { this.tabBrowser.removeProgressListener(this); } - -/* - * Gets a document object given a browser window object - * - * NOTE: Browser objects are associated with document objects via keys generated - * from the time the browser object is opened. I'm not sure if this is the - * appropriate mechanism for handling this, but it's what PiggyBank used and it - * appears to work. - */ -Scholar.Ingester.Interface.getDocument = function(browser) { - try { - var key = browser.getAttribute("scholar-key"); - if(Scholar.Ingester.Interface.browserDocuments[key]) { - return Scholar.Ingester.Interface.browserDocuments[key]; - } - } finally {} - return false; -} - -/* - * Creates a new document object for a browser window object, attempts to - * retrieve appropriate scraper - */ -Scholar.Ingester.Interface.setDocument = function(browser) { - try { - var key = browser.getAttribute("scholar-key"); - } finally { - if(!key) { - var key = (new Date()).getTime(); - browser.setAttribute("scholar-key", key); - } - } - Scholar.Ingester.Interface.browserDocuments[key] = new Scholar.Ingester.Document(browser); - Scholar.Ingester.Interface.browserDocuments[key].retrieveScraper(); -} - -/* - * Deletes the document object associated with a given browser window object - */ -Scholar.Ingester.Interface.deleteDocument = function(browser) { - try { - var key = browser.getAttribute("scholar-key"); - if(Scholar.Ingester.Interface.browserDocuments[key]) { - delete Scholar.Ingester.Interface.browserDocuments[key]; - return true; - } - } finally {} - return false; -} - /* * Scrapes a page (called when the capture icon is clicked) */ Scholar.Ingester.Interface.scrapeThisPage = function() { - var document = Scholar.Ingester.Interface.getDocument(Scholar.Ingester.Interface.tabBrowser.selectedBrowser); - if(document.scraper) { - document.scrapePage(); + var documentObject = Scholar.Ingester.Interface._getDocument(Scholar.Ingester.Interface.tabBrowser.selectedBrowser); + if(documentObject.scraper) { + Scholar.Ingester.Interface.scrapeProgress = new Scholar.Ingester.Interface.Progress(window, Scholar.Ingester.Interface.tabBrowser.selectedBrowser.contentDocument, Scholar.getString("ingester.scraping")); + documentObject.scrapePage(Scholar.Ingester.Interface._finishScraping); } } @@ -105,11 +70,11 @@ Scholar.Ingester.Interface.scrapeThisPage = function() { * thereof of the current page */ Scholar.Ingester.Interface.updateStatus = function(browser) { - var document = Scholar.Ingester.Interface.getDocument(browser); - if(document && document.scraper) { - this.statusImage.src = "chrome://scholar/skin/capture_colored.png"; + var documentObject = Scholar.Ingester.Interface._getDocument(browser); + if(documentObject && documentObject.scraper) { + Scholar.Ingester.Interface.statusImage.src = "chrome://scholar/skin/capture_colored.png"; } else { - this.statusImage.src = "chrome://scholar/skin/capture_gray.png"; + Scholar.Ingester.Interface.statusImage.src = "chrome://scholar/skin/capture_gray.png"; } } @@ -122,8 +87,8 @@ Scholar.Ingester.Interface.updateStatus = function(browser) { * if a tab is loaded behind the currently selected page, the ingester will not * create a new object for it. */ -Scholar.Ingester.Interface.contentLoad = function() { - Scholar.Ingester.Interface.setDocument(Scholar.Ingester.Interface.tabBrowser.selectedBrowser); +Scholar.Ingester.Interface.contentLoad = function() { + Scholar.Ingester.Interface._setDocument(Scholar.Ingester.Interface.tabBrowser.selectedBrowser); Scholar.Ingester.Interface.updateStatus(Scholar.Ingester.Interface.tabBrowser.selectedBrowser); } @@ -159,7 +124,7 @@ Scholar.Ingester.Interface.Listener.onLocationChange = function() { Scholar.Ingester.Interface.browsers.splice(i,1); // To execute if document object does not exist - Scholar.Ingester.Interface.deleteDocument(browser); + Scholar.Ingester.Interface._deleteDocument(browser); } } @@ -185,4 +150,179 @@ Scholar.Ingester.Interface.Listener.onLocationChange = function() { Scholar.Ingester.Interface.updateStatus( Scholar.Ingester.Interface.tabBrowser.selectedBrowser ); -} -\ No newline at end of file +} + +////////////////////////////////////////////////////////////////////////////// +// +// Private Scholar.Ingester.Document methods +// +////////////////////////////////////////////////////////////////////////////// + +/* + * Gets a document object given a browser window object + * + * NOTE: Browser objects are associated with document objects via keys generated + * from the time the browser object is opened. I'm not sure if this is the + * appropriate mechanism for handling this, but it's what PiggyBank used and it + * appears to work. + */ +Scholar.Ingester.Interface._getDocument = function(browser) { + try { + var key = browser.getAttribute("scholar-key"); + if(Scholar.Ingester.Interface.browserDocuments[key]) { + return Scholar.Ingester.Interface.browserDocuments[key]; + } + } finally {} + return false; +} + +/* + * Creates a new document object for a browser window object, attempts to + * retrieve appropriate scraper + */ +Scholar.Ingester.Interface._setDocument = function(browser) { + try { + var key = browser.getAttribute("scholar-key"); + } finally { + if(!key) { + var key = (new Date()).getTime(); + browser.setAttribute("scholar-key", key); + } + } + Scholar.Ingester.Interface.browserDocuments[key] = new Scholar.Ingester.Document(browser); + Scholar.Ingester.Interface.browserDocuments[key].retrieveScraper(); +} + +/* + * Deletes the document object associated with a given browser window object + */ +Scholar.Ingester.Interface._deleteDocument = function(browser) { + try { + var key = browser.getAttribute("scholar-key"); + if(Scholar.Ingester.Interface.browserDocuments[key]) { + delete Scholar.Ingester.Interface.browserDocuments[key]; + return true; + } + } finally {} + return false; +} + +/* + * Callback to be executed when scraping is complete + */ +Scholar.Ingester.Interface._finishScraping = function(documentObject) { + Scholar.Ingester.Interface.scrapeProgress.changeHeadline(Scholar.getString("ingester.scrapeComplete")); + + var fields = Scholar.ItemFields.getItemTypeFields(documentObject.item.getField("itemTypeID")); + + var titleLabel = Scholar.getString("itemFields.title") + ":" + Scholar.Ingester.Interface.scrapeProgress.addResult(titleLabel, this.item.getField("title")); + var creators = documentObject.item.numCreators(); + if(creators) { + for(var i=0; i<creators; i++) { + var creator = documentObject.item.getCreator(i); + var label = Scholar.getString("creatorTypes."+Scholar.CreatorTypes.getTypeName(creator.creatorTypeID)) + ":"; + var data = creator.firstName + ' ' + creator.lastName; + Scholar.Ingester.Interface.scrapeProgress.addResult(label, data); + } + } + + for(i in fields) { + var data = documentObject.item.getField(fields[i]); + if(data) { + var name = Scholar.ItemFields.getName(fields[i]); + if(name != "source") { + var label = Scholar.getString("itemFields."+ name) + ":"; + Scholar.Ingester.Interface.scrapeProgress.addResult(label, data); + } + } + } + + setTimeout(function() { Scholar.Ingester.Interface.scrapeProgress.fade() }, 2000); +} + +////////////////////////////////////////////////////////////////////////////// +// +// Scholar.Ingester.Progress +// +////////////////////////////////////////////////////////////////////////////// + +// Handles the display of a div showing progress in scraping + +Scholar.Ingester.Interface.Progress = function(myWindow, myDocument, headline) { + this.window = myWindow; + this.document = myDocument; + this.div = this.document.createElement('div'); + this.div.style.MozOpacity = '.9'; + this.div.style.position = 'fixed'; + this.div.style.right = '20px'; + this.div.style.top = '20px'; + this.div.style.width = '200px'; + this.div.style.height = '120px'; + this.div.style.backgroundColor = '#7eadd9' + this.div.style.color = '#000'; + this.div.style.padding = '5px'; + this.div.style.fontFamily = 'Arial, Geneva, Helvetica'; + this.div.style.overflow = 'hidden'; + this.div.id = 'firefoxScholarProgressDiv'; + + this.headlineP = this.document.createElement("div"); + this.headlineP.style.textAlign = 'center'; + this.headlineP.style.fontSize = '22px'; + this.headlineP.style.marginBottom = '5px'; + if(!headline) { + headline = '&nbsp;'; + } + var headlineNode = this.document.createTextNode(headline); + this.headlineP.appendChild(headlineNode); + this.div.appendChild(this.headlineP); + + this.bodyP = this.document.createElement("div"); + this.table = this.document.createElement("table"); + this.table.style.borderCollapse = 'collapse'; + this.bodyP.appendChild(this.table); + this.div.appendChild(this.bodyP); + + this.document.body.appendChild(this.div); +} + +Scholar.Ingester.Interface.Progress.prototype.changeHeadline = function(headline) { + this.headlineP.removeChild(this.headlineP.firstChild); + + var headlineNode = this.document.createTextNode(headline); + this.headlineP.appendChild(headlineNode); +} + +Scholar.Ingester.Interface.Progress.prototype.addResult = function(label, data) { + var labelNode = this.document.createTextNode(label); + var dataNode = this.document.createTextNode(data); + + var tr = this.document.createElement("tr"); + var labelTd = this.document.createElement("td"); + labelTd.style.fontSize = '10px'; + labelTd.style.width = '60px'; + var dataTd = this.document.createElement("td"); + dataTd.style.fontSize = '10px'; + + labelTd.appendChild(labelNode); + dataTd.appendChild(dataNode); + tr.appendChild(labelTd); + tr.appendChild(dataTd); + this.table.appendChild(tr); +} + +Scholar.Ingester.Interface.Progress.prototype.fade = function() { + // Icky, icky hack to keep objects + var me = this; + this._fader = function() { + if(me.div.style.MozOpacity <= 0) { + me.div.style.display = 'none'; + } else { + me.div.style.MozOpacity -= .1; + setTimeout(me._fader, 100); + } + } + + // Begin fade + this._fader(); +} diff --git a/chrome/chromeFiles/content/scholar/ingester/scrape-progress.xul b/chrome/chromeFiles/content/scholar/ingester/scrape-progress.xul @@ -1,27 +0,0 @@ -<?xml version="1.0" ?> -<?xml-stylesheet href="chrome://global/skin/" type="text/css"?> -<!DOCTYPE overlay SYSTEM "chrome://piggy-bank/locale/load-dom-dialog.dtd"> - -<window - xmlns="http://www.mozilla.org/keymaster/gatekeeper/there.is.only.xul" - xmlns:xul="http://www.mozilla.org/keymaster/gatekeeper/there.is.only.xul" - xmlns:html="http://www.w3.org/1999/xhtml" - id="scrape-progress" - windowtype="Options" - orient="vertical" - screenX="10" screenY="10" - persist="width height screenX screenY sizeMode" - title="Scraping Page&#8230;" -> - - <hbox flex="1"> - <vbox flex="1" style="padding: 10px"> - <label value="Scraping Page&#8230;" /> - <progressmeter id="progress" mode="undetermined" /> - </vbox> - <resizer id="window-resizer" dir="bottomright"/> - <box style="visibility: collapse"> - <tabbrowser id="hidden-browser" /> - </box> - </hbox> -</window> diff --git a/chrome/chromeFiles/content/scholar/xpcom/ingester.js b/chrome/chromeFiles/content/scholar/xpcom/ingester.js @@ -356,7 +356,7 @@ Scholar.Ingester.Document.prototype.canScrape = function(currentScraper) { currentScraper.scraperDetectCode + "\n})()", scraperSandbox); } catch(e) { - throw e+' in scraper '+currentScraper.label; + throw e+' in scraperDetectCode for '+currentScraper.label; } } return canScrape; @@ -375,7 +375,11 @@ Scholar.Ingester.Document.prototype.scrapePage = function(callback) { var scraperSandbox = this.sandbox; - Components.utils.evalInSandbox(this.scraper.scraperJavaScript, scraperSandbox); + try { + Components.utils.evalInSandbox(this.scraper.scraperJavaScript, scraperSandbox); + } catch(e) { + throw e+' in scraperJavaScript for '+this.scraper.label; + } // If synchronous, call _scrapePageComplete(); if(!scraperSandbox._waitForCompletion) { @@ -413,7 +417,7 @@ Scholar.Ingester.Document.prototype.scrapePage = function(callback) { Scholar.Ingester.Document.prototype._scrapePageComplete = function() { this._updateDatabase(); if(this._scrapeCallback) { - this._scrapeCallback(); + this._scrapeCallback(this); } } @@ -469,5 +473,10 @@ Scholar.Ingester.Document.prototype._updateDatabase = function() { newItem.setCreator(0, firstName, lastName); } newItem.save(); + + // First one is stored so as to be accessible + if(!this.item) { + this.item = newItem; + } } } \ No newline at end of file diff --git a/chrome/chromeFiles/locale/en-US/scholar/scholar.properties b/chrome/chromeFiles/locale/en-US/scholar/scholar.properties @@ -21,4 +21,7 @@ itemTypes.journalArticle = Journal Article creatorTypes.author = Author creatorTypes.contributor = Contributor -creatorTypes.editor = Editor -\ No newline at end of file +creatorTypes.editor = Editor + +ingester.scraping = Scraping Page... +ingester.scrapeComplete = Scraping Complete +\ No newline at end of file