www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | Submodules | README | LICENSE

commit 93652a137cf25526c2ed5480f9adfda409128897
parent c42991a5bf204def797fd19d68046b3aebcf6a87
Author: Simon Kornblith <simon@simonster.com>
Date:   Fri,  2 Jun 2006 23:53:42 +0000

Fix issues with asynchronous scraping and XMLHttpRequest



Diffstat:
Mchrome/chromeFiles/content/scholar/ingester/browser.js | 60+++++++++++++++++++++++++++++++++++++++---------------------
Mchrome/chromeFiles/content/scholar/xpcom/ingester.js | 102+++++++++++++++++++++++++++++++++++++++++++++++++------------------------------
Mchrome/chromeFiles/locale/en-US/scholar/scholar.properties | 6++++--
3 files changed, 106 insertions(+), 62 deletions(-)

diff --git a/chrome/chromeFiles/content/scholar/ingester/browser.js b/chrome/chromeFiles/content/scholar/ingester/browser.js @@ -211,31 +211,36 @@ Scholar.Ingester.Interface._deleteDocument = function(browser) { * Callback to be executed when scraping is complete */ Scholar.Ingester.Interface._finishScraping = function(documentObject) { - Scholar.Ingester.Interface.scrapeProgress.changeHeadline(Scholar.getString("ingester.scrapeComplete")); - - var fields = Scholar.ItemFields.getItemTypeFields(documentObject.item.getField("itemTypeID")); + if(documentObject.item) { + Scholar.Ingester.Interface.scrapeProgress.changeHeadline(Scholar.getString("ingester.scrapeComplete")); - var titleLabel = Scholar.getString("itemFields.title") + ":" - Scholar.Ingester.Interface.scrapeProgress.addResult(titleLabel, this.item.getField("title")); - var creators = documentObject.item.numCreators(); - if(creators) { - for(var i=0; i<creators; i++) { - var creator = documentObject.item.getCreator(i); - var label = Scholar.getString("creatorTypes."+Scholar.CreatorTypes.getTypeName(creator.creatorTypeID)) + ":"; - var data = creator.firstName + ' ' + creator.lastName; - Scholar.Ingester.Interface.scrapeProgress.addResult(label, data); - } - } - - for(i in fields) { - var data = documentObject.item.getField(fields[i]); - if(data) { - var name = Scholar.ItemFields.getName(fields[i]); - if(name != "source") { - var label = Scholar.getString("itemFields."+ name) + ":"; + var fields = Scholar.ItemFields.getItemTypeFields(documentObject.item.getField("itemTypeID")); + + var titleLabel = Scholar.getString("itemFields.title") + ":" + Scholar.Ingester.Interface.scrapeProgress.addResult(titleLabel, this.item.getField("title")); + var creators = documentObject.item.numCreators(); + if(creators) { + for(var i=0; i<creators; i++) { + var creator = documentObject.item.getCreator(i); + var label = Scholar.getString("creatorTypes."+Scholar.CreatorTypes.getTypeName(creator.creatorTypeID)) + ":"; + var data = creator.firstName + ' ' + creator.lastName; Scholar.Ingester.Interface.scrapeProgress.addResult(label, data); } } + + for(i in fields) { + var data = documentObject.item.getField(fields[i]); + if(data) { + var name = Scholar.ItemFields.getName(fields[i]); + if(name != "source") { + var label = Scholar.getString("itemFields."+ name) + ":"; + Scholar.Ingester.Interface.scrapeProgress.addResult(label, data); + } + } + } + } else { + Scholar.Ingester.Interface.scrapeProgress.changeHeadline(Scholar.getString("ingester.scrapeError")); + Scholar.Ingester.Interface.scrapeProgress.addDescription(Scholar.getString("ingester.scrapeErrorDescription")); } setTimeout(function() { Scholar.Ingester.Interface.scrapeProgress.fade() }, 2000); @@ -311,6 +316,19 @@ Scholar.Ingester.Interface.Progress.prototype.addResult = function(label, data) this.table.appendChild(tr); } +Scholar.Ingester.Interface.Progress.prototype.addDescription = function(description) { + var descriptionNode = this.document.createTextNode(description); + var tr = this.document.createElement("tr"); + var descriptionTd = this.document.createElement("td"); + descriptionTd.style.fontSize = '10px'; + descriptionTd.style.colspan = '2'; + + descriptionTd.appendChild(descriptionNode); + tr.appendChild(descriptionTd); + this.table.appendChild(tr); +} + + Scholar.Ingester.Interface.Progress.prototype.fade = function() { // Icky, icky hack to keep objects var me = this; diff --git a/chrome/chromeFiles/content/scholar/xpcom/ingester.js b/chrome/chromeFiles/content/scholar/xpcom/ingester.js @@ -200,46 +200,55 @@ Scholar.Ingester.Utilities.prototype.collectURLsWithSubstring = function(doc, su // essential components for Scholar and would take a great deal of effort to // implement. We can, however, always implement them later. -// It looks like these are simple front-ends for XMLHttpRequest. They're a -// component of the Piggy Bank API, so they're implemented here. -Scholar.Ingester.Utilities.HTTPUtilities = function() {} +// These are front ends for XMLHttpRequest. XMLHttpRequest can't actually be +// accessed outside the sandbox, and even if it could, it wouldn't let scripts +// access across domains, so everything's replicated here. +Scholar.Ingester.HTTPUtilities = function(contentWindow) { + this.window = contentWindow; +} -Scholar.Ingester.Utilities.HTTPUtilities.prototype.doGet = function(url, onStatus, onDone) { - var xmlhttp = new XMLHttpRequest(); - - xmlhttp.open('GET', url, true); - xmlhttp.overrideMimeType("text/xml"); - xmlhttp.onreadystatechange = function() { - Scholar.Ingester.Utilities.HTTPUtilities.stateChange(xmlhttp, onStatus, onDone); - }; - xmlhttp.send(null); +Scholar.Ingester.HTTPUtilities.prototype.doGet = function(url, onStatus, onDone) { + var xmlhttp = new this.window.XMLHttpRequest(); + + xmlhttp.open('GET', url, true); + xmlhttp.overrideMimeType("text/xml"); + + var me = this; + xmlhttp.onreadystatechange = function() { + me.stateChange(xmlhttp, onStatus, onDone); + }; + xmlhttp.send(null); } -Scholar.Ingester.Utilities.HTTPUtilities.prototype.doPost = function(url, body, onStatus, onDone) { - var xmlhttp = new XMLHttpRequest(); - - xmlhttp.open('POST', url, true); - xmlhttp.overrideMimeType("text/xml"); - xmlhttp.onreadystatechange = function() { - Scholar.Ingester.Utilities.HTTPUtilities.stateChange(xmlhttp, onStatus, onDone); - }; - xmlhttp.send(body); +Scholar.Ingester.HTTPUtilities.prototype.doPost = function(url, body, onStatus, onDone) { + var xmlhttp = new this.window.XMLHttpRequest(); + + xmlhttp.open('POST', url, true); + xmlhttp.overrideMimeType("text/xml"); + + var me = this; + xmlhttp.onreadystatechange = function() { + me.stateChange(xmlhttp, onStatus, onDone); + }; + xmlhttp.send(body); } -Scholar.Ingester.Utilities.HTTPUtilities.prototype.doOptions = function(url, body, onStatus, onDone) { - var xmlhttp = new XMLHttpRequest(); - - xmlhttp.open('OPTIONS', url, true); - xmlhttp.overrideMimeType("text/xml"); - xmlhttp.onreadystatechange = function() { - Scholar.Ingester.Utilities.HTTPUtilities.stateChange(xmlhttp, onStatus, onDone); - }; - xmlhttp.send(body); +Scholar.Ingester.HTTPUtilities.prototype.doOptions = function(url, body, onStatus, onDone) { + var xmlhttp = new this.window.XMLHttpRequest(); + + xmlhttp.open('OPTIONS', url, true); + xmlhttp.overrideMimeType("text/xml"); + + var me = this; + xmlhttp.onreadystatechange = function() { + me.stateChange(xmlhttp, onStatus, onDone); + }; + xmlhttp.send(body); } // Possible point of failure; for some reason, this used to be a separate // class, so make sure it works -Scholar.Ingester.Utilities.HTTPUtilities.prototype.stateChange = function(xmlhttp, onStatus, onDone) { +Scholar.Ingester.HTTPUtilities.prototype.stateChange = function(xmlhttp, onStatus, onDone) { switch (xmlhttp.readyState) { // Request not yet made @@ -307,6 +316,8 @@ Scholar.Ingester.Utilities.HTTPUtilities.prototype.stateChange = function(xmlhtt */ Scholar.Ingester.Document = function(browserWindow){ this.browser = browserWindow; + this.appSvc = Cc["@mozilla.org/appshell/appShellService;1"] + .getService(Ci.nsIAppShellService); this.scraper = null this.model = new Scholar.Ingester.Model(); this._generateSandbox(); @@ -379,10 +390,11 @@ Scholar.Ingester.Document.prototype.scrapePage = function(callback) { Components.utils.evalInSandbox(this.scraper.scraperJavaScript, scraperSandbox); } catch(e) { throw e+' in scraperJavaScript for '+this.scraper.label; + this._scrapePageComplete(); } // If synchronous, call _scrapePageComplete(); - if(!scraperSandbox._waitForCompletion) { + if(!this._waitForCompletion) { this._scrapePageComplete(); } } @@ -411,7 +423,7 @@ Scholar.Ingester.Document.prototype.scrapePage = function(callback) { * function before returning */ -/*` +/* * Called when scraping (synchronous or asynchronous) is complete */ Scholar.Ingester.Document.prototype._scrapePageComplete = function() { @@ -420,17 +432,23 @@ Scholar.Ingester.Document.prototype._scrapePageComplete = function() { this._scrapeCallback(this); } } - + +/* + * Generates a sandbox for scraping/scraper detection + */ Scholar.Ingester.Document.prototype._generateSandbox = function() { this.sandbox = new Components.utils.Sandbox(this.browser.contentDocument.location.href); this.sandbox.browser = this.browser; this.sandbox.doc = this.sandbox.browser.contentDocument; this.sandbox.utilities = new Scholar.Ingester.Utilities; + this.sandbox.utilities.HTTPUtilities = new Scholar.Ingester.HTTPUtilities(this.appSvc.hiddenDOMWindow); + this.sandbox.window = this.window; this.sandbox.model = this.model; this.sandbox.XPathResult = Components.interfaces.nsIDOMXPathResult; - this.sandbox.wait = function(){ this._waitForCompletion = true; }; - this.sandbox.done = function(){ this._scrapePageComplete(); }; + var me = this; + this.sandbox.wait = function(){ me._waitForCompletion = true; }; + this.sandbox.done = function(){ me._scrapePageComplete(); }; } /* @@ -453,9 +471,15 @@ Scholar.Ingester.Document.prototype._updateDatabase = function() { newItem.setField("publisher", this.model.data[uri][prefixDC + 'publisher']); } if(this.model.data[uri][prefixDC + 'year']) { - data.date = this.model.data[uri][prefixDC + 'year'].substring( - this.model.data[uri][prefixDC + 'year'].lastIndexOf(" ")+1, - this.model.data[uri][prefixDC + 'year'].length); + if(this.model.data[uri][prefixDC + 'year'].length == 4) { + newItem.setField("year", this.model.data[uri][prefixDC + 'year']); + } else { + try { + newItem.setField(this.model.data[uri][prefixDC + 'year'].substring( + this.model.data[uri][prefixDC + 'year'].lastIndexOf(" ")+1, + this.model.data[uri][prefixDC + 'year'].length)); + } catch(e) {} + } } if(this.model.data[uri][prefixDC + 'edition']) { newItem.setField("edition", this.model.data[uri][prefixDC + 'edition']); diff --git a/chrome/chromeFiles/locale/en-US/scholar/scholar.properties b/chrome/chromeFiles/locale/en-US/scholar/scholar.properties @@ -24,4 +24,6 @@ creatorTypes.contributor = Contributor creatorTypes.editor = Editor ingester.scraping = Scraping Page... -ingester.scrapeComplete = Scraping Complete -\ No newline at end of file +ingester.scrapeComplete = Scraping Complete +ingester.scrapeError = Could Not Scrape +ingester.scrapeErrorDescription = An error occurred while scraping this page. Please try again. If this error persists, contact the scraper author. +\ No newline at end of file