commit e5404f4938eaec2032a727cb9d19a25fa3f43ec4
parent 0ab9e8b36cfcb3cffbc8fc51f2cfb9140378d25e
Author: Simon Kornblith <simon@simonster.com>
Date: Mon, 4 Sep 2006 17:37:07 +0000
closes #269, For some COinS pages "could not save item" error
Diffstat:
2 files changed, 68 insertions(+), 15 deletions(-)
diff --git a/chrome/chromeFiles/content/scholar/xpcom/ingester.js b/chrome/chromeFiles/content/scholar/xpcom/ingester.js
@@ -415,7 +415,7 @@ Scholar.OpenURL = new function() {
item.creators.push({firstName:value});
}
} else if(key == "rft.au") {
- item.creators.push(Scholar.cleanAuthor(value, "author", true));
+ item.creators.push(Scholar.Utilities.prototype.cleanAuthor(value, "author", true));
} else if(key == "rft.aucorp") {
item.creators.push({lastName:value, institutional:true});
} else if(key == "rft.isbn" && !item.ISBN) {
diff --git a/scrapers.sql b/scrapers.sql
@@ -1,4 +1,4 @@
--- 73
+-- 74
-- Set the following timestamp to the most recent scraper update date
REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-31 22:44:00'));
@@ -2792,7 +2792,7 @@ REPLACE INTO "translators" VALUES ('05d07af9-105a-4572-99f6-a8e231c0daef', '2006
}',
'// used to retrieve next COinS object when asynchronously parsing COinS objects
// on a page
-function retrieveNextCOinS(needFullItems, newItems, doc) {
+function retrieveNextCOinS(needFullItems, newItems, couldUseFullItems, doc) {
if(needFullItems.length) {
var item = needFullItems.shift();
@@ -2802,26 +2802,26 @@ function retrieveNextCOinS(needFullItems, newItems, doc) {
newItems.push(item);
});
search.setHandler("done", function() {
- retrieveNextCOinS(needFullItems, newItems, doc);
+ retrieveNextCOinS(needFullItems, newItems, couldUseFullItems, doc);
});
search.setSearch(item);
// look for translators
var translators = search.getTranslators();
- if(translators) {
+ if(translators.length) {
search.setTranslator(translators);
search.translate();
} else {
- retrieveNextCOinS(needFullItems, newItems, doc);
+ retrieveNextCOinS(needFullItems, newItems, couldUseFullItems, doc);
}
} else {
- completeCOinS(newItems, doc);
+ completeCOinS(newItems, couldUseFullItems, doc);
Scholar.done(true);
}
}
// saves all COinS objects
-function completeCOinS(newItems, doc) {
+function completeCOinS(newItems, couldUseFullItems, doc) {
if(newItems.length > 1) {
var selectArray = new Array();
@@ -2829,21 +2829,67 @@ function completeCOinS(newItems, doc) {
selectArray[i] = newItems[i].title;
}
selectArray = Scholar.selectItems(selectArray);
+
+ var useIndices = new Array();
for(var i in selectArray) {
+ useIndices.push(i);
+ }
+ completeItems(newItems, useIndices, couldUseFullItems);
+ } else if(newItems.length) {
+ completeItems(newItems, [0], couldUseFullItems);
+ }
+}
+
+function completeItems(newItems, useIndices, couldUseFullItems, doc) {
+ if(!useIndices.length) {
+ return;
+ }
+ var i = useIndices.shift();
+
+ // grab full item if requested
+ if(couldUseFullItems[i]) {
+ Scholar.Utilities.debug("looking up contextObject");
+ var search = Scholar.loadTranslator("search");
+
+ var firstItem = false;
+ search.setHandler("itemDone", function(obj, newItem) {
+ if(!firstItem) {
+ // add doc as attachment
+ newItem.attachments.push({document:doc});
+ newItem.complete();
+ firstItem = true;
+ }
+ });
+ search.setHandler("done", function(obj) {
+ // call next
+ completeItems(newItems, useIndices, couldUseFullItems);
+ });
+
+ search.setSearch(newItems[i]);
+ var translators = search.getTranslators();
+ if(translators.length) {
+ search.setTranslator(translators);
+ search.translate();
+ } else {
// add doc as attachment
newItems[i].attachments.push({document:doc});
-
newItems[i].complete();
+ // call next
+ completeItems(newItems, useIndices, couldUseFullItems);
}
- } else if(newItems.length) {
- newItems[0].attachments.push({document:doc});
- newItems[0].complete();
+ } else {
+ // add doc as attachment
+ newItems[i].attachments.push({document:doc});
+ newItems[i].complete();
+ // call next
+ completeItems(newItems, useIndices, couldUseFullItems);
}
}
function doWeb(doc, url) {
var newItems = new Array();
var needFullItems = new Array();
+ var couldUseFullItems = new Array();
var spanTags = doc.getElementsByTagName("span");
@@ -2855,7 +2901,13 @@ function doWeb(doc, url) {
var spanTitle = spanTags[i].getAttribute("title");
var newItem = new Scholar.Item();
if(Scholar.Utilities.parseContextObject(spanTitle, newItem)) {
- if(newItem.title && newItem.creators.length) {
+ if(newItem.title) {
+ if(!newItem.creators.length) {
+ // if we have a title but little other identifying
+ // information, say we''ll get full item later
+ couldUseFullItems[newItems.length] = true;
+ }
+
// title and creators are minimum data to avoid looking up
newItems.push(newItem);
} else {
@@ -2868,12 +2920,13 @@ function doWeb(doc, url) {
}
}
+ Scholar.Utilities.debug(needFullItems);
if(needFullItems.length) {
// retrieve full items asynchronously
Scholar.wait();
- retrieveNextCOinS(needFullItems, newItems, doc);
+ retrieveNextCOinS(needFullItems, newItems, couldUseFullItems, doc);
} else {
- completeCOinS(newItems, doc);
+ completeCOinS(newItems, couldUseFullItems, doc);
}
}');