commit 0417a1910d139daeaf76f10c451877b3f5581ad4
parent 6f31a3d31a010fe4d409b91e970ee582d0d2fc60
Author: Simon Kornblith <simon@simonster.com>
Date: Sat, 26 Jan 2013 02:36:23 -0500
Detect CAPTCHA and stop trying to hit Google Scholar
Diffstat:
1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/chrome/content/zotero/recognizePDF.js b/chrome/content/zotero/recognizePDF.js
@@ -69,7 +69,7 @@ var Zotero_RecognizePDF = new function() {
*/
this.recognize = function(file, libraryID) {
const MAX_PAGES = 7;
- const GOOGLE_SCHOLAR_QUERY_DELAY = 1500; // in ms
+ const GOOGLE_SCHOLAR_QUERY_DELAY = 2000; // in ms
return _extractText(file, MAX_PAGES).then(function(lines) {
// Look for DOI - Use only first 80 lines to avoid catching article references
@@ -138,8 +138,12 @@ var Zotero_RecognizePDF = new function() {
}
}
- var nextLine = 0;
- var queryGoogle = function() {
+ var nextLine = 0,
+ limited = false,
+ queryGoogle = function() {
+ // Once we hit the CAPTCHA once, don't keep trying
+ if(limited) throw new Zotero.Exception.Alert("recognizePDF.limit");
+
// Take the relevant parts of some lines (exclude hyphenated word)
var queryString = "", queryStringWords = 0;
while(queryStringWords < 25) {
@@ -180,11 +184,17 @@ var Zotero_RecognizePDF = new function() {
return Zotero.HTTP.promise("GET", url, {"responseType":"document"})
})
.then(function(xmlhttp) {
- var deferred = Q.defer();
-
- var translate = new Zotero.Translate.Web();
+ if(Zotero.Utilities.xpath(doc, "//form[@action='Captcha']").length) {
+ // Hit CAPTCHA
+ limited = true;
+ throw new Zotero.Exception.Alert("recognizePDF.limit");
+ }
+
+ var doc = xmlhttp.response,
+ deferred = Q.defer(),
+ translate = new Zotero.Translate.Web();
translate.setTranslator("57a00950-f0d1-4b41-b6ba-44ff0fc30289");
- translate.setDocument(Zotero.HTTP.wrapDocument(xmlhttp.response, url));
+ translate.setDocument(Zotero.HTTP.wrapDocument(doc, url));
translate.setHandler("translators", function(translate, detected) {
if(detected.length) {
deferred.resolve(_promiseTranslate(translate, libraryID));
@@ -197,6 +207,7 @@ var Zotero_RecognizePDF = new function() {
return deferred.promise;
}, function(e) {
if(e instanceof Zotero.HTTP.UnexpectedStatusException && e.status == 403) {
+ // Hit hard block
throw new Zotero.Exception.Alert("recognizePDF.limit");
}
throw e;