commit 3976f7afb1a0796ffa10ebb732a4209afb1af964
parent 3b42e71c778d11b7f744c826f33476aa8b689416
Author: Simon Kornblith <simon@simonster.com>
Date: Sat, 31 Dec 2011 18:49:09 +0000
Closes #5, Fix Retrieve Metadata for Google Books
It doesn't look like Google Books PDFs actually have any OCRed text, nor do they have any obvious identifying information, so we just blacklist them for now.
Diffstat:
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/chrome/content/zotero/recognizePDF.js b/chrome/content/zotero/recognizePDF.js
@@ -310,7 +310,8 @@ Zotero_RecognizePDF.Recognizer.prototype.recognize = function(file, libraryID, c
// get (not quite) median length
var lineLengthsLength = lineLengths.length;
- if(lineLengthsLength < 20) {
+ if(lineLengthsLength < 20
+ || lines[0] === "This is a digital copy of a book that was preserved for generations on library shelves before it was carefully scanned by Google as part of a project") {
this._callback(false, "recognizePDF.noOCR");
} else {
var sortedLengths = lineLengths.sort();
@@ -329,9 +330,6 @@ Zotero_RecognizePDF.Recognizer.prototype.recognize = function(file, libraryID, c
}
this._startLine = this._iteration = 0;
- }
-
- if(lineLengthsLength >= 20) {
this._queryGoogle();
}
}