commit 45b3cd8a530d60c860b639a5fea849abaa897879
parent 3587bb0f6b9f5fb97a0ba776964c29b1fdb0db97
Author: Dan Stillman <dstillman@zotero.org>
Date: Thu, 7 May 2015 13:41:13 -0400
Replace non-breaking spaces in tested lines in recognizePDF
Fixes "PDF does not contain OCRed text" message for
http://pdfserver.amlaw.com/nlj/NSA_ca2_20150507.pdf
Diffstat:
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/chrome/content/zotero/recognizePDF.js b/chrome/content/zotero/recognizePDF.js
@@ -593,7 +593,11 @@ var Zotero_RecognizePDF = new function() {
const lineRe = /^[\s_]*([^\s]+(?: [^\s_]+)+)/;
var cleanedLines = [], cleanedLineLengths = [];
for(var i=0; i<lines.length && cleanedLines.length<100; i++) {
- var m = lineRe.exec(lines[i]);
+ var m = lineRe.exec(
+ lines[i]
+ // Replace non-breaking spaces
+ .replace(/\xA0/g, ' ')
+ );
if(m && m[1].split(' ').length > 3) {
cleanedLines.push(m[1]);
cleanedLineLengths.push(m[1].length);