www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | Submodules | README | LICENSE

commit 45b3cd8a530d60c860b639a5fea849abaa897879
parent 3587bb0f6b9f5fb97a0ba776964c29b1fdb0db97
Author: Dan Stillman <dstillman@zotero.org>
Date:   Thu,  7 May 2015 13:41:13 -0400

Replace non-breaking spaces in tested lines in recognizePDF

Fixes "PDF does not contain OCRed text" message for
http://pdfserver.amlaw.com/nlj/NSA_ca2_20150507.pdf

Diffstat:
Mchrome/content/zotero/recognizePDF.js | 6+++++-
1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/chrome/content/zotero/recognizePDF.js b/chrome/content/zotero/recognizePDF.js @@ -593,7 +593,11 @@ var Zotero_RecognizePDF = new function() { const lineRe = /^[\s_]*([^\s]+(?: [^\s_]+)+)/; var cleanedLines = [], cleanedLineLengths = []; for(var i=0; i<lines.length && cleanedLines.length<100; i++) { - var m = lineRe.exec(lines[i]); + var m = lineRe.exec( + lines[i] + // Replace non-breaking spaces + .replace(/\xA0/g, ' ') + ); if(m && m[1].split(' ').length > 3) { cleanedLines.push(m[1]); cleanedLineLengths.push(m[1].length);