commit 91c3374d6bd5a50cf8f4c2db38571ae190508601 parent 4ca0e6408cfc98c23b2a48b274286a2fe9e072c1 Author: Simon Kornblith <simon@simonster.com> Date: Mon, 21 Jan 2013 01:04:05 -0500 Merge branch '3.0' and modify recognizePDF.js to use promises Conflicts: chrome/content/zotero/recognizePDF.js install.rdf update.rdf Diffstat:
21 files changed, 552 insertions(+), 447 deletions(-)
diff --git a/chrome/content/zotero/fileInterface.js b/chrome/content/zotero/fileInterface.js @@ -532,12 +532,12 @@ var Zotero_File_Interface = new function() { // generate bibliography try { if(io.method == 'copy-to-clipboard') { - copyItemsToClipboard(items, io.style, false, io.mode === "citation"); + copyItemsToClipboard(items, io.style, false, io.mode === "citations"); } else { var style = Zotero.Styles.get(io.style); var bibliography = Zotero.Cite.makeFormattedBibliographyOrCitationList(style, - items, format, io.mode === "citation"); + items, format, io.mode === "citations"); } } catch(e) { window.alert(Zotero.getString("fileInterface.bibliographyGenerationError")); diff --git a/chrome/content/zotero/recognizePDF.js b/chrome/content/zotero/recognizePDF.js @@ -26,10 +26,12 @@ /** * @fileOverview Tools for automatically retrieving a citation for the given PDF */ -const Zotero_RecognizePDF_SUCCESS_IMAGE = "chrome://zotero/skin/tick.png"; -const Zotero_RecognizePDF_FAILURE_IMAGE = "chrome://zotero/skin/cross.png"; -const Zotero_RecognizePDF_LOADING_IMAGE = "chrome://global/skin/icons/loading_16.png"; - +Components.utils.import("resource://zotero/q.js"); + +const SUCCESS_IMAGE = "chrome://zotero/skin/tick.png"; +const FAILURE_IMAGE = "chrome://zotero/skin/cross.png"; +const LOADING_IMAGE = "chrome://global/skin/icons/loading_16.png"; + /** * Front end for recognizing PDFs * @namespace @@ -60,6 +62,292 @@ var Zotero_RecognizePDF = new function() { if (!items) return; var itemRecognizer = new Zotero_RecognizePDF.ItemRecognizer(); itemRecognizer.recognizeItems(items); + } + + /** + * Retrieves metadata for a PDF and saves it as an item + * + * @param {nsIFile} file The PDF file to retrieve metadata for + * @param {Integer|null} libraryID The library in which to save the PDF + * @return {Promise} A promise resolved when PDF metadata has been retrieved + */ + this.recognize = function(file, libraryID) { + const MAX_PAGES = 7; + + return _extractText(file, MAX_PAGES).then(function(lines) { + // Look for DOI - Use only first 80 lines to avoid catching article references + var allText = lines.join("\n"), + doi = Zotero.Utilities.cleanDOI(lines.slice(0,80).join('\n')), + promise; + Zotero.debug(allText); + + if(doi) { + // Look up DOI + Zotero.debug("RecognizePDF: Found DOI: "+doi); + + var translate = new Zotero.Translate.Search(); + translate.setTranslator("11645bd1-0420-45c1-badb-53fb41eeb753"); + translate.setSearch({"itemType":"journalArticle", "DOI":doi}); + promise = _promiseTranslate(translate, libraryID); + } else { + // Look for ISBNs if no DOI + var isbns = _findISBNs(allText); + if(isbns.length) { + Zotero.debug("RecognizePDF: Found ISBNs: " + isbns); + + var translate = new Zotero.Translate.Search(); + translate.setTranslator("c73a4a8c-3ef1-4ec8-8229-7531ee384cc4"); + translate.setSearch({"itemType":"book", "ISBN":isbns[0]}); + promise = _promiseTranslate(translate, libraryID); + } else { + promise = Q.reject("No ISBN or DOI found"); + } + } + + // If no DOI or ISBN, query Google Scholar + return promise.fail(function(error) { + Zotero.debug("RecognizePDF: "+error); + + // Use only first column from multi-column lines + const lineRe = /^\s*([^\s]+(?: [^\s]+)+)/; + var cleanedLines = [], cleanedLineLengths = []; + for(var i=0; i<lines.length && cleanedLines.length<100; i++) { + var m = lineRe.exec(lines[i]); + if(m && m[1].split(' ').length > 3) { + cleanedLines.push(m[1]); + cleanedLineLengths.push(m[1].length); + } + } + + // get (not quite) median length + var lineLengthsLength = cleanedLineLengths.length; + if(lineLengthsLength < 20 + || cleanedLines[0] === "This is a digital copy of a book that was preserved for generations on library shelves before it was carefully scanned by Google as part of a project") { + throw new Zotero.Exception.Alert("recognizePDF.noOCR"); + } + + var sortedLengths = cleanedLineLengths.sort(), + medianLength = sortedLengths[Math.floor(lineLengthsLength/2)]; + + // pick lines within 6 chars of the median (this is completely arbitrary) + var goodLines = [], + uBound = medianLength + 6, + lBound = medianLength - 6; + for (var i=0; i<lineLengthsLength; i++) { + if(cleanedLineLengths[i] > lBound && cleanedLineLengths[i] < uBound) { + // Strip quotation marks so they don't mess up search query quoting + var line = cleanedLines[i].replace('"', ''); + goodLines.push(line); + } + } + + var nextLine = 0; + var queryGoogle = function() { + // Take the relevant parts of some lines (exclude hyphenated word) + var queryString = "", queryStringWords = 0; + while(queryStringWords < 25) { + if(!goodLines.length) throw new Zotero.Exception.Alert("recognizePDF.noMatches"); + + var words = goodLines.splice(nextLine, 1)[0].split(/\s+/); + // Try to avoid picking adjacent strings so the odds of them appearing in another + // document quoting our document is low. Every 7th line is a magic value + nextLine = (nextLine + 7) % goodLines.length; + + // get rid of first and last words + words.shift(); + words.pop(); + // make sure there are no long words (probably OCR mistakes) + var skipLine = false; + for(var i=0; i<words.length; i++) { + if(words[i].length > 20) { + skipLine = true; + break; + } + } + // add words to query + if(!skipLine && words.length) { + queryStringWords += words.length; + queryString += '"'+words.join(" ")+'" '; + } + } + + Zotero.debug("RecognizePDF: Query string "+queryString); + + // pass query string to Google Scholar and translate + var url = "http://scholar.google.com/scholar?q="+encodeURIComponent(queryString)+"&hl=en&lr=&btnG=Search"; + return Zotero.HTTP.promise("GET", url, {"responseType":"document"}) + .then(function(xmlhttp) { + var deferred = Q.defer(); + + var translate = new Zotero.Translate.Web(); + translate.setTranslator("57a00950-f0d1-4b41-b6ba-44ff0fc30289"); + translate.setDocument(Zotero.HTTP.wrapDocument(xmlhttp.response, url)); + translate.setHandler("translators", function(translate, detected) { + if(detected.length) { + deferred.resolve(_promiseTranslate(translate, libraryID)); + } else { + deferred.reject("Detection with Google Scholar failed"); + } + }); + translate.getTranslators(); + + return deferred.promise; + }, function(e) { + if(e instanceof Zotero.HTTP.UnexpectedStatusException && e.status == 403) { + throw new Zotero.Exception.Alert("recognizePDF.recognizePDF.limit"); + } + throw e; + }); + }; + + return queryGoogle().fail(queryGoogle).fail(queryGoogle); + }); + }); + } + + /** + * Get text from a PDF + * @param {nsIFile} file PDF + * @param {Number} pages Number of pages to extract + * @return {Promise} + */ + function _extractText(file, pages) { + var cacheFile = Zotero.getZoteroDirectory(); + cacheFile.append("recognizePDFcache.txt"); + if(cacheFile.exists()) { + cacheFile.remove(false); + } + + var exec = Zotero.getZoteroDirectory(); + exec.append(Zotero.Fulltext.pdfConverterFileName); + + var args = ['-enc', 'UTF-8', '-nopgbrk', '-layout', '-l', pages]; + args.push(file.path, cacheFile.path); + + Zotero.debug('RecognizePDF: Running pdftotext '+args.join(" ")); + + return Zotero.Utilities.Internal.exec(exec, args).then(function() { + if(!cacheFile.exists()) { + throw new Zotero.Exception.Alert("recognizePDF.couldNotRead"); + } + + try { + var inputStream = Components.classes["@mozilla.org/network/file-input-stream;1"] + .createInstance(Components.interfaces.nsIFileInputStream); + inputStream.init(cacheFile, 0x01, 0664, 0); + try { + var intlStream = Components.classes["@mozilla.org/intl/converter-input-stream;1"] + .createInstance(Components.interfaces.nsIConverterInputStream); + intlStream.init(inputStream, "UTF-8", 65535, + Components.interfaces.nsIConverterInputStream.DEFAULT_REPLACEMENT_CHARACTER); + intlStream.QueryInterface(Components.interfaces.nsIUnicharLineInputStream); + + // get the lines in this sample + var lines = [], str = {}; + while(intlStream.readLine(str)) { + var line = str.value.trim(); + if(line) lines.push(line); + } + } finally { + inputStream.close(); + } + } finally { + cacheFile.remove(false); + } + + return lines; + }, function() { + throw new Zotero.Exception.Alert("recognizePDF.couldNotRead"); + }); + } + + /** + * Attach appropriate handlers to a Zotero.Translate instance and begin translation + * @return {Promise} + */ + function _promiseTranslate(translate, libraryID) { + var deferred = Q.defer(); + translate.setHandler("select", function(translate, items, callback) { + for(var i in items) { + var obj = {}; + obj[i] = items[i]; + callback(obj); + return; + } + }); + translate.setHandler("done", function(translate, success) { + if(success && translate.newItems.length) { + deferred.resolve(translate.newItems[0]); + } else { + deferred.reject("Translation with Google Scholar failed"); + } + }); + translate.translate(libraryID, false); + return deferred.promise; + } + + /** + * Search ISBNs in text + * @private + * @return {String[]} Array of ISBNs + */ + function _findISBNs(x) { + if(typeof(x) != "string") { + throw "findISBNs: argument must be a string"; + } + var isbns = []; + + // Match lines saying "isbn: " or "ISBN-10:" or similar, consider m-dashes and n-dashes as well + var pattern = /(SBN|sbn)[ \u2014\u2013\u2012-]?(10|13)?[: ]*([0-9X][0-9X \u2014\u2013\u2012-]+)/g; + var match; + + while (match = pattern.exec(x)) { + var isbn = match[3]; + isbn = isbn.replace(/[ \u2014\u2013\u2012-]/g, ''); + if(isbn.length==20 || isbn.length==26) { + // Handle the case of two isbns (e.g. paper+hardback) next to each other + isbns.push(isbn.slice(0,isbn.length/2), isbn.slice(isbn.length/2)); + } else if(isbn.length==23) { + // Handle the case of two isbns (10+13) next to each other + isbns.push(isbn.slice(0,10), isbn.slice(10)); + } else if(isbn.length==10 || isbn.length==13) { + isbns.push(isbn); + } + } + + // Validate ISBNs + var validIsbns = []; + for (var i =0; i < isbns.length; i++) { + if(_isValidISBN(isbns[i])) validIsbns.push(isbns[i]); + } + return validIsbns; + } + + /** + * Check whether an ISBNs is valid + * @private + * @return {Boolean} + */ + function _isValidISBN(isbn) { + if(isbn.length == 13) { + // ISBN-13 should start with 978 or 979 i.e. GS1 for book publishing industry + var prefix = isbn.slice(0,3); + if (prefix != "978" && prefix != "979") return false; + // Verify check digit + var check = 0; + for (var i = 0; i < 13; i+=2) check += isbn[i]*1; + for (i = 1; i < 12; i+=2) check += 3 * isbn[i]*1; + return (check % 10 == 0); + } else if(isbn.length == 10) { + // Verify ISBN-10 check digit + var check = 0; + for (var i = 0; i < 9; i++) check += isbn[i]*1 * (10-i); + // last number might be 'X' + if (isbn[9] == 'X' || isbn[9] == 'x') check += 10; + else check += isbn[i]*1; + return (check % 11 == 0); + } + return false; } } @@ -139,431 +427,64 @@ Zotero_RecognizePDF.ItemRecognizer.prototype._recognizeItem = function() { } this._progressIndicator.value = (this._itemTotal-this._items.length)/this._itemTotal*100; - this._item = this._items.shift(); - this._progressWindow.document.getElementById("item-"+this._item.id+"-icon"). - setAttribute("src", Zotero_RecognizePDF_LOADING_IMAGE); + var item = this._items.shift(), + itemIcon = this._progressWindow.document.getElementById("item-"+item.id+"-icon"), + itemTitle = this._progressWindow.document.getElementById("item-"+item.id+"-title"); + itemIcon.setAttribute("src", LOADING_IMAGE); - var file = this._item.getFile(); - if(file) { - var recognizer = new Zotero_RecognizePDF.Recognizer(); - var me = this; - recognizer.recognize(file, this._item.libraryID, function(newItem, error) { me._callback(newItem, error) }); - } else { - this._callback(false, "recognizePDF.fileNotFound"); - } -} - -/** - * Cleans up after items are recognized, disabling the cancel button and making the progress window - * close on blur - */ -Zotero_RecognizePDF.ItemRecognizer.prototype._done = function() { - this._progressIndicator.value = 100; - this._progressWindow.document.getElementById("cancel-button").label = Zotero.getString("recognizePDF.close.label"); - var me = this; - this._progressWindow.addEventListener("blur", - function() { me._progressWindow.setTimeout(function() { me._progressWindow.close() }, 2000) }, false); - this._progressWindow.document.getElementById("label").value = Zotero.getString("recognizePDF.complete.label"); -} - -/** - * Callback function to be executed upon recognition completion - * @param {Zotero.Item|Boolean} newItem The new item created from translation, or false if - * recognition was unsuccessful - * @param {String} [error] The error name, if recognition was unsuccessful. - */ -Zotero_RecognizePDF.ItemRecognizer.prototype._callback = function(newItem, error) { - if(this._stopped) { - if(newItem) Zotero.Items.erase(newItem.id); - return; - } + var file = item.getFile(), me = this; - if(newItem) { + (file + ? Zotero_RecognizePDF.recognize(file, item.libraryID) + : Q.reject(new Zotero.Exception.Alert("recognizePDF.fileNotFound"))) + .then(function(newItem) { + // If already stopped, delete + if(me._stopped) { + Zotero.Items.erase(item.id); + return; + } + // put new item in same collections as the old one - var itemCollections = this._item.getCollections(); + var itemCollections = item.getCollections(); for(var j=0; j<itemCollections.length; j++) { var collection = Zotero.Collections.get(itemCollections[j]); collection.addItem(newItem.id); } // put old item as a child of the new item - this._item.setSource(newItem.id); - this._item.save(); - } + item.setSource(newItem.id); + item.save(); - // add name - this._progressWindow.document.getElementById("item-"+this._item.id+"-title"). - setAttribute("label", (newItem ? newItem.getField("title") : Zotero.getString(error))); - // update icon - this._progressWindow.document.getElementById("item-"+this._item.id+"-icon"). - setAttribute("src", (newItem ? Zotero_RecognizePDF_SUCCESS_IMAGE : Zotero_RecognizePDF_FAILURE_IMAGE)); - - if(error == "recognizePDF.limit") { - // now done, since we hit the query limit - var error = Zotero.getString(error); - for(var i in this._items) { - this._progressWindow.document.getElementById("item-"+this._items[i].id+"-title"). - setAttribute("label", error); - this._progressWindow.document.getElementById("item-"+this._items[i].id+"-icon"). - setAttribute("src", Zotero_RecognizePDF_FAILURE_IMAGE); - } - this._done(); - } else { - // scroll to this item - this._progressWindow.document.getElementById("tree").treeBoxObject.scrollToRow(Math.max(0, this._itemTotal-this._items.length-5)); - // continue recognizing - this._recognizeItem(); - } -} + itemTitle.setAttribute("label", newItem.getField("title")); + itemIcon.setAttribute("src", SUCCESS_IMAGE); + }, function(error) { + Zotero.debug(error); + Zotero.logError(error); -/*Zotero_RecognizePDF.ItemRecognizer.prototype._captchaCallback = function(img) { - var io = {dataIn:img}; - Zotero.debug(img); - this._progressWindow.openDialog("chrome://zotero/content/pdfCaptcha.xul", "", "chrome,modal,resizable=no", io); - - if(io.dataOut) return io.dataOut; - - this.stop(); - this._progressWindow.close(); - return false; -}*/ - -/** - * @class PDF recognizer backend - */ -Zotero_RecognizePDF.Recognizer = function () {} - -/** - * Retrieves metadata for a PDF and saves it as an item - * - * @param {nsIFile} file The PDF file to retrieve metadata for - * @param {Function} callback The function to be executed when recognition is complete - * @param {Function} [captchaCallback] The function to be executed if a CAPTCHA is encountered - * (function will be passed image as URL and must return text of CAPTCHA) - */ -Zotero_RecognizePDF.Recognizer.prototype.recognize = function(file, libraryID, callback, captchaCallback) { - const MAX_PAGES = 7; - - this._libraryID = libraryID; - this._callback = callback; - //this._captchaCallback = captchaCallback; - - var cacheFile = Zotero.getZoteroDirectory(); - cacheFile.append("recognizePDFcache.txt"); - if(cacheFile.exists()) { - cacheFile.remove(false); - } - - var proc = Components.classes["@mozilla.org/process/util;1"]. - createInstance(Components.interfaces.nsIProcess); - var exec = Zotero.getZoteroDirectory(); - exec.append(Zotero.Fulltext.pdfConverterFileName); - proc.init(exec); - - var args = ['-enc', 'UTF-8', '-nopgbrk', '-layout', '-l', MAX_PAGES]; - args.push(file.path, cacheFile.path); - - Zotero.debug('Running pdftotext '+args.join(" ")); - try { - if (!Zotero.isFx36) { - proc.runw(true, args, args.length); - } - else { - proc.run(true, args, args.length); - } - } - catch (e) { - Zotero.debug("Error running pdftotext", 1); - Zotero.debug(e, 1); - } - - if(!cacheFile.exists()) { - this._callback(false, "recognizePDF.couldNotRead"); - return; - } - - var inputStream = Components.classes["@mozilla.org/network/file-input-stream;1"] - .createInstance(Components.interfaces.nsIFileInputStream); - inputStream.init(cacheFile, 0x01, 0664, 0); - var intlStream = Components.classes["@mozilla.org/intl/converter-input-stream;1"] - .createInstance(Components.interfaces.nsIConverterInputStream); - intlStream.init(inputStream, "UTF-8", 65535, - Components.interfaces.nsIConverterInputStream.DEFAULT_REPLACEMENT_CHARACTER); - intlStream.QueryInterface(Components.interfaces.nsIUnicharLineInputStream); - - // get the lines in this sample - var lines = [], - cleanedLines = [], - cleanedLineLengths = [], - str = {}; - while(intlStream.readLine(str)) { - var line = str.value.trim(); - if(line) lines.push(line); - } - - inputStream.close(); - cacheFile.remove(false); - - // look for DOI - Use only first 80 lines to avoid catching article references - var allText = lines.join("\n"); - Zotero.debug(allText); - var m = Zotero.Utilities.cleanDOI(lines.slice(0,80).join('\n')); - if(m) { - this._DOI = m; - } else { // dont look for ISBNs if we found a DOI - var isbns = this._findISBNs(allText); - if(isbns.length > 0) { - this._ISBNs = isbns; - Zotero.debug("Found ISBNs: " + isbns); - } - } - - // Use only first column from multi-column lines - const lineRe = /^\s*([^\s]+(?: [^\s]+)+)/; - for(var i=0; i<lines.length; i++) { - var m = lineRe.exec(lines[i]); - if(m) { - cleanedLines.push(m[1]); - cleanedLineLengths.push(m[1].length); - } - } - - // get (not quite) median length - var lineLengthsLength = cleanedLineLengths.length; - if(lineLengthsLength < 20 - || cleanedLines[0] === "This is a digital copy of a book that was preserved for generations on library shelves before it was carefully scanned by Google as part of a project") { - this._callback(false, "recognizePDF.noOCR"); - } else { - var sortedLengths = cleanedLineLengths.sort(); - var medianLength = sortedLengths[Math.floor(lineLengthsLength/2)]; + itemTitle.setAttribute("label", error instanceof Zotero.Exception.Alert ? error.message : Zotero.getString("recognizePDF.error")); + itemIcon.setAttribute("src", FAILURE_IMAGE); - // pick lines within 4 chars of the median (this is completely arbitrary) - this._goodLines = []; - var uBound = medianLength + 4; - var lBound = medianLength - 4; - for (var i=0; i<lineLengthsLength; i++) { - if(cleanedLineLengths[i] > lBound && cleanedLineLengths[i] < uBound) { - // Strip quotation marks so they don't mess up search query quoting - var line = cleanedLines[i].replace('"', ''); - this._goodLines.push(line); - } + if(error instanceof Zotero.Exception.Alert && error.name === "recognizePDF.limit") { + me._done(); + } else { + me._recognizeItem(); } - - this._startLine = this._iteration = 0; - this._queryGoogle(); - } -} - -/** - * Search ISBNs in text - * @private - * @return array with ISBNs - */ -Zotero_RecognizePDF.Recognizer.prototype._findISBNs = function(x) { - if(typeof(x) != "string") { - throw "findISBNs: argument must be a string"; - } - var isbns = []; - - // Match lines saying "isbn: " or "ISBN-10:" or similar, consider m-dashes and n-dashes as well - var pattern = /(SBN|sbn)[ \u2014\u2013\u2012-]?(10|13)?[: ]*([0-9X][0-9X \u2014\u2013\u2012-]+)/g; - var match; - - while (match = pattern.exec(x)) { - var isbn = match[3]; - isbn = isbn.replace(/[ \u2014\u2013\u2012-]/g, ''); - if(isbn.length==20 || isbn.length==26) { - // Handle the case of two isbns (e.g. paper+hardback) next to each other - isbns.push(isbn.slice(0,isbn.length/2), isbn.slice(isbn.length/2)); - } else if(isbn.length==23) { - // Handle the case of two isbns (10+13) next to each other - isbns.push(isbn.slice(0,10), isbn.slice(10)); - } else if(isbn.length==10 || isbn.length==13) { - isbns.push(isbn); - } - } - - // Validate ISBNs - var validIsbns = []; - for (var i =0; i < isbns.length; i++) { - if(this._isValidISBN(isbns[i])) validIsbns.push(isbns[i]); - } - Zotero.debug("validIsbns: " + validIsbns); - return validIsbns; -} - -Zotero_RecognizePDF.Recognizer.prototype._isValidISBN = function(isbn) { - if(isbn.length == 13) { - // ISBN-13 should start with 978 or 979 i.e. GS1 for book publishing industry - var prefix = isbn.slice(0,3); - if (prefix != "978" && prefix != "979") return false; - // Verify check digit - var check = 0; - for (var i = 0; i < 13; i+=2) check += isbn[i]*1; - for (i = 1; i < 12; i+=2) check += 3 * isbn[i]*1; - return (check % 10 == 0); - } else if(isbn.length == 10) { - // Verify ISBN-10 check digit - var check = 0; - for (var i = 0; i < 9; i++) check += isbn[i]*1 * (10-i); - // last number might be 'X' - if (isbn[9] == 'X' || isbn[9] == 'x') check += 10; - else check += isbn[i]*1; - return (check % 11 == 0); - } - return false; + }).fin(function() { + // scroll to this item + me._progressWindow.document.getElementById("tree").treeBoxObject.scrollToRow(Math.max(0, me._itemTotal-me._items.length-5)); + }).end(); } /** - * Queries Google Scholar for metadata for this PDF - * @private + * Cleans up after items are recognized, disabling the cancel button and making the progress window + * close on blur */ -Zotero_RecognizePDF.Recognizer.prototype._queryGoogle = function() { - if(this._iteration > 3 || this._startLine >= this._goodLines.length) { - try { - if(this._hiddenBrowser) Zotero.Browser.deleteHiddenBrowser(me._hiddenBrowser); - } catch(e) {} - this._callback(false, "recognizePDF.noMatches"); - return; - } - this._iteration++; - - var queryString = ""; +Zotero_RecognizePDF.ItemRecognizer.prototype._done = function() { + this._progressIndicator.value = 100; + this._progressWindow.document.getElementById("cancel-button").label = Zotero.getString("recognizePDF.close.label"); var me = this; - if(this._DOI || this._ISBNs) { - var translate = new Zotero.Translate.Search(); - var item = {}; - if(this._DOI) { - // use CrossRef to look for DOI - translate.setTranslator("11645bd1-0420-45c1-badb-53fb41eeb753"); - item = {"itemType":"journalArticle", "DOI":this._DOI}; - - } - else if(this._ISBNs) { - // use Open WorldCat to look for ISBN - translate.setTranslator("c73a4a8c-3ef1-4ec8-8229-7531ee384cc4"); - item = {"itemType":"book", "ISBN":this._ISBNs[0]}; - } - translate.setSearch(item); - translate.setHandler("itemDone", function(translate, item) { - me._callback(item); - }); - translate.setHandler("select", function(translate, items, callback) { - return me._selectItems(translate, items, callback); - }); - translate.setHandler("done", function(translate, success) { - if(!success) me._queryGoogle(); - }); - translate.translate(this._libraryID, false); - if(this._DOI) delete this._DOI; - else if(this._ISBNs) delete this.ISBNs; - } else { - // take the relevant parts of some lines (exclude hyphenated word) - var queryStringWords = 0; - while(queryStringWords < 25 && this._startLine < this._goodLines.length) { - var words = this._goodLines[this._startLine].split(/\s+/); - // get rid of first and last words - words.shift(); - words.pop(); - // make sure there are no long words (probably OCR mistakes) - var skipLine = false; - for(var i=0; i<words.length; i++) { - if(words[i].length > 20) { - skipLine = true; - break; - } - } - // add words to query - if(!skipLine && words.length) { - queryStringWords += words.length; - queryString += '"'+words.join(" ")+'" '; - } - this._startLine++; - } - - Zotero.debug("RecognizePDF: Query string "+queryString); - - // pass query string to Google Scholar and translate - var url = "http://scholar.google.com/scholar?q="+encodeURIComponent(queryString)+"&hl=en&lr=&btnG=Search"; - if(!this._hiddenBrowser) { - this._hiddenBrowser = Zotero.Browser.createHiddenBrowser(); - this._hiddenBrowser.docShell.allowImages = false; - } - - var translate = new Zotero.Translate.Web(); - var savedItem = false; - translate.setTranslator("57a00950-f0d1-4b41-b6ba-44ff0fc30289"); - translate.setHandler("itemDone", function(translate, item) { - Zotero.Browser.deleteHiddenBrowser(me._hiddenBrowser); - savedItem = true; - me._callback(item); - }); - translate.setHandler("select", function(translate, items, callback) { - me._selectItems(translate, items, callback); - }); - translate.setHandler("done", function(translate, success) { - if(!success || !savedItem) me._queryGoogle(); - }); - translate.setHandler("translators", function(translate, detected) { - if(detected.length) { - translate.translate(me._libraryID, false); - } else { - me._queryGoogle(); - } - }); - - this._hiddenBrowser.addEventListener("pageshow", function() { me._scrape(translate) }, true); - - this._hiddenBrowser.loadURIWithFlags(url, - Components.interfaces.nsIWebNavigation.LOAD_FLAGS_BYPASS_HISTORY, null, null, null); - } -} - -/** - * To be executed when Google Scholar is loaded - * @private - */ -Zotero_RecognizePDF.Recognizer.prototype._scrape = function(/**Zotero.Translate*/ translate) { - if(this._hiddenBrowser.contentDocument.location.href == "about:blank") return; - - if(this._hiddenBrowser.contentDocument.title == "403 Forbidden") { - // hit the captcha - /* - var forms = this._hiddenBrowser.contentDocument.getElementsByTagName("form"); - if(forms.length && forms[0].getAttribute("action") == "Captcha") { - var captchaImage = forms[0].getElementsByTagName("img"); - var captchaBox = this._hiddenBrowser.contentDocument.getElementsByName("captcha"); - if(captchaImage.length && captchaBox.length && this._captchaCallback) { - var text = this._captchaCallback(captchaImage[0].src); - if(text) { - captchaBox[0].value = text; - forms[0].submit(); - return; - } - } - }*/ - this._callback(false, "recognizePDF.limit"); - return; - } - - this._hiddenBrowser.removeEventListener("pageshow", this._scrape.caller, true); - translate.setDocument(this._hiddenBrowser.contentDocument); - - translate.getTranslators(false, true); -} - -/** - * Callback to pick first item in the Google Scholar item list - * @private - * @type Object - */ -Zotero_RecognizePDF.Recognizer.prototype._selectItems = function(/**Zotero.Translate*/ translate, - /**Object*/ items, /**Function**/ callback) { - for(var i in items) { - var obj = {}; - obj[i] = items[i]; - callback(obj); - return; - } -} + this._progressWindow.addEventListener("blur", + function() { me._progressWindow.setTimeout(function() { me._progressWindow.close() }, 2000) }, false); + this._progressWindow.document.getElementById("label").value = Zotero.getString("recognizePDF.complete.label"); +} +\ No newline at end of file diff --git a/chrome/content/zotero/xpcom/connector/translate_item.js b/chrome/content/zotero/xpcom/connector/translate_item.js @@ -106,6 +106,8 @@ Zotero.Translate.ItemSaver.prototype = { }); }, + // ALL CODE BELOW THIS POINT IS EXECUTED ONLY IN NON-FIREFOX ENVIRONMENTS + /** * Polls for updates to attachment progress * @param items Items in Zotero.Item.toArray() format diff --git a/chrome/content/zotero/xpcom/cookieSandbox.js b/chrome/content/zotero/xpcom/cookieSandbox.js @@ -153,8 +153,9 @@ Zotero.CookieSandbox.Observer = new function() { var ir = this.trackedInterfaceRequestors[i].get(); if(!ir) { // The interface requestor is gone, so remove it from the list - this.trackedInterfaceRequestors.splice(i--, 1); - this.trackedInterfaceRequestorSandboxes.splice(i--, 1); + this.trackedInterfaceRequestors.splice(i, 1); + this.trackedInterfaceRequestorSandboxes.splice(i, 1); + i--; } else if(ir == notificationCallbacks) { // We are tracking this interface requestor trackedBy = this.trackedInterfaceRequestorSandboxes[i]; diff --git a/chrome/content/zotero/xpcom/data/creator.js b/chrome/content/zotero/xpcom/data/creator.js @@ -536,8 +536,7 @@ Zotero.Creator.prototype._checkValue = function (field, value) { break; case 'key': - var re = /^[23456789ABCDEFGHIJKMNPQRSTUVWXTZ]{8}$/ - if (!re.test(value)) { + if (!Zotero.ID.isValidKey(value)) { this._invalidValueError(field, value); } break; diff --git a/chrome/content/zotero/xpcom/data/item.js b/chrome/content/zotero/xpcom/data/item.js @@ -2513,7 +2513,10 @@ Zotero.Item.prototype.setNote = function(text) { throw ("text must be a string in Zotero.Item.setNote() (was " + typeof text + ")"); } - text = Zotero.Utilities.trim(text); + text = text + // Strip control characters + .replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F]/g, "") + .trim(); var oldText = this.getNote(); if (text == oldText) { diff --git a/chrome/content/zotero/xpcom/duplicates.js b/chrome/content/zotero/xpcom/duplicates.js @@ -191,6 +191,12 @@ Zotero.Duplicates.prototype._findDuplicates = function () { Zotero.ItemFields.getID('ISBN') ] ); + var isbnCache = {}; + if (rows) { + for each(var row in rows) { + isbnCache[row.itemID] = row.value; + } + } processRows(); // DOI @@ -200,8 +206,34 @@ Zotero.Duplicates.prototype._findDuplicates = function () { + "AND itemID NOT IN (SELECT itemID FROM deletedItems) " + "ORDER BY value"; var rows = Zotero.DB.query(sql, [this._libraryID, Zotero.ItemFields.getID('DOI')]); + var doiCache = {}; + if (rows) { + for each(var row in rows) { + doiCache[row.itemID] = row.value; + } + } processRows(); + // Get years + var dateFields = [Zotero.ItemFields.getID('date')].concat( + Zotero.ItemFields.getTypeFieldsFromBase('date') + ); + var sql = "SELECT itemID, SUBSTR(value, 1, 4) AS year FROM items " + + "JOIN itemData USING (itemID) " + + "JOIN itemDataValues USING (valueID) " + + "WHERE libraryID=? AND fieldID IN (" + + dateFields.map(function () '?').join() + ") " + + "AND SUBSTR(value, 1, 4) != '0000' " + + "AND itemID NOT IN (SELECT itemID FROM deletedItems) " + + "ORDER BY value"; + var rows = Zotero.DB.query(sql, [this._libraryID].concat(dateFields)); + var yearCache = {}; + if (rows) { + for each(var row in rows) { + yearCache[row.itemID] = row.year; + } + } + var creatorRowsCache = {}; // Match on normalized title @@ -225,8 +257,29 @@ Zotero.Duplicates.prototype._findDuplicates = function () { return -1; } + // If both items have a DOI and they don't match, it's not a dupe + if (typeof doiCache[a.itemID] != 'undefined' + && typeof doiCache[b.itemID] != 'undefined' + && doiCache[a.itemID] != doiCache[b.itemID]) { + return -1; + } + + // If both items have an ISBN and they don't match, it's not a dupe + if (typeof isbnCache[a.itemID] != 'undefined' + && typeof isbnCache[b.itemID] != 'undefined' + && isbnCache[a.itemID] != isbnCache[b.itemID]) { + return -1; + } + + // If both items have a year and they're off by more than one, it's not a dupe + if (typeof yearCache[a.itemID] != 'undefined' + && typeof yearCache[b.itemID] != 'undefined' + && Math.abs(yearCache[a.itemID] - yearCache[b.itemID]) > 1) { + return -1; + } + // Check for at least one match on last name + first initial of first name - if (creatorRowsCache[a.itemID] != undefined) { + if (typeof creatorRowsCache[a.itemID] != 'undefined') { aCreatorRows = creatorRowsCache[a.itemID]; } else { @@ -239,7 +292,7 @@ Zotero.Duplicates.prototype._findDuplicates = function () { } // Check for at least one match on last name + first initial of first name - if (creatorRowsCache[b.itemID] != undefined) { + if (typeof creatorRowsCache[b.itemID] != 'undefined') { bCreatorRows = creatorRowsCache[b.itemID]; } else { diff --git a/chrome/content/zotero/xpcom/error.js b/chrome/content/zotero/xpcom/error.js @@ -80,7 +80,6 @@ Zotero.Exception.Alert = function(name, params, title, cause) { this.params = params || []; this._title = title || "general.error"; this.cause = cause; - return this; }; Zotero.Exception.Alert.prototype = { @@ -110,7 +109,7 @@ Zotero.Exception.Alert.prototype = { * Gets the error string */ "toString":function() { - return this.cause.toString() || this.message; + return this.cause ? this.cause.toString() : this.message; }, /** diff --git a/chrome/content/zotero/xpcom/http.js b/chrome/content/zotero/xpcom/http.js @@ -35,14 +35,12 @@ Zotero.HTTP = new function() { * @param {nsIURI|String} url URL to request * @param {Object} [options] Options for HTTP request:<ul> * <li>body - The body of a POST request</li> - * <li>responseType - The type of the response. See XHR 2 documentation for - * legal values</li> - * <li>responseCharset - The charset the response should be interpreted as</li> * <li>cookieSandbox - The sandbox from which cookies should be taken</li> - * <li>dontCache - If set, specifies that the request should not be fulfilled - * from the cache</li> - * <li>successCodes - HTTP status codes that are considered successful</li> * <li>debug - Log response text and status code</li> + * <li>dontCache - If set, specifies that the request should not be fulfilled from the cache</li> + * <li>responseType - The type of the response. See XHR 2 documentation for legal values</li> + * <li>responseCharset - The charset the response should be interpreted as</li> + * <li>successCodes - HTTP status codes that are considered successful</li> * </ul> * @param {Zotero.CookieSandbox} [cookieSandbox] Cookie sandbox object * @return {Promise} A promise resolved with the XMLHttpRequest object if the request @@ -112,6 +110,11 @@ Zotero.HTTP = new function() { if(options && options.dontCache) { channel.loadFlags |= Components.interfaces.nsIRequest.LOAD_BYPASS_CACHE; } + + // Set responseType + if(options && options.responseType) { + xmlhttp.responseType = options.responseType; + } // Send headers var headers = {}; @@ -773,4 +776,74 @@ Zotero.HTTP = new function() { break; } } + + /** + * Mimics the window.location/document.location interface, given an nsIURL + * @param {nsIURL} url + */ + this.Location = function(url) { + this._url = url; + this.hash = url.ref ? "#"+url.ref : ""; + this.host = url.hostPort; + this.hostname = url.host; + this.href = url.spec; + this.pathname = url.filePath; + this.port = (url.schemeIs("https") ? 443 : 80); + this.protocol = url.scheme+":"; + this.search = url.query ? "?"+url.query : ""; + }; + this.Location.prototype = { + "toString":function() { + return this.href; + }, + "__exposedProps__":{ + "hash":"r", + "host":"r", + "hostname":"r", + "href":"r", + "pathname":"r", + "port":"r", + "protocol":"r", + "search":"r", + "toString":"r" + } + }; + + /** + * Mimics an HTMLWindow given an nsIURL + * @param {nsIURL} url + */ + this.Window = function(url) { + this._url = url; + this.top = this; + this.location = Zotero.HTTP.Location(url); + }; + this.Window.prototype.__exposedProps__ = { + "top":"r", + "location":"r" + }; + + /** + * Wraps an HTMLDocument object returned by XMLHttpRequest DOMParser to make it look more like it belongs + * to a browser. This is necessary if the document is to be passed to Zotero.Translate. + * @param {HTMLDocument} doc Document returned by + * @param {nsIURL|String} url + */ + this.wrapDocument = function(doc, url) { + if(typeof url !== "object") { + url = Services.io.newURI(url, null, null).QueryInterface(Components.interfaces.nsIURL); + } + + var parser = Components.classes["@mozilla.org/xmlextras/domparser;1"] + .createInstance(Components.interfaces.nsIDOMParser); + var secMan = Components.classes["@mozilla.org/scriptsecuritymanager;1"] + .getService(Components.interfaces.nsIScriptSecurityManager); + parser.init(secMan.getCodebasePrincipal(url), url, url); + return Zotero.Translate.DOMWrapper.wrap(doc, { + "documentURI":{ "enumerable":true, "value":url.spec }, + "URL":{ "enumerable":true, "value":url.spec }, + "location":{ "enumerable":true, "value":(new Zotero.HTTP.Location(url)) }, + "defaultView":{ "enumerable":true, "value":(new Zotero.HTTP.Window(url)) } + }); + } } \ No newline at end of file diff --git a/chrome/content/zotero/xpcom/id.js b/chrome/content/zotero/xpcom/id.js @@ -87,11 +87,18 @@ Zotero.ID_Tracker = function () { function getKey() { - var baseString = "23456789ABCDEFGHIJKMNPQRSTUVWXTZ"; + // TODO: add 'L' and 'Y' after 3.0.11 cut-off + var baseString = "23456789ABCDEFGHIJKMNPQRSTUVWXZ"; return Zotero.randomString(8, baseString); } + this.isValidKey = function () { + var re = /^[23456789ABCDEFGHIJKLMNPQRSTUVWXYZ]{8}$/ + return re.test(value); + } + + function getBigInt(max) { if (!max) { max = 9007199254740991; diff --git a/chrome/content/zotero/xpcom/report.js b/chrome/content/zotero/xpcom/report.js @@ -73,7 +73,14 @@ Zotero.Report = new function() { // If not valid XML, display notes with entities encoded var parser = Components.classes["@mozilla.org/xmlextras/domparser;1"] .createInstance(Components.interfaces.nsIDOMParser); - var doc = parser.parseFromString('<div>' + arr.note.replace(/ /g, " ") + '</div>', "application/xml"); + var doc = parser.parseFromString('<div>' + + arr.note + // isn't valid in HTML + .replace(/ /g, " ") + // Strip control characters (for notes that were + // added before item.setNote() started doing this) + .replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F]/g, "") + + '</div>', "application/xml"); if (doc.documentElement.tagName == 'parsererror') { Zotero.debug(doc.documentElement.textContent, 2); content += '<p class="plaintext">' + escapeXML(arr.note) + '</p>\n'; @@ -100,7 +107,13 @@ Zotero.Report = new function() { // If not valid XML, display notes with entities encoded var parser = Components.classes["@mozilla.org/xmlextras/domparser;1"] .createInstance(Components.interfaces.nsIDOMParser); - var doc = parser.parseFromString('<div>' + note.note.replace(/ /g, " ") + '</div>', "application/xml"); + var doc = parser.parseFromString('<div>' + + note.note + .replace(/ /g, " ") + // Strip control characters (for notes that were + // added before item.setNote() started doing this) + .replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F]/g, "") + + '</div>', "application/xml"); if (doc.documentElement.tagName == 'parsererror') { Zotero.debug(doc.documentElement.textContent, 2); content += '<p class="plaintext">' + escapeXML(note.note) + '</p>\n'; diff --git a/chrome/content/zotero/xpcom/translation/translate.js b/chrome/content/zotero/xpcom/translation/translate.js @@ -1560,7 +1560,7 @@ Zotero.Translate.Web.prototype._getTranslatorsGetPotentialTranslators = function Zotero.Translate.Web.prototype._getSandboxLocation = function() { if(this._parentTranslator) { return this._parentTranslator._sandboxLocation; - } else if(this.document.defaultView) { + } else if(this.document.defaultView && this.document.defaultView.toString() === "[object Window]") { return this.document.defaultView; } else { return this.document.location.toString(); diff --git a/chrome/content/zotero/xpcom/translation/translate_firefox.js b/chrome/content/zotero/xpcom/translation/translate_firefox.js @@ -447,8 +447,10 @@ Zotero.Translate.SandboxManager.prototype = { * Imports an object into the sandbox * * @param {Object} object Object to be imported (under Zotero) - * @param {Boolean} passTranslateAsFirstArgument Whether the translate instance should be passed - * as the first argument to the function. + * @param {*} [passTranslateAsFirstArgument] An argument to pass + * as the first argument to the function. + * @param {Object} [attachTo] The object to attach `object` to. + * Defaults to this.sandbox.Zotero */ "importObject":function(object, passAsFirstArgument, attachTo) { if(!attachTo) attachTo = this.sandbox.Zotero; diff --git a/chrome/content/zotero/xpcom/utilities.js b/chrome/content/zotero/xpcom/utilities.js @@ -907,7 +907,7 @@ Zotero.Utilities = { for(var i in obj) { if(!obj.hasOwnProperty(i)) continue; - if(typeof obj[i] === "object") { + if(typeof obj[i] === "object" && obj[i] !== null) { obj2[i] = Zotero.Utilities.deepCopy(obj[i]); } else { obj2[i] = obj[i]; @@ -1089,7 +1089,7 @@ Zotero.Utilities = { **/ "randomString":function(len, chars) { if (!chars) { - chars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXTZabcdefghiklmnopqrstuvwxyz"; + chars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; } if (!len) { len = 8; @@ -1159,7 +1159,7 @@ Zotero.Utilities = { closeBrace = ']'; } - dumped_text += level_padding + "'" + item + "' => " + openBrace; + dumped_text += level_padding + "'" + item + "' => " + type + ' ' + openBrace; //only recurse if there's anything in the object, purely cosmetical try { for(var i in value) { diff --git a/chrome/content/zotero/xpcom/utilities_internal.js b/chrome/content/zotero/xpcom/utilities_internal.js @@ -170,6 +170,35 @@ Zotero.Utilities.Internal = { if (index == 1) { setTimeout(function () { buttonCallback(); }, 1); } + }, + + /** + * Launch a process + * @param {nsIFile} cmd Path to command to launch + * @param {String[]} args Arguments given + * @return {Promise} Promise resolved to true if command succeeds, or an error otherwise + */ + "exec":function(cmd, args) { + if(!cmd.isExecutable()) { + return Q.reject(cmd.path+" is not an executable"); + } + + var proc = Components.classes["@mozilla.org/process/util;1"]. + createInstance(Components.interfaces.nsIProcess); + proc.init(cmd); + + var deferred = Q.defer(); + proc.runwAsync(args, args.length, {"observe":function(subject, topic) { + if(topic !== "process-finished") { + deferred.reject(new Error(cmd.path+" failed")); + } else if(proc.exitValue != 0) { + deferred.reject(new Error(cmd.path+" returned exit status "+proc.exitValue)); + } else { + deferred.resolve(true); + } + }}); + + return deferred.promise; } } diff --git a/chrome/content/zotero/xpcom/zotero.js b/chrome/content/zotero/xpcom/zotero.js @@ -35,7 +35,7 @@ const ZOTERO_CONFIG = { API_URL: 'https://api.zotero.org/', PREF_BRANCH: 'extensions.zotero.', BOOKMARKLET_URL: 'https://www.zotero.org/bookmarklet/', - VERSION: "3.0.11.SOURCE" + VERSION: "3.0.12.SOURCE" }; // Commonly used imports accessible anywhere diff --git a/chrome/content/zotero/zoteroPane.js b/chrome/content/zotero/zoteroPane.js @@ -1504,10 +1504,11 @@ var ZoteroPane = new function() /* * Remove, trash, or delete item(s), depending on context * - * @param {Boolean} [force=false] Trash or delete even if in a collection or search, - * or trash without prompt in library + * @param {Boolean} [force=false] Trash or delete even if in a collection or search, + * or trash without prompt in library + * @param {Boolean} [fromMenu=false] If triggered from context menu, which always prompts for deletes */ - this.deleteSelectedItems = function (force) { + this.deleteSelectedItems = function (force, fromMenu) { if (!this.itemsView || !this.itemsView.selection.count) { return; } @@ -1533,7 +1534,7 @@ var ZoteroPane = new function() if (itemGroup.isLibrary(true)) { // In library, don't prompt if meta key was pressed - var prompt = force ? false : toTrash; + var prompt = (force && !fromMenu) ? false : toTrash; } else if (itemGroup.isCollection()) { // In collection, only prompt if trashing diff --git a/chrome/content/zotero/zoteroPane.xul b/chrome/content/zotero/zoteroPane.xul @@ -264,7 +264,7 @@ <menuseparator/> <menuitem label="&zotero.items.menu.duplicateItem;" oncommand="ZoteroPane_Local.duplicateSelectedItem();"/> <menuitem oncommand="ZoteroPane_Local.deleteSelectedItems();"/> - <menuitem oncommand="ZoteroPane_Local.deleteSelectedItems(true);"/> + <menuitem oncommand="ZoteroPane_Local.deleteSelectedItems(true, true);"/> <menuitem label="&zotero.items.menu.restoreToLibrary;" oncommand="ZoteroPane_Local.restoreSelectedItems();"/> <menuitem label="&zotero.items.menu.mergeItems;" oncommand="ZoteroPane_Local.mergeSelectedItems();"/> <menuseparator/> diff --git a/chrome/locale/en-US/zotero/zotero.properties b/chrome/locale/en-US/zotero/zotero.properties @@ -747,6 +747,7 @@ recognizePDF.couldNotRead = Could not read text from PDF. recognizePDF.noMatches = No matching references found. recognizePDF.fileNotFound = File not found. recognizePDF.limit = Query limit reached. Try again later. +recognizePDF.error = An unexpected error occurred. recognizePDF.complete.label = Metadata Retrieval Complete. recognizePDF.close.label = Close diff --git a/install.rdf b/install.rdf @@ -25,7 +25,7 @@ <Description> <em:id>{ec8030f7-c20a-464f-9b0e-13a3a9e97384}</em:id> <em:minVersion>5.0</em:minVersion> - <em:maxVersion>17.*</em:maxVersion> + <em:maxVersion>18.*</em:maxVersion> </Description> </em:targetApplication> diff --git a/update.rdf b/update.rdf @@ -12,7 +12,7 @@ <RDF:Description> <id>{ec8030f7-c20a-464f-9b0e-13a3a9e97384}</id> <minVersion>5.0</minVersion> - <maxVersion>17.*</maxVersion> + <maxVersion>18.*</maxVersion> <updateLink>http://download.zotero.org/extension/zotero.xpi</updateLink> <updateHash>sha1:</updateHash> </RDF:Description>