www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | Submodules | README | LICENSE

commit e35b035224244e8522c05815ae253ff31ae6d959
parent 4e717a0934b67fb12fee2831587fec614ee6d4a1
Author: Dan Stillman <dstillman@zotero.org>
Date:   Sat, 21 Oct 2017 03:26:27 -0400

Separate identifer parsing from Add Item by Identifier and search translation

- Move identifier detection to `Zotero.Utilities.Internal.extractIdentifiers()`
  so that it can be used for things other than Add Item by Identifier
  (e.g., translation-server)
- Add a `Zotero.Translate.Search::setIdentifier()` function that takes an
  identifier object produced by `extractIdentifiers()` (`{ DOI: "10/..." }`),
  converts that to the search format expected by translators, and calls setSearch()

Diffstat:
Mchrome/content/zotero/lookup.js | 73++++---------------------------------------------------------------------
Mchrome/content/zotero/xpcom/translation/translate.js | 31+++++++++++++++++++++++++++++++
Mchrome/content/zotero/xpcom/utilities_internal.js | 67+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtest/tests/utilities_internalTest.js | 53++++++++++++++++++++++++++++++++++++++++++++++++++++-
4 files changed, 154 insertions(+), 70 deletions(-)

diff --git a/chrome/content/zotero/lookup.js b/chrome/content/zotero/lookup.js @@ -32,73 +32,8 @@ var Zotero_Lookup = new function () { * Performs a lookup by DOI, PMID, or ISBN */ this.accept = Zotero.Promise.coroutine(function* (textBox) { - var foundIDs = []; //keep track of identifiers to avoid duplicates - var identifier = textBox.value; - //first look for DOIs - var ids = identifier.split(/[\s\u00A0]+/); //whitespace + non-breaking space - var searches = [], doi; - for(var i=0, n=ids.length; i<n; i++) { - if((doi = Zotero.Utilities.cleanDOI(ids[i])) && foundIDs.indexOf(doi) == -1) { - searches.push({ - itemType: "journalArticle", - DOI: doi - }); - foundIDs.push(doi); - } - } - - //then try ISBNs - if (!searches.length) { - //first try replacing dashes - ids = identifier.replace(/[\u002D\u00AD\u2010-\u2015\u2212]+/g, "") //hyphens and dashes - .toUpperCase(); - - var ISBN_RE = /(?:\D|^)(97[89]\d{10}|\d{9}[\dX])(?!\d)/g; - var isbn; - - while(isbn = ISBN_RE.exec(ids)) { - isbn = Zotero.Utilities.cleanISBN(isbn[1]); - if(isbn && foundIDs.indexOf(isbn) == -1) { - searches.push({ - itemType: "book", - ISBN: isbn - }); - foundIDs.push(isbn); - } - } - - //now try spaces - if (!searches.length) { - ids = ids.replace(/[ \u00A0]+/g, ""); //space + non-breaking space - while(isbn = ISBN_RE.exec(ids)) { - isbn = Zotero.Utilities.cleanISBN(isbn[1]); - if(isbn && foundIDs.indexOf(isbn) == -1) { - searches.push({ - itemType: "book", - ISBN: isbn - }); - foundIDs.push(isbn); - } - } - } - } - - //finally try for PMID - if (!searches.length) { - // PMID; right now, the longest PMIDs are 8 digits, so it doesn't - // seem like we will need to discriminate for a fairly long time - var PMID_RE = /(?:\D|^)(\d{1,9})(?!\d)/g; - var pmid; - while((pmid = PMID_RE.exec(identifier)) && foundIDs.indexOf(pmid) == -1) { - searches.push({ - itemType: "journalArticle", - contextObject: "rft_id=info:pmid/" + pmid[1] - }); - foundIDs.push(pmid); - } - } - - if (!searches.length) { + var identifiers = Zotero.Utilities.Internal.extractIdentifiers(textBox.value); + if (!identifiers.length) { Zotero.alert( window, Zotero.getString("lookup.failure.title"), @@ -120,9 +55,9 @@ var Zotero_Lookup = new function () { Zotero_Lookup.toggleProgress(true); - for (let search of searches) { + for (let identifier of identifiers) { var translate = new Zotero.Translate.Search(); - translate.setSearch(search); + translate.setIdentifier(identifier); // be lenient about translators let translators = yield translate.getTranslators(); diff --git a/chrome/content/zotero/xpcom/translation/translate.js b/chrome/content/zotero/xpcom/translation/translate.js @@ -2569,6 +2569,37 @@ Zotero.Translate.Search.prototype.setSearch = function(search) { } /** + * Set an identifier to use for searching + * + * @param {Object} identifier - An object with 'DOI', 'ISBN', or 'PMID' + */ +Zotero.Translate.Search.prototype.setIdentifier = function (identifier) { + var search; + if (identifier.DOI) { + search = { + itemType: "journalArticle", + DOI: identifier.DOI + }; + } + else if (identifier.ISBN) { + search = { + itemType: "book", + ISBN: identifier.ISBN + }; + } + else if (identifier.PMID) { + search = { + itemType: "journalArticle", + contextObject: "rft_id=info:pmid/" + identifier.PMID + }; + } + else { + throw new Error("Unrecognized identifier"); + } + this.setSearch(search); +} + +/** * Overloads {@link Zotero.Translate.Base#getTranslators} to always return all potential translators */ Zotero.Translate.Search.prototype.getTranslators = function() { diff --git a/chrome/content/zotero/xpcom/utilities_internal.js b/chrome/content/zotero/xpcom/utilities_internal.js @@ -845,6 +845,73 @@ Zotero.Utilities.Internal = { return item; }, + + extractIdentifiers: function (text) { + var identifiers = []; + var foundIDs = new Set(); // keep track of identifiers to avoid duplicates + + // First look for DOIs + var ids = text.split(/[\s\u00A0]+/); // whitespace + non-breaking space + var doi; + for (let id of ids) { + if ((doi = Zotero.Utilities.cleanDOI(id)) && !foundIDs.has(doi)) { + identifiers.push({ + DOI: doi + }); + foundIDs.add(doi); + } + } + + // Then try ISBNs + if (!identifiers.length) { + // First try replacing dashes + let ids = text.replace(/[\u002D\u00AD\u2010-\u2015\u2212]+/g, "") // hyphens and dashes + .toUpperCase(); + let ISBN_RE = /(?:\D|^)(97[89]\d{10}|\d{9}[\dX])(?!\d)/g; + let isbn; + while (isbn = ISBN_RE.exec(ids)) { + isbn = Zotero.Utilities.cleanISBN(isbn[1]); + if (isbn && !foundIDs.has(isbn)) { + identifiers.push({ + ISBN: isbn + }); + foundIDs.add(isbn); + } + } + + // Next try spaces + if (!identifiers.length) { + ids = ids.replace(/[ \u00A0]+/g, ""); // space + non-breaking space + while (isbn = ISBN_RE.exec(ids)) { + isbn = Zotero.Utilities.cleanISBN(isbn[1]); + if(isbn && !foundIDs.has(isbn)) { + identifiers.push({ + ISBN: isbn + }); + foundIDs.add(isbn); + } + } + } + } + + // Finally try for PMID + if (!identifiers.length) { + // PMID; right now, the longest PMIDs are 8 digits, so it doesn't seem like we'll + // need to discriminate for a fairly long time + let PMID_RE = /(?:\D|^)(\d{1,9})(?!\d)/g; + let pmid; + while ((pmid = PMID_RE.exec(text)) && !foundIDs.has(pmid)) { + identifiers.push({ + PMID: pmid[1] + }); + foundIDs.add(pmid); + } + } + + return identifiers; + }, + + /** * Hyphenate an ISBN based on the registrant table available from * https://www.isbn-international.org/range_file_generation diff --git a/test/tests/utilities_internalTest.js b/test/tests/utilities_internalTest.js @@ -1,6 +1,14 @@ "use strict"; describe("Zotero.Utilities.Internal", function () { + var ZUI; + + before(function () { + ZUI = Zotero.Utilities.Internal; + }); + + + describe("#md5()", function () { it("should generate hex string given file path", function* () { var file = OS.Path.join(getTestDataDirectory().path, 'test.png'); @@ -102,5 +110,48 @@ describe("Zotero.Utilities.Internal", function () { assert.isFalse(val); assert.isFalse(spy.called); }); - }) + }); + + + describe("#extractIdentifiers()", function () { + it("should extract ISBN-10", async function () { + var id = "0838985890"; + var identifiers = ZUI.extractIdentifiers(id); + assert.lengthOf(identifiers, 1); + assert.lengthOf(Object.keys(identifiers[0]), 1); + assert.propertyVal(identifiers[0], "ISBN", id); + }); + + it("should extract ISBN-13", async function () { + var identifiers = ZUI.extractIdentifiers("978-0838985892"); + assert.lengthOf(identifiers, 1); + assert.lengthOf(Object.keys(identifiers[0]), 1); + assert.propertyVal(identifiers[0], "ISBN", "9780838985892"); + }); + + it("should extract multiple ISBN-13s", async function () { + var identifiers = ZUI.extractIdentifiers("978-0838985892 9781479347711 "); + assert.lengthOf(identifiers, 2); + assert.lengthOf(Object.keys(identifiers[0]), 1); + assert.lengthOf(Object.keys(identifiers[1]), 1); + assert.propertyVal(identifiers[0], "ISBN", "9780838985892"); + assert.propertyVal(identifiers[1], "ISBN", "9781479347711"); + }); + + it("should extract DOI", async function () { + var id = "10.4103/0976-500X.85940"; + var identifiers = ZUI.extractIdentifiers(id); + assert.lengthOf(identifiers, 1); + assert.lengthOf(Object.keys(identifiers[0]), 1); + assert.propertyVal(identifiers[0], "DOI", id); + }); + + it("should extract PMID", async function () { + var id = "24297125"; + var identifiers = ZUI.extractIdentifiers(id); + assert.lengthOf(identifiers, 1); + assert.lengthOf(Object.keys(identifiers[0]), 1); + assert.propertyVal(identifiers[0], "PMID", id); + }); + }); })