Separate identifer parsing from Add Item by Identifier and search translation - www - Unnamed repository; edit this file 'description' to name the repository.

commit e35b035224244e8522c05815ae253ff31ae6d959
parent 4e717a0934b67fb12fee2831587fec614ee6d4a1
Author: Dan Stillman <dstillman@zotero.org>
Date:   Sat, 21 Oct 2017 03:26:27 -0400

Separate identifer parsing from Add Item by Identifier and search translation

- Move identifier detection to `Zotero.Utilities.Internal.extractIdentifiers()`
  so that it can be used for things other than Add Item by Identifier
  (e.g., translation-server)
- Add a `Zotero.Translate.Search::setIdentifier()` function that takes an
  identifier object produced by `extractIdentifiers()` (`{ DOI: "10/..." }`),
  converts that to the search format expected by translators, and calls setSearch()

Diffstat:
M chrome/content/zotero/lookup.js  | 73 ++++---------------------------------------------------------------------
M chrome/content/zotero/xpcom/translation/translate.js  | 31 +++++++++++++++++++++++++++++++
M chrome/content/zotero/xpcom/utilities_internal.js  | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M test/tests/utilities_internalTest.js  | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++-

4 files changed, 154 insertions(+), 70 deletions(-)
diff --git a/chrome/content/zotero/lookup.js b/chrome/content/zotero/lookup.js
@@ -32,73 +32,8 @@ var Zotero_Lookup = new function () {
 	 * Performs a lookup by DOI, PMID, or ISBN
 	 */
 	this.accept = Zotero.Promise.coroutine(function* (textBox) {
-		var foundIDs = [];	//keep track of identifiers to avoid duplicates
-		var identifier = textBox.value;
-		//first look for DOIs
-		var ids = identifier.split(/[\s\u00A0]+/);	//whitespace + non-breaking space
-		var searches = [], doi;
-		for(var i=0, n=ids.length; i<n; i++) {
-			if((doi = Zotero.Utilities.cleanDOI(ids[i])) && foundIDs.indexOf(doi) == -1) {
-				searches.push({
-					itemType: "journalArticle",
-					DOI: doi
-				});
-				foundIDs.push(doi);
-			}
-		}
-
-		//then try ISBNs
-		if (!searches.length) {
-			//first try replacing dashes
-			ids = identifier.replace(/[\u002D\u00AD\u2010-\u2015\u2212]+/g, "")	//hyphens and dashes
-											.toUpperCase();
-
-			var ISBN_RE = /(?:\D|^)(97[89]\d{10}|\d{9}[\dX])(?!\d)/g;
-			var isbn;
-
-			while(isbn = ISBN_RE.exec(ids)) {
-				isbn = Zotero.Utilities.cleanISBN(isbn[1]);
-				if(isbn && foundIDs.indexOf(isbn) == -1) {
-					searches.push({
-						itemType: "book",
-						ISBN: isbn
-					});
-					foundIDs.push(isbn);
-				}
-			}
-
-			//now try spaces
-			if (!searches.length) {
-				ids = ids.replace(/[ \u00A0]+/g, "");	//space + non-breaking space
-				while(isbn = ISBN_RE.exec(ids)) {
-					isbn = Zotero.Utilities.cleanISBN(isbn[1]);
-					if(isbn && foundIDs.indexOf(isbn) == -1) {
-						searches.push({
-							itemType: "book",
-							ISBN: isbn
-						});
-						foundIDs.push(isbn);
-					}
-				}
-			}
-		}
-
-		//finally try for PMID
-		if (!searches.length) {
-			// PMID; right now, the longest PMIDs are 8 digits, so it doesn't 
-			// seem like we will need to discriminate for a fairly long time
-			var PMID_RE = /(?:\D|^)(\d{1,9})(?!\d)/g;
-			var pmid;
-			while((pmid = PMID_RE.exec(identifier)) && foundIDs.indexOf(pmid) == -1) {
-				searches.push({
-					itemType: "journalArticle",
-					contextObject: "rft_id=info:pmid/" + pmid[1]
-				});
-				foundIDs.push(pmid);
-			}
-		}
-
-		if (!searches.length) {
+		var identifiers = Zotero.Utilities.Internal.extractIdentifiers(textBox.value);
+		if (!identifiers.length) {
 			Zotero.alert(
 				window,
 				Zotero.getString("lookup.failure.title"),
@@ -120,9 +55,9 @@ var Zotero_Lookup = new function () {
 
 		Zotero_Lookup.toggleProgress(true);
 
-		for (let search of searches) {
+		for (let identifier of identifiers) {
 			var translate = new Zotero.Translate.Search();
-			translate.setSearch(search);
+			translate.setIdentifier(identifier);
 
 			// be lenient about translators
 			let translators = yield translate.getTranslators();
diff --git a/chrome/content/zotero/xpcom/translation/translate.js b/chrome/content/zotero/xpcom/translation/translate.js
@@ -2569,6 +2569,37 @@ Zotero.Translate.Search.prototype.setSearch = function(search) {
 }
 
 /**
+ * Set an identifier to use for searching
+ *
+ * @param {Object} identifier - An object with 'DOI', 'ISBN', or 'PMID'
+ */
+Zotero.Translate.Search.prototype.setIdentifier = function (identifier) {
+	var search;
+	if (identifier.DOI) {
+		search = {
+			itemType: "journalArticle",
+			DOI: identifier.DOI
+		};
+	}
+	else if (identifier.ISBN) {
+		search = {
+			itemType: "book",
+			ISBN: identifier.ISBN
+		};
+	}
+	else if (identifier.PMID) {
+		search = {
+			itemType: "journalArticle",
+			contextObject: "rft_id=info:pmid/" + identifier.PMID
+		};
+	}
+	else {
+		throw new Error("Unrecognized identifier");
+	}
+	this.setSearch(search);
+}
+
+/**
  * Overloads {@link Zotero.Translate.Base#getTranslators} to always return all potential translators
  */
 Zotero.Translate.Search.prototype.getTranslators = function() {
diff --git a/chrome/content/zotero/xpcom/utilities_internal.js b/chrome/content/zotero/xpcom/utilities_internal.js
@@ -845,6 +845,73 @@ Zotero.Utilities.Internal = {
 		return item;
 	},
 	
+	
+	extractIdentifiers: function (text) {
+		var identifiers = [];
+		var foundIDs = new Set(); // keep track of identifiers to avoid duplicates
+		
+		// First look for DOIs
+		var ids = text.split(/[\s\u00A0]+/); // whitespace + non-breaking space
+		var doi;
+		for (let id of ids) {
+			if ((doi = Zotero.Utilities.cleanDOI(id)) && !foundIDs.has(doi)) {
+				identifiers.push({
+					DOI: doi
+				});
+				foundIDs.add(doi);
+			}
+		}
+		
+		// Then try ISBNs
+		if (!identifiers.length) {
+			// First try replacing dashes
+			let ids = text.replace(/[\u002D\u00AD\u2010-\u2015\u2212]+/g, "") // hyphens and dashes
+				.toUpperCase();
+			let ISBN_RE = /(?:\D|^)(97[89]\d{10}|\d{9}[\dX])(?!\d)/g;
+			let isbn;
+			while (isbn = ISBN_RE.exec(ids)) {
+				isbn = Zotero.Utilities.cleanISBN(isbn[1]);
+				if (isbn && !foundIDs.has(isbn)) {
+					identifiers.push({
+						ISBN: isbn
+					});
+					foundIDs.add(isbn);
+				}
+			}
+			
+			// Next try spaces
+			if (!identifiers.length) {
+				ids = ids.replace(/[ \u00A0]+/g, ""); // space + non-breaking space
+				while (isbn = ISBN_RE.exec(ids)) {
+					isbn = Zotero.Utilities.cleanISBN(isbn[1]);
+					if(isbn && !foundIDs.has(isbn)) {
+						identifiers.push({
+							ISBN: isbn
+						});
+						foundIDs.add(isbn);
+					}
+				}
+			}
+		}
+		
+		// Finally try for PMID
+		if (!identifiers.length) {
+			// PMID; right now, the longest PMIDs are 8 digits, so it doesn't seem like we'll
+			// need to discriminate for a fairly long time
+			let PMID_RE = /(?:\D|^)(\d{1,9})(?!\d)/g;
+			let pmid;
+			while ((pmid = PMID_RE.exec(text)) && !foundIDs.has(pmid)) {
+				identifiers.push({
+					PMID: pmid[1]
+				});
+				foundIDs.add(pmid);
+			}
+		}
+		
+		return identifiers;
+	},
+	
+	
 	/**
 	 * Hyphenate an ISBN based on the registrant table available from
 	 * https://www.isbn-international.org/range_file_generation
diff --git a/test/tests/utilities_internalTest.js b/test/tests/utilities_internalTest.js
@@ -1,6 +1,14 @@
 "use strict";
 
 describe("Zotero.Utilities.Internal", function () {
+	var ZUI;
+		
+	before(function () {
+		ZUI = Zotero.Utilities.Internal;
+	});
+	
+	
+	
 	describe("#md5()", function () {
 		it("should generate hex string given file path", function* () {
 			var file = OS.Path.join(getTestDataDirectory().path, 'test.png');
@@ -102,5 +110,48 @@ describe("Zotero.Utilities.Internal", function () {
 			assert.isFalse(val);
 			assert.isFalse(spy.called);
 		});
-	})
+	});
+	
+	
+	describe("#extractIdentifiers()", function () {
+		it("should extract ISBN-10", async function () {
+			var id = "0838985890";
+			var identifiers = ZUI.extractIdentifiers(id);
+			assert.lengthOf(identifiers, 1);
+			assert.lengthOf(Object.keys(identifiers[0]), 1);
+			assert.propertyVal(identifiers[0], "ISBN", id);
+		});
+		
+		it("should extract ISBN-13", async function () {
+			var identifiers = ZUI.extractIdentifiers("978-0838985892");
+			assert.lengthOf(identifiers, 1);
+			assert.lengthOf(Object.keys(identifiers[0]), 1);
+			assert.propertyVal(identifiers[0], "ISBN", "9780838985892");
+		});
+		
+		it("should extract multiple ISBN-13s", async function () {
+			var identifiers = ZUI.extractIdentifiers("978-0838985892 9781479347711 ");
+			assert.lengthOf(identifiers, 2);
+			assert.lengthOf(Object.keys(identifiers[0]), 1);
+			assert.lengthOf(Object.keys(identifiers[1]), 1);
+			assert.propertyVal(identifiers[0], "ISBN", "9780838985892");
+			assert.propertyVal(identifiers[1], "ISBN", "9781479347711");
+		});
+		
+		it("should extract DOI", async function () {
+			var id = "10.4103/0976-500X.85940";
+			var identifiers = ZUI.extractIdentifiers(id);
+			assert.lengthOf(identifiers, 1);
+			assert.lengthOf(Object.keys(identifiers[0]), 1);
+			assert.propertyVal(identifiers[0], "DOI", id);
+		});
+		
+		it("should extract PMID", async function () {
+			var id = "24297125";
+			var identifiers = ZUI.extractIdentifiers(id);
+			assert.lengthOf(identifiers, 1);
+			assert.lengthOf(Object.keys(identifiers[0]), 1);
+			assert.propertyVal(identifiers[0], "PMID", id);
+		});
+	});
 })

	www Unnamed repository; edit this file 'description' to name the repository.
	Log \| Files \| Refs \| Submodules \| README \| LICENSE

M	chrome/content/zotero/lookup.js	\|	73	++++---------------------------------------------------------------------
M	chrome/content/zotero/xpcom/translation/translate.js	\|	31	+++++++++++++++++++++++++++++++
M	chrome/content/zotero/xpcom/utilities_internal.js	\|	67	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	test/tests/utilities_internalTest.js	\|	53	++++++++++++++++++++++++++++++++++++++++++++++++++++-