commit 9165a0247f5c065890b361399f41e16ccb83f43e
parent e6dbd1ed92213b1f190b4a5de1fb3d06df1b404e
Author: Martynas Bagdonas <martbgd@gmail.com>
Date: Mon, 7 May 2018 13:04:11 +0300
Fix PMID and arXiv identifiers extraction (#1498)
Diffstat:
2 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/chrome/content/zotero/xpcom/utilities_internal.js b/chrome/content/zotero/xpcom/utilities_internal.js
@@ -901,7 +901,7 @@ Zotero.Utilities.Internal = {
// arXiv identifiers are extracted without version number
// i.e. 0706.0044v1 is extracted as 0706.0044,
// because arXiv OAI API doesn't allow to access individual versions
- let arXiv_RE = /((?:[^A-Za-z]|^)([\-A-Za-z\.]+\/\d{7})(?:(v[0-9]+)|)(?!\d))|((?:\D|^)(\d{4}.\d{4,5})(?:(v[0-9]+)|)(?!\d))/g;
+ let arXiv_RE = /((?:[^A-Za-z]|^)([\-A-Za-z\.]+\/\d{7})(?:(v[0-9]+)|)(?!\d))|((?:\D|^)(\d{4}\.\d{4,5})(?:(v[0-9]+)|)(?!\d))/g;
let m;
while ((m = arXiv_RE.exec(text))) {
let arXiv = m[2] || m[5];
@@ -916,11 +916,11 @@ Zotero.Utilities.Internal = {
if (!identifiers.length) {
// PMID; right now, the longest PMIDs are 8 digits, so it doesn't seem like we'll
// need to discriminate for a fairly long time
- let PMID_RE = /(?:\D|^)(\d{1,9})(?!\d)/g;
+ let PMID_RE = /(^|\s|,|:)(\d{1,9})(?=\s|,|$)/g;
let pmid;
while ((pmid = PMID_RE.exec(text)) && !foundIDs.has(pmid)) {
identifiers.push({
- PMID: pmid[1]
+ PMID: pmid[2]
});
foundIDs.add(pmid);
}
diff --git a/test/tests/utilities_internalTest.js b/test/tests/utilities_internalTest.js
@@ -147,15 +147,20 @@ describe("Zotero.Utilities.Internal", function () {
});
it("should extract PMID", async function () {
- var id = "24297125";
- var identifiers = ZUI.extractIdentifiers(id);
- assert.lengthOf(identifiers, 1);
+ var identifiers = ZUI.extractIdentifiers("1 PMID:24297125,222 3-4 1234567890, 123456789");
+ assert.lengthOf(identifiers, 4);
assert.lengthOf(Object.keys(identifiers[0]), 1);
- assert.propertyVal(identifiers[0], "PMID", id);
+ assert.lengthOf(Object.keys(identifiers[1]), 1);
+ assert.lengthOf(Object.keys(identifiers[2]), 1);
+ assert.lengthOf(Object.keys(identifiers[3]), 1);
+ assert.propertyVal(identifiers[0], "PMID", "1");
+ assert.propertyVal(identifiers[1], "PMID", "24297125");
+ assert.propertyVal(identifiers[2], "PMID", "222");
+ assert.propertyVal(identifiers[3], "PMID", "123456789");
});
it("should extract multiple old and new style arXivs", async function () {
- var identifiers = ZUI.extractIdentifiers("0706.0044 arXiv:0706.00441v1,hep-ex/9809001v1, math.GT/0309135.");
+ var identifiers = ZUI.extractIdentifiers("0706.0044 arXiv:0706.00441v1,12345678,hep-ex/9809001v1, math.GT/0309135.");
assert.lengthOf(identifiers, 4);
assert.lengthOf(Object.keys(identifiers[0]), 1);
assert.lengthOf(Object.keys(identifiers[1]), 1);