commit 03af0aab6daeb83a23eff998a818ae0c3af2a187
parent 3e49b5b63232cdc7eecc5aedc8edc57741f6ef22
Author: Avram Lyon <ajlyon@gmail.com>
Date: Sat, 12 Feb 2011 18:28:24 +0000
Trans: Change DOI to disallow ampersands
Diffstat:
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/translators/DOI.js b/translators/DOI.js
@@ -16,7 +16,14 @@ var selectArray = {};
// builds a list of DOIs
function getDOIs(doc) {
- const DOIre = /\b(10\.[\w.]+\/[^\s]+)\.?\b/igm;
+ // TODO Detect DOIs more correctly.
+ // The actual rules for DOIs are very lax-- but we're more strict.
+ // Specifically, we should allow space characters, and all Unicode
+ // characters except for control characters. Here, we're cheating
+ // by not allowing ampersands, to fix an issue with getting DOIs
+ // out of URLs.
+ // Description at: http://www.doi.org/handbook_2000/appendix_1.html#A1-4
+ const DOIre = /\b(10\.[\w.]+\/[^\s&]+)\.?\b/igm;
const DOIXPath = "//text()[contains(., '10.')]";
DOIre.lastMatch = 0;
@@ -100,4 +107,4 @@ function doWeb(doc, url) {
// retrieve full items asynchronously
Zotero.wait();
retrieveNextDOI(DOIs, doc);
-}
-\ No newline at end of file
+}