commit 9c31b10de7350de1eadbfb292722d5aac4464498
parent f5a9dd6812f84d2d38af6568567147e7fbda6af7
Author: Dan Stillman <dstillman@zotero.org>
Date: Thu, 17 Dec 2009 08:09:03 +0000
Push PubMed and PubMed Central
Diffstat:
3 files changed, 177 insertions(+), 85 deletions(-)
diff --git a/repotime.txt b/repotime.txt
@@ -1 +1 @@
-2009-11-19 05:05:00
+2009-12-17 08:10:00
diff --git a/translators/NCBI PubMed.js b/translators/NCBI PubMed.js
@@ -8,7 +8,7 @@
"maxVersion":"",
"priority":100,
"inRepository":true,
- "lastUpdated":"2009-10-22 19:00:00"
+ "lastUpdated":"2009-12-17 08:10:00"
}
function detectWeb(doc, url) {
@@ -27,10 +27,10 @@ function detectWeb(doc, url) {
}
}
- var uids = doc.evaluate('//input[@type="checkbox" or @name="uid"]', doc,
+ var uids = doc.evaluate('//input[@type="checkbox" and @name="EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_RVDocSum.uid"]', doc,
nsResolver, XPathResult.ANY_TYPE, null);
- if(uids.iterateNext() && doc.title.indexOf("PMC Results") == -1) {
- if (uids.iterateNext() && doc.title.indexOf("PMC Results") == -1){
+ if(uids.iterateNext()) {
+ if (uids.iterateNext()){
return "multiple";
}
return "journalArticle";
diff --git a/translators/PubMed Central.js b/translators/PubMed Central.js
@@ -2,95 +2,187 @@
"translatorID":"27ee5b2c-2a5a-4afc-a0aa-d386642d4eed",
"translatorType":4,
"label":"PubMed Central",
- "creator":"Michael Berkowitz",
+ "creator":"Michael Berkowitz and Rintze Zelle",
"target":"http://[^/]*.nih.gov/",
"minVersion":"1.0.0b4.r5",
"maxVersion":"",
"priority":100,
"inRepository":true,
- "lastUpdated":"2009-10-02 04:50:00"
+ "lastUpdated":"2009-12-17 08:10:00"
}
function detectWeb(doc, url) {
- if (doc.evaluate('//table[@id="ResultPanel"]//td[2]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
- return "multiple";
- } else if (url.indexOf("articlerender") != -1) {
- return "journalArticle";
- }
+ var namespace = doc.documentElement.namespaceURI;
+ var nsResolver = namespace ? function(prefix) {
+ if (prefix == 'x') return namespace; else return null;
+ } : null;
+
+ try {var pmid = url.match(/ncbi\.nlm\.nih\.gov\/pmc\/articles\/PMC([\d]+)/)[1];} catch (e) {}
+ if (pmid) {
+ return "journalArticle";
+ }
+
+ var uids = doc.evaluate('//div[@class="toc-pmcid"]', doc, nsResolver, XPathResult.ANY_TYPE, null);
+ if(uids.iterateNext()) {
+ if (uids.iterateNext()){
+ return "multiple";
+ }
+ return "journalArticle";
+ }
}
+function lookupPMCIDs(ids, doc) {
+ Zotero.wait();
+ var newUri = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&retmode=xml&id=" + ids.join(",");
+ Zotero.debug(newUri);
+ Zotero.Utilities.HTTP.doGet(newUri, function (text) {
+ text = text.replace(/<!DOCTYPE[^>]*>/, "").replace(/<\?xml[^>]*\?>/, ""); // Remove xml parse instruction and doctype
+ text = text.replace(/(<[^!>][^>]*>)/g, function replacer(str, p1, p2, offset, s) {
+ return str.replace(/-/gm, "");
+ }); //Strip hyphens from element names, attribute names and attribute values
+ text = text.replace(/(<[^!>][^>]*>)/g, function replacer(str, p1, p2, offset, s) {
+ return str.replace(/:/gm, "");
+ }); //Strip colons from element names, attribute names and attribute values
+ text = Zotero.Utilities.trim(text);
+ XML.prettyPrinting = false;
+ XML.ignoreWhitespace = false;
+ var xml = new XML(text);
+
+ for (var i = 0; i < xml.article.length(); i++) {
+ var newItem = new Zotero.Item("journalArticle");
+
+ var journal = xml.article[i].front.journalmeta;
+
+ if (journal.journalid.(@journalidtype == "nlmta").length()) {
+ newItem.journalAbbreviation = Zotero.Utilities.superCleanString(journal.journalid.(@journalidtype == "nlmta").text().toString());
+ }
+ newItem.publicationTitle = Zotero.Utilities.superCleanString(journal.journaltitle.text().toString());
+
+ var issn = journal.issn.(@pubtype == "epub").text().toString();
+ var issn = journal.issn.(@pubtype == "ppub").text().toString();
+ if (issn) {
+ newItem.ISSN = issn;
+ }
+
+ var article = xml.article[i].front.articlemeta;
+
+ if (article.abstract.p.length()) {
+ newItem.abstractNote = Zotero.Utilities.unescapeHTML(article.abstract.p.toXMLString());
+ }
+
+ if (article.articleid.(@pubidtype == "doi").length()) {
+ newItem.DOI = article.articleid.(@pubidtype == "doi").text().toString();
+ }
+ var PMID = article.articleid.(@pubidtype == "pmid").text().toString();
+ if (PMID) {
+ newItem.extra = "PMID: " + PMID + "\n";
+ }
+ newItem.extra = newItem.extra + "PMCID: " + ids[i];
+ newItem.title = Zotero.Utilities.unescapeHTML(article.titlegroup.articletitle.toXMLString().split("<xref")[0]);
+ if (article.volume.length()) {
+ newItem.volume = article.volume.text().toString();
+ }
+ if (article.issue.length()) {
+ newItem.issue = article.issue.text().toString();
+ }
+ if (article.lpage.length()) {
+ newItem.pages = article.fpage.text().toString() + "-" + article.lpage.text().toString();
+ } else if (article.fpage.length()) {
+ newItem.pages = article.fpage.text().toString()
+ }
+
+ var pubdate = article.pubdate. (@pubtype == "ppub");
+ if (!pubdate) {
+ var pubdate = article.pubdate. (@pubtype == "epub");
+ }
+ if (pubdate) {
+ if (pubdate.day.text().toString() != "") {
+ newItem.date = pubdate.year.text().toString() + "-" + pubdate.month.text().toString() + "-" + pubdate.day.text().toString();
+ } else if (pubdate.month.text().toString() != "") {
+ newItem.date = pubdate.year.text().toString() + "-" + pubdate.month.text().toString();
+ } else if (pubdate.year.text().toString() != "") {
+ newItem.date = pubdate.year.text().toString();
+ }
+ }
+
+ if (article.contribgroup.contrib.length()) {
+ var authors = article.contribgroup.contrib. (@contribtype == "author");
+ for (var j = 0; j < authors.length(); j++) {
+ var lastName = authors[j].name.surname.text().toString();
+ var firstName = authors[j].name.givennames.text().toString();
+ if (firstName || lastName) {
+ newItem.creators.push({
+ lastName: lastName,
+ firstName: firstName
+ });
+ }
+ }
+ }
+
+ var linkurl = "http://www.ncbi.nlm.nih.gov/pmc/articles/PMC" + ids[i] + "/";
+ newItem.attachments = [{
+ url: linkurl,
+ title: "PubMed Central Link",
+ mimeType: "text/html",
+ snapshot: false
+ }];
+
+ if (article.selfuri.@xlinkhref.length()) {
+ var pdfFileName = article.selfuri.@xlinkhref.toXMLString();
+ var pdfurl = "http://www.ncbi.nlm.nih.gov/pmc/articles/PMC" + ids[i] + "/pdf/" + pdfFileName;
+ newItem.attachments.push({
+ title:"PubMed Central Full Text PDF",
+ mimeType:"application/pdf",
+ url:pdfurl
+ });
+ }
+
+ newItem.complete();
+ }
+
+ Zotero.done();
+ });
+}
+
+
+
function doWeb(doc, url) {
- var tagMap = {journal_title:"publicationTitle",
- title:"title",
- date:"date",
- issue:"issue",
- volume:"volume",
- doi:"DOI",
- fulltext_html_url:"url"
- };
- var URIs = new Array();
- var items = new Object();
- if (doc.title.indexOf("PMC Results") != -1) {
- var titlex = '//div[@class="toc-entry"]/div/div[@class="toc-title"]';
- var linkx = '//div[@class="toc-entry"]/div/a[@class="toc-link"][1]';
-
- var titles = doc.evaluate(titlex, doc, null, XPathResult.ANY_TYPE, null);
- var next_title = titles.iterateNext();
- var links = doc.evaluate(linkx, doc, null, XPathResult.ANY_TYPE, null);
- var next_link = links.iterateNext();
- while (next_title && next_link) {
- items[next_link.href] = next_title.textContent;
- next_title = titles.iterateNext();
- next_link = links.iterateNext();
- }
- items = Zotero.selectItems(items);
- if(!items) return true;
- for (var i in items) {
- URIs.push(i);
- }
- } else {
- URIs.push(url);
- }
- Zotero.Utilities.HTTP.doGet(URIs, function(text) {
- var tags = new Object();
- var meta = text.match(/<meta[^>]*>/gi);
- for (var i in meta) {
- var item = meta[i].match(/=\"([^"]*)\"/g);
- if (item[0].substring(2, 10) == 'citation') {
- tags[item[0].substring(11, item[0].length - 1)] = item[1].substring(2, item[1].length - 1);
- }
- }
- var newItem = new Zotero.Item("journalArticle");
- for (var tag in tagMap) {
- newItem[tagMap[tag]] = Zotero.Utilities.unescapeHTML(tags[tag]);
- }
- for (var i in meta) {
- if (meta[i].match(/DC.Contributor/)) {
- newItem.creators.push(Zotero.Utilities.cleanAuthor(Zotero.Utilities.unescapeHTML(meta[i].match(/content=\"([^"]*)\">/)[1]), "author"));
- }
- }
- newItem.attachments.push({url:tags["fulltext_html_url"], title:"PubMed Central Snapshot", mimeType:"text/html"});
- if (tags["pdf_url"]) {
- newItem.attachments.push({url:tags["pdf_url"], title:"PubMed Central Full Text PDF", mimeType:"application/pdf"});
- }
- newItem.url = tags["fulltext_html_url"];
- if (!newItem.url) newItem.url = tags["abstract_html_url"];
- try {
- newItem.extra = "PMCID: " + text.match(/PMCID: <\/span>(PMC\d+)/)[1];
- } catch(e){
-
- }
- newItem.journalAbbreviation = text.match(/span class=\"citation-abbreviation\">([^<]+)</)[1];
- newItem.pages = text.match(/span class=\"citation-flpages\">([^<]+)</)[1].replace(/[\.:\s]/g, "");
-
- if (text.match(/Abstract<\/div>([^<]+)</)) {
- var abstract = text.match(/Abstract<\/div>([^<]+)</)[1];
- } else if (text.match(/\"section-content\"><!\-\-article\-meta\-\->([^<]+)/)) {
- var abstract = text.match(/\"section-content\"><!\-\-article\-meta\-\->([^<]+)/)[1];
- }
- if (abstract) newItem.abstractNote = abstract;
- newItem.complete();
- }, function(){ Zotero.done();}
- );
- Zotero.wait();
+ var namespace = doc.documentElement.namespaceURI;
+ var nsResolver = namespace ?
+ function (prefix) {
+ if (prefix == 'x') return namespace;
+ else return null;
+ } : null;
+
+ var ids = new Array();
+ var pmcid;
+ var resultsCount = 0;
+ try {
+ pmcid = url.match(/ncbi\.nlm\.nih\.gov\/pmc\/articles\/PMC([\d]+)/)[1];
+ } catch(e) {}
+ if (pmcid) {
+ ids.push(pmcid);
+ lookupPMCIDs(ids, doc);
+ } else {
+ var pmcids = doc.evaluate('//div[@class="toc-pmcid"]', doc, nsResolver, XPathResult.ANY_TYPE, null);
+ var titles = doc.evaluate('//div[@class="toc-title"]', doc, nsResolver, XPathResult.ANY_TYPE, null);
+ var title;
+ while (pmcid = pmcids.iterateNext()) {
+ title = titles.iterateNext();
+ ids[pmcid.textContent.match(/PMC([\d]+)/)[1]] = title.textContent;
+ resultsCount = resultsCount + 1;
+ }
+ if (resultsCount > 1) {
+ ids = Zotero.selectItems(ids);
+ }
+ if (!ids) {
+ return true;
+ }
+
+ var pmcids = new Array();
+ for (var i in ids) {
+ pmcids.push(i);
+ }
+ lookupPMCIDs(pmcids, doc);
+ }
}
\ No newline at end of file