www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | Submodules | README | LICENSE

commit 9c31b10de7350de1eadbfb292722d5aac4464498
parent f5a9dd6812f84d2d38af6568567147e7fbda6af7
Author: Dan Stillman <dstillman@zotero.org>
Date:   Thu, 17 Dec 2009 08:09:03 +0000

Push PubMed and PubMed Central


Diffstat:
Mrepotime.txt | 2+-
Mtranslators/NCBI PubMed.js | 8++++----
Mtranslators/PubMed Central.js | 252++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------
3 files changed, 177 insertions(+), 85 deletions(-)

diff --git a/repotime.txt b/repotime.txt @@ -1 +1 @@ -2009-11-19 05:05:00 +2009-12-17 08:10:00 diff --git a/translators/NCBI PubMed.js b/translators/NCBI PubMed.js @@ -8,7 +8,7 @@ "maxVersion":"", "priority":100, "inRepository":true, - "lastUpdated":"2009-10-22 19:00:00" + "lastUpdated":"2009-12-17 08:10:00" } function detectWeb(doc, url) { @@ -27,10 +27,10 @@ function detectWeb(doc, url) { } } - var uids = doc.evaluate('//input[@type="checkbox" or @name="uid"]', doc, + var uids = doc.evaluate('//input[@type="checkbox" and @name="EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_RVDocSum.uid"]', doc, nsResolver, XPathResult.ANY_TYPE, null); - if(uids.iterateNext() && doc.title.indexOf("PMC Results") == -1) { - if (uids.iterateNext() && doc.title.indexOf("PMC Results") == -1){ + if(uids.iterateNext()) { + if (uids.iterateNext()){ return "multiple"; } return "journalArticle"; diff --git a/translators/PubMed Central.js b/translators/PubMed Central.js @@ -2,95 +2,187 @@ "translatorID":"27ee5b2c-2a5a-4afc-a0aa-d386642d4eed", "translatorType":4, "label":"PubMed Central", - "creator":"Michael Berkowitz", + "creator":"Michael Berkowitz and Rintze Zelle", "target":"http://[^/]*.nih.gov/", "minVersion":"1.0.0b4.r5", "maxVersion":"", "priority":100, "inRepository":true, - "lastUpdated":"2009-10-02 04:50:00" + "lastUpdated":"2009-12-17 08:10:00" } function detectWeb(doc, url) { - if (doc.evaluate('//table[@id="ResultPanel"]//td[2]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { - return "multiple"; - } else if (url.indexOf("articlerender") != -1) { - return "journalArticle"; - } + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == 'x') return namespace; else return null; + } : null; + + try {var pmid = url.match(/ncbi\.nlm\.nih\.gov\/pmc\/articles\/PMC([\d]+)/)[1];} catch (e) {} + if (pmid) { + return "journalArticle"; + } + + var uids = doc.evaluate('//div[@class="toc-pmcid"]', doc, nsResolver, XPathResult.ANY_TYPE, null); + if(uids.iterateNext()) { + if (uids.iterateNext()){ + return "multiple"; + } + return "journalArticle"; + } } +function lookupPMCIDs(ids, doc) { + Zotero.wait(); + var newUri = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&retmode=xml&id=" + ids.join(","); + Zotero.debug(newUri); + Zotero.Utilities.HTTP.doGet(newUri, function (text) { + text = text.replace(/<!DOCTYPE[^>]*>/, "").replace(/<\?xml[^>]*\?>/, ""); // Remove xml parse instruction and doctype + text = text.replace(/(<[^!>][^>]*>)/g, function replacer(str, p1, p2, offset, s) { + return str.replace(/-/gm, ""); + }); //Strip hyphens from element names, attribute names and attribute values + text = text.replace(/(<[^!>][^>]*>)/g, function replacer(str, p1, p2, offset, s) { + return str.replace(/:/gm, ""); + }); //Strip colons from element names, attribute names and attribute values + text = Zotero.Utilities.trim(text); + XML.prettyPrinting = false; + XML.ignoreWhitespace = false; + var xml = new XML(text); + + for (var i = 0; i < xml.article.length(); i++) { + var newItem = new Zotero.Item("journalArticle"); + + var journal = xml.article[i].front.journalmeta; + + if (journal.journalid.(@journalidtype == "nlmta").length()) { + newItem.journalAbbreviation = Zotero.Utilities.superCleanString(journal.journalid.(@journalidtype == "nlmta").text().toString()); + } + newItem.publicationTitle = Zotero.Utilities.superCleanString(journal.journaltitle.text().toString()); + + var issn = journal.issn.(@pubtype == "epub").text().toString(); + var issn = journal.issn.(@pubtype == "ppub").text().toString(); + if (issn) { + newItem.ISSN = issn; + } + + var article = xml.article[i].front.articlemeta; + + if (article.abstract.p.length()) { + newItem.abstractNote = Zotero.Utilities.unescapeHTML(article.abstract.p.toXMLString()); + } + + if (article.articleid.(@pubidtype == "doi").length()) { + newItem.DOI = article.articleid.(@pubidtype == "doi").text().toString(); + } + var PMID = article.articleid.(@pubidtype == "pmid").text().toString(); + if (PMID) { + newItem.extra = "PMID: " + PMID + "\n"; + } + newItem.extra = newItem.extra + "PMCID: " + ids[i]; + newItem.title = Zotero.Utilities.unescapeHTML(article.titlegroup.articletitle.toXMLString().split("<xref")[0]); + if (article.volume.length()) { + newItem.volume = article.volume.text().toString(); + } + if (article.issue.length()) { + newItem.issue = article.issue.text().toString(); + } + if (article.lpage.length()) { + newItem.pages = article.fpage.text().toString() + "-" + article.lpage.text().toString(); + } else if (article.fpage.length()) { + newItem.pages = article.fpage.text().toString() + } + + var pubdate = article.pubdate. (@pubtype == "ppub"); + if (!pubdate) { + var pubdate = article.pubdate. (@pubtype == "epub"); + } + if (pubdate) { + if (pubdate.day.text().toString() != "") { + newItem.date = pubdate.year.text().toString() + "-" + pubdate.month.text().toString() + "-" + pubdate.day.text().toString(); + } else if (pubdate.month.text().toString() != "") { + newItem.date = pubdate.year.text().toString() + "-" + pubdate.month.text().toString(); + } else if (pubdate.year.text().toString() != "") { + newItem.date = pubdate.year.text().toString(); + } + } + + if (article.contribgroup.contrib.length()) { + var authors = article.contribgroup.contrib. (@contribtype == "author"); + for (var j = 0; j < authors.length(); j++) { + var lastName = authors[j].name.surname.text().toString(); + var firstName = authors[j].name.givennames.text().toString(); + if (firstName || lastName) { + newItem.creators.push({ + lastName: lastName, + firstName: firstName + }); + } + } + } + + var linkurl = "http://www.ncbi.nlm.nih.gov/pmc/articles/PMC" + ids[i] + "/"; + newItem.attachments = [{ + url: linkurl, + title: "PubMed Central Link", + mimeType: "text/html", + snapshot: false + }]; + + if (article.selfuri.@xlinkhref.length()) { + var pdfFileName = article.selfuri.@xlinkhref.toXMLString(); + var pdfurl = "http://www.ncbi.nlm.nih.gov/pmc/articles/PMC" + ids[i] + "/pdf/" + pdfFileName; + newItem.attachments.push({ + title:"PubMed Central Full Text PDF", + mimeType:"application/pdf", + url:pdfurl + }); + } + + newItem.complete(); + } + + Zotero.done(); + }); +} + + + function doWeb(doc, url) { - var tagMap = {journal_title:"publicationTitle", - title:"title", - date:"date", - issue:"issue", - volume:"volume", - doi:"DOI", - fulltext_html_url:"url" - }; - var URIs = new Array(); - var items = new Object(); - if (doc.title.indexOf("PMC Results") != -1) { - var titlex = '//div[@class="toc-entry"]/div/div[@class="toc-title"]'; - var linkx = '//div[@class="toc-entry"]/div/a[@class="toc-link"][1]'; - - var titles = doc.evaluate(titlex, doc, null, XPathResult.ANY_TYPE, null); - var next_title = titles.iterateNext(); - var links = doc.evaluate(linkx, doc, null, XPathResult.ANY_TYPE, null); - var next_link = links.iterateNext(); - while (next_title && next_link) { - items[next_link.href] = next_title.textContent; - next_title = titles.iterateNext(); - next_link = links.iterateNext(); - } - items = Zotero.selectItems(items); - if(!items) return true; - for (var i in items) { - URIs.push(i); - } - } else { - URIs.push(url); - } - Zotero.Utilities.HTTP.doGet(URIs, function(text) { - var tags = new Object(); - var meta = text.match(/<meta[^>]*>/gi); - for (var i in meta) { - var item = meta[i].match(/=\"([^"]*)\"/g); - if (item[0].substring(2, 10) == 'citation') { - tags[item[0].substring(11, item[0].length - 1)] = item[1].substring(2, item[1].length - 1); - } - } - var newItem = new Zotero.Item("journalArticle"); - for (var tag in tagMap) { - newItem[tagMap[tag]] = Zotero.Utilities.unescapeHTML(tags[tag]); - } - for (var i in meta) { - if (meta[i].match(/DC.Contributor/)) { - newItem.creators.push(Zotero.Utilities.cleanAuthor(Zotero.Utilities.unescapeHTML(meta[i].match(/content=\"([^"]*)\">/)[1]), "author")); - } - } - newItem.attachments.push({url:tags["fulltext_html_url"], title:"PubMed Central Snapshot", mimeType:"text/html"}); - if (tags["pdf_url"]) { - newItem.attachments.push({url:tags["pdf_url"], title:"PubMed Central Full Text PDF", mimeType:"application/pdf"}); - } - newItem.url = tags["fulltext_html_url"]; - if (!newItem.url) newItem.url = tags["abstract_html_url"]; - try { - newItem.extra = "PMCID: " + text.match(/PMCID: <\/span>(PMC\d+)/)[1]; - } catch(e){ - - } - newItem.journalAbbreviation = text.match(/span class=\"citation-abbreviation\">([^<]+)</)[1]; - newItem.pages = text.match(/span class=\"citation-flpages\">([^<]+)</)[1].replace(/[\.:\s]/g, ""); - - if (text.match(/Abstract<\/div>([^<]+)</)) { - var abstract = text.match(/Abstract<\/div>([^<]+)</)[1]; - } else if (text.match(/\"section-content\"><!\-\-article\-meta\-\->([^<]+)/)) { - var abstract = text.match(/\"section-content\"><!\-\-article\-meta\-\->([^<]+)/)[1]; - } - if (abstract) newItem.abstractNote = abstract; - newItem.complete(); - }, function(){ Zotero.done();} - ); - Zotero.wait(); + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? + function (prefix) { + if (prefix == 'x') return namespace; + else return null; + } : null; + + var ids = new Array(); + var pmcid; + var resultsCount = 0; + try { + pmcid = url.match(/ncbi\.nlm\.nih\.gov\/pmc\/articles\/PMC([\d]+)/)[1]; + } catch(e) {} + if (pmcid) { + ids.push(pmcid); + lookupPMCIDs(ids, doc); + } else { + var pmcids = doc.evaluate('//div[@class="toc-pmcid"]', doc, nsResolver, XPathResult.ANY_TYPE, null); + var titles = doc.evaluate('//div[@class="toc-title"]', doc, nsResolver, XPathResult.ANY_TYPE, null); + var title; + while (pmcid = pmcids.iterateNext()) { + title = titles.iterateNext(); + ids[pmcid.textContent.match(/PMC([\d]+)/)[1]] = title.textContent; + resultsCount = resultsCount + 1; + } + if (resultsCount > 1) { + ids = Zotero.selectItems(ids); + } + if (!ids) { + return true; + } + + var pmcids = new Array(); + for (var i in ids) { + pmcids.push(i); + } + lookupPMCIDs(pmcids, doc); + } } \ No newline at end of file