Push PubMed and PubMed Central - www - Unnamed repository; edit this file 'description' to name the repository.

commit 9c31b10de7350de1eadbfb292722d5aac4464498
parent f5a9dd6812f84d2d38af6568567147e7fbda6af7
Author: Dan Stillman <dstillman@zotero.org>
Date:   Thu, 17 Dec 2009 08:09:03 +0000

Push PubMed and PubMed Central


Diffstat:
M repotime.txt  | 2 +-
M translators/NCBI PubMed.js  | 8 ++++----
M translators/PubMed Central.js  | 252 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------

3 files changed, 177 insertions(+), 85 deletions(-)
diff --git a/repotime.txt b/repotime.txt
@@ -1 +1 @@
-2009-11-19 05:05:00
+2009-12-17 08:10:00
diff --git a/translators/NCBI PubMed.js b/translators/NCBI PubMed.js
@@ -8,7 +8,7 @@
 	"maxVersion":"",
 	"priority":100,
 	"inRepository":true,
-	"lastUpdated":"2009-10-22 19:00:00"
+	"lastUpdated":"2009-12-17 08:10:00"
 }
 
 function detectWeb(doc, url) {
@@ -27,10 +27,10 @@ function detectWeb(doc, url) {
 		}
 	}
 
-	var uids = doc.evaluate('//input[@type="checkbox" or @name="uid"]', doc,
+	var uids = doc.evaluate('//input[@type="checkbox" and @name="EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_RVDocSum.uid"]', doc,
 			nsResolver, XPathResult.ANY_TYPE, null);
-	if(uids.iterateNext() && doc.title.indexOf("PMC Results") == -1) {
-		if (uids.iterateNext() && doc.title.indexOf("PMC Results") == -1){
+	if(uids.iterateNext()) {
+		if (uids.iterateNext()){
 			return "multiple";
 		}
 		return "journalArticle";
diff --git a/translators/PubMed Central.js b/translators/PubMed Central.js
@@ -2,95 +2,187 @@
 	"translatorID":"27ee5b2c-2a5a-4afc-a0aa-d386642d4eed",
 	"translatorType":4,
 	"label":"PubMed Central",
-	"creator":"Michael Berkowitz",
+	"creator":"Michael Berkowitz and Rintze Zelle",
 	"target":"http://[^/]*.nih.gov/",
 	"minVersion":"1.0.0b4.r5",
 	"maxVersion":"",
 	"priority":100,
 	"inRepository":true,
-	"lastUpdated":"2009-10-02 04:50:00"
+	"lastUpdated":"2009-12-17 08:10:00"
 }
 
 function detectWeb(doc, url) {
-	if (doc.evaluate('//table[@id="ResultPanel"]//td[2]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
-		return "multiple";
-	} else if (url.indexOf("articlerender") != -1) {
-		return "journalArticle";
-	}
+    var namespace = doc.documentElement.namespaceURI;
+    var nsResolver = namespace ? function(prefix) {
+        if (prefix == 'x') return namespace; else return null;
+    } : null;
+    
+    try {var pmid = url.match(/ncbi\.nlm\.nih\.gov\/pmc\/articles\/PMC([\d]+)/)[1];} catch (e) {}
+    if (pmid) {
+        return "journalArticle";
+    }
+    
+    var uids = doc.evaluate('//div[@class="toc-pmcid"]', doc, nsResolver, XPathResult.ANY_TYPE, null);
+    if(uids.iterateNext()) {
+        if (uids.iterateNext()){
+            return "multiple";
+        }
+        return "journalArticle";
+    }
 }
 
+function lookupPMCIDs(ids, doc) {
+    Zotero.wait();
+    var newUri = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&retmode=xml&id=" + ids.join(",");
+    Zotero.debug(newUri);
+    Zotero.Utilities.HTTP.doGet(newUri, function (text) {
+        text = text.replace(/<!DOCTYPE[^>]*>/, "").replace(/<\?xml[^>]*\?>/, ""); // Remove xml parse instruction and doctype
+        text = text.replace(/(<[^!>][^>]*>)/g, function replacer(str, p1, p2, offset, s) {
+            return str.replace(/-/gm, "");
+        }); //Strip hyphens from element names, attribute names and attribute values
+        text = text.replace(/(<[^!>][^>]*>)/g, function replacer(str, p1, p2, offset, s) {
+            return str.replace(/:/gm, "");
+        }); //Strip colons from element names, attribute names and attribute values
+        text = Zotero.Utilities.trim(text);
+        XML.prettyPrinting = false;
+        XML.ignoreWhitespace = false;
+        var xml = new XML(text);
+
+        for (var i = 0; i < xml.article.length(); i++) {
+            var newItem = new Zotero.Item("journalArticle");
+
+            var journal = xml.article[i].front.journalmeta;
+
+            if (journal.journalid.(@journalidtype == "nlmta").length()) {
+                newItem.journalAbbreviation = Zotero.Utilities.superCleanString(journal.journalid.(@journalidtype == "nlmta").text().toString());
+            }
+            newItem.publicationTitle = Zotero.Utilities.superCleanString(journal.journaltitle.text().toString());
+
+            var issn = journal.issn.(@pubtype == "epub").text().toString();
+            var issn = journal.issn.(@pubtype == "ppub").text().toString();
+            if (issn) {
+                newItem.ISSN = issn;
+            }
+
+            var article = xml.article[i].front.articlemeta;
+
+            if (article.abstract.p.length()) {
+                newItem.abstractNote = Zotero.Utilities.unescapeHTML(article.abstract.p.toXMLString());
+            }
+
+            if (article.articleid.(@pubidtype == "doi").length()) {
+                newItem.DOI = article.articleid.(@pubidtype == "doi").text().toString();
+            }
+            var PMID = article.articleid.(@pubidtype == "pmid").text().toString();
+            if (PMID) {
+                newItem.extra = "PMID: " + PMID + "\n";
+            }
+            newItem.extra = newItem.extra + "PMCID: " + ids[i];
+            newItem.title = Zotero.Utilities.unescapeHTML(article.titlegroup.articletitle.toXMLString().split("<xref")[0]);
+            if (article.volume.length()) {
+                newItem.volume = article.volume.text().toString();
+            }
+            if (article.issue.length()) {
+                newItem.issue = article.issue.text().toString();
+            }
+            if (article.lpage.length()) {
+                newItem.pages = article.fpage.text().toString() + "-" + article.lpage.text().toString();
+            } else if (article.fpage.length()) {
+                newItem.pages = article.fpage.text().toString()
+            }
+
+            var pubdate = article.pubdate. (@pubtype == "ppub");
+            if (!pubdate) {
+                var pubdate = article.pubdate. (@pubtype == "epub");
+            }
+            if (pubdate) {
+                if (pubdate.day.text().toString() != "") {
+                    newItem.date = pubdate.year.text().toString() + "-" + pubdate.month.text().toString() + "-" + pubdate.day.text().toString();
+                } else if (pubdate.month.text().toString() != "") {
+                    newItem.date = pubdate.year.text().toString() + "-" + pubdate.month.text().toString();
+                } else if (pubdate.year.text().toString() != "") {
+                    newItem.date = pubdate.year.text().toString();
+                }
+            }
+
+            if (article.contribgroup.contrib.length()) {
+                var authors = article.contribgroup.contrib. (@contribtype == "author");
+                for (var j = 0; j < authors.length(); j++) {
+                    var lastName = authors[j].name.surname.text().toString();
+                    var firstName = authors[j].name.givennames.text().toString();
+                    if (firstName || lastName) {
+                        newItem.creators.push({
+                            lastName: lastName,
+                            firstName: firstName
+                        });
+                    }
+                }
+            }
+
+            var linkurl = "http://www.ncbi.nlm.nih.gov/pmc/articles/PMC" + ids[i] + "/";
+            newItem.attachments = [{
+                url: linkurl,
+                title: "PubMed Central Link",
+                mimeType: "text/html",
+                snapshot: false
+            }];
+            
+            if (article.selfuri.@xlinkhref.length()) {
+                var pdfFileName = article.selfuri.@xlinkhref.toXMLString();
+                var pdfurl = "http://www.ncbi.nlm.nih.gov/pmc/articles/PMC" + ids[i] + "/pdf/" + pdfFileName;
+                newItem.attachments.push({
+                title:"PubMed Central Full Text PDF",
+                mimeType:"application/pdf",
+                url:pdfurl
+            }); 
+            }
+
+            newItem.complete();
+        }
+
+        Zotero.done();
+    });
+}
+
+
+
 function doWeb(doc, url) {
-	var tagMap = {journal_title:"publicationTitle",
-					title:"title",
-					date:"date",
-					issue:"issue",
-					volume:"volume",
-					doi:"DOI",
-					fulltext_html_url:"url"
-				};
-	var URIs = new Array();
-	var items = new Object();
-	if (doc.title.indexOf("PMC Results") != -1) {
-		var titlex = '//div[@class="toc-entry"]/div/div[@class="toc-title"]';
-		var linkx = '//div[@class="toc-entry"]/div/a[@class="toc-link"][1]';
-		
-		var titles = doc.evaluate(titlex, doc, null, XPathResult.ANY_TYPE, null);
-		var next_title = titles.iterateNext();
-		var links = doc.evaluate(linkx, doc, null, XPathResult.ANY_TYPE, null);
-		var next_link = links.iterateNext();
-		while (next_title && next_link) {
-			items[next_link.href] = next_title.textContent;
-			next_title = titles.iterateNext();
-			next_link = links.iterateNext();
-		}
-		items = Zotero.selectItems(items);
-		if(!items) return true;
-		for (var i in items) {
-			URIs.push(i);
-		}
-	} else {
-		URIs.push(url);
-	}
-		Zotero.Utilities.HTTP.doGet(URIs, function(text) {
-			var tags = new Object();
-			var meta = text.match(/<meta[^>]*>/gi);
-			for (var i in meta) {
-				var item = meta[i].match(/=\"([^"]*)\"/g);
-				if (item[0].substring(2, 10) == 'citation') {
-					tags[item[0].substring(11, item[0].length - 1)] = item[1].substring(2, item[1].length - 1);
-				}
-			}
-			var newItem = new Zotero.Item("journalArticle");
-			for (var tag in tagMap) {
-				newItem[tagMap[tag]] = Zotero.Utilities.unescapeHTML(tags[tag]);
-			}
-			for (var i in meta) {
-				if (meta[i].match(/DC.Contributor/)) {
-					newItem.creators.push(Zotero.Utilities.cleanAuthor(Zotero.Utilities.unescapeHTML(meta[i].match(/content=\"([^"]*)\">/)[1]), "author"));
-				}
-			}
-			newItem.attachments.push({url:tags["fulltext_html_url"], title:"PubMed Central Snapshot", mimeType:"text/html"});
-			if (tags["pdf_url"]) {	
-				newItem.attachments.push({url:tags["pdf_url"], title:"PubMed Central Full Text PDF", mimeType:"application/pdf"});
-			}
-			newItem.url = tags["fulltext_html_url"];
-			if (!newItem.url) newItem.url = tags["abstract_html_url"];
-			try {
-			  newItem.extra = "PMCID: " + text.match(/PMCID: <\/span>(PMC\d+)/)[1];
-			} catch(e){
-			  
-			}
-			newItem.journalAbbreviation = text.match(/span class=\"citation-abbreviation\">([^<]+)</)[1];
-			newItem.pages = text.match(/span class=\"citation-flpages\">([^<]+)</)[1].replace(/[\.:\s]/g, "");
-			
-			if (text.match(/Abstract<\/div>([^<]+)</)) {
-				var abstract = text.match(/Abstract<\/div>([^<]+)</)[1];
-			} else if (text.match(/\"section-content\"><!\-\-article\-meta\-\->([^<]+)/)) {
-				var abstract = text.match(/\"section-content\"><!\-\-article\-meta\-\->([^<]+)/)[1];
-			}
-			if (abstract) newItem.abstractNote = abstract;
-			newItem.complete();
-		}, function(){ Zotero.done();} 
-		);
-	Zotero.wait();
+    var namespace = doc.documentElement.namespaceURI;
+    var nsResolver = namespace ?
+    function (prefix) {
+        if (prefix == 'x') return namespace;
+        else return null;
+    } : null;
+
+    var ids = new Array();
+    var pmcid;
+    var resultsCount = 0;
+    try {
+        pmcid = url.match(/ncbi\.nlm\.nih\.gov\/pmc\/articles\/PMC([\d]+)/)[1];
+    } catch(e) {}
+    if (pmcid) {
+        ids.push(pmcid);
+        lookupPMCIDs(ids, doc);
+    } else {
+        var pmcids = doc.evaluate('//div[@class="toc-pmcid"]', doc, nsResolver, XPathResult.ANY_TYPE, null);
+        var titles = doc.evaluate('//div[@class="toc-title"]', doc, nsResolver, XPathResult.ANY_TYPE, null);
+        var title;
+        while (pmcid = pmcids.iterateNext()) {
+            title = titles.iterateNext();
+            ids[pmcid.textContent.match(/PMC([\d]+)/)[1]] = title.textContent;
+            resultsCount = resultsCount + 1;
+        }
+        if (resultsCount > 1) {
+            ids = Zotero.selectItems(ids);
+        }
+        if (!ids) {
+            return true;
+        }
+
+        var pmcids = new Array();
+        for (var i in ids) {
+            pmcids.push(i);
+        }
+        lookupPMCIDs(pmcids, doc);
+    }
 }
 \ No newline at end of file

	www Unnamed repository; edit this file 'description' to name the repository.
	Log \| Files \| Refs \| Submodules \| README \| LICENSE

M	repotime.txt	\|	2	+-
M	translators/NCBI PubMed.js	\|	8	++++----
M	translators/PubMed Central.js	\|	252	++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------