commit 6de78951458ff48315e34888b75135772d46e57e
parent 78951c34ed8169d70f8fc841a3010ce19f6309d1
Author: Matt Burton <mcburton@gmail.com>
Date: Fri, 2 Oct 2009 03:48:13 +0000
Adding Akilesh's PEP Web translator
Diffstat:
1 file changed, 116 insertions(+), 0 deletions(-)
diff --git a/translators/PEP Web.js b/translators/PEP Web.js
@@ -0,0 +1,115 @@
+{
+ "translatorID":"32ad4782-b106-4ccb-8ae1-ff102ba93eef",
+ "translatorType":4,
+ "label":"PEP Web",
+ "creator":"Akilesh Ayyar",
+ "target":"^http?://www.pep-web.org",
+ "minVersion":"1.0.0b3.r1",
+ "maxVersion":"",
+ "priority":100,
+ "inRepository":false,
+ "lastUpdated":"2009-09-26 00:15:00"
+}
+
+//Only works for journal articles, and only for single entries.
+//Author names sometimes omit periods after the first initials.
+
+function detectWeb(doc, url) {
+
+ var namespace = doc.documentElement.namespaceURI;
+ var nsResolver = namespace ? function(prefix) {
+ if (prefix == 'x') return namespace; else return null;
+ } : null;
+
+
+ if (url.match(/document/))
+ return "journalArticle";
+}
+
+
+function scrape(doc, url) {
+ var namespace = doc.documentElement.namespaceURI;
+ var nsResolver = namespace ? function(prefix) {
+ if (prefix == 'x') return namespace; else return null;
+ } : null;
+
+ var newItem = new Zotero.Item("journalArticle");
+ newItem.url = doc.location.href;
+
+ var xPathString = "/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/table/tbody/tr/td[1]/span[@id='maincite']";
+
+ var xPathString2 = "/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/div/p[3]/a";
+
+ var myXPathObject = doc.evaluate(xPathString, doc, nsResolver, XPathResult.ANY_TYPE, null);
+ var myXPathObject2 = doc.evaluate(xPathString2, doc, nsResolver, XPathResult.ANY_TYPE, null);
+
+ var citeString = myXPathObject.iterateNext().textContent;
+ var titleString = myXPathObject2.iterateNext().textContent;
+
+ //authors
+ var authors = citeString.match(/(^.*)\(/)[1].toString();
+ if (authors == ' ') {
+ authors = "Unknown";
+ }
+
+ var currentauthor;
+
+ //grab all but last author, if there are more than one
+ while (authors.match(/^(.*?)(\,)(\s)([A-Z]\.)*\,\s/)) {
+ currentauthor = authors.match(/^(.*?)(\,)(\s)([A-Z]\.)*/)[0].toString();
+ newItem.creators.push(Zotero.Utilities.cleanAuthor(currentauthor, "author", true));
+ authors = authors.replace(/^(.*?)(\,)(\s)([A-Z]\.)*\,\s/, '');
+ }
+
+ //grab remaining author, or sole author if there's only one
+ if (authors != null) {
+ currentauthor = authors;
+ newItem.creators.push(Zotero.Utilities.cleanAuthor(currentauthor, "author", true));
+ }
+
+ //title
+ newItem.title = titleString;
+
+ //year
+ var year = citeString.match(/\([0-9][0-9][0-9][0-9]\)/).toString();
+ year = year.replace(/\(/, '');
+ year = year.replace(/\)/, '');
+ newItem.date = year;
+
+ //publication name
+ var pubname = citeString.match(/(\)\.\s)(.*)(\,)/)[0].toString();
+ var pubminus = citeString.match(/(\)\.\s)(.*?)(\.)/)[0].toString();
+ pubname = pubname.replace(pubminus, '');
+ pubname = pubname.replace(/\,/, '');
+ pubname = pubname.replace(/\.*/, '');
+ pubname = pubname.replace(/^\s*/, '');
+ pubname = pubname.replace(/\s*$/, '')
+ newItem.publicationTitle = pubname;
+
+ //volume
+ var volumeandpages = citeString.match(/[0-9]*\:([0-9]*(\-?)[0-9]*)\.$/)[0].toString();
+ var volume = volumeandpages.match(/[0-9]*\:/)[0].toString();
+ volume = volume.replace(":", '');
+ newItem.volume = volume;
+
+ //pages
+ var pages = volumeandpages.match(/\:([0-9]*)(\-?)([0-9]*)/)[0].toString();
+ pages = pages.replace(":", '');
+ pages = pages.replace(".", '');
+ newItem.pages = pages;
+
+ newItem.attachments.push({url:doc.location.href, title:"PEP Web Snapshot", mimeType:"text/html"});
+
+ newItem.complete();
+}
+
+
+function doWeb(doc, url) {
+
+ var articles = new Array();
+
+ articles = [url];
+
+ Zotero.Utilities.processDocuments(articles, scrape, function(){Zotero.done();});
+ Zotero.wait();
+}
+\ No newline at end of file