commit 7cfa4e8e3aa7f655423deb28b2a7622f5ff06830
parent c63ed295744e77e83fde72a860265f894d3622b4
Author: Matt Burton <mcburton@gmail.com>
Date: Mon, 29 Mar 2010 15:01:31 +0000
Adding Jonas's 03mar10 new translators.
Diffstat:
2 files changed, 222 insertions(+), 0 deletions(-)
diff --git a/translators/Electronic Colloquium on Computational Complexity.js b/translators/Electronic Colloquium on Computational Complexity.js
@@ -0,0 +1,113 @@
+{
+ "translatorID":"09a9599e-c20e-a405-d10d-35ad4130a426",
+ "translatorType":4,
+ "label":"Electronic Colloquium on Computational Complexity",
+ "creator":"Jonas Schrieb",
+ "minVersion":"1.0.0b3.r1",
+ "target":"http://(www.)?eccc.(uni-trier|hpi-web).de/",
+ "maxVersion":"",
+ "priority":100,
+ "inRepository":false,
+ "lastUpdated":"2010-03-03 15:00:00"
+}
+
+function detectWeb(doc, url) {
+ var singleRe = /^http:\/\/(www\.)?eccc\.(uni-trier|hpi-web)\.de\/report\/\d{4}\/\d{3}/;
+ var multipleRe = /^http:\/\/(www\.)?eccc\.(uni-trier|hpi-web)\.de\/(title|year|keyword)\//;
+ if(singleRe.test(url)) {
+ return "report";
+ } else if(multipleRe.test(url)) {
+ return "multiple";
+ }
+}
+
+function scrape(doc) {
+ var newItem = new Zotero.Item("report");
+
+ var namespace = doc.documentElement.namespaceURI;
+ var nsResolver = namespace ? function(prefix) {
+ if (prefix == 'x') return namespace; else return null;
+ } : null;
+
+
+ var url = doc.location.href;
+ var tmp = url.match(/\/(\d{4})\/(\d{3})\/$/);
+ newItem.date = tmp[1];
+ newItem.reportNumber = tmp[2];
+ newItem.url = url;
+
+
+
+ var titleXPath = "id('box')//h4";
+ newItem.title = doc.evaluate(titleXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
+
+
+
+ var authorsXPath = "id('box')//a[contains(@href,'author')]";
+ var authors = doc.evaluate(authorsXPath, doc, nsResolver, XPathResult.ANY_TYPE, null);
+ var nextAuthor;
+ while (nextAuthor = authors.iterateNext()) {
+ newItem.creators.push(Zotero.Utilities.cleanAuthor(nextAuthor.textContent, "author"));
+ }
+
+
+
+ var keywordsXPath = "id('box')//a[contains(@href,'keyword')]";
+ var keywords = doc.evaluate(keywordsXPath, doc, nsResolver, XPathResult.ANY_TYPE, null);
+ var nextKeyword;
+ var i = 0;
+ while (nextKeyword = keywords.iterateNext()) {
+ newItem.tags[i++] = nextKeyword.textContent;
+ }
+
+
+
+ var abstractXPath = "id('box')/text()";
+ var abstractLines = doc.evaluate(abstractXPath, doc, nsResolver, XPathResult.ANY_TYPE, null);
+ newItem.abstractNote = "";
+ var nextLine;
+ while(nextLine = abstractLines.iterateNext()) {
+ newItem.abstractNote += nextLine.textContent;
+ }
+
+
+
+ newItem.attachments = [
+ {url:url, title:"ECCC Snapshot", mimeType:"text/html"},
+ {url:url+"download", title:"ECCC Full Text PDF", mimeType:"application/pdf"}
+ ];
+
+ newItem.complete();
+}
+
+function doWeb(doc, url) {
+ var namespace = doc.documentElement.namespaceURI;
+ var nsResolver = namespace ? function(prefix) {
+ if (prefix == 'x') return namespace; else return null;
+ } : null;
+
+ var articles = new Array();
+ var items = new Object();
+ var nextTitle;
+
+ if (detectWeb(doc, url) == "multiple") {
+ var titleXPath = "//a[starts-with(@href,'/report/')]/h4";
+ var linkXPath = "//a[starts-with(@href,'/report/')][h4]";
+
+ var titles = doc.evaluate(titleXPath, doc, nsResolver, XPathResult.ANY_TYPE, null);
+ var links = doc.evaluate(linkXPath, doc, nsResolver, XPathResult.ANY_TYPE, null);
+ while (nextTitle = titles.iterateNext()) {
+ nextLink = links.iterateNext();
+ items[nextLink.href] = nextTitle.textContent;
+ }
+ items = Zotero.selectItems(items);
+ for (var i in items) {
+ articles.push(i);
+ }
+ } else {
+ articles = [url];
+ }
+
+ Zotero.Utilities.processDocuments(articles, scrape, function(){Zotero.done();});
+ Zotero.wait();
+}
diff --git a/translators/ePrint IACR.js b/translators/ePrint IACR.js
@@ -0,0 +1,109 @@
+{
+ "translatorID":"04a23cbe-5f8b-d6cd-8eb1-2e23bcc8ae8f",
+ "translatorType":4,
+ "label":"ePrint IACR",
+ "creator":"Jonas Schrieb",
+ "minVersion":"1.0.0b3.r1",
+ "target":"^http://eprint\\.iacr\\.org/",
+ "maxVersion":"",
+ "priority":100,
+ "inRepository":false,
+ "lastUpdated":"2010-03-03 14:00:00"
+}
+
+function detectWeb(doc, url) {
+ var singleRe = /^http:\/\/eprint\.iacr\.org\/(\d{4}\/\d{3}|cgi-bin\/print\.pl)/;
+ var multipleRe = /^http:\/\/eprint\.iacr\.org\/(complete|curr|\d{4}|cgi-bin\/search\.pl)/;
+ if(singleRe.test(url)) {
+ return "report";
+ } else if(multipleRe.test(url)) {
+ return "multiple";
+ }
+}
+
+function scrape(doc, url) {
+ var namespace = doc.documentElement.namespaceURI;
+ var nsResolver = namespace ? function(prefix) {
+ if (prefix == 'x') return namespace; else return null;
+ } : null;
+
+ var reportNoXPath = "//h2";
+ var titleXPath = "//p[1]/b";
+ var authorsXPath = "//p[2]/i";
+ var abstractXPath = "//p[starts-with(b/text(),\"Abstract\")]/text() | //p[not(*)]";
+ var keywordsXPath = "//p[starts-with(b/text(),\"Category\")]";
+
+ var reportNo = doc.evaluate(reportNoXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
+ reportNo = reportNo.match(/(\d{4})\/(\d{3})$/);
+ var year = reportNo[1];
+ var no = reportNo[2];
+
+ var title = doc.evaluate(titleXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
+
+ var authors = doc.evaluate(authorsXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
+ authors = authors.split(" and ");
+
+ var abstr = "";
+ var abstractLines = doc.evaluate(abstractXPath, doc, nsResolver, XPathResult.ANY_TYPE, null);
+ var nextLine;
+ while(nextLine = abstractLines.iterateNext()) {
+ abstr += nextLine.textContent;
+ }
+
+ var keywords = doc.evaluate(keywordsXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
+ var tmp = keywords.match(/Category \/ Keywords: (?:([^\/]*) \/ )?([^\/]*)/);
+ keywords = tmp[2].split(", ")
+ keywords.unshift(tmp[1]);
+
+
+ var newItem = new Zotero.Item("report");
+ newItem.date = year;
+ newItem.reportNumber = no;
+ newItem.url = "http://eprint.iacr.org/"+year+"/"+no;
+ newItem.title = title;
+ newItem.abstractNote = abstr;
+ for (var i in authors) {
+ newItem.creators.push(Zotero.Utilities.cleanAuthor(authors[i], "author"));
+ }
+ for (var i = 0; i < keywords.length; i++) {
+ newItem.tags[i] = keywords[i];
+ }
+ newItem.attachments = [
+ {url:newItem.url, title:"ePrint IACR Snapshot", mimeType:"text/html"},
+ {url:newItem.url+".pdf", title:"ePrint IACR Full Text PDF", mimeType:"application/pdf"}
+ ];
+
+ newItem.complete();
+}
+
+function doWeb(doc, url) {
+ var namespace = doc.documentElement.namespaceURI;
+ var nsResolver = namespace ? function(prefix) {
+ if (prefix == 'x') return namespace; else return null;
+ } : null;
+
+ var articles = new Array();
+ var items = new Object();
+ var nextTitle;
+
+ if (detectWeb(doc, url) == "multiple") {
+ var titleXPath = "//dl/dd/b";
+ var linkXPath = "//dl/dt/a[1]";
+
+ var titles = doc.evaluate(titleXPath, doc, nsResolver, XPathResult.ANY_TYPE, null);
+ var links = doc.evaluate(linkXPath, doc, nsResolver, XPathResult.ANY_TYPE, null);
+ while (nextTitle = titles.iterateNext()) {
+ nextLink = links.iterateNext();
+ items[nextLink.href] = nextTitle.textContent;
+ }
+ items = Zotero.selectItems(items);
+ for (var i in items) {
+ articles.push(i);
+ }
+ } else {
+ articles = [url];
+ }
+
+ Zotero.Utilities.processDocuments(articles, scrape, function(){Zotero.done();});
+ Zotero.wait();
+}