commit 313bbd246d9fb4d54567c294bd25623b9765a227
parent 96f836f58dcd94a8c9378326c00d870330a79cfe
Author: Matt Burton <mcburton@gmail.com>
Date: Mon, 30 Mar 2009 01:28:23 +0000
Adding translators from dev-list
Diffstat:
| A | translators/AllAfrica.js | | | 71 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | translators/jmlr.js | | | 116 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | translators/nips.js | | | 84 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
3 files changed, 271 insertions(+), 0 deletions(-)
diff --git a/translators/AllAfrica.js b/translators/AllAfrica.js
@@ -0,0 +1,70 @@
+{
+ "translatorID":"34B1E0EA-FD02-4069-BAE4-ED4D98674A5E",
+ "translatorType":4,
+ "label":"allAfrica.com",
+ "creator":"Matt Bachtell",
+ "target":"^http://allafrica\\.com/stories/*",
+ "minVersion":"1.0.0b4.r5",
+ "maxVersion":"",
+ "priority":100,
+ "inRepository":false,
+ "lastUpdated":"2009-03-29 12:34:05"
+}
+
+
+function detectWeb (doc, url) {
+
+ return "newspaperArticle";
+
+}
+
+function doWeb (doc, url){
+ scrape(doc,url);
+}
+
+function scrape(doc, url) {
+ var title = doc.evaluate("/html/body/div[3]/div/h1[@class='headline']", doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent;
+ var date = doc.evaluate("/html/body/div[3]/div/p[@class='date']", doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent;
+
+// zotero entry creation code
+ var newItem = new Zotero.Item('newspaperArticle');
+ newItem.title = title;
+ newItem.date = date;
+ newItem.url = url;
+
+ //AUTHORS
+ try{
+ var authors = doc.evaluate("/html/body/div[3]/div/p[@class='reporter']", doc, null, XPathResult.ANY_TYPE,null).iterateNext().textContent;
+ if (authors.match(/ &| And/)){
+ var aus = authors.split(" And");
+ for (var i=0; i < aus.length ; i++){
+ newItem.creators.push(Zotero.Utilities.cleanAuthor(aus[i], "author"));
+ }
+ }
+ else if(authors.match(", ")){
+ var aus = authors.split(/[,| And| & ]/);
+ for (var i=0; i < aus.length; i++){
+ newItem.creators.push(Zotero.Utilities.cleanAuthor(aus[i], "author"));
+ }
+ }
+ else{
+ var author = authors;
+ newItem.creators.push(Zotero.Utilities.cleanAuthor(author, "author"));
+ }
+ }
+ catch(e){
+ // DO NOTHING
+ }
+
+ //SOURCE
+ try{
+ var newspaper_source = doc.evaluate("/html/body/div[3]/div/p/a/img/@alt", doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent;
+ newItem.publicationTitle = newspaper_source;
+ }
+ catch(e){
+ var newspaper_source = doc.evaluate("/html/body/div[3]/div/p", doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent;
+ newItem.publicationTitle = newspaper_source;
+ }
+ newItem.complete();
+
+} // end scrape
+\ No newline at end of file
diff --git a/translators/jmlr.js b/translators/jmlr.js
@@ -0,0 +1,116 @@
+{
+ "translatorID":"80bc4fd3-747c-4dc2-86e9-da7b251e1407",
+ "translatorType":4,
+ "label":"Journal of Machine Learning Research",
+ "creator":"Fei Qi",
+ "target":"^http://jmlr\\.csail\\.mit\\.edu/papers",
+ "minVersion":"1.0.0b4.r5",
+ "maxVersion":"",
+ "priority":100,
+ "inRepository":false,
+ "lastUpdated":"2009-03-21 12:34:05"
+}
+
+function detectWeb(doc, url) {
+ var contRe = /(v\d+|topic|special)/;
+ var m = contRe.exec( url );
+ if (m) {
+ if( doc.title.match( "JMLR" ) )
+ return "multiple";
+ else
+ return "journalArticle";
+ }
+ return false;
+}
+
+function scrape( doc, url ) {
+ var n = doc.documentElement.namespaceURI;
+ var ns = n ? function(prefix) {} : null;
+
+ var item = new Zotero.Item( "journalArticle" );
+ item.url = doc.location.href;
+ item.publicationTitle = "Journal of Machine Learning Research";
+
+ // Zotero.debug( 'retrieving title' );
+ var title = doc.evaluate( '//div[@id="content"]/h2', doc, ns,
+ XPathResult.ANY_TYPE, null ).iterateNext();
+ if( title ){
+ var titlecontent = title.textContent.replace( /^\s+/, '' );
+ item.title = titlecontent.replace( /\s+$/, '' );
+ }
+
+ var refline = doc.evaluate( '//div[@id="content"]/p', doc, ns,
+ XPathResult.ANY_TYPE, null ).iterateNext();
+ if( refline ) {
+ var info = refline.textContent.split( ';' );
+ var authors = info[0].split( ',' );
+ for ( var j = 0; j < authors.length; j++ ){
+ item.creators.push( Zotero.Utilities.cleanAuthor( authors[j], "author" ) );
+ }
+ // Zotero.debug( 'retrieving publication info' );
+ var volissRe = /\s*(\d+)\(\s*(\w+)\s*\):\s*(\d+\s*--\s*\d+),\s*(\d+)./;
+ var voliss = info[1].match( volissRe );
+ item.volume = voliss[1];
+ item.date = voliss[2] + ', ' + voliss[4];
+ item.pages = voliss[3];
+ }
+
+ var text = doc.evaluate( '//div[@id="content"]', doc, ns,
+ XPathResult.ANY_TYPE, null ).iterateNext();
+ // Zotero.debug( doc.textContent );
+ var full = text.textContent.split( 'Abstract' );
+ var absatt = full[1].split( '[abs]' );
+ var abs =absatt[0].replace( /^\s+/, '' );
+ item.abstractNote = abs.replace( /\s+$/, '' );
+ //Zotero.debug( item.abstractNote );
+
+ var atts = doc.evaluate( '//div[@id="content"]//a', doc, ns,
+ XPathResult.ANY_TYPE, null );
+ var att = atts.iterateNext();
+ while( att ){
+ // Zotero.debug( att.textContent + ' VS ' + att.href );
+ if( 0 <= att.textContent.search( 'pdf' ) ) {
+ item.attachments = [ {url:att.href,
+ title:item.title,
+ mimeType:"application/pdf"} ];
+ break;
+ }
+ att = atts.iterateNext();
+ }
+ item.complete();
+}
+
+function doWeb( doc, url ) {
+ var arts = new Array();
+ if (detectWeb(doc, url) == "multiple") {
+ var n = doc.documentElement.namespaceURI;
+ var ns = n ? function(prefix) {} : null;
+ // Search page
+ var items = new Object();
+ var titles = doc.evaluate( '//div[@id="content"]//dt', doc, ns,
+ XPathResult.ANY_TYPE, null );
+ var urls = doc.evaluate( '//div[@id="content"]//dd/a', doc, ns,
+ XPathResult.ANY_TYPE, null );
+ if( titles && urls ) {
+ var title = titles.iterateNext();
+ var url = urls.iterateNext();
+ while( title ) {
+ while( 0 > url.textContent.search( 'abs' ) )
+ url = urls.iterateNext();
+ // Zotero.debug( title.textContent + ' AT ' + url.href );
+ items[url.href] = title.textContent;
+ title = titles.iterateNext();
+ url = urls.iterateNext();
+ }
+ }
+ items = Zotero.selectItems(items);
+ for (var item in items) {
+ arts.push(item);
+ }
+ } else {
+ arts.push(url);
+ }
+
+ Zotero.Utilities.processDocuments( arts, scrape, function() {Zotero.done();});
+ Zotero.wait();
+}
diff --git a/translators/nips.js b/translators/nips.js
@@ -0,0 +1,84 @@
+{
+ "translatorID":"c816f8ad-4c73-4f6d-914e-a6e7212746cf",
+ "translatorType":4,
+ "label":"Neural Information Processing Systems",
+ "creator":"Fei Qi",
+ "target":"http://books\\.nips\\.cc/nips\\d+\\.html",
+ "minVersion":"1.0.0b4.r5",
+ "maxVersion":"",
+ "priority":100,
+ "inRepository":false,
+ "lastUpdated":"2009-03-21 11:23:12"
+}
+
+function detectWeb(doc, url) {
+ return "multiple";
+}
+
+function grabCitation( paper ) {
+ // Zotero.debug( paper.title );
+ // Zotero.debug( paper.pdf );
+ // Zotero.debug( paper.bib );
+ Zotero.Utilities.HTTP.doGet( paper.bib, function( text ) {
+ var translator = Zotero.loadTranslator("import");
+ translator.setTranslator("9cb70025-a888-4a29-a210-93ec52da40d4");
+ // Zotero.debug( text );
+ translator.setString( text );
+ translator.setHandler( "itemDone", function( obj, item ) {
+ item.attachments = [{url:paper.pdf, title:paper.title, mimeType:"application/pdf"}];
+ item.complete();
+ } );
+ translator.translate();
+ }, function() {Zotero.done();}, null);
+}
+
+function doWeb( doc, url ) {
+ var n = doc.documentElement.namespaceURI;
+ var ns = n ? function(prefix) {} : null;
+ // if( doc.title.match( "Search" ) ){
+ // var titleRe = '//i';
+ // var urlRe = '//a';
+ //} else {
+ var titleRe = '//table//td/b';
+ var urlRe = '//table//td/a';
+ //}
+ if (detectWeb(doc, url) == "multiple") {
+ // Retrive items
+ var items = new Object();
+ var arts = new Array();
+ var titles = doc.evaluate( titleRe, doc, ns, XPathResult.ANY_TYPE, null);
+ var urls = doc.evaluate( urlRe, doc, ns, XPathResult.ANY_TYPE, null);
+ if( titles ) {
+ var title = titles.iterateNext();
+ var url = urls.iterateNext();
+ var idx = 0;
+ while( title && urls ) {
+ var art = new Object;
+ // Zotero.debug( title.textContent );
+ items[idx] = title.textContent;
+ art.title = items[idx];
+ var urlnum = 0;
+ while( urlnum < 2 && url ) {
+ if( 0 <= url.textContent.search( 'pdf' ) ) {
+ art.pdf = url.href;
+ urlnum++;
+ }
+ if( 0 <= url.textContent.search( 'bib' ) ) {
+ art.bib = url.href;
+ urlnum++;
+ }
+ url = urls.iterateNext();
+ }
+ arts.push( art );
+ idx++;
+ title = titles.iterateNext();
+ url = urls.iterateNext();
+ }
+ }
+ items = Zotero.selectItems( items );
+ for (var item in items) {
+ grabCitation( arts[item] );
+ }
+ }
+ Zotero.wait();
+}