commit d5989a3d1e367d6b960c3d6504543cd6c3beccd4
parent 9568bf6b6f3eabd854967c20af3b02c7281cd7a9
Author: Matt Burton <mcburton@gmail.com>
Date: Thu, 15 Jan 2009 03:34:39 +0000
Addresses #1242, incorporated santaworts code, still some issues with PDF download
Diffstat:
1 file changed, 82 insertions(+), 91 deletions(-)
diff --git a/translators/ACS Publications.js b/translators/ACS Publications.js
@@ -2,13 +2,13 @@
"translatorID":"938ebe32-2b2e-4349-a5b3-b3a05d3de627",
"translatorType":4,
"label":"ACS Publications",
- "creator":"Sean Takats and Michael Berkowitz",
+ "creator":"Sean Takats and Michael Berkowitz and Santawort",
"target":"http://[^/]*pubs3?.acs.org[^/]*/(?:wls/journals/query/(?:subscriberResults|query)\\.html|acs/journals/toc.page|cgi-bin/(?:article|abstract|sample|asap).cgi)?",
"minVersion":"1.0.0b3.r1",
"maxVersion":"",
"priority":100,
"inRepository":true,
- "lastUpdated":"2008-05-06 08:15:00"
+ "lastUpdated":"2009-01-14 10:15:00"
}
function detectWeb(doc, url) {
@@ -17,110 +17,101 @@ function detectWeb(doc, url) {
if (prefix == 'x') return namespace; else return null;
} : null;
- if(doc.evaluate('//input[@name="jid"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
+ if(doc.evaluate('//input[@id="articleListHeader_selectAllToc"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
+ Zotero.debug("multiple");
return "multiple";
- } else if (doc.evaluate('//jid', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
+ } else if (doc.evaluate('//div[@id="articleHead"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
return "journalArticle";
- }
- return false;
-}
-
-function handleRequests(requests, pdfs) {
- if(requests.length == 0) {
- Zotero.done();
- return;
}
-
- var request = requests.shift();
-
- Zotero.Utilities.HTTP.doGet("http://pubs.acs.org/wls/journals/citation2/Citation?"+request.jid, function() {
- Zotero.Utilities.HTTP.doPost("http://pubs.acs.org/wls/journals/citation2/Citation",
- "includeAbstract=citation-abstract&format=refmgr&submit=1&mode=GET", function(text) {
- // load translator for RIS
- var translator = Zotero.loadTranslator("import");
- translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
- translator.setString(text);
- translator.setHandler("itemDone", function(obj, item) {
- var pdf = pdfs.shift();
- if(pdf) {
- item.attachments.push({
- title:"ACS Full Text PDF",
- url:pdf, mimeType:"application/pdf"
- });
- }
- if (!item.attachments[0].title)
- item.attachments[0].title = "ACS Snapshot";
- item.complete();
- });
- translator.translate();
-
- handleRequests(requests);
- });
- });
+ return false;
}
-function doWeb(doc, url) {
+function doWeb(doc, url){
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
-
- var pdfs = new Array();
- var requests = new Array();
-
- if (detectWeb(doc, url) == "multiple") {
- // search page
- var items = new Array();
- if (doc.evaluate('//form[@name="citationSelect"]//tbody/tr[1]//span[@class="textbold"][1]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
- var titles = doc.evaluate('//form[@name="citationSelect"]//tbody/tr[1]//span[@class="textbold"][1]', doc, nsResolver, XPathResult.ANY_TYPE, null);
- } else if (doc.evaluate('//form/div[@class="artBox"]/div[@class="artBody"]/div[@class="artTitle"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
- var titles = doc.evaluate('//form/div[@class="artBox"]/div[@class="artBody"]/div[@class="artTitle"]', doc, nsResolver, XPathResult.ANY_TYPE, null);
- }
- if (doc.evaluate('//form[@name="citationSelect"]//input[@name="jid"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
- var jids = doc.evaluate('//form[@name="citationSelect"]//input[@name="jid"]', doc, nsResolver, XPathResult.ANY_TYPE, null);
- } else if (doc.evaluate('//div[@id="content"]/form/div[@class="artBox"]/div[@class="artHeadBox"]/div[@class="artHeader"]/input', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
- var jids = doc.evaluate('//div[@id="content"]/form/div[@class="artBox"]/div[@class="artHeadBox"]/div[@class="artHeader"]/input', doc, nsResolver, XPathResult.ANY_TYPE, null);
- }
- var links = doc.evaluate('//form[@name="citationSelect"]//tbody/tr[2]//a[@class="link"]', doc, nsResolver, XPathResult.ANY_TYPE, null);
+ var host = 'http://' + doc.location.host + "/";
+ //Zotero.debug(host);
+ var m = url.match(/https?:\/\/[^\/]*\/doi\/(abs|full)\/(.+)/);
+ var dois = new Array();
+ if(detectWeb(doc, url) == "multiple") { //search
+ var doi;
var title;
- var jid;
- var id;
- var link;
- while ((title = titles.iterateNext()) && (jid = jids.iterateNext())){
- id = jid.value
- items[id] = Zotero.Utilities.trimInternal(title.textContent);
-
- var link = doc.evaluate('../../..//a[contains(text(), "PDF")]', title, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
- if(link) {
- links[id] = link.href.replace("searchRedirect.cgi", "article.cgi");
+ var availableItems = new Array();
+ var xpath = '//div[@class="articleBox"]';
+ if (doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
+ elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
+ var elmt = elmts.iterateNext();
+ do {
+ title = doc.evaluate('./div[@class="articleBoxMeta"]/h2', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
+ doi = doc.evaluate('./div[@class="articleBoxMeta"]/h2/a/@href', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace("/doi/abs/","");
+ if (doi.indexOf("prevSearch") != -1){
+ doi = doi.substring(0,doi.indexOf("?"));
}
+ availableItems[doi] = title;
+ } while (elmt = elmts.iterateNext())
+ }
+ var items = Zotero.selectItems(availableItems);
+ if(!items) {
+ return true;
}
-
- items = Zotero.selectItems(items);
- if(!items) return true;
-
- var getstring = "";
for(var i in items) {
- getstring = getstring + "jid=" + encodeURIComponent(i) + "&";
- pdfs.push(links[i]+"?sessid=");
+ dois.push(i);
}
- requests.push({jid:getstring});
- } else {
- // single page
- var jid = doc.evaluate('//jid', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
- jid = jid.substr(jid.indexOf("/")+1);
- var pdf = doc.evaluate('/html/body//a[contains(text(), "PDF")]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
- if (!pdf) {
- var pdf = doc.evaluate('/html/body//a[contains(@href, "/pdf/")]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
+ } else if (m){ //single article
+ var doi = m[2];
+ if (doi.match("prevSearch")) {
+ doi = doi.substring(0,doi.indexOf("?"));
}
- if (pdf) {
- pdf = pdf.href;
- pdf = pdf.replace("searchRedirect.cgi", "article.cgi");
- pdfs.push(pdf+"?sessid=");
- }
- var requests = [{jid:"jid=" + encodeURIComponent(jid)}];
+ Zotero.debug("DOI= "+doi);
+ dois.push(doi);
}
- handleRequests(requests, pdfs);
-
+
+ var setupSets = [];
+ for each (doi in dois) {
+ var citUrl = host + 'action/showCitFormats?doi=' + doi;
+ setupSets.push({ doi: doi, citUrl: citUrl });
+ }
+
+ var setupCallback = function () {
+ //get citation export page's source code;
+ if (setupSets.length) {
+ var set = setupSets.shift();
+ Zotero.Utilities.HTTP.doGet(set.citUrl, function(text){
+ //get the exported RIS file name;
+ var downloadFileName = text.match(/name=\"downloadFileName\" value=\"([A-Za-z0-9_]+)\"/)[1];
+ Zotero.debug("downloadfilename= "+downloadFileName);
+ processCallback(set.doi,downloadFileName);
+ });
+ }
+ else {
+ Zotero.done();
+ }
+ }
+ var processCallback = function (doi,downloadFileName) {
+ var baseurl = "http://pubs.acs.org/action/downloadCitation";
+ var post = "doi=" + doi + "&downloadFileName=" + downloadFileName + "&include=abs&format=refman&direct=on&submit=Download+article+citation+data";
+ Zotero.Utilities.HTTP.doPost(baseurl, post,function(text){
+ // Fix the RIS doi mapping
+ text = text.replace("N1 - doi:","M3 - ");
+ Zotero.debug("ris= "+ text);
+ var translator = Zotero.loadTranslator("import");
+ translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
+ translator.setString(text);
+ translator.setHandler("itemDone", function(obj, item) {
+ var pdfUrl = host + 'doi/pdf/' + doi;
+ var fullTextUrl = host + 'doi/full/' + doi;
+ item.attachments.push(
+ {title:"ACS Full Text PDF",url:pdfUrl, mimeType:"application/pdf"},
+ {title:"ACS Full Text Snapshot",url:fullTextUrl, mimeType:"text/html"}
+ );
+ item.complete();
+ });
+ translator.translate();
+ setupCallback();
+ });
+ }
+ setupCallback();
Zotero.wait();
}
\ No newline at end of file