www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | Submodules | README | LICENSE

commit 0509e10c25836849343e54145a83597c19955546
parent 28ebbc9ba60c3e6cb0bf0c21c9193c6d4d5ffd21
Author: Avram Lyon <ajlyon@gmail.com>
Date:   Sat, 19 Mar 2011 10:31:21 +0000

Trans: Fixes to eLibrary translator to be more resilient


Diffstat:
Mtranslators/eLibrary.ru.js | 155++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------
1 file changed, 111 insertions(+), 44 deletions(-)

diff --git a/translators/eLibrary.ru.js b/translators/eLibrary.ru.js @@ -1,19 +1,19 @@ { - "translatorID":"587709d3-80c5-467d-9fc8-ed41c31e20cf", - "label":"eLibrary.ru", - "creator":"Avram Lyon", - "target":"^http://elibrary\\.ru/", - "minVersion":"1.0.0b4.r5", - "maxVersion":"", - "priority":100, - "inRepository":"1", - "translatorType":4, - "lastUpdated":"2011-01-11 04:31:00" + "translatorID": "587709d3-80c5-467d-9fc8-ed41c31e20cf", + "label": "eLibrary.ru", + "creator": "Avram Lyon", + "target": "^http://elibrary\\.ru/", + "minVersion": "1.0.0b4.r5", + "maxVersion": "", + "priority": 100, + "inRepository": "1", + "translatorType": 4, + "lastUpdated": "2011-03-12 22:55:32" } /* eLibrary.ru Translator - Copyright (C) 2010 Avram Lyon, ajlyon@gmail.com + Copyright (C) 2010-2011 Avram Lyon, ajlyon@gmail.com This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -85,27 +85,32 @@ function scrape (doc) { switch (label) { case "Названиепубликации": titleBlock = doc.evaluate('./table['+t+']', datablock, ns, XPathResult.ANY_TYPE, null).iterateNext(); + Zotero.debug("have titleBlock"); break; case "Авторы": authorBlock = doc.evaluate('./table['+t+']', datablock, ns, XPathResult.ANY_TYPE, null).iterateNext(); + Zotero.debug("have authorBlock"); break; case "Журнал": + case "Издательство": metaBlock = doc.evaluate('./table['+t+']', datablock, ns, XPathResult.ANY_TYPE, null).iterateNext(); + Zotero.debug("have metaBlock"); break; case "Коды": codeBlock = doc.evaluate('./table['+t+']', datablock, ns, XPathResult.ANY_TYPE, null).iterateNext(); + Zotero.debug("have codeBlock"); break; case "Ключевыеслова": keywordBlock = doc.evaluate('./table['+t+']', datablock, ns, XPathResult.ANY_TYPE, null).iterateNext(); + Zotero.debug("have keywordBlock"); break; case "Аннотация": abstractBlock = doc.evaluate('./table['+t+']', datablock, ns, XPathResult.ANY_TYPE, null).iterateNext(); - break; - case "Коды": - codeBlock = doc.evaluate('./table['+t+']', datablock, ns, XPathResult.ANY_TYPE, null).iterateNext(); + Zotero.debug("have abstractBlock"); break; case "Списоклитературы": referenceBlock = doc.evaluate('./table['+t+']', datablock, ns, XPathResult.ANY_TYPE, null).iterateNext(); + Zotero.debug("have referenceBlock"); break; case "Переводнаяверсия": default: @@ -113,23 +118,8 @@ function scrape (doc) { break; } } - var type = doc.evaluate('.//table[2]//tr[5]/td[4]', metaBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent; - - switch (type) { - case "научная статья": - type = "journalArticle"; - break; - case "учебное пособие": - case "монография": - type = "book"; - break; - default: - Zotero.debug("Unknown type: "+type+". Using 'journalArticle'"); - type = "journalArticle"; - break; - } - var item = new Zotero.Item(type); + var item = new Zotero.Item(); /*var pdf = false; // Now see if we have a free PDF to download var pdfImage = doc.evaluate('//a/img[@src="/images/pdf_green.gif"]', doc, ns, XPathResult.ANY_TYPE, null).iterateNext(); @@ -151,9 +141,13 @@ function scrape (doc) { item.title = doc.title.match(/eLIBRARY.RU - (.*)/)[1]; if (authorBlock) { - var authorNode = doc.evaluate('.//td[2]/font/a', authorBlock, ns, XPathResult.ANY_TYPE, null); + // Sometimes we don't have links, just bold text + var authorNode = doc.evaluate('.//td[2]/font/a | .//td[2]/font/b', authorBlock, ns, XPathResult.ANY_TYPE, null); while ((author = authorNode.iterateNext()) !== null) { - if (!author.href.match(/org_about\.asp/)) { // Remove organizations + // Remove organizations; by URL or by node name + if ((author.href && !author.href.match(/org_about\.asp/) + && !author.href.match(/org_items\.asp/)) + || author.nodeName == "B") { author = author.textContent; var authors = author.split(","); for (var i = 0; i < authors.length; i++) { @@ -175,17 +169,46 @@ function scrape (doc) { } else { Zotero.debug("Skipping presumed affiliation: " + author.textContent) ; } } } - - item.publicationTitle = doc.evaluate('.//table[1]//tr[1]/td[2]', metaBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent; - item.publisher = doc.evaluate('.//table[1]//tr[2]/td[2]', metaBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent; - item.date = doc.evaluate('.//table[2]//tr[1]/td[2]', metaBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent; - item.ISSN = doc.evaluate('.//table[2]//tr[1]/td[4]', metaBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent; - item.volume = doc.evaluate('.//table[2]//tr[2]/td[2]', metaBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent; - item.issue = doc.evaluate('.//table[2]//tr[3]/td[2]', metaBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent; - item.pages = doc.evaluate('.//table[2]//tr[4]/td[2]', metaBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent; - item.language = doc.evaluate('.//table[2]//tr[5]/td[2]', metaBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent; + // This is the table of metadata. We could walk through it, but I found it easier + // to just make a 2-d array of XPaths of field names values. + var mapped = false; + var metaPieces = [['.//table[1]//tr[1]/td[1]','.//table[1]//tr[1]/td[2]'], + ['.//table[1]//tr[2]/td[1]','.//table[1]//tr[2]/td[2]'], + ['.//table[2]//tr[1]/td[1]','.//table[2]//tr[1]/td[2]'], + ['.//table[2]//tr[1]/td[3]','.//table[2]//tr[1]/td[4]'], + ['.//table[2]//tr[2]/td[1]','.//table[2]//tr[2]/td[2]'], + ['.//table[2]//tr[2]/td[3]','.//table[2]//tr[2]/td[4]'], + ['.//table[2]//tr[3]/td[1]','.//table[2]//tr[3]/td[2]'], + ['.//table[2]//tr[3]/td[3]','.//table[2]//tr[3]/td[4]'], + ['.//table[2]//tr[4]/td[1]','.//table[2]//tr[4]/td[2]'], + ['.//table[2]//tr[4]/td[3]','.//table[2]//tr[4]/td[4]']] + for (i in metaPieces) { + mapped = mapper(metaPieces[i][0], metaPieces[i][1], metaBlock, doc); + item[mapped[0]] = mapped[1]; + } + if (item.extra) item.extra = "Цитируемость в РИНЦ: " + item.extra; if (abstractBlock) item.abstractNote = doc.evaluate('./tbody/tr/td[2]/table/tbody/tr/td/font', abstractBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent; + + // Set type + switch (item.itemType) { + case "обзорная статья": // Would be "review article" + case "научная статья": + item.itemType = "journalArticle"; + break; + case "учебное пособие": + case "монография": + item.itemType = "book"; + break; + case "публикация в сборнике трудов конференции": + item.itemType = "conferencePaper"; + break; + default: + Zotero.debug("Unknown type: "+item.itemType+". Using 'journalArticle'"); + item.itemType = "journalArticle"; + break; + } + /*if (referenceBlock) { var note = Zotero.Utilities.trimInternal( doc.evaluate('./tbody/tr/td[2]/table', referenceBlock, ns, XPathResult.ANY_TYPE, null) @@ -193,10 +216,14 @@ function scrape (doc) { Zotero.debug(note); item.notes.push(note); }*/ + if (codeBlock) { - item.extra = doc.evaluate('.//td[2]', codeBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent; - var doi = item.extra.match(/DOI: (10\..+?) /); - if (doi) item.DOI = doi[1]; + item.extra += ' '+ doc.evaluate('.//td[2]', codeBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent; + var doi = item.extra.match(/DOI: (10\.[^\s]+)/); + if (doi) { + item.DOI = doi[1]; + item.extra = item.extra.replace(/DOI: 10\.[^\s]+/,""); + } } if (keywordBlock) { @@ -205,7 +232,47 @@ function scrape (doc) { item.tags.push(tag.textContent); } + if (item.title.toUpperCase() == item.title) { + Zotero.debug("Trying to fix all-uppers"); + item.title = item.title.substr(0,1) + item.title.toLowerCase().substr(1); + } + //if(pdf) item.attachments.push(pdf); item.complete(); +} + +function mapper (from, to, block, doc) { + var name = doc.evaluate(from, block, null, XPathResult.ANY_TYPE, null).iterateNext(); + var value = doc.evaluate(to, block, null, XPathResult.ANY_TYPE, null).iterateNext(); + if (!name || !value) return false; + var key = false; + switch (name.textContent.trim()) { + case "Журнал": + key = "publicationTitle"; break; + case "Издательство": + key = "publisher"; break; + case "Год издания": + case "Год выпуска": + key = "date"; break; + case "Том": + key = "volume"; break; + case "Номер": + key = "issue"; break; + case "ISSN": + key = "ISSN"; break; + case "Страницы": + key = "pages"; break; + case "Язык": + key = "language"; break; + case "Место издания": + key = "place"; break; + case "Цит. в РИНЦ": + key = "extra"; break; + case "Тип": + key = "itemType"; break; + default: + Zotero.debug("Unmapped field: "+name.textContent.trim()); + } + return [key, value.textContent.trim()]; } \ No newline at end of file