commit 0509e10c25836849343e54145a83597c19955546
parent 28ebbc9ba60c3e6cb0bf0c21c9193c6d4d5ffd21
Author: Avram Lyon <ajlyon@gmail.com>
Date: Sat, 19 Mar 2011 10:31:21 +0000
Trans: Fixes to eLibrary translator to be more resilient
Diffstat:
1 file changed, 111 insertions(+), 44 deletions(-)
diff --git a/translators/eLibrary.ru.js b/translators/eLibrary.ru.js
@@ -1,19 +1,19 @@
{
- "translatorID":"587709d3-80c5-467d-9fc8-ed41c31e20cf",
- "label":"eLibrary.ru",
- "creator":"Avram Lyon",
- "target":"^http://elibrary\\.ru/",
- "minVersion":"1.0.0b4.r5",
- "maxVersion":"",
- "priority":100,
- "inRepository":"1",
- "translatorType":4,
- "lastUpdated":"2011-01-11 04:31:00"
+ "translatorID": "587709d3-80c5-467d-9fc8-ed41c31e20cf",
+ "label": "eLibrary.ru",
+ "creator": "Avram Lyon",
+ "target": "^http://elibrary\\.ru/",
+ "minVersion": "1.0.0b4.r5",
+ "maxVersion": "",
+ "priority": 100,
+ "inRepository": "1",
+ "translatorType": 4,
+ "lastUpdated": "2011-03-12 22:55:32"
}
/*
eLibrary.ru Translator
- Copyright (C) 2010 Avram Lyon, ajlyon@gmail.com
+ Copyright (C) 2010-2011 Avram Lyon, ajlyon@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -85,27 +85,32 @@ function scrape (doc) {
switch (label) {
case "Названиепубликации":
titleBlock = doc.evaluate('./table['+t+']', datablock, ns, XPathResult.ANY_TYPE, null).iterateNext();
+ Zotero.debug("have titleBlock");
break;
case "Авторы":
authorBlock = doc.evaluate('./table['+t+']', datablock, ns, XPathResult.ANY_TYPE, null).iterateNext();
+ Zotero.debug("have authorBlock");
break;
case "Журнал":
+ case "Издательство":
metaBlock = doc.evaluate('./table['+t+']', datablock, ns, XPathResult.ANY_TYPE, null).iterateNext();
+ Zotero.debug("have metaBlock");
break;
case "Коды":
codeBlock = doc.evaluate('./table['+t+']', datablock, ns, XPathResult.ANY_TYPE, null).iterateNext();
+ Zotero.debug("have codeBlock");
break;
case "Ключевыеслова":
keywordBlock = doc.evaluate('./table['+t+']', datablock, ns, XPathResult.ANY_TYPE, null).iterateNext();
+ Zotero.debug("have keywordBlock");
break;
case "Аннотация":
abstractBlock = doc.evaluate('./table['+t+']', datablock, ns, XPathResult.ANY_TYPE, null).iterateNext();
- break;
- case "Коды":
- codeBlock = doc.evaluate('./table['+t+']', datablock, ns, XPathResult.ANY_TYPE, null).iterateNext();
+ Zotero.debug("have abstractBlock");
break;
case "Списоклитературы":
referenceBlock = doc.evaluate('./table['+t+']', datablock, ns, XPathResult.ANY_TYPE, null).iterateNext();
+ Zotero.debug("have referenceBlock");
break;
case "Переводнаяверсия":
default:
@@ -113,23 +118,8 @@ function scrape (doc) {
break;
}
}
- var type = doc.evaluate('.//table[2]//tr[5]/td[4]', metaBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent;
-
- switch (type) {
- case "научная статья":
- type = "journalArticle";
- break;
- case "учебное пособие":
- case "монография":
- type = "book";
- break;
- default:
- Zotero.debug("Unknown type: "+type+". Using 'journalArticle'");
- type = "journalArticle";
- break;
- }
- var item = new Zotero.Item(type);
+ var item = new Zotero.Item();
/*var pdf = false;
// Now see if we have a free PDF to download
var pdfImage = doc.evaluate('//a/img[@src="/images/pdf_green.gif"]', doc, ns, XPathResult.ANY_TYPE, null).iterateNext();
@@ -151,9 +141,13 @@ function scrape (doc) {
item.title = doc.title.match(/eLIBRARY.RU - (.*)/)[1];
if (authorBlock) {
- var authorNode = doc.evaluate('.//td[2]/font/a', authorBlock, ns, XPathResult.ANY_TYPE, null);
+ // Sometimes we don't have links, just bold text
+ var authorNode = doc.evaluate('.//td[2]/font/a | .//td[2]/font/b', authorBlock, ns, XPathResult.ANY_TYPE, null);
while ((author = authorNode.iterateNext()) !== null) {
- if (!author.href.match(/org_about\.asp/)) { // Remove organizations
+ // Remove organizations; by URL or by node name
+ if ((author.href && !author.href.match(/org_about\.asp/)
+ && !author.href.match(/org_items\.asp/))
+ || author.nodeName == "B") {
author = author.textContent;
var authors = author.split(",");
for (var i = 0; i < authors.length; i++) {
@@ -175,17 +169,46 @@ function scrape (doc) {
} else { Zotero.debug("Skipping presumed affiliation: " + author.textContent) ; }
}
}
-
- item.publicationTitle = doc.evaluate('.//table[1]//tr[1]/td[2]', metaBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent;
- item.publisher = doc.evaluate('.//table[1]//tr[2]/td[2]', metaBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent;
- item.date = doc.evaluate('.//table[2]//tr[1]/td[2]', metaBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent;
- item.ISSN = doc.evaluate('.//table[2]//tr[1]/td[4]', metaBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent;
- item.volume = doc.evaluate('.//table[2]//tr[2]/td[2]', metaBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent;
- item.issue = doc.evaluate('.//table[2]//tr[3]/td[2]', metaBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent;
- item.pages = doc.evaluate('.//table[2]//tr[4]/td[2]', metaBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent;
- item.language = doc.evaluate('.//table[2]//tr[5]/td[2]', metaBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent;
+ // This is the table of metadata. We could walk through it, but I found it easier
+ // to just make a 2-d array of XPaths of field names values.
+ var mapped = false;
+ var metaPieces = [['.//table[1]//tr[1]/td[1]','.//table[1]//tr[1]/td[2]'],
+ ['.//table[1]//tr[2]/td[1]','.//table[1]//tr[2]/td[2]'],
+ ['.//table[2]//tr[1]/td[1]','.//table[2]//tr[1]/td[2]'],
+ ['.//table[2]//tr[1]/td[3]','.//table[2]//tr[1]/td[4]'],
+ ['.//table[2]//tr[2]/td[1]','.//table[2]//tr[2]/td[2]'],
+ ['.//table[2]//tr[2]/td[3]','.//table[2]//tr[2]/td[4]'],
+ ['.//table[2]//tr[3]/td[1]','.//table[2]//tr[3]/td[2]'],
+ ['.//table[2]//tr[3]/td[3]','.//table[2]//tr[3]/td[4]'],
+ ['.//table[2]//tr[4]/td[1]','.//table[2]//tr[4]/td[2]'],
+ ['.//table[2]//tr[4]/td[3]','.//table[2]//tr[4]/td[4]']]
+ for (i in metaPieces) {
+ mapped = mapper(metaPieces[i][0], metaPieces[i][1], metaBlock, doc);
+ item[mapped[0]] = mapped[1];
+ }
+ if (item.extra) item.extra = "Цитируемость в РИНЦ: " + item.extra;
if (abstractBlock)
item.abstractNote = doc.evaluate('./tbody/tr/td[2]/table/tbody/tr/td/font', abstractBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent;
+
+ // Set type
+ switch (item.itemType) {
+ case "обзорная статья": // Would be "review article"
+ case "научная статья":
+ item.itemType = "journalArticle";
+ break;
+ case "учебное пособие":
+ case "монография":
+ item.itemType = "book";
+ break;
+ case "публикация в сборнике трудов конференции":
+ item.itemType = "conferencePaper";
+ break;
+ default:
+ Zotero.debug("Unknown type: "+item.itemType+". Using 'journalArticle'");
+ item.itemType = "journalArticle";
+ break;
+ }
+
/*if (referenceBlock) {
var note = Zotero.Utilities.trimInternal(
doc.evaluate('./tbody/tr/td[2]/table', referenceBlock, ns, XPathResult.ANY_TYPE, null)
@@ -193,10 +216,14 @@ function scrape (doc) {
Zotero.debug(note);
item.notes.push(note);
}*/
+
if (codeBlock) {
- item.extra = doc.evaluate('.//td[2]', codeBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent;
- var doi = item.extra.match(/DOI: (10\..+?) /);
- if (doi) item.DOI = doi[1];
+ item.extra += ' '+ doc.evaluate('.//td[2]', codeBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent;
+ var doi = item.extra.match(/DOI: (10\.[^\s]+)/);
+ if (doi) {
+ item.DOI = doi[1];
+ item.extra = item.extra.replace(/DOI: 10\.[^\s]+/,"");
+ }
}
if (keywordBlock) {
@@ -205,7 +232,47 @@ function scrape (doc) {
item.tags.push(tag.textContent);
}
+ if (item.title.toUpperCase() == item.title) {
+ Zotero.debug("Trying to fix all-uppers");
+ item.title = item.title.substr(0,1) + item.title.toLowerCase().substr(1);
+ }
+
//if(pdf) item.attachments.push(pdf);
item.complete();
+}
+
+function mapper (from, to, block, doc) {
+ var name = doc.evaluate(from, block, null, XPathResult.ANY_TYPE, null).iterateNext();
+ var value = doc.evaluate(to, block, null, XPathResult.ANY_TYPE, null).iterateNext();
+ if (!name || !value) return false;
+ var key = false;
+ switch (name.textContent.trim()) {
+ case "Журнал":
+ key = "publicationTitle"; break;
+ case "Издательство":
+ key = "publisher"; break;
+ case "Год издания":
+ case "Год выпуска":
+ key = "date"; break;
+ case "Том":
+ key = "volume"; break;
+ case "Номер":
+ key = "issue"; break;
+ case "ISSN":
+ key = "ISSN"; break;
+ case "Страницы":
+ key = "pages"; break;
+ case "Язык":
+ key = "language"; break;
+ case "Место издания":
+ key = "place"; break;
+ case "Цит. в РИНЦ":
+ key = "extra"; break;
+ case "Тип":
+ key = "itemType"; break;
+ default:
+ Zotero.debug("Unmapped field: "+name.textContent.trim());
+ }
+ return [key, value.textContent.trim()];
}
\ No newline at end of file