commit ef28a3a705eaa12b06e019fa82b1309e12f0be1b
parent e681b4698f1472db27e61eb08bc3c4e571fc6bbb
Author: Avram Lyon <ajlyon@gmail.com>
Date: Mon, 20 Dec 2010 08:21:27 +0000
Trans: New version of NZZ.ch translator by Ibex
Diffstat:
| M | translators/NZZ.ch.js | | | 100 | ++++++++++++++++++++++++++++++++++++++++++------------------------------------- |
1 file changed, 53 insertions(+), 47 deletions(-)
diff --git a/translators/NZZ.ch.js b/translators/NZZ.ch.js
@@ -1,14 +1,14 @@
{
- "translatorID":"61ffe600-55e0-11df-bed9-0002a5d5c51b",
- "translatorType":4,
- "label":"nzz.ch",
- "creator":"ibex",
- "target":"^http://((www\\.)?nzz\\.ch/.)",
- "minVersion":"2.0",
- "maxVersion":"",
- "priority":100,
- "inRepository":false,
- "lastUpdated":"2010-09-08 12:00:00"
+ "translatorID":"61ffe600-55e0-11df-bed9-0002a5d5c51b",
+ "label":"nzz.ch",
+ "creator":"ibex",
+ "target":"^http://((www\\.)?nzz\\.ch/.)",
+ "minVersion":"2.0",
+ "maxVersion":"",
+ "priority":100,
+ "inRepository":"0",
+ "translatorType":4,
+ "lastUpdated":"2010-12-20 11:17:03"
}
/*
@@ -41,20 +41,20 @@ function getXPath(xpath, doc) {
/* Zotero API */
function detectWeb(doc, url) {
- //Zotero.debug("ibex detectWeb URL= "+ url);
- if (doc.title.substr(0, 6) == "Suche " && getXPath('//div[@class = "searchdetails"]', doc)) {
+ //Zotero.debug("ibex detectWeb URL= " + url);
+ if (doc.title.substr(0, 6) == "Suche " && getXPath('//ul[@class = "berichte"]', doc)) {
return "multiple";
- } else if (doc.location.href.match(/\.\d+\.html/) && getXPath('//li[@id = "article"]/div[@class = "article"]', doc)) {
+ } else if (doc.location.href.match(/\.\d+\.html/) && getXPath('/html/body[@class = "artikel"]', doc)) {
return "newspaperArticle";
}
}
/* Zotero API */
function doWeb(doc, url) {
- //Zotero.debug("ibex doWeb URL= "+ url);
+ //Zotero.debug("ibex doWeb URL= " + url);
var urls = new Array();
if (detectWeb(doc, url) == "multiple") {
- var items = Zotero.Utilities.getItemArray(doc, doc.getElementById("searchresult").getElementsByTagName("h3"), '\\.\\d+\\.html');
+ var items = Zotero.Utilities.getItemArray(doc, doc.getElementById("content").getElementsByClassName('berichte'), '\\.\\d+\\.html');
if (!items || countObjectProperties(items) == 0) {
return true;
}
@@ -75,59 +75,65 @@ function doWeb(doc, url) {
/* Three types of articles: "Neue Zürcher Zeitung", "NZZ Online" and "NZZ am Sonntag" */
function scrape(doc) {
- //Zotero.debug("ibex scrape URL = "+ doc.location.href);
- var newArticle = new Zotero.Item('newspaperArticle');
- newArticle.url = doc.location.href;
- newArticle.title = Zotero.Utilities.trimInternal(getXPath('//li[@id = "article"]/div[@class = "article"]/div[@class = "header"]//h1', doc).textContent);
+ //Zotero.debug("ibex scrape URL = " + doc.location.href);
+ var newItem = new Zotero.Item('newspaperArticle');
+ newItem.url = doc.location.href;
+ newItem.title = Zotero.Utilities.trimInternal(getXPath('//div[@id = "content"]//h1', doc).textContent);
- var publ = Zotero.Utilities.trimInternal(getXPath('//li[@id = "article"]/div[@class = "article"]/div[@class = "header"]/div[@class = "pubication"]', doc).textContent);
+ var publ = Zotero.Utilities.trimInternal(getXPath('//div[@id = "content"]//p[@class = "dachzeile"]', doc).textContent);
publ = publ.split(',');
- newArticle.date = Zotero.Utilities.trimInternal(publ[0]);
- newArticle.publicationTitle = Zotero.Utilities.trimInternal(publ[publ.length - 1]);
- if (newArticle.publicationTitle.match(/^\d/)) {
+ newItem.date = Zotero.Utilities.trimInternal(publ[0]);
+
+ newItem.publicationTitle = Zotero.Utilities.trimInternal(publ[publ.length - 1]);
+ if (newItem.publicationTitle.match(/^\d/)) {
//set a publication title if there is only a number (date)
- newArticle.publicationTitle = "NZZ";
- } else if (newArticle.publicationTitle == "Neue Zürcher Zeitung") {
- newArticle.ISSN = "0376-6829";
- } else if (newArticle.publicationTitle == "NZZ am Sonntag") {
- newArticle.ISSN = "1660-0851";
+ newItem.publicationTitle = "NZZ";
+ } else if (newItem.publicationTitle == "Neue Zürcher Zeitung") {
+ newItem.ISSN = "0376-6829";
+ } else if (newItem.publicationTitle == "NZZ am Sonntag") {
+ newItem.ISSN = "1660-0851";
}
- var subtitle = getXPath('//li[@id = "article"]/div[@class = "article"]/div[@class = "header"]//h2', doc);
- if (subtitle != null && newArticle.publicationTitle != "NZZ am Sonntag") {
- newArticle.shortTitle = newArticle.title;
- newArticle.title += ": " + Zotero.Utilities.trimInternal(subtitle.textContent);
+ var subtitle = getXPath('//div[@id = "content"]//h2', doc);
+ if ((subtitle != null) && (Zotero.Utilities.trimInternal(subtitle.textContent) != "")) {
+ newItem.shortTitle = newItem.title;
+ newItem.title += ": " + Zotero.Utilities.trimInternal(subtitle.textContent);
}
- var teaser = getXPath('//li[@id = "article"]/div[@class = "article"]//div[@class = "body"]/h5', doc);
- if (teaser != null) {
- newArticle.abstractNote = Zotero.Utilities.trimInternal(teaser.textContent);
+ var teaser = getXPath('//div[@id = "content"]//h3', doc);
+ if ((teaser != null) && (Zotero.Utilities.trimInternal(teaser.textContent) != "")) {
+ newItem.abstractNote = Zotero.Utilities.trimInternal(teaser.textContent);
}
- var authorline = getXPath('//li[@id = "article"]/div[@class = "article"]//div[@class = "body"]/p[contains(@class, "quelle")]', doc);
- authorline = !authorline && newArticle.publicationTitle == "NZZ am Sonntag"? subtitle :authorline; // subtitle in some cases of "NZZ am Sonntag"
+ var authorline = getXPath('//div[@id = "content"]//p[@class = "autor"]', doc);
if (authorline != null) {
authorline = Zotero.Utilities.trimInternal(authorline.textContent);
- //assumption of authorline: "[Interview:|Von ]name1 [und Name2][, location]"
- authorline = authorline.replace(/^.*Von /, "");
- authorline = authorline.replace(/Interview: /, "");
+ //assumption of authorline: "[Interview:|Von ]name1[, name2] [und Name3][, location]"
+ authorline = authorline.replace(/^Von /, "");
+ authorline = authorline.replace(/^Interview: /, "");
+ authorline = authorline.replace(/vor Ort /i, "");
//remove ", location"
- authorline = Zotero.Utilities.trim(authorline.replace(/, .*$/, ""));
+ authorline = Zotero.Utilities.trim(authorline.replace(/, \S*$/, ""));
- var authors = authorline.split(" und ");
+ var authors = authorline.split(/,|und/);
for (var i = 0; i < authors.length && authorline.length > 0; i++) {
- newArticle.creators.push(Zotero.Utilities.cleanAuthor(authors[i], "author"));
+ newItem.creators.push(Zotero.Utilities.cleanAuthor(authors[i], "author"));
}
}
- var section = getXPath('//ul[@id="navContent"]/li/a[@id="navContentSelected"]', doc);
+ var section = getXPath('//ul[@id="navi"]//ul[@id="submenu1"]/li[@class="selected"]/a', doc);
if (section != null) {
- newArticle.section = Zotero.Utilities.trimInternal(section.textContent.replace(/·/,""));
+ newItem.section = Zotero.Utilities.trimInternal(section.textContent);
+ }
+
+ var source = getXPath('//div[@id = "content"]//span[@class="quelle"]', doc);
+ if (source != null) {
+ newItem.extra = Zotero.Utilities.trimInternal(source.textContent).replace(/^\(/,"").replace(/\)$/,"");
}
- newArticle.attachments.push({title:"NZZ Online Article Snapshot", mimeType:"text/html", url:doc.location.href + "?printview=true", snapshot:true});
+ newItem.attachments.push({title:"NZZ Online Article Snapshot", mimeType:"text/html", url:doc.location.href, snapshot:true});
- newArticle.complete();
+ newItem.complete();
}
/* There is no built-in function to count object properties which often are used as associative arrays.*/