commit 7eb0781be46324c415ab4a31074a08180f727cd7
parent 23cae9a3edafa2efc9abb93c161f763ada3c9790
Author: Avram Lyon <ajlyon@gmail.com>
Date: Thu, 10 Mar 2011 18:13:46 +0000
Trans: Quick rewrite of Reuters
Diffstat:
1 file changed, 68 insertions(+), 25 deletions(-)
diff --git a/translators/Reuters.js b/translators/Reuters.js
@@ -1,18 +1,42 @@
{
- "translatorID":"83979786-44af-494a-9ddb-46654e0486ef",
- "translatorType":4,
- "label":"Reuters",
- "creator":"Michael Berkowitz",
- "target":"http://(www\\.)?reuters.com/",
- "minVersion":"1.0.0b4.r5",
- "maxVersion":"",
- "priority":100,
- "inRepository":true,
- "lastUpdated":"2008-07-07 14:50:00"
+ "translatorID": "83979786-44af-494a-9ddb-46654e0486ef",
+ "label": "Reuters",
+ "creator": "Avram Lyon, Michael Berkowitz",
+ "target": "^https?://(www\\.)?reuters\\.com/",
+ "minVersion": "2.0",
+ "maxVersion": "",
+ "priority": 100,
+ "inRepository": "1",
+ "translatorType": 4,
+ "lastUpdated": "2011-03-10 21:05:59"
}
+/*
+ Reuters Translator
+ Copyright (C) 2011 Avram Lyon, ajlyon@gmail.com
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+ Translator for Reuters. This is a minimal translator just to get Reuters back working after a redesign.
+ Future versions should implement multiple item saving and attend to the nits that this translator has
+ probably missed.
+*/
+
function detectWeb(doc, url) {
- if (url.match(/article/)) {
+ if (url.match(/^https?:\/\/(www\.)?reuters\.com\/article/)) {
return "newspaperArticle";
}
}
@@ -20,18 +44,38 @@ function detectWeb(doc, url) {
function doWeb(doc, url) {
var item = new Zotero.Item("newspaperArticle");
- item.title = Zotero.Utilities.trimInternal(doc.evaluate('//div[@class="article primaryContent"]/h1', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent);
- item.date = doc.evaluate('//div[@class="timestampHeader"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent.match(/^.*\d{4}/)[0];
- var byline = doc.evaluate('//div[@id="resizeableText"]/p[1]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent;
- if (byline.match(/^By/)) {
- var authors = byline.substr(3).split(',');
- for each (var aut in authors) {
- item.creators.push(Zotero.Utilities.cleanAuthor(aut, "author"));
- }
- item.abstractNote = doc.evaluate('//div[@id="resizeableText"]/p[2]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent.match(/\-\s+(.*)$/)[1];
- } else {
- item.abstractNote = byline.match(/\-\s+(.*)$/)[1];
+ item.title = doc.evaluate('//meta[@property="og:title"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().content;
+ item.date = doc.evaluate('//meta[@name="REVISION_DATE"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().content;
+ item.place = doc.evaluate('//div[@id="articleInfo"]//span[@class="location"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent;
+ var byline = doc.evaluate('//div[@id="articleInfo"]//p[@class="byline"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent;
+ var authors = byline.substr(3).split(',');
+ for each (var aut in authors) {
+ item.creators.push(authorFix(aut));
}
- item.url = url;
+ item.abstractNote = doc.evaluate('//span[@class="focusParagraph"]/p', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace(/^.*\(Reuters\)\s+-\s+/,"");
+ item.url = doc.evaluate('//link[@rel="canonical"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().href;
+ item.publicationTitle = "Reuters";
+ if (item.place == item.place.toUpperCase())
+ item.place = Zotero.Utilities.capitalizeTitle(item.place.toLowerCase(),true);
item.complete();
-}
-\ No newline at end of file
+}
+
+function authorFix(author) {
+ // Sometimes we have "By Author"
+ if(author.substr(0, 3).toLowerCase() == "by ") {
+ author = author.substr(3);
+ }
+ var cleaned = Zotero.Utilities.cleanAuthor(author, "author");
+ // If we have only one name, set the author to one-name mode
+ if (cleaned.firstName == "") {
+ cleaned["fieldMode"] = true;
+ } else {
+ // We can check for all lower-case and capitalize if necessary
+ // All-uppercase is handled by cleanAuthor
+ cleaned.firstName = (cleaned.firstName == cleaned.firstName.toLowerCase()) ?
+ Zotero.Utilities.capitalizeTitle(cleaned.firstName, true) : cleaned.firstName;
+ cleaned.lastName = (cleaned.lastName == cleaned.lastName.toLowerCase()) ?
+ Zotero.Utilities.capitalizeTitle(cleaned.lastName, true) : cleaned.lastName;
+ }
+ return cleaned;
+}