commit a0ecb645a8dbc6271c28e901fe9159a075e0ff6b
parent 8bc1c0ad7e29bcf5ed59fd2a8edfc6fe472b86e4
Author: Avram Lyon <ajlyon@gmail.com>
Date: Thu, 12 Aug 2010 15:30:48 +0000
Adding TV by the Numbers translator by odie5533
Diffstat:
1 file changed, 138 insertions(+), 0 deletions(-)
diff --git a/translators/TV by the Numbers.js b/translators/TV by the Numbers.js
@@ -0,0 +1,138 @@
+{
+ "translatorID":"180a62bf-efdd-4d38-8d85-8971af04dd85",
+ "label":"TV by the Numbers",
+ "creator":"odie5533",
+ "target":"^http://tvbythenumbers\\.com",
+ "minVersion":"1.0",
+ "maxVersion":"",
+ "priority":100,
+ "inRepository":"0",
+ "translatorType":4,
+ "lastUpdated":"2010-08-04 03:31:19"
+}
+
+/*
+ TV by the Numbers - translator for Zotero
+ Copyright (C) 2010 odie5533
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+ This translator supports saving a snapshot of a single post and saving
+ the citation of many posts at once without visiting each post. Thus, it does
+ not save a snapshot when multiple citations are to be saved.
+*/
+
+
+PUB_TITLE = "TV by the Numbers";
+XPATH_TITLE = "//title";
+XPATH_PAGES = null;
+XPATH_DATE = "substring-after(substring-before(string(//p[@class='posted_on']),' by '), 'on ')";
+RE_DATE = /(.*)/;
+XPATH_AUTHORS = "substring-after(string(//p[@class='posted_on']),' by ')";
+RE_AUTHORS = /(.*)/;
+
+function detectWeb(doc, url) {
+ /* site has lots of garbage, check we're on the right doc */
+ if (!xpath_string(doc, doc, XPATH_TITLE))
+ return;
+ var posts = doc.evaluate("count(//div[@class='post-alt blog'])", doc, null,
+ XPathResult.NUMBER_TYPE, null).numberValue;
+ if (posts == 1)
+ return "webpage";
+ else if (posts > 1)
+ return "multiple";
+}
+
+function xpath_string(doc, node, xpath) {
+ var res = doc.evaluate(xpath, node, null, XPathResult.STRING_TYPE, null);
+ if (!res || !res.stringValue)
+ return null;
+ return Zotero.Utilities.trim(res.stringValue);
+}
+
+function xpre(doc, node, xpath, reg) {
+ var xpmatch = xpath_string(doc, node, xpath);
+ return reg.exec(xpmatch)[1];
+}
+
+function scrape(doc, url) {
+ var items = new Array();
+ var posts = doc.evaluate("//div[@class='post-alt blog']", doc, null,
+ XPathResult.ANY_TYPE, null);
+
+ var post_count = 0;
+
+ while (post = posts.iterateNext()) {
+ var newItem = new Zotero.Item("webpage");
+ newItem.publicationTitle = PUB_TITLE;
+
+ var link = post.getElementsByTagName("a")[0];
+ newItem.url = link.href;
+
+ var title = Zotero.Utilities.unescapeHTML(
+ Zotero.Utilities.cleanTags(link.textContent));
+ title = title.replace(/(\s+)(?:‘|’)|(?:‘|’)(\s+)/g, "$1''$2").replace(/‘|’/g, "'");
+ newItem.title = title;
+
+ if (XPATH_DATE)
+ newItem.date = xpre(doc, post, XPATH_DATE, RE_DATE);
+ if (XPATH_PAGES)
+ newItem.pages = xpath_string(doc, post, XPATH_PAGES);
+
+ //authors
+ var author_text = xpre(doc, post, XPATH_AUTHORS, RE_AUTHORS);
+ var authors = [];
+ if (author_text) {
+ if (author_text.indexOf(" and ") != -1)
+ authors = author_text.split(" and ");
+ else if (author_text.indexOf(";") != -1)
+ authors = author_text.split(";");
+ else
+ authors.push(author_text);
+ }
+ for each(var a in authors)
+ if (a != 'null')
+ newItem.creators.push(
+ Zotero.Utilities.cleanAuthor(a, "author"));
+
+ // attach html
+ if (url == newItem.url)
+ newItem.attachments.push({title:PUB_TITLE+" Snapshot",
+ mimeType:"text/html", url:doc.location.href, snapshot:true});
+
+ newItem.toString = function() { return this.title; };
+ items[newItem.url] = newItem;
+ post_count++;
+ }
+
+ /* a stupidly complex way of calling selectItems, and then completing
+ the items which were selected */
+ if (post_count > 1) {
+ var sel_items = new Object();
+ for each(var i in items)
+ sel_items[i.url] = i.title;
+ sel_items = Zotero.selectItems(sel_items);
+
+ for (var i in sel_items)
+ items[i].complete();
+ } else if (post_count == 1)
+ for each(var i in items)
+ i.complete();
+}
+
+function doWeb(doc, url) {
+ scrape(doc, url);
+}