commit 218cf288f32e0987e5d9e71be21b2f92b111e190
parent dd665ec41f9dff4283b829a93d3b6ea5c0ef026e
Author: Avram Lyon <ajlyon@gmail.com>
Date: Tue, 21 Dec 2010 21:28:41 +0000
Trans: Committing CNKI and Douban changes from Ace Strong
Diffstat:
| M | translators/CNKI.js | | | 81 | +++++++++++++++++++++++++++++++++++++++++++++++-------------------------------- |
| M | translators/Douban.js | | | 101 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------- |
2 files changed, 130 insertions(+), 52 deletions(-)
diff --git a/translators/CNKI.js b/translators/CNKI.js
@@ -2,13 +2,13 @@
"translatorID":"5c95b67b-41c5-4f55-b71a-48d5d7183063",
"label":"CNKI",
"creator":"Ace Strong <acestrong@gmail.com> and Heromyth <zxpmyth@yahoo.com.cn>",
- "target":"^https?://(?:(?:(dlib|epub|acad|apj1|law1)\\.cnki\\.net)|(?:[0-9\\.]+))/(?:grid2008|kns50|Kns55|kcms)",
+ "target":"^https?://(?:(?:(dlib|epub|acad|apj1|law1|www)\\.cnki\\.net)|(?:[0-9\\.]+))/(?:grid2008|kns50|Kns55|kcms)",
"minVersion":"2.0rc1",
"maxVersion":"",
"priority":100,
"inRepository":"1",
"translatorType":4,
- "lastUpdated":"2010-10-12 15:25:46"
+ "lastUpdated":"2010-12-10 14:32:46"
}
/*
@@ -56,11 +56,23 @@
// #################################
function detectCode(url) {
- var pattern = /(?:dbcode|dbname)=([A-Z]{4})/i;
+ var pattern = /(?:dbcode|dbname)=([A-Za-z]{4})/i;
if (pattern.test(url)) {
var code = pattern.exec(url)[1];
return code;
+ } else {
+ // parse from source page
+ var page = Zotero.Utilities.retrieveSource(url);
+ pattern = /id="nowdbname"[^>]*?>(.*?)<\/SPAN>/i;
+ if (pattern.test(page)) {
+ var dbname = pattern.exec(page)[1];
+// Zotero.debug(dbname);
+ if (dbname == "中国期刊全文数据库") {
+ return "CJFD";
+ }
+ }
}
+ return "NONE";
}
function getResolver(doc) {
@@ -108,7 +120,7 @@ function scrapeAndParse1(url) {
// Zotero.debug(url);
newItem.url = url;
- // 标题
+ // 标题/Title
pattern = /<span (?:id="chTitle"|class='datatitle')>(.*?)<\/span>/;
if (pattern.test(page)) {
var title = trimTags(pattern.exec(page)[1]);
@@ -116,7 +128,7 @@ function scrapeAndParse1(url) {
// Zotero.debug("title: "+title);
}
- // 作者
+ // 作者/Authors
var authorNames;
pattern = /【作者】(?:[\s\S]*?)GetLinkListEx\('(.*?);','/;
if (pattern.test(page)) {
@@ -139,7 +151,7 @@ function scrapeAndParse1(url) {
// Zotero.debug("authorNames:\n"+authorNames);
}
- // 摘要
+ // 摘要/Abstract
var abst;
pattern = /【摘要】\s*<[^>]*>(.*?)<\/span>/;
if (pattern.test(page)) {
@@ -154,6 +166,7 @@ function scrapeAndParse1(url) {
// Zotero.debug("abstract:\n"+abst);
newItem.abstractNote = Zotero.Utilities.trim(abst);
}
+
pattern = /【Abstract】\s*<[^>]*>(.*?)<\/span>/;
if (pattern.test(page)) {
abst = trimTags(pattern.exec(page)[1]);
@@ -174,13 +187,13 @@ function scrapeAndParse1(url) {
}
// Zotero.debug(newItem.abstractNote);
- // 关键词
+ // 关键词/Keywords
var tags;
pattern = /【关键词】(?:[\s\S]*?)KeywordFilter\('(.*?)'\),'kw'/;
if (pattern.test(page)) {
tags = pattern.exec(page)[1].split(";");
} else {
- pattern = /【中文关键词】([\s\S]*?)<\/tr>/;
+ pattern = /【(?:中文)?关键词】([\s\S]*?)<\/tr>/;
if (pattern.test(page)) {
tags = trimTags(pattern.exec(page)[1]).split(";");
}
@@ -214,7 +227,7 @@ function scrapeAndParse1(url) {
}
// 文献出处 & DOI & 出版时间
- pattern = /【文献出处】([\s\S]*?)<\/a>/;
+ pattern = /【(?:文献出处|刊名)】([\s\S]*?)<\/a>/;
if (pattern.test(page)) {
var publicationTitle = trimTags(pattern.exec(page)[1]);
newItem.publicationTitle = Zotero.Utilities.trim(publicationTitle);
@@ -234,7 +247,7 @@ function scrapeAndParse1(url) {
newItem.DOI = Zotero.Utilities.trim(doi);
// Zotero.debug("doi: "+doi);
}
- pattern = /【文献出处】(?:[\s\S]*?)(\d{4})年\s*(\d{2})(卷|期)/;
+ pattern = /【(?:文献出处|刊名)】(?:[\s\S]*?)(\d{4})年\s*([0-9A-Z]{2})(卷|期)/;
if (pattern.test(page)) {
var date = pattern.exec(page)[1];
newItem.date = date;
@@ -274,7 +287,7 @@ function scrapeAndParse2(url) {
// Zotero.debug(newItem.thesisType);
- // 标题
+ // 标题/Title
pattern = /<span (?:id="chTitle"|class='datatitle')>(.*?)<\/span>/;
if (pattern.test(page)) {
var title = pattern.exec(page)[1];
@@ -284,7 +297,7 @@ function scrapeAndParse2(url) {
// Zotero.debug("title: "+title);
}
- // 作者
+ // 作者/Author
pattern = /【作者】([\s\S]*?)<\/a>/;
if (pattern.test(page)) {
var authorNames = trimTags(pattern.exec(page)[1]).split(";");
@@ -296,7 +309,7 @@ function scrapeAndParse2(url) {
// Zotero.debug("authorNames:\n"+authorNames);
}
- // 导师
+ // 导师/Tutors
pattern = /【导师】([\s\S]*?)<\/a>/;
if (pattern.test(page)) {
var directors = trimTags(pattern.exec(page)[1]).split(";");
@@ -308,7 +321,7 @@ function scrapeAndParse2(url) {
// Zotero.debug("directors: "+directors);
}
- // 摘要
+ // 摘要/Abstract
var abst;
pattern = /ReplaceFont\('ChDivSummary','(.*?)(?='\);ReplaceFont)/;
if (pattern.test(page)) {
@@ -343,7 +356,7 @@ function scrapeAndParse2(url) {
}
// Zotero.debug(newItem.abstractNote);
- // 关键词
+ // 关键词/Keywords
var tags;
pattern = /【关键词】\s*<span[^>]*>(.*?)<\/a>*<\/span>/;
if (pattern.test(page)) {
@@ -384,26 +397,26 @@ function scrapeAndParse2(url) {
// Zotero.debug(newItem.tags);
// 出版学校 & DOI & 出版时间
- var publisher;
+ var university;
pattern = /【网络出版投稿人】\s*<a[^>]*>(.*?)<\/a>/;
if (pattern.test(page)) {
- publisher = pattern.exec(page)[1];
+ university = pattern.exec(page)[1];
} else {
pattern = /【网络出版投稿人】([\s\S]*?)<\/tr>/;
if (pattern.test(page)) {
- publisher = Zotero.Utilities.trim(
+ university = Zotero.Utilities.trim(
trimTags(pattern.exec(page)[1]));
}
}
- if (publisher) {
+ if (university) {
pattern = /(.*?)((.*?))/;
- if (pattern.test(publisher)) {
- newItem.publisher = pattern.exec(publisher)[1];
- newItem.place = pattern.exec(publisher)[2];
+ if (pattern.test(university)) {
+ newItem.university = pattern.exec(university)[1];
+ newItem.place = pattern.exec(university)[2];
} else {
- newItem.publisher = publisher;
+ newItem.publisher = university;
}
-// Zotero.debug("publisher: "+publisher);
+// Zotero.debug("university: "+university);
}
var doi;
pattern = /【DOI】(.*?)<\/li>/;
@@ -450,7 +463,7 @@ function scrapeAndParse3(url) {
// Zotero.debug(url);
newItem.url = url;
- // 标题
+ // 标题/Title
pattern = /<span id="chTitle">(.*?)<\/span>/;
if (pattern.test(page)) {
var title = trimTags(pattern.exec(page)[1]);
@@ -458,7 +471,7 @@ function scrapeAndParse3(url) {
// Zotero.debug("title: "+title);
}
- // 作者
+ // 作者/Authors
pattern = /【作者】(.*?)<\/p>/;
if (pattern.test(page)) {
var authorNames = trimTags(pattern.exec(page)[1]).split(";");
@@ -471,7 +484,7 @@ function scrapeAndParse3(url) {
// Zotero.debug("authorNames:\n"+authorNames);
}
- // 摘要
+ // 摘要/Abstract
var abst;
pattern = /ReplaceFont\('ChDivSummary','(.*?)(?='\);ReplaceFont)/;
if (pattern.test(page)) {
@@ -502,7 +515,7 @@ function scrapeAndParse3(url) {
}
// Zotero.debug("abst:\n"+newItem.abstractNote);
- // 关键词
+ // 关键词/Keywords
pattern = /【关键词】\s*<span[^>]*>(.*?)<\/a>*<\/span>/;
if (pattern.test(page)) {
var tags = trimTags(pattern.exec(page)[1]).split(";");
@@ -569,7 +582,7 @@ function scrapeAndParse4(url) {
// Zotero.debug(url);
newItem.url = url;
- // 标题
+ // 标题/Title
pattern = /<span id="chTitle">(.*?)<\/span>/;
if (pattern.test(page)) {
var title = trimTags(pattern.exec(page)[1]);
@@ -587,7 +600,7 @@ function scrapeAndParse4(url) {
}
// Zotero.debug(newItem.shortTitle);
- // 作者
+ // 作者/Authors
pattern = /【作\s*者】(.*?)<\/p>/;
if (pattern.test(page)) {
var authorNames = trimTags(pattern.exec(page)[1]).split(";");
@@ -600,7 +613,7 @@ function scrapeAndParse4(url) {
// Zotero.debug("authorNames:\n"+authorNames);
}
- // 正文快照
+ // 正文快照/Abstract
var abst;
pattern = /<p>【正文快照】(.*?)(?=<\/p>)/;
if (pattern.test(page)) {
@@ -654,7 +667,7 @@ function detectWeb(doc, url) {
var pattern = /detail.aspx/;
if (pattern.test(url)) {
- var code = detectCode(url);
+ var code = detectCode(url).toUpperCase();
// Zotero.debug(code);
if (code == "CJFQ" || code == "CJFD") {
return "journalArticle";
@@ -666,9 +679,11 @@ function detectWeb(doc, url) {
return "conferencePaper";
} else if (code == "CCND") {
return "newspaperArticle";
+ } else if (code == "NONE") {
+ Zotero.debug("Not support yet.");
}
}
-
+
pattern = /brief/;
if (pattern.test(url)) {
return "multiple"
diff --git a/translators/Douban.js b/translators/Douban.js
@@ -1,14 +1,14 @@
{
"translatorID":"fc353b26-8911-4c34-9196-f6f567c93901",
"label":"Douban",
- "creator":"Ace Strong <acestrong@gmail.com>",
- "target":"^https?://(www|book)\\.douban\\.com/subject",
+ "creator":"Ace Strong<acestrong@gmail.com>",
+ "target":"^https?://(?:www|book).douban.com/(?:subject|doulist|people/[a-zA-Z._]*/(?:do|wish|collect)|.*?status=(?:do|wish|collect)|group/[0-9]*?/collection|tag)",
"minVersion":"2.0rc1",
"maxVersion":"",
"priority":100,
"inRepository":"1",
"translatorType":4,
- "lastUpdated":"2010-10-10 00:23:10"
+ "lastUpdated":"2010-12-19 20:09:43"
}
/*
@@ -39,6 +39,10 @@
*
* - A search listing of books
* - A book page
+ * - A doulist page
+ * - A do page
+ * - A wish page
+ * - A collect page
*/
// http://book.douban.com/
@@ -70,10 +74,10 @@ function scrapeAndParse(url) {
newItem.url = url;
// 标题
- pattern = /<h1>(.*?)<\/h1>/;
+ pattern = /<h1>([\s\S]*?)<\/h1>/;
if (pattern.test(page)) {
var title = pattern.exec(page)[1];
- newItem.title = title;
+ newItem.title = Zotero.Utilities.trim(trimTags(title));
// Zotero.debug("title: "+title);
}
@@ -169,7 +173,7 @@ function scrapeAndParse(url) {
}
// 简介
- pattern = /<h2[^>]*?>简介[\s\S]*?<\/h2>([\s\S]*?)<\/div>/;
+ pattern = /<h2[^>]*?>(?:内容)?简介[\s\S]*?<\/h2>([\s\S]*?)<\/div>/;
if (pattern.test(page)) {
var intro = pattern.exec(page)[1];
intro = trimTags(intro.replace(/(<br\/>)/g, "\n"));
@@ -218,6 +222,23 @@ function scrapeAndParse(url) {
}
// Zotero.debug("abstractNote: "+newItem.abstractNote);
}
+
+ // 标签
+ pattern = /<h2\s*?>豆瓣成员常用的标签([\s\S]*?)<\/div>/;
+ if (pattern.test(page)) {
+ var labels = pattern.exec(page)[1];
+ pattern = /<a [^>]*?>(.*?)<\/a>/g;
+
+ var result = labels.match(pattern);
+ for (var i=0; i<result.length; i++) {
+ var label = trimTags(result[i]);
+
+ if (label) {
+ newItem.tags.push(label);
+ }
+// Zotero.debug(label);
+ }
+ }
newItem.complete();
}
@@ -227,7 +248,7 @@ function scrapeAndParse(url) {
// #########################
function detectWeb(doc, url) {
- var pattern = /subject_search/;
+ var pattern = /subject_search|doulist|people\/[a-zA-Z._]*?\/(?:do|wish|collect)|.*?status=(?:do|wish|collect)|group\/[0-9]*?\/collection|tag/;
if (pattern.test(url)) {
return "multiple";
@@ -244,22 +265,64 @@ function doWeb(doc, url) {
if(detectWeb(doc, url) == "multiple") {
// Zotero.debug("Enter multiple.");
- // search page
+ // selected results
var items = new Array();
- pattern = /<a class="nbg"\s*([^>]*?)>/g;
- if (pattern.test(page)) {
- var result = page.match(pattern);
-// Zotero.debug(result.length);
-// Zotero.debug(result[1]);
-
- pattern = /href="(.*?)".*?title="(.*?)"/;
- for (var i=0; i<result.length; i++) {
- var res = pattern.exec(result[i]);
- if(res[1]) {
- items[res[1]] = res[2];
+ pattern = /doulist/;
+ if (pattern.test(url)) {
+ // fetch items from doulist
+ pattern = /<table ([\s\S]*?)<\/table>/g;
+ if (pattern.test(page)) {
+ var result = page.match(pattern);
+// Zotero.debug(result.length);
+// Zotero.debug(result[1]);
+
+ pattern = /<div (?:[\s\S]*?)<a href="(.*?)">(.*?)<\/a>\s*?<\/div>/;
+ for (var i=0; i<result.length; i++) {
+ var res = pattern.exec(result[i]);
+ if(res[1]) {
+ items[res[1]] = res[2];
+ }
}
}
+ } else {
+ pattern = /(?:do|wish|collect)$/;
+
+ if (pattern.test(url)) {
+ // fetch items from do/wish/collect list
+ pattern = /<a href="(?:.*?)">\s*<em>(?:.*?)<\/em>\s*<\/a>/g;
+ if (pattern.test(page)) {
+ var result = page.match(pattern);
+// Zotero.debug(result.length);
+// Zotero.debug(result[0]);
+
+ pattern = /<a href="(.*?)">\s*<em>(.*?)<\/em>\s*<\/a>/;
+ for (var i=0; i<result.length; i++) {
+ var res = pattern.exec(result[i]);
+ if(res[1]) {
+ items[res[1]] = res[2];
+ }
+ }
+ }
+ } else {
+ // fetch items from search result or collection or tag
+ pattern = /<a class="nbg"\s*([^>]*?)>/g;
+ if (pattern.test(page)) {
+ var result = page.match(pattern);
+// Zotero.debug(result.length);
+// Zotero.debug(result[1]);
+
+ pattern = /href="(.*?)".*?title="(.*?)"/;
+ for (var i=0; i<result.length; i++) {
+ var res = pattern.exec(result[i]);
+ if(res[1]) {
+ items[res[1]] = res[2];
+ }
+ }
+ }
+ }
+
+
}
// 让用户选择要保存哪些文献