commit 47488d752e5906e3182164e3b389dfbb631bf0e2
parent d8341eb2d12e36bdcd6b059ae6064c31daa5c112
Author: Avram Lyon <ajlyon@gmail.com>
Date: Tue, 12 Oct 2010 19:15:19 +0000
Translator update for CNKI, new translators for Douban Books and Wanfang Data; thanks to Ace Strong for submissions.
Diffstat:
3 files changed, 1154 insertions(+), 488 deletions(-)
diff --git a/translators/CNKI.js b/translators/CNKI.js
@@ -1,14 +1,14 @@
{
"translatorID":"5c95b67b-41c5-4f55-b71a-48d5d7183063",
"label":"CNKI",
- "creator":"Ace Strong<acestrong@gmail.com> and Heromyth<zxpmyth@yahoo.com.cn>",
+ "creator":"Ace Strong <acestrong@gmail.com> and Heromyth <zxpmyth@yahoo.com.cn>",
"target":"^https?://(?:(?:(dlib|epub|acad|apj1|law1)\\.cnki\\.net)|(?:[0-9\\.]+))/(?:grid2008|kns50|Kns55|kcms)",
- "minVersion":"2.0.b4",
+ "minVersion":"2.0rc1",
"maxVersion":"",
"priority":100,
"inRepository":"1",
"translatorType":4,
- "lastUpdated":"2010-10-07 15:58:33"
+ "lastUpdated":"2010-10-12 15:25:46"
}
/*
@@ -99,547 +99,551 @@ function trimMultiline(text) {
// work for journalArticle
function scrapeAndParse1(url) {
// Zotero.debug("journalArticle");
- var page = Zotero.Utilities.retrieveSource(url);
- var pattern;
-
- // 类型 & URL
- var itemType = "journalArticle";
- var newItem = new Zotero.Item(itemType);
-// Zotero.debug(url);
- newItem.url = url;
-
- // 标题
- pattern = /<span (?:id="chTitle"|class='datatitle')>(.*?)<\/span>/;
- if (pattern.test(page)) {
- var title = trimTags(pattern.exec(page)[1]);
- newItem.title = title;
-// Zotero.debug("title: "+title);
- }
+ Zotero.Utilities.doGet(url, function(page) {
+ var pattern;
- // 作者
- var authorNames;
- pattern = /【作者】(?:[\s\S]*?)GetLinkListEx\('(.*?);','/;
- if (pattern.test(page)) {
- authorNames = pattern.exec(page)[1].split(";");
- } else {
- pattern = /【作者】([\s\S]*?)<\/tr>/;
+ // 类型 & URL
+ var itemType = "journalArticle";
+ var newItem = new Zotero.Item(itemType);
+// Zotero.debug(url);
+ newItem.url = url;
+
+ // 标题
+ pattern = /<span (?:id="chTitle"|class='datatitle')>(.*?)<\/span>/;
+ if (pattern.test(page)) {
+ var title = trimTags(pattern.exec(page)[1]);
+ newItem.title = title;
+// Zotero.debug("title: "+title);
+ }
+
+ // 作者
+ var authorNames;
+ pattern = /【作者】(?:[\s\S]*?)GetLinkListEx\('(.*?);','/;
if (pattern.test(page)) {
- authorNames = trimTags(pattern.exec(page)[1]).split(";");
+ authorNames = pattern.exec(page)[1].split(";");
+ } else {
+ pattern = /【作者】([\s\S]*?)<\/tr>/;
+ if (pattern.test(page)) {
+ authorNames = trimTags(pattern.exec(page)[1]).split(";");
+ }
}
- }
- if (authorNames) {
- for (var i=0; i<authorNames.length; i++) {
- var authorName = Zotero.Utilities.trim(authorNames[i]);
- if (authorName.length > 0) {
- newItem.creators.push(
- Zotero.Utilities.cleanAuthor(authorNames[i],
- "author", true));
+ if (authorNames) {
+ for (var i=0; i<authorNames.length; i++) {
+ var authorName = Zotero.Utilities.trim(authorNames[i]);
+ if (authorName.length > 0) {
+ newItem.creators.push(
+ Zotero.Utilities.cleanAuthor(authorNames[i],
+ "author", true));
+ }
}
+// Zotero.debug("authorNames:\n"+authorNames);
}
-// Zotero.debug("authorNames:\n"+authorNames);
- }
-
- // 摘要
- var abst;
- pattern = /【摘要】\s*<[^>]*>(.*?)<\/span>/;
- if (pattern.test(page)) {
- abst = trimTags(pattern.exec(page)[1]);
- } else {
- pattern = /【摘要】([\s\S]*?)<\/tr>/;
+
+ // 摘要
+ var abst;
+ pattern = /【摘要】\s*<[^>]*>(.*?)<\/span>/;
if (pattern.test(page)) {
abst = trimTags(pattern.exec(page)[1]);
+ } else {
+ pattern = /【摘要】([\s\S]*?)<\/tr>/;
+ if (pattern.test(page)) {
+ abst = trimTags(pattern.exec(page)[1]);
+ }
}
- }
- if (abst) {
-// Zotero.debug("abstract:\n"+abst);
- newItem.abstractNote = Zotero.Utilities.trim(abst);
- }
- pattern = /【Abstract】\s*<[^>]*>(.*?)<\/span>/;
- if (pattern.test(page)) {
- abst = trimTags(pattern.exec(page)[1]);
- } else {
- pattern = /【英文摘要】([\s\S]*?)<\/tr>/;
+ if (abst) {
+// Zotero.debug("abstract:\n"+abst);
+ newItem.abstractNote = Zotero.Utilities.trim(abst);
+ }
+ pattern = /【Abstract】\s*<[^>]*>(.*?)<\/span>/;
if (pattern.test(page)) {
abst = trimTags(pattern.exec(page)[1]);
- }
- }
- if (abst) {
-// Zotero.debug("abstract:\n"+abst);
- if (newItem.abstractNote===undefined) {
- newItem.abstractNote = Zotero.Utilities.trim(abst);
} else {
- newItem.abstractNote = newItem.abstractNote + "\n"
- + Zotero.Utilities.trim(abst);
+ pattern = /【英文摘要】([\s\S]*?)<\/tr>/;
+ if (pattern.test(page)) {
+ abst = trimTags(pattern.exec(page)[1]);
+ }
}
- }
-// Zotero.debug(newItem.abstractNote);
-
- // 关键词
- var tags;
- pattern = /【关键词】(?:[\s\S]*?)KeywordFilter\('(.*?)'\),'kw'/;
- if (pattern.test(page)) {
- tags = pattern.exec(page)[1].split(";");
- } else {
- pattern = /【中文关键词】([\s\S]*?)<\/tr>/;
+ if (abst) {
+// Zotero.debug("abstract:\n"+abst);
+ if (newItem.abstractNote===undefined) {
+ newItem.abstractNote = Zotero.Utilities.trim(abst);
+ } else {
+ newItem.abstractNote = newItem.abstractNote + "\n"
+ + Zotero.Utilities.trim(abst);
+ }
+ }
+// Zotero.debug(newItem.abstractNote);
+
+ // 关键词
+ var tags;
+ pattern = /【关键词】(?:[\s\S]*?)KeywordFilter\('(.*?)'\),'kw'/;
if (pattern.test(page)) {
- tags = trimTags(pattern.exec(page)[1]).split(";");
+ tags = pattern.exec(page)[1].split(";");
+ } else {
+ pattern = /【中文关键词】([\s\S]*?)<\/tr>/;
+ if (pattern.test(page)) {
+ tags = trimTags(pattern.exec(page)[1]).split(";");
+ }
}
- }
- if (tags) {
- for (var i=0; i<tags.length; i++) {
- var tag = Zotero.Utilities.trim(tags[i]);
- if (tag.length>0 && newItem.tags.indexOf(tag)<0) {
- newItem.tags.push(tag);
+ if (tags) {
+ for (var i=0; i<tags.length; i++) {
+ var tag = Zotero.Utilities.trim(tags[i]);
+ if (tag.length>0 && newItem.tags.indexOf(tag)<0) {
+ newItem.tags.push(tag);
+ }
}
+// Zotero.debug("tags:\n"+tags);
}
-// Zotero.debug("tags:\n"+tags);
- }
- pattern = /【Key words】(?:[\s\S]*?)GetLinkList\('(.*?)','kw'/;
- if (pattern.test(page)) {
- tags = pattern.exec(page)[1].split(";");
- } else {
- pattern = /【英文关键词】([\s\S]*?)<\/tr>/;
+ pattern = /【Key words】(?:[\s\S]*?)GetLinkList\('(.*?)','kw'/;
if (pattern.test(page)) {
- tags = trimTags(pattern.exec(page)[1]).split(";");
+ tags = pattern.exec(page)[1].split(";");
+ } else {
+ pattern = /【英文关键词】([\s\S]*?)<\/tr>/;
+ if (pattern.test(page)) {
+ tags = trimTags(pattern.exec(page)[1]).split(";");
+ }
}
- }
- if (tags) {
- for (var i=0; i<tags.length; i++) {
- var tag = Zotero.Utilities.trim(tags[i]);
- if (tag.length>0 && newItem.tags.indexOf(tag)<0) {
- newItem.tags.push(tag);
+ if (tags) {
+ for (var i=0; i<tags.length; i++) {
+ var tag = Zotero.Utilities.trim(tags[i]);
+ if (tag.length>0 && newItem.tags.indexOf(tag)<0) {
+ newItem.tags.push(tag);
+ }
}
+// Zotero.debug("tags:\n"+tags);
}
-// Zotero.debug("tags:\n"+tags);
- }
-
- // 文献出处 & DOI & 出版时间
- pattern = /【文献出处】([\s\S]*?)<\/a>/;
- if (pattern.test(page)) {
- var publicationTitle = trimTags(pattern.exec(page)[1]);
- newItem.publicationTitle = Zotero.Utilities.trim(publicationTitle);
-// Zotero.debug("publicationTitle: "+publicationTitle);
- }
- var doi;
- pattern = /【DOI】(.*?)<\/li>/;
- if (pattern.test(page)) {
- doi= pattern.exec(page)[1];
- } else {
- pattern = /【DOI】([\s\S]*?)<\/tr>/;
+
+ // 文献出处 & DOI & 出版时间
+ pattern = /【文献出处】([\s\S]*?)<\/a>/;
if (pattern.test(page)) {
- doi= trimTags(pattern.exec(page)[1]);
+ var publicationTitle = trimTags(pattern.exec(page)[1]);
+ newItem.publicationTitle = Zotero.Utilities.trim(publicationTitle);
+// Zotero.debug("publicationTitle: "+publicationTitle);
}
- }
- if (doi) {
- newItem.DOI = Zotero.Utilities.trim(doi);
-// Zotero.debug("doi: "+doi);
- }
- pattern = /【文献出处】(?:[\s\S]*?)(\d{4})年\s*(\d{2})(卷|期)/;
- if (pattern.test(page)) {
- var date = pattern.exec(page)[1];
- newItem.date = date;
- var val = pattern.exec(page)[2];
- var attr = pattern.exec(page)[3];
- if (attr == "卷") {
- newItem.volume = val;
+ var doi;
+ pattern = /【DOI】(.*?)<\/li>/;
+ if (pattern.test(page)) {
+ doi= pattern.exec(page)[1];
} else {
- newItem.issue = val;
+ pattern = /【DOI】([\s\S]*?)<\/tr>/;
+ if (pattern.test(page)) {
+ doi= trimTags(pattern.exec(page)[1]);
+ }
}
-// Zotero.debug("date: "+date);
-// Zotero.debug("val: "+val);
-// Zotero.debug("attr: "+attr);
- }
-
- newItem.complete();
+ if (doi) {
+ newItem.DOI = Zotero.Utilities.trim(doi);
+// Zotero.debug("doi: "+doi);
+ }
+ pattern = /【文献出处】(?:[\s\S]*?)(\d{4})年\s*(\d{2})(卷|期)/;
+ if (pattern.test(page)) {
+ var date = pattern.exec(page)[1];
+ newItem.date = date;
+ var val = pattern.exec(page)[2];
+ var attr = pattern.exec(page)[3];
+ if (attr == "卷") {
+ newItem.volume = val;
+ } else {
+ newItem.issue = val;
+ }
+// Zotero.debug("date: "+date);
+// Zotero.debug("val: "+val);
+// Zotero.debug("attr: "+attr);
+ }
+
+ newItem.complete();
+ });
}
// work for thesis
function scrapeAndParse2(url) {
// Zotero.debug("thesis");
- var page = Zotero.Utilities.retrieveSource(url);
- var pattern;
-
- // 类型 & URL
- var itemType = "thesis";
- var newItem = new Zotero.Item(itemType);
-// Zotero.debug(url);
- newItem.url = url;
- var code = detectCode(url);
- if (code == "CDFD") {
- newItem.thesisType = "博士论文"
- } else {
- newItem.thesisType = "硕士论文"
- }
-// Zotero.debug(newItem.thesisType);
+ Zotero.Utilities.doGet(url, function(page) {
+ var pattern;
-
- // 标题
- pattern = /<span (?:id="chTitle"|class='datatitle')>(.*?)<\/span>/;
- if (pattern.test(page)) {
- var title = pattern.exec(page)[1];
- pattern = /(<.*?>)/g;
- title = title.replace(pattern, "");
- newItem.title = title;
-// Zotero.debug("title: "+title);
- }
+ // 类型 & URL
+ var itemType = "thesis";
+ var newItem = new Zotero.Item(itemType);
+// Zotero.debug(url);
+ newItem.url = url;
+ var code = detectCode(url);
+ if (code == "CDFD") {
+ newItem.thesisType = "博士论文"
+ } else {
+ newItem.thesisType = "硕士论文"
+ }
+// Zotero.debug(newItem.thesisType);
+
- // 作者
- pattern = /【作者】([\s\S]*?)<\/a>/;
- if (pattern.test(page)) {
- var authorNames = trimTags(pattern.exec(page)[1]).split(";");
- for (var i=0; i<authorNames.length; i++) {
- newItem.creators.push(
- Zotero.Utilities.cleanAuthor(authorNames[i],
- "author", true));
- }
-// Zotero.debug("authorNames:\n"+authorNames);
- }
+ // 标题
+ pattern = /<span (?:id="chTitle"|class='datatitle')>(.*?)<\/span>/;
+ if (pattern.test(page)) {
+ var title = pattern.exec(page)[1];
+ pattern = /(<.*?>)/g;
+ title = title.replace(pattern, "");
+ newItem.title = title;
+// Zotero.debug("title: "+title);
+ }
+
+ // 作者
+ pattern = /【作者】([\s\S]*?)<\/a>/;
+ if (pattern.test(page)) {
+ var authorNames = trimTags(pattern.exec(page)[1]).split(";");
+ for (var i=0; i<authorNames.length; i++) {
+ newItem.creators.push(
+ Zotero.Utilities.cleanAuthor(authorNames[i],
+ "author", true));
+ }
+// Zotero.debug("authorNames:\n"+authorNames);
+ }
+
+ // 导师
+ pattern = /【导师】([\s\S]*?)<\/a>/;
+ if (pattern.test(page)) {
+ var directors = trimTags(pattern.exec(page)[1]).split(";");
+ for (var i=0; i<directors.length; i++) {
+ newItem.creators.push(
+ Zotero.Utilities.cleanAuthor(trimTags(directors[i]),
+ "director", true));
+ }
+// Zotero.debug("directors: "+directors);
+ }
- // 导师
- pattern = /【导师】([\s\S]*?)<\/a>/;
- if (pattern.test(page)) {
- var directors = trimTags(pattern.exec(page)[1]).split(";");
- for (var i=0; i<directors.length; i++) {
- newItem.creators.push(
- Zotero.Utilities.cleanAuthor(trimTags(directors[i]),
- "director", true));
- }
-// Zotero.debug("directors: "+directors);
- }
-
- // 摘要
- var abst;
- pattern = /ReplaceFont\('ChDivSummary','(.*?)(?='\);ReplaceFont)/;
- if (pattern.test(page)) {
- abst = trimTags(pattern.exec(page)[1]);
- } else {
- pattern = /【中文摘要】([\s\S]*?)<\/tr>/;
+ // 摘要
+ var abst;
+ pattern = /ReplaceFont\('ChDivSummary','(.*?)(?='\);ReplaceFont)/;
if (pattern.test(page)) {
abst = trimTags(pattern.exec(page)[1]);
+ } else {
+ pattern = /【中文摘要】([\s\S]*?)<\/tr>/;
+ if (pattern.test(page)) {
+ abst = trimTags(pattern.exec(page)[1]);
+ }
}
- }
- if (abst) {
-// Zotero.debug("abstract:\n"+abst);
- newItem.abstractNote = trimMultiline(abst);
- }
- pattern = /ReplaceFont\('EnDivSummary','(.*?)(?='\);if)/;
- if (pattern.test(page)) {
- abst = trimTags(pattern.exec(page)[1]);
- } else {
- pattern = /【英文摘要】([\s\S]*?)<\/tr>/;
+ if (abst) {
+// Zotero.debug("abstract:\n"+abst);
+ newItem.abstractNote = trimMultiline(abst);
+ }
+ pattern = /ReplaceFont\('EnDivSummary','(.*?)(?='\);if)/;
if (pattern.test(page)) {
abst = trimTags(pattern.exec(page)[1]);
- }
- }
- if (abst) {
-// Zotero.debug("abstract:\n"+abst);
- if (newItem.abstractNote===undefined) {
- newItem.abstractNote = Zotero.Utilities.trim(abst);
} else {
- newItem.abstractNote = newItem.abstractNote + "\n"
- + trimMultiline(abst);
+ pattern = /【英文摘要】([\s\S]*?)<\/tr>/;
+ if (pattern.test(page)) {
+ abst = trimTags(pattern.exec(page)[1]);
+ }
}
- }
-// Zotero.debug(newItem.abstractNote);
-
- // 关键词
- var tags;
- pattern = /【关键词】\s*<span[^>]*>(.*?)<\/a>*<\/span>/;
- if (pattern.test(page)) {
- tags = trimTags(pattern.exec(page)[1]).split(";");
- } else {
- pattern = /【关键词】([\s\S]*?)<\/tr>/;
+ if (abst) {
+// Zotero.debug("abstract:\n"+abst);
+ if (newItem.abstractNote===undefined) {
+ newItem.abstractNote = Zotero.Utilities.trim(abst);
+ } else {
+ newItem.abstractNote = newItem.abstractNote + "\n"
+ + trimMultiline(abst);
+ }
+ }
+// Zotero.debug(newItem.abstractNote);
+
+ // 关键词
+ var tags;
+ pattern = /【关键词】\s*<span[^>]*>(.*?)<\/a>*<\/span>/;
if (pattern.test(page)) {
tags = trimTags(pattern.exec(page)[1]).split(";");
+ } else {
+ pattern = /【关键词】([\s\S]*?)<\/tr>/;
+ if (pattern.test(page)) {
+ tags = trimTags(pattern.exec(page)[1]).split(";");
+ }
}
- }
- if (tags) {
- for (var i=0; i<tags.length; i++) {
- var tag = Zotero.Utilities.trim(tags[i]);
- if (tag.length>0 && newItem.tags.indexOf(tag)<0) {
- newItem.tags.push(tag);
+ if (tags) {
+ for (var i=0; i<tags.length; i++) {
+ var tag = Zotero.Utilities.trim(tags[i]);
+ if (tag.length>0 && newItem.tags.indexOf(tag)<0) {
+ newItem.tags.push(tag);
+ }
}
+// Zotero.debug("tags:\n"+tags);
}
-// Zotero.debug("tags:\n"+tags);
- }
- pattern = /【Key words】\s*<span[^>]*>(.*?)<\/a>*<\/span>/;
- if (pattern.test(page)) {
- tags = trimTags(pattern.exec(page)[1]).split(";");
- } else {
- pattern = /【英文关键词】([\s\S]*?)<\/tr>/;
+ pattern = /【Key words】\s*<span[^>]*>(.*?)<\/a>*<\/span>/;
if (pattern.test(page)) {
tags = trimTags(pattern.exec(page)[1]).split(";");
+ } else {
+ pattern = /【英文关键词】([\s\S]*?)<\/tr>/;
+ if (pattern.test(page)) {
+ tags = trimTags(pattern.exec(page)[1]).split(";");
+ }
}
- }
- if (tags) {
- for (var i=0; i<tags.length; i++) {
- var tag = Zotero.Utilities.trim(tags[i]);
- if (tag.length>0 && newItem.tags.indexOf(tag)<0) {
- newItem.tags.push(tag);
+ if (tags) {
+ for (var i=0; i<tags.length; i++) {
+ var tag = Zotero.Utilities.trim(tags[i]);
+ if (tag.length>0 && newItem.tags.indexOf(tag)<0) {
+ newItem.tags.push(tag);
+ }
}
+// Zotero.debug("tags:\n"+tags);
}
-// Zotero.debug("tags:\n"+tags);
- }
-// Zotero.debug(newItem.tags);
-
- // 出版学校 & DOI & 出版时间
- var publisher;
- pattern = /【网络出版投稿人】\s*<a[^>]*>(.*?)<\/a>/;
- if (pattern.test(page)) {
- publisher = pattern.exec(page)[1];
- } else {
- pattern = /【网络出版投稿人】([\s\S]*?)<\/tr>/;
+// Zotero.debug(newItem.tags);
+
+ // 出版学校 & DOI & 出版时间
+ var publisher;
+ pattern = /【网络出版投稿人】\s*<a[^>]*>(.*?)<\/a>/;
if (pattern.test(page)) {
- publisher = Zotero.Utilities.trim(
- trimTags(pattern.exec(page)[1]));
- }
- }
- if (publisher) {
- pattern = /(.*?)((.*?))/;
- if (pattern.test(publisher)) {
- newItem.publisher = pattern.exec(publisher)[1];
- newItem.place = pattern.exec(publisher)[2];
+ publisher = pattern.exec(page)[1];
} else {
- newItem.publisher = publisher;
+ pattern = /【网络出版投稿人】([\s\S]*?)<\/tr>/;
+ if (pattern.test(page)) {
+ publisher = Zotero.Utilities.trim(
+ trimTags(pattern.exec(page)[1]));
+ }
}
-// Zotero.debug("publisher: "+publisher);
- }
- var doi;
- pattern = /【DOI】(.*?)<\/li>/;
- if (pattern.test(page)) {
- doi= pattern.exec(page)[1];
- } else {
- pattern = /【DOI】([\s\S]*?)<\/tr>/;
+ if (publisher) {
+ pattern = /(.*?)((.*?))/;
+ if (pattern.test(publisher)) {
+ newItem.publisher = pattern.exec(publisher)[1];
+ newItem.place = pattern.exec(publisher)[2];
+ } else {
+ newItem.publisher = publisher;
+ }
+// Zotero.debug("publisher: "+publisher);
+ }
+ var doi;
+ pattern = /【DOI】(.*?)<\/li>/;
if (pattern.test(page)) {
- var doi= trimTags(pattern.exec(page)[1]);
+ doi= pattern.exec(page)[1];
+ } else {
+ pattern = /【DOI】([\s\S]*?)<\/tr>/;
+ if (pattern.test(page)) {
+ var doi= trimTags(pattern.exec(page)[1]);
+ }
}
- }
- if (doi) {
- newItem.DOI = Zotero.Utilities.trim(doi);
-// Zotero.debug("doi: "+doi);
- }
- var date;
- pattern = /【网络出版投稿时间】(.*?)\s*<\/li>/;
- if (pattern.test(page)) {
- date = pattern.exec(page)[1];
- } else {
- pattern = /【网络出版投稿时间】([\s\S]*?)\s*<\/tr>/;
+ if (doi) {
+ newItem.DOI = Zotero.Utilities.trim(doi);
+// Zotero.debug("doi: "+doi);
+ }
+ var date;
+ pattern = /【网络出版投稿时间】(.*?)\s*<\/li>/;
if (pattern.test(page)) {
- date = trimTags(pattern.exec(page)[1]);
+ date = pattern.exec(page)[1];
+ } else {
+ pattern = /【网络出版投稿时间】([\s\S]*?)\s*<\/tr>/;
+ if (pattern.test(page)) {
+ date = trimTags(pattern.exec(page)[1]);
+ }
}
- }
- if (date) {
- newItem.date = Zotero.Utilities.trim(date);
-// Zotero.debug("date: "+date);
- }
-
- newItem.complete();
+ if (date) {
+ newItem.date = Zotero.Utilities.trim(date);
+// Zotero.debug("date: "+date);
+ }
+
+ newItem.complete();
+ });
}
// work for conferencePaper
function scrapeAndParse3(url) {
// Zotero.debug("conferencePaper");
- var page = Zotero.Utilities.retrieveSource(url);
- var pattern;
-
- // 类型 & URL
- var itemType = "conferencePaper";
- var newItem = new Zotero.Item(itemType);
-// Zotero.debug(url);
- newItem.url = url;
-
- // 标题
- pattern = /<span id="chTitle">(.*?)<\/span>/;
- if (pattern.test(page)) {
- var title = trimTags(pattern.exec(page)[1]);
- newItem.title = title;
-// Zotero.debug("title: "+title);
- }
+ Zotero.Utilities.doGet(url, function(page) {
+ var pattern;
- // 作者
- pattern = /【作者】(.*?)<\/p>/;
- if (pattern.test(page)) {
- var authorNames = trimTags(pattern.exec(page)[1]).split(";");
- for (var i=0; i<authorNames.length; i++) {
- newItem.creators.push(
- Zotero.Utilities.cleanAuthor(
- Zotero.Utilities.trim(authorNames[i]),
- "author", true));
+ // 类型 & URL
+ var itemType = "conferencePaper";
+ var newItem = new Zotero.Item(itemType);
+// Zotero.debug(url);
+ newItem.url = url;
+
+ // 标题
+ pattern = /<span id="chTitle">(.*?)<\/span>/;
+ if (pattern.test(page)) {
+ var title = trimTags(pattern.exec(page)[1]);
+ newItem.title = title;
+// Zotero.debug("title: "+title);
}
-// Zotero.debug("authorNames:\n"+authorNames);
- }
-
- // 摘要
- var abst;
- pattern = /ReplaceFont\('ChDivSummary','(.*?)(?='\);ReplaceFont)/;
- if (pattern.test(page)) {
- abst = pattern.exec(page)[1];
-// Zotero.debug("raw:\n"+abst);
- pattern = /(<.*?>)/g;
- abst = abst.replace(pattern, "");
-// Zotero.debug("after:\n"+abst);
- newItem.abstractNote = Zotero.Utilities.trim(abst);
- }
-
- pattern = /ReplaceFont\('EnDivSummary','(.*?)(?='\);if)/;
- if (pattern.test(page)) {
- abst = pattern.exec(page)[1];
-// Zotero.debug("raw:\n"+abst);
- if (abst != undefined && abst != null) {
+
+ // 作者
+ pattern = /【作者】(.*?)<\/p>/;
+ if (pattern.test(page)) {
+ var authorNames = trimTags(pattern.exec(page)[1]).split(";");
+ for (var i=0; i<authorNames.length; i++) {
+ newItem.creators.push(
+ Zotero.Utilities.cleanAuthor(
+ Zotero.Utilities.trim(authorNames[i]),
+ "author", true));
+ }
+// Zotero.debug("authorNames:\n"+authorNames);
+ }
+
+ // 摘要
+ var abst;
+ pattern = /ReplaceFont\('ChDivSummary','(.*?)(?='\);ReplaceFont)/;
+ if (pattern.test(page)) {
+ abst = pattern.exec(page)[1];
+// Zotero.debug("raw:\n"+abst);
pattern = /(<.*?>)/g;
abst = abst.replace(pattern, "");
// Zotero.debug("after:\n"+abst);
+ newItem.abstractNote = Zotero.Utilities.trim(abst);
+ }
- if (newItem.abstractNote===undefined) {
- newItem.abstractNote = Zotero.Utilities.trim(abst);
- } else {
- newItem.abstractNote = newItem.abstractNote + "\n"
- + Zotero.Utilities.trim(abst);
+ pattern = /ReplaceFont\('EnDivSummary','(.*?)(?='\);if)/;
+ if (pattern.test(page)) {
+ abst = pattern.exec(page)[1];
+// Zotero.debug("raw:\n"+abst);
+ if (abst != undefined && abst != null) {
+ pattern = /(<.*?>)/g;
+ abst = abst.replace(pattern, "");
+// Zotero.debug("after:\n"+abst);
+
+ if (newItem.abstractNote===undefined) {
+ newItem.abstractNote = Zotero.Utilities.trim(abst);
+ } else {
+ newItem.abstractNote = newItem.abstractNote + "\n"
+ + Zotero.Utilities.trim(abst);
+ }
}
}
- }
-// Zotero.debug("abst:\n"+newItem.abstractNote);
-
- // 关键词
- pattern = /【关键词】\s*<span[^>]*>(.*?)<\/a>*<\/span>/;
- if (pattern.test(page)) {
- var tags = trimTags(pattern.exec(page)[1]).split(";");
- for (var i=0; i<tags.length; i++) {
- var tag = Zotero.Utilities.trim(tags[i]);
- if (tag.length>0 && newItem.tags.indexOf(tag)<0) {
- newItem.tags.push(tag);
+// Zotero.debug("abst:\n"+newItem.abstractNote);
+
+ // 关键词
+ pattern = /【关键词】\s*<span[^>]*>(.*?)<\/a>*<\/span>/;
+ if (pattern.test(page)) {
+ var tags = trimTags(pattern.exec(page)[1]).split(";");
+ for (var i=0; i<tags.length; i++) {
+ var tag = Zotero.Utilities.trim(tags[i]);
+ if (tag.length>0 && newItem.tags.indexOf(tag)<0) {
+ newItem.tags.push(tag);
+ }
}
+// Zotero.debug("tags:\n"+tags);
}
-// Zotero.debug("tags:\n"+tags);
- }
- pattern = /【Key words】\s*<span[^>]*>(.*?)<\/a>*<\/span>/;
- if (pattern.test(page)) {
- var tags = trimTags(pattern.exec(page)[1]).split(";");
- for (var i=0; i<tags.length; i++) {
- var tag = Zotero.Utilities.trim(tags[i]);
- if (tag.length>0 && newItem.tags.indexOf(tag)<0) {
- newItem.tags.push(tag);
+ pattern = /【Key words】\s*<span[^>]*>(.*?)<\/a>*<\/span>/;
+ if (pattern.test(page)) {
+ var tags = trimTags(pattern.exec(page)[1]).split(";");
+ for (var i=0; i<tags.length; i++) {
+ var tag = Zotero.Utilities.trim(tags[i]);
+ if (tag.length>0 && newItem.tags.indexOf(tag)<0) {
+ newItem.tags.push(tag);
+ }
}
+// Zotero.debug("tags:\n"+tags);
+ }
+// Zotero.debug(newItem.tags);
+
+ // 会议名称 & 会议录名称 & 会议地点 & 会议时间
+ pattern = /【会议名称】(.*?)\s*<\/li>/;
+ if (pattern.test(page)) {
+ var conferenceName = trimTags(pattern.exec(page)[1]);
+ newItem.conferenceName = conferenceName;
+// Zotero.debug("conferenceName: "+conferenceName);
+ }
+ pattern = /【会议录名称】(.*?)\s*<\/li>/;
+ if (pattern.test(page)) {
+ var proceedingsTitle = trimTags(pattern.exec(page)[1]);
+ newItem.proceedingsTitle = proceedingsTitle;
+// Zotero.debug("proceedingsTitle: "+proceedingsTitle);
+ }
+ pattern = /【会议地点】(.*?)\s*<\/li>/;
+ if (pattern.test(page)) {
+ var place = trimTags(pattern.exec(page)[1]);
+ newItem.place = place;
+// Zotero.debug("place: "+place);
+ }
+ pattern = /【会议时间】(.*?)\s*<\/li>/;
+ if (pattern.test(page)) {
+ var date = trimTags(pattern.exec(page)[1]);
+ newItem.date = date;
+// Zotero.debug("date: "+date);
}
-// Zotero.debug("tags:\n"+tags);
- }
-// Zotero.debug(newItem.tags);
-
- // 会议名称 & 会议录名称 & 会议地点 & 会议时间
- pattern = /【会议名称】(.*?)\s*<\/li>/;
- if (pattern.test(page)) {
- var conferenceName = trimTags(pattern.exec(page)[1]);
- newItem.conferenceName = conferenceName;
-// Zotero.debug("conferenceName: "+conferenceName);
- }
- pattern = /【会议录名称】(.*?)\s*<\/li>/;
- if (pattern.test(page)) {
- var proceedingsTitle = trimTags(pattern.exec(page)[1]);
- newItem.proceedingsTitle = proceedingsTitle;
-// Zotero.debug("proceedingsTitle: "+proceedingsTitle);
- }
- pattern = /【会议地点】(.*?)\s*<\/li>/;
- if (pattern.test(page)) {
- var place = trimTags(pattern.exec(page)[1]);
- newItem.place = place;
-// Zotero.debug("place: "+place);
- }
- pattern = /【会议时间】(.*?)\s*<\/li>/;
- if (pattern.test(page)) {
- var date = trimTags(pattern.exec(page)[1]);
- newItem.date = date;
-// Zotero.debug("date: "+date);
- }
- newItem.complete();
+ newItem.complete();
+ });
}
// work for newspaperArticle
function scrapeAndParse4(url) {
// Zotero.debug("newspaperArticle");
- var page = Zotero.Utilities.retrieveSource(url);
- var pattern;
-
- // 类型 & URL
- var itemType = "newspaperArticle";
- var newItem = new Zotero.Item(itemType);
-// Zotero.debug(url);
- newItem.url = url;
-
- // 标题
- pattern = /<span id="chTitle">(.*?)<\/span>/;
- if (pattern.test(page)) {
- var title = trimTags(pattern.exec(page)[1]);
- newItem.title = title;
-// Zotero.debug("title: "+title);
- }
-
- // 副标题/引题
- var shortTitle;
- pattern = /<p>【(?:副标题|引题)】(.*?)(?=<\/p>)/;
- if (pattern.test(page)) {
- shortTitle = pattern.exec(page)[1];
-// Zotero.debug("shortTitle: "+shortTitle);
- newItem.shortTitle = Zotero.Utilities.trimInternal(shortTitle);
- }
-// Zotero.debug(newItem.shortTitle);
-
- // 作者
- pattern = /【作\s*者】(.*?)<\/p>/;
- if (pattern.test(page)) {
- var authorNames = trimTags(pattern.exec(page)[1]).split(";");
- for (var i=0; i<authorNames.length; i++) {
- newItem.creators.push(
- Zotero.Utilities.cleanAuthor(
- Zotero.Utilities.trim(authorNames[i]),
- "author", true));
+ Zotero.Utilities.doGet(url, function(page) {
+ var pattern;
+
+ // 类型 & URL
+ var itemType = "newspaperArticle";
+ var newItem = new Zotero.Item(itemType);
+// Zotero.debug(url);
+ newItem.url = url;
+
+ // 标题
+ pattern = /<span id="chTitle">(.*?)<\/span>/;
+ if (pattern.test(page)) {
+ var title = trimTags(pattern.exec(page)[1]);
+ newItem.title = title;
+// Zotero.debug("title: "+title);
}
-// Zotero.debug("authorNames:\n"+authorNames);
- }
- // 正文快照
- var abst;
- pattern = /<p>【正文快照】(.*?)(?=<\/p>)/;
- if (pattern.test(page)) {
- abst = pattern.exec(page)[1];
-// Zotero.debug("abst:\n"+abst);
- newItem.abstractNote = Zotero.Utilities.trimInternal(abst);
- }
-// Zotero.debug(newItem.abstractNote);
+ // 副标题/引题
+ var shortTitle;
+ pattern = /<p>【(?:副标题|引题)】(.*?)(?=<\/p>)/;
+ if (pattern.test(page)) {
+ shortTitle = pattern.exec(page)[1];
+// Zotero.debug("shortTitle: "+shortTitle);
+ newItem.shortTitle = Zotero.Utilities.trimInternal(shortTitle);
+ }
+// Zotero.debug(newItem.shortTitle);
- // 报纸名称 & DOI & 出版时间 & 版名 & 版号
- pattern = /【报纸名称】\s*<[^>]*>(.*?)<\/a>/;
- if (pattern.test(page)) {
- var publicationTitle = trimTags(pattern.exec(page)[1]);
- newItem.publicationTitle = publicationTitle;
-// Zotero.debug("publicationTitle: "+publicationTitle);
- }
- pattern = /【DOI】\s*(.*?)\s*<\/li>/;
- if (pattern.test(page)) {
- var doi = pattern.exec(page)[1];
- newItem.DOI = doi;
-// Zotero.debug("doi: "+doi);
- }
- pattern = /【报纸日期】\s*(.*?)\s*<\/li>/;
- if (pattern.test(page)) {
- var date = pattern.exec(page)[1];
- newItem.date = date;
-// Zotero.debug("date: "+date);
- }
- pattern = /【版名】\s*(.*?)\s*<\/li>/;
- if (pattern.test(page)) {
- var section = pattern.exec(page)[1];
- newItem.section = section;
-// Zotero.debug("section: "+section);
- }
- pattern = /【版号】\s*(.*?)\s*<\/li>/;
- if (pattern.test(page)) {
- var edition = pattern.exec(page)[1];
- newItem.edition = edition;
-// Zotero.debug("edition: "+edition);
- }
-
- newItem.complete();
+ // 作者
+ pattern = /【作\s*者】(.*?)<\/p>/;
+ if (pattern.test(page)) {
+ var authorNames = trimTags(pattern.exec(page)[1]).split(";");
+ for (var i=0; i<authorNames.length; i++) {
+ newItem.creators.push(
+ Zotero.Utilities.cleanAuthor(
+ Zotero.Utilities.trim(authorNames[i]),
+ "author", true));
+ }
+// Zotero.debug("authorNames:\n"+authorNames);
+ }
+
+ // 正文快照
+ var abst;
+ pattern = /<p>【正文快照】(.*?)(?=<\/p>)/;
+ if (pattern.test(page)) {
+ abst = pattern.exec(page)[1];
+// Zotero.debug("abst:\n"+abst);
+ newItem.abstractNote = Zotero.Utilities.trimInternal(abst);
+ }
+// Zotero.debug(newItem.abstractNote);
+
+ // 报纸名称 & DOI & 出版时间 & 版名 & 版号
+ pattern = /【报纸名称】\s*<[^>]*>(.*?)<\/a>/;
+ if (pattern.test(page)) {
+ var publicationTitle = trimTags(pattern.exec(page)[1]);
+ newItem.publicationTitle = publicationTitle;
+// Zotero.debug("publicationTitle: "+publicationTitle);
+ }
+ pattern = /【DOI】\s*(.*?)\s*<\/li>/;
+ if (pattern.test(page)) {
+ var doi = pattern.exec(page)[1];
+ newItem.DOI = doi;
+// Zotero.debug("doi: "+doi);
+ }
+ pattern = /【报纸日期】\s*(.*?)\s*<\/li>/;
+ if (pattern.test(page)) {
+ var date = pattern.exec(page)[1];
+ newItem.date = date;
+// Zotero.debug("date: "+date);
+ }
+ pattern = /【版名】\s*(.*?)\s*<\/li>/;
+ if (pattern.test(page)) {
+ var section = pattern.exec(page)[1];
+ newItem.section = section;
+// Zotero.debug("section: "+section);
+ }
+ pattern = /【版号】\s*(.*?)\s*<\/li>/;
+ if (pattern.test(page)) {
+ var edition = pattern.exec(page)[1];
+ newItem.edition = edition;
+// Zotero.debug("edition: "+edition);
+ }
+
+ newItem.complete();
+ });
}
// #########################
@@ -675,9 +679,9 @@ function detectWeb(doc, url) {
function doWeb(doc, url) {
var nsResolver = getResolver(doc);
- var urls, tds;
+ var urls, tds, pages;
- Zotero.debug(url);
+// Zotero.debug(url);
if (detectWeb(doc, url) == "multiple") {
// Zotero.debug("Enter multiple.");
@@ -687,35 +691,49 @@ function doWeb(doc, url) {
var xpath = '//iframe[@id="iframeResult"]';
var iframe = doc.evaluate(xpath, doc, nsResolver,
XPathResult.ANY_TYPE, null).iterateNext();
- xpath = '//div[@class="GridTitleDiv"]';
if (iframe) {
- var subdoc = iframe.contentDocument;
- tds = subdoc.evaluate(xpath, subdoc, nsResolver,
- XPathResult.ANY_TYPE, null);
+ // fetch iframe's element
+// Zotero.debug(iframe.src);
+ pages = Zotero.Utilities.retrieveSource(iframe.src);
} else {
- tds = doc.evaluate(xpath, doc, nsResolver,
- XPathResult.ANY_TYPE, null);
+ // already in iframe
+// Zotero.debug("url:"+url);
+ pages = Zotero.Utilities.retrieveSource(url);
+ }
+
+ pattern = /<tr class=["']GTContentTitle["']>[\s\S]*?<\/tr>([\s\S]*?)<table class=["']pageBar_bottom["']/;
+ var content;
+ try {
+ content = pattern.exec(pages)[1];
+// Zotero.debug(content);
+ pattern = /<\/table>[\s\S]*?<a href=["'](.*?)["'][^>]*?><script[\s\S]*?(?:Replace[^\(]*?\()'(.*?)'\)/g;
+ } catch (err) {
+ content = pages;
+ pattern = /<div class=["']GridTitleDiv["']>.*?<a href=["'](.*?)["'][^>]*?><script[\s\S]*?(?:Replace[^\(]*?\()'(.*?)'\)/g;
+ }
+ var res = pattern.exec(content);
+ if (!res) {
+ pattern = /<div class=["']GridTitleDiv["']>.*?<a href=["'](.*?)["'][^>]*?>(.*?)<\/a>/g;
+ res = pattern.exec(content);
+ if (!res) {
+ pattern = /<\/table>[\s\S]*?<a href=["'](.*?)["'][^>]*?>(.*?)<\/a>/g;
+ res = pattern.exec(content);
+ }
}
-
- var td = tds.iterateNext();
var link;
var title;
- while (td) {
- var a = td.getElementsByTagName("a")[0];
- title = Zotero.Utilities.cleanTags(a.textContent);
- pattern = /;(.*)/;
- if (pattern.test(title)) {
- title = pattern.exec(title)[1];
- }
- link = a.getAttribute("href");
- if (link) {
- pattern = /^(http:\/\/.*?)\//;
- link = pattern.exec(url)[1] + link;
- items[link] = Zotero.Utilities.trimInternal(title);
-// Zotero.debug("title:"+title);
-// Zotero.debug("link:"+link);
- }
- td = tds.iterateNext();
+ while (res) {
+
+ title = Zotero.Utilities.cleanTags(res[2]);
+ link = res[1];
+
+ patt = /^(http:\/\/.*?)\//;
+ link = patt.exec(url)[1] + link;
+ items[link] = trimTags(title);
+// Zotero.debug("title:"+title);
+// Zotero.debug("link:"+link);
+
+ res = pattern.exec(content);
}
// Zotero.debug(items);
if (items.__count__) {
diff --git a/translators/Douban.js b/translators/Douban.js
@@ -0,0 +1,284 @@
+{
+ "translatorID":"fc353b26-8911-4c34-9196-f6f567c93901",
+ "label":"Douban",
+ "creator":"Ace Strong <acestrong@gmail.com>",
+ "target":"^https?://(www|book)\\.douban\\.com/subject",
+ "minVersion":"2.0rc1",
+ "maxVersion":"",
+ "priority":100,
+ "inRepository":"1",
+ "translatorType":4,
+ "lastUpdated":"2010-10-10 00:23:10"
+}
+
+/*
+ Douban Translator
+ Copyright (C) 2009-2010 TAO Cheng, acestrong@gmail.com
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// #######################
+// ##### Sample URLs #####
+// #######################
+
+/*
+ * The starting point for an search is the URL below.
+ * In testing, I tried the following:
+ *
+ * - A search listing of books
+ * - A book page
+ */
+// http://book.douban.com/
+
+
+// #################################
+// #### Local utility functions ####
+// #################################
+
+function trimTags(text) {
+ return text.replace(/(<.*?>)/g, "");
+}
+
+function trimMultispace(text) {
+ return text.replace(/\n\s+/g, "\n");
+}
+
+// #############################
+// ##### Scraper functions #####
+// #############################
+
+function scrapeAndParse(url) {
+ var page = Zotero.Utilities.retrieveSource(url);
+ var pattern;
+
+ // 类型 & URL
+ var itemType = "book";
+ var newItem = new Zotero.Item(itemType);
+// Zotero.debug(itemType);
+ newItem.url = url;
+
+ // 标题
+ pattern = /<h1>(.*?)<\/h1>/;
+ if (pattern.test(page)) {
+ var title = pattern.exec(page)[1];
+ newItem.title = title;
+// Zotero.debug("title: "+title);
+ }
+
+ // 又名
+ pattern = /<span [^>]*?>又名:(.*?)<\/span>/;
+ if (pattern.test(page)) {
+ var shortTitle = pattern.exec(page)[1];
+ newItem.shortTitle = Zotero.Utilities.trim(shortTitle);
+// Zotero.debug("shortTitle: "+shortTitle);
+ }
+
+ // 作者
+ pattern = /<span><span [^>]*?>作者<\/span>:(.*?)<\/span>/;
+ if (pattern.test(page)) {
+ var authorNames = trimTags(pattern.exec(page)[1]);
+ pattern = /(\[.*?\]|\(.*?\)|(.*?))/g;
+ authorNames = authorNames.replace(pattern, "").split("/");
+// Zotero.debug(authorNames);
+ for (var i=0; i<authorNames.length; i++) {
+ var useComma = true;
+ pattern = /[A-Za-z]/;
+ if (pattern.test(authorNames[i])) {
+ // 外文名
+ pattern = /,/;
+ if (!pattern.test(authorNames[i])) {
+ useComma = false;
+ }
+ }
+ newItem.creators.push(Zotero.Utilities.cleanAuthor(
+ Zotero.Utilities.trim(authorNames[i]),
+ "author", useComma));
+ }
+ }
+
+ // 译者
+ pattern = /<span><span [^>]*?>译者<\/span>:(.*?)<\/span>/;
+ if (pattern.test(page)) {
+ var translatorNames = trimTags(pattern.exec(page)[1]);
+ pattern = /(\[.*?\])/g;
+ translatorNames = translatorNames.replace(pattern, "").split("/");
+// Zotero.debug(translatorNames);
+ for (var i=0; i<translatorNames.length; i++) {
+ var useComma = true;
+ pattern = /[A-Za-z]/;
+ if (pattern.test(translatorNames[i])) {
+ // 外文名
+ useComma = false;
+ }
+ newItem.creators.push(Zotero.Utilities.cleanAuthor(
+ Zotero.Utilities.trim(translatorNames[i]),
+ "translator", useComma));
+ }
+ }
+
+ // ISBN
+ pattern = /<span [^>]*?>ISBN:<\/span>(.*?)<br\/>/;
+ if (pattern.test(page)) {
+ var isbn = pattern.exec(page)[1];
+ newItem.ISBN = Zotero.Utilities.trim(isbn);
+// Zotero.debug("isbn: "+isbn);
+ }
+
+ // 页数
+ pattern = /<span [^>]*?>页数:<\/span>(.*?)<br\/>/;
+ if (pattern.test(page)) {
+ var numPages = pattern.exec(page)[1];
+ newItem.numPages = Zotero.Utilities.trim(numPages);
+// Zotero.debug("numPages: "+numPages);
+ }
+
+ // 出版社
+ pattern = /<span [^>]*?>出版社:<\/span>(.*?)<br\/>/;
+ if (pattern.test(page)) {
+ var publisher = pattern.exec(page)[1];
+ newItem.publisher = Zotero.Utilities.trim(publisher);
+// Zotero.debug("publisher: "+publisher);
+ }
+
+ // 丛书
+ pattern = /<span [^>]*?>丛书:<\/span>(.*?)<br\/>/;
+ if (pattern.test(page)) {
+ var series = trimTags(pattern.exec(page)[1]);
+ newItem.series = Zotero.Utilities.trim(series);
+// Zotero.debug("series: "+series);
+ }
+
+ // 出版年
+ pattern = /<span [^>]*?>出版年:<\/span>(.*?)<br\/>/;
+ if (pattern.test(page)) {
+ var date = pattern.exec(page)[1];
+ newItem.date = Zotero.Utilities.trim(date);
+// Zotero.debug("date: "+date);
+ }
+
+ // 简介
+ pattern = /<h2[^>]*?>简介[\s\S]*?<\/h2>([\s\S]*?)<\/div>/;
+ if (pattern.test(page)) {
+ var intro = pattern.exec(page)[1];
+ intro = trimTags(intro.replace(/(<br\/>)/g, "\n"));
+ pattern = /\(展开全部\)([\s\S]*)/;
+ if (pattern.test(intro)) {
+ intro = pattern.exec(intro)[1];
+ }
+ pattern = /\S/;
+ if (pattern.test(intro)) {
+ newItem.abstractNote = "图书简介:\n"
+ + trimMultispace(intro);
+ }
+// Zotero.debug("abstractNote: "+newItem.abstractNote);
+ }
+
+ // 作者简介
+ pattern = /<h2[^>]*?>作者简介[\s\S]*?<\/h2>([\s\S]*?)<\/div>/;
+ if (pattern.test(page)) {
+ var intro = pattern.exec(page)[1];
+ intro = trimTags(intro.replace(/(<br\/>)/g, "\n"));
+ pattern = /\(展开全部\)([\s\S]*)/;
+ if (pattern.test(intro)) {
+ intro = pattern.exec(intro)[1];
+ }
+
+ if (newItem.abstractNote === undefined) {
+ newItem.abstractNote = "作者简介:\n"
+ + trimMultispace(intro);
+ } else {
+ newItem.abstractNote += "\n作者简介:\n"
+ + trimMultispace(intro);
+ }
+// Zotero.debug("abstractNote: "+newItem.abstractNote);
+ }
+
+ // 丛书信息
+ pattern = /<h2>丛书信息<\/h2>([\s\S]*?)<\/div>/;
+ if (pattern.test(page)) {
+ var intro = pattern.exec(page)[1];
+ intro = Zotero.Utilities.trimInternal(trimTags(intro));
+
+ if (newItem.abstractNote === undefined) {
+ newItem.abstractNote = "丛书信息:\n" + intro;
+ } else {
+ newItem.abstractNote += "\n丛书信息:\n" + intro;
+ }
+// Zotero.debug("abstractNote: "+newItem.abstractNote);
+ }
+
+ newItem.complete();
+}
+
+// #########################
+// ##### API functions #####
+// #########################
+
+function detectWeb(doc, url) {
+ var pattern = /subject_search/;
+
+ if (pattern.test(url)) {
+ return "multiple";
+ } else {
+ return "book";
+ }
+
+ return false;
+}
+
+function doWeb(doc, url) {
+ var page = Zotero.Utilities.retrieveSource(url);
+ var pattern, urls;
+
+ if(detectWeb(doc, url) == "multiple") {
+// Zotero.debug("Enter multiple.");
+ // search page
+ var items = new Array();
+
+ pattern = /<a class="nbg"\s*([^>]*?)>/g;
+ if (pattern.test(page)) {
+ var result = page.match(pattern);
+// Zotero.debug(result.length);
+// Zotero.debug(result[1]);
+
+ pattern = /href="(.*?)".*?title="(.*?)"/;
+ for (var i=0; i<result.length; i++) {
+ var res = pattern.exec(result[i]);
+ if(res[1]) {
+ items[res[1]] = res[2];
+ }
+ }
+ }
+
+ // 让用户选择要保存哪些文献
+ items = Zotero.selectItems(items);
+ if (!items) return true;
+
+ urls = new Array();
+ for(var url in items) {
+ urls.push(url);
+ }
+ } else {
+ urls = [url];
+ }
+
+ if (urls) {
+// Zotero.debug(urls);
+
+ for (var i=0; i<urls.length; i++) {
+ scrapeAndParse(urls[i]);
+ }
+ }
+}
diff --git a/translators/Wanfang Data.js b/translators/Wanfang Data.js
@@ -0,0 +1,364 @@
+{
+ "translatorID":"eb876bd2-644c-458e-8d05-bf54b10176f3",
+ "label":"Wanfang Data",
+ "creator":"Ace Strong <acestrong@gmail.com>",
+ "target":"^https?://[ds]\\.(?:g\\.)?wanfangdata\\.com\\.cn",
+ "minVersion":"2.0rc1",
+ "maxVersion":"",
+ "priority":100,
+ "inRepository":"1",
+ "translatorType":4,
+ "lastUpdated":"2010-10-12 15:45:49"
+}
+
+/*
+ Wanfang Data Translator
+ Copyright (C) 2010 TAO Cheng, acestrong@gmail.com
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// #######################
+// ##### Sample URLs #####
+// #######################
+
+/*
+ * The starting point for an search is the URL below.
+ * In testing, I tried the following:
+ *
+ * - A search listing of journals
+ * - A search listing of thesis
+ * - A search listing of conference papers
+ * - A search listing of foreign literatures(for chinese)
+ * - A journal paper page
+ * - A thesis page
+ * - A conference paper page
+ * - A foreign literature page
+ */
+// http://g.wanfangdata.com.cn/Default.aspx
+
+
+// #################################
+// #### Local utility functions ####
+// #################################
+
+function detectCode(url) {
+ var pattern = /[ds]\.(?:g\.)?wanfangdata\.com\.cn\/([A-Za-z]*?)_/;
+ if (pattern.test(url)) {
+ var code = pattern.exec(url)[1];
+ return code;
+ }
+ return null;
+}
+
+function detectType(code) {
+ if (code == "Periodical") {
+ return "journalArticle";
+ } else if (code == "Thesis") {
+ return "thesis";
+ } else if (code == "Conference") {
+ return "conferencePaper";
+ } else if (code == "NSTLHY") {
+ return "conferencePaper";
+ } else if (code == "NSTLQK") {
+ return "journalArticle";
+ } else {
+ return false;
+ }
+}
+
+function getResolver(doc) {
+ var namespace, resolver;
+ namespace = doc.documentElement.namespaceURI;
+ if (namespace) {
+ resolver = function(prefix) {
+ if (prefix == 'x') {
+ return namespace;
+ } else {
+ return null;
+ }
+ };
+ } else {
+ resolver = null;
+ }
+ return resolver;
+}
+
+// #############################
+// ##### Scraper functions #####
+// #############################
+
+function scrape(url) {
+
+ Zotero.Utilities.HTTP.doGet(url, function(page) {
+ var pattern = /href=["'](.*?)["'] class="export"/;
+ var newurl = pattern.exec(page)[1];
+
+ Zotero.Utilities.HTTP.doGet(newurl, function(page) {
+ // scrape from xml data of export page
+ var pattern;
+
+ pattern = /var text='(.*?)';/;
+ if (pattern.test(page)) {
+ var xml = pattern.exec(page)[1].replace(/(\\r\\n)/g, "\n");
+// Zotero.debug(xml);
+
+ var newItem = new Zotero.Item();
+
+ // 类型
+ pattern = /<ResourceCategory>(.*?)<\/ResourceCategory>/;
+ var category = pattern.exec(xml)[1];
+ var type = detectType(category);
+
+// Zotero.debug(type);
+ newItem.itemType = type;
+ newItem.url = url;
+
+ // 标题
+ pattern = /<Titles>[\s\S]*?<Text>(.*?)<\/Text>[\s\S]*?(?:<Text>(.*?)<\/Text>[\s\S]*?)?<\/Titles>/;
+ var titles = pattern.exec(xml);
+
+ newItem.title = titles[1];
+ if (titles[2]) {
+ newItem.shortTitle = titles[2];
+ }
+
+ // 作者
+ pattern = /<Creator>\s*<Name>(.*?)<\/Name>/g;
+ var author = pattern.exec(xml)[1];
+ while (author) {
+// Zotero.debug(author);
+
+ var patt = /[a-zA-Z]/;
+ var useComma = true;
+ if (patt.test(author)) {
+ patt = /,/;
+ if (!patt.test(author)) {
+ useComma = false;
+ }
+ }
+ newItem.creators.push(
+ Zotero.Utilities.cleanAuthor(
+ author,
+ "author",
+ useComma));
+
+ var res = pattern.exec(xml);
+ if (res) {
+ author = res[1];
+ } else {
+ author = null;
+ }
+ }
+
+ // 引用页/页数
+ pattern = /<Page>([0-9,-]*?)[^0-9,-]*?<\/Page>/;
+ if (pattern.test(xml)) {
+ var pages = pattern.exec(xml)[1];
+// Zotero.debug(pages);
+ pattern = /-/;
+ if (pattern.test(pages)) {
+ newItem.pages = pages;
+ } else {
+ newItem.numPages = pages;
+ }
+ }
+
+ // 页数
+ pattern = /<PageCount>([0-9]*)<\/PageCount>/;
+ if (pattern.test(xml)) {
+ var pages = pattern.exec(xml)[1];
+// Zotero.debug(pages);
+ newItem.numPages = pages;
+ }
+
+ // 发表时间
+ pattern = /<PublishDate>(.*?)<\/PublishDate>/;
+ if (pattern.test(xml)) {
+ newItem.date = pattern.exec(xml)[1];
+ }
+
+ // 关键词
+ pattern = /<Keyword>(.*?)<\/Keyword>/g;
+ var res = pattern.exec(xml);
+ while (res) {
+ newItem.tags.push(res[1]);
+ res = pattern.exec(xml);
+ }
+
+ // 摘要
+ pattern = /<Abstract>\s*?<Text>([\s\S]*?)<\/Text>/;
+ if (pattern.test(xml)) {
+ newItem.abstractNote = pattern.exec(xml)[1];
+ }
+
+ // 硕士/博士
+ pattern = /<Degree>(.*?)<\/Degree>/;
+ if (pattern.test(xml)) {
+ newItem.thesisType = pattern.exec(xml)[1];
+ }
+
+ // 导师
+ pattern = /<Tutor>(.*?)<\/Tutor>/g;
+ var res = pattern.exec(xml);
+ while (res) {
+ var tutor = res[1];
+ newItem.creators.push(
+ Zotero.Utilities.cleanAuthor(
+ tutor,
+ "director",
+ true));
+ res = pattern.exec(xml);
+ }
+
+ // 毕业学校
+ pattern = /<School>(.*?)<\/School>/;
+ if (pattern.test(xml)) {
+ newItem.publisher = pattern.exec(xml)[1];
+ }
+
+ // 期刊名
+ pattern = /<Periodical>[\s\S]*?<Name>(.*?)<\/Name>\s*?<NameEn>(.*?)<\/NameEn>/;
+ if (pattern.test(xml)) {
+ var res = pattern.exec(xml);
+ newItem.publicationTitle = res[1];
+ newItem.journalAbbreviation = res[2];
+ }
+
+ // 卷
+ pattern = /<Volum>([0-9]*?)<\/Volum>/;
+ if (pattern.test(xml)) {
+ newItem.volume = pattern.exec(xml)[1];
+ }
+
+ // 期
+ pattern = /<Issue>([0-9]*?)<\/Issue>/;
+ if (pattern.test(xml)) {
+ newItem.issue = pattern.exec(xml)[1];
+ }
+
+ // 系列
+ pattern = /<Column>(.*?)<\/Column>/;
+ if (pattern.test(xml)) {
+ newItem.series = pattern.exec(xml)[1];
+ }
+
+ // 会议名称
+ pattern = /<Conference>[\s\S]*?<Name>(.*?)<\/Name>/;
+ if (pattern.test(xml)) {
+ newItem.conferenceName = pattern.exec(xml)[1];
+ }
+
+ // 会议地点
+ pattern = /<Conference>[\s\S]*?<Locus>(.*?)<\/Locus>/;
+ if (pattern.test(xml)) {
+ newItem.place = pattern.exec(xml)[1];
+ }
+
+ // 会议论文集
+ pattern = /<Source>(.*?)<\/Source>/;
+ if (pattern.test(xml)) {
+ newItem.proceedingsTitle = pattern.exec(xml)[1];
+ }
+
+ // ISSN
+ pattern = /<ISSN>(.*?)<\/ISSN>/;
+ if (pattern.test(xml)) {
+ newItem.ISSN = pattern.exec(xml)[1];
+ }
+
+ // 语言
+ pattern = /<Language>([a-zA-Z]*?)<\/Language>/;
+ if (pattern.test(xml)) {
+ newItem.language = Zotero.Utilities.trim(
+ pattern.exec(xml)[1]);
+ }
+
+ newItem.complete();
+ }
+ });
+ });
+}
+
+// #########################
+// ##### API functions #####
+// #########################
+
+function detectWeb(doc, url) {
+ var pattern = /paper\.aspx/i;
+ if (pattern.test(url)) {
+ return "multiple"
+ }
+
+ pattern = /[ds]\.(?:g\.)?wanfangdata\.com\.cn/;
+ if (pattern.test(url)) {
+ var code = detectCode(url);
+// Zotero.debug(code);
+ return detectType(code);
+ }
+
+ return false;
+}
+
+function doWeb(doc, url) {
+ var nsResolver = getResolver(doc);
+ var urls, lis;
+
+ Zotero.debug(url);
+
+ if (detectWeb(doc, url) == "multiple") {
+// Zotero.debug("Enter multiple.");
+ // search page
+ var items = new Array();
+
+ var xpath = '//li[@class="title_li"]';
+ lis = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
+
+ var li = lis.iterateNext();
+ var link;
+ var title;
+ while (li) {
+ var a = li.getElementsByTagName("a")[0];
+ title = Zotero.Utilities.cleanTags(a.textContent);
+ link = a.getAttribute("href");
+ if (link) {
+ items[link] = Zotero.Utilities.trimInternal(title);
+// Zotero.debug("title:"+title);
+// Zotero.debug("link:"+link);
+ }
+ li = lis.iterateNext();
+ }
+// Zotero.debug(items);
+ if (items.__count__) {
+ // 让用户选择要保存哪些文献
+ items = Zotero.selectItems(items);
+ if (!items) return true;
+
+ urls = new Array();
+ for (var url in items) {
+ urls.push(url);
+ }
+ }
+ } else {
+ urls = [url];
+ }
+
+ if (urls) {
+// Zotero.debug(urls);
+
+ for (var i=0; i<urls.length; i++) {
+ scrape(urls[i]);
+ }
+ }
+}