www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | Submodules | README | LICENSE

commit e636bd9c2e8e713ebf594a4859be8bb0b6fae00b
parent c9c5fa79f506719d8be6bb2a9258ed244c7753a6
Author: Avram Lyon <ajlyon@gmail.com>
Date:   Thu,  7 Oct 2010 20:36:57 +0000

Trans: Typo fix to BnF, per Ziche; new version of CNKI by Ace Strong


Diffstat:
Mtranslators/Bibliothèque nationale de France.js | 2+-
Mtranslators/CNKI.js | 835+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
2 files changed, 687 insertions(+), 150 deletions(-)

diff --git a/translators/Bibliothèque nationale de France.js b/translators/Bibliothèque nationale de France.js @@ -129,7 +129,7 @@ var BnfClass = function() { //Default case "205": default: - return "contibutor"; + return "contributor"; } }; diff --git a/translators/CNKI.js b/translators/CNKI.js @@ -2,13 +2,13 @@ "translatorID":"5c95b67b-41c5-4f55-b71a-48d5d7183063", "label":"CNKI", "creator":"Ace Strong<acestrong@gmail.com> and Heromyth<zxpmyth@yahoo.com.cn>", - "target":"^https?://(?:(?:(dlib|epub|ckrd)(?:.edu)?.cnki.net)|(?:[0-9.]+))/(?:kns50|grid2008|grid20)", + "target":"^https?://(?:(?:(dlib|epub|acad|apj1|law1)\\.cnki\\.net)|(?:[0-9\\.]+))/(?:grid2008|kns50|Kns55|kcms)", "minVersion":"2.0.b4", "maxVersion":"", "priority":100, - "inRepository":true, + "inRepository":"1", "translatorType":4, - "lastUpdated":"2010-09-26 15:08:45" + "lastUpdated":"2010-10-07 15:58:33" } /* @@ -29,189 +29,726 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ +// ####################### +// ##### Sample URLs ##### +// ####################### -function detectWeb(doc, url) { - var articleRe = /detail.aspx/; - var s = articleRe.exec(url); +/* + * The starting point for an search is the URL below. + * In testing, I tried the following: + * + * - A search listing of journals + * - A search listing of phd thesis + * - A search listing of master thesis + * - A search listing of conference papers + * - A search listing of newspaper articles + * - A journal paper page + * - A phd thesis page + * - A master thesis page + * - A conference paper page + * - A newspaper article page + */ +// http://epub.cnki.net/grid2008/index/ZKCALD.htm - if(s) { - return "journalArticle"; - } else { - articleRe = /Brief.aspx/; - s = articleRe.exec(url); - if(s) - return "multiple"; +// ################################# +// #### Local utility functions #### +// ################################# + +function detectCode(url) { + var pattern = /(?:dbcode|dbname)=([A-Z]{4})/i; + if (pattern.test(url)) { + var code = pattern.exec(url)[1]; + return code; } +} - return false; +function getResolver(doc) { + var namespace, resolver; + namespace = doc.documentElement.namespaceURI; + if (namespace) { + resolver = function(prefix) { + if (prefix == 'x') { + return namespace; + } else { + return null; + } + }; + } else { + resolver = null; + } + return resolver; +} + +function trimTags(text) { + var pattern = /(<.*?>)/g; + text = text.replace(pattern, ""); + return text; +} + +function trimMultiline(text) { + var pattern = /(\s{2,})/g; + text = text.replace(pattern, "\n"); + return text; } -function scrape(doc, url) { - //var namespace = doc.documentElement.namespaceURI; - //var nsResolver = namespace ? function(prefix) { - // if (prefix == "x") return namespace; else return null; - //} : null; - var nsResolver = null; +// ############################# +// ##### Scraper functions ##### +// ############################# +// work for journalArticle +function scrapeAndParse1(url) { +// Zotero.debug("journalArticle"); + var page = Zotero.Utilities.retrieveSource(url); + var pattern; + + // 类型 & URL var itemType = "journalArticle"; - // TODO: 因为中英文信息都不想丢失,所以存为两个Item,也算是中国特色吧~ - // 但是目前只解析出中文的信息,下个版本中添加英文信息。 var newItem = new Zotero.Item(itemType); - //Zotero.debug(itemType); +// Zotero.debug(url); newItem.url = url; // 标题 - var titles = doc.title.split('-').slice(0,-1); - //Zotero.debug(titles); - var title = titles.join("-"); - Zotero.debug("Title:"+title); - newItem.title = title; - - // 附件,网页快照 - var snapName = title + " (CNKI)"; - //Zotero.debug(snapName); - //newItem.attachments.push({document:doc, title:snapName}); - newItem.attachments.push({url:newItem.url, snapshot:true, title:snapName, mimeType:"text/html"}); - //Zotero.debug(doc); - - // 其他信息,/html/body/table[4]/tbody/tr/td[2]/table/tbody/tr/td[2]/table[2]/tbody - var dataRows = doc.evaluate('//body/table[4]/tbody/tr/td[2]/table/tbody/tr/td[2]/table[2]/tbody/tr', doc, nsResolver, - XPathResult.ANY_TYPE, null); - var dataRow; - while(dataRow = dataRows.iterateNext()) { - var tds = dataRow.getElementsByTagName("td"); - var heading = Zotero.Utilities.trimInternal(tds[0].textContent); - var content = tds[1]; - if(heading == "【作者中文名】" || heading == "【作者】") { - //Zotero.debug("Authors:"); - var as = content.getElementsByTagName("a"); - //Zotero.debug(as.length); - var i=0; - for(i=0;i<as.length;i++) { - var a = as[i]; - newItem.creators.push(Zotero.Utilities.cleanAuthor(a.textContent, "author", true)); - //Zotero.debug(a.textContent); + pattern = /<span (?:id="chTitle"|class='datatitle')>(.*?)<\/span>/; + if (pattern.test(page)) { + var title = trimTags(pattern.exec(page)[1]); + newItem.title = title; +// Zotero.debug("title: "+title); + } + + // 作者 + var authorNames; + pattern = /【作者】(?:[\s\S]*?)GetLinkListEx\('(.*?);','/; + if (pattern.test(page)) { + authorNames = pattern.exec(page)[1].split(";"); + } else { + pattern = /【作者】([\s\S]*?)<\/tr>/; + if (pattern.test(page)) { + authorNames = trimTags(pattern.exec(page)[1]).split(";"); + } + } + if (authorNames) { + for (var i=0; i<authorNames.length; i++) { + var authorName = Zotero.Utilities.trim(authorNames[i]); + if (authorName.length > 0) { + newItem.creators.push( + Zotero.Utilities.cleanAuthor(authorNames[i], + "author", true)); + } + } +// Zotero.debug("authorNames:\n"+authorNames); + } + + // 摘要 + var abst; + pattern = /【摘要】\s*<[^>]*>(.*?)<\/span>/; + if (pattern.test(page)) { + abst = trimTags(pattern.exec(page)[1]); + } else { + pattern = /【摘要】([\s\S]*?)<\/tr>/; + if (pattern.test(page)) { + abst = trimTags(pattern.exec(page)[1]); + } + } + if (abst) { +// Zotero.debug("abstract:\n"+abst); + newItem.abstractNote = Zotero.Utilities.trim(abst); + } + pattern = /【Abstract】\s*<[^>]*>(.*?)<\/span>/; + if (pattern.test(page)) { + abst = trimTags(pattern.exec(page)[1]); + } else { + pattern = /【英文摘要】([\s\S]*?)<\/tr>/; + if (pattern.test(page)) { + abst = trimTags(pattern.exec(page)[1]); + } + } + if (abst) { +// Zotero.debug("abstract:\n"+abst); + if (newItem.abstractNote===undefined) { + newItem.abstractNote = Zotero.Utilities.trim(abst); + } else { + newItem.abstractNote = newItem.abstractNote + "\n" + + Zotero.Utilities.trim(abst); + } + } +// Zotero.debug(newItem.abstractNote); + + // 关键词 + var tags; + pattern = /【关键词】(?:[\s\S]*?)KeywordFilter\('(.*?)'\),'kw'/; + if (pattern.test(page)) { + tags = pattern.exec(page)[1].split(";"); + } else { + pattern = /【中文关键词】([\s\S]*?)<\/tr>/; + if (pattern.test(page)) { + tags = trimTags(pattern.exec(page)[1]).split(";"); + } + } + if (tags) { + for (var i=0; i<tags.length; i++) { + var tag = Zotero.Utilities.trim(tags[i]); + if (tag.length>0 && newItem.tags.indexOf(tag)<0) { + newItem.tags.push(tag); + } + } +// Zotero.debug("tags:\n"+tags); + } + pattern = /【Key words】(?:[\s\S]*?)GetLinkList\('(.*?)','kw'/; + if (pattern.test(page)) { + tags = pattern.exec(page)[1].split(";"); + } else { + pattern = /【英文关键词】([\s\S]*?)<\/tr>/; + if (pattern.test(page)) { + tags = trimTags(pattern.exec(page)[1]).split(";"); + } + } + if (tags) { + for (var i=0; i<tags.length; i++) { + var tag = Zotero.Utilities.trim(tags[i]); + if (tag.length>0 && newItem.tags.indexOf(tag)<0) { + newItem.tags.push(tag); } - } else if(heading == "【文献出处】" || heading == "【刊名】") { - //Zotero.debug("Publication:"); - var as = content.getElementsByTagName("a"); - //Zotero.debug(as[0].textContent); - //Zotero.debug(as[3].textContent); - // 出版社 - newItem.publicationTitle = as[0].textContent; - var parts = Zotero.Utilities.trimInternal(as[3].textContent); - // 出版时间 - var year = parts.substr(0,4); - //Zotero.debug(year); - newItem.date = year; - // 卷号或期号 - var pattern = /(.*)(期|卷)/ - var testStr = parts.split(" ")[1]; - //Zotero.debug(testStr); - if (pattern.test(testStr)){ - var attr = pattern.exec(testStr); - //Zotero.debug(attr[1]+":"+attr[2]); - if(attr[2]=="期"){ - newItem.issue = attr[1]; - }else{ - newItem.volume = attr[1]; - } + } +// Zotero.debug("tags:\n"+tags); + } + + // 文献出处 & DOI & 出版时间 + pattern = /【文献出处】([\s\S]*?)<\/a>/; + if (pattern.test(page)) { + var publicationTitle = trimTags(pattern.exec(page)[1]); + newItem.publicationTitle = Zotero.Utilities.trim(publicationTitle); +// Zotero.debug("publicationTitle: "+publicationTitle); + } + var doi; + pattern = /【DOI】(.*?)<\/li>/; + if (pattern.test(page)) { + doi= pattern.exec(page)[1]; + } else { + pattern = /【DOI】([\s\S]*?)<\/tr>/; + if (pattern.test(page)) { + doi= trimTags(pattern.exec(page)[1]); + } + } + if (doi) { + newItem.DOI = Zotero.Utilities.trim(doi); +// Zotero.debug("doi: "+doi); + } + pattern = /【文献出处】(?:[\s\S]*?)(\d{4})年\s*(\d{2})(卷|期)/; + if (pattern.test(page)) { + var date = pattern.exec(page)[1]; + newItem.date = date; + var val = pattern.exec(page)[2]; + var attr = pattern.exec(page)[3]; + if (attr == "卷") { + newItem.volume = val; + } else { + newItem.issue = val; + } +// Zotero.debug("date: "+date); +// Zotero.debug("val: "+val); +// Zotero.debug("attr: "+attr); + } + + newItem.complete(); +} + +// work for thesis +function scrapeAndParse2(url) { +// Zotero.debug("thesis"); + var page = Zotero.Utilities.retrieveSource(url); + var pattern; + + // 类型 & URL + var itemType = "thesis"; + var newItem = new Zotero.Item(itemType); +// Zotero.debug(url); + newItem.url = url; + var code = detectCode(url); + if (code == "CDFD") { + newItem.thesisType = "博士论文" + } else { + newItem.thesisType = "硕士论文" + } +// Zotero.debug(newItem.thesisType); + + + // 标题 + pattern = /<span (?:id="chTitle"|class='datatitle')>(.*?)<\/span>/; + if (pattern.test(page)) { + var title = pattern.exec(page)[1]; + pattern = /(<.*?>)/g; + title = title.replace(pattern, ""); + newItem.title = title; +// Zotero.debug("title: "+title); + } + + // 作者 + pattern = /【作者】([\s\S]*?)<\/a>/; + if (pattern.test(page)) { + var authorNames = trimTags(pattern.exec(page)[1]).split(";"); + for (var i=0; i<authorNames.length; i++) { + newItem.creators.push( + Zotero.Utilities.cleanAuthor(authorNames[i], + "author", true)); + } +// Zotero.debug("authorNames:\n"+authorNames); + } + + // 导师 + pattern = /【导师】([\s\S]*?)<\/a>/; + if (pattern.test(page)) { + var directors = trimTags(pattern.exec(page)[1]).split(";"); + for (var i=0; i<directors.length; i++) { + newItem.creators.push( + Zotero.Utilities.cleanAuthor(trimTags(directors[i]), + "director", true)); + } +// Zotero.debug("directors: "+directors); + } + + // 摘要 + var abst; + pattern = /ReplaceFont\('ChDivSummary','(.*?)(?='\);ReplaceFont)/; + if (pattern.test(page)) { + abst = trimTags(pattern.exec(page)[1]); + } else { + pattern = /【中文摘要】([\s\S]*?)<\/tr>/; + if (pattern.test(page)) { + abst = trimTags(pattern.exec(page)[1]); + } + } + if (abst) { +// Zotero.debug("abstract:\n"+abst); + newItem.abstractNote = trimMultiline(abst); + } + pattern = /ReplaceFont\('EnDivSummary','(.*?)(?='\);if)/; + if (pattern.test(page)) { + abst = trimTags(pattern.exec(page)[1]); + } else { + pattern = /【英文摘要】([\s\S]*?)<\/tr>/; + if (pattern.test(page)) { + abst = trimTags(pattern.exec(page)[1]); + } + } + if (abst) { +// Zotero.debug("abstract:\n"+abst); + if (newItem.abstractNote===undefined) { + newItem.abstractNote = Zotero.Utilities.trim(abst); + } else { + newItem.abstractNote = newItem.abstractNote + "\n" + + trimMultiline(abst); + } + } +// Zotero.debug(newItem.abstractNote); + + // 关键词 + var tags; + pattern = /【关键词】\s*<span[^>]*>(.*?)<\/a>*<\/span>/; + if (pattern.test(page)) { + tags = trimTags(pattern.exec(page)[1]).split(";"); + } else { + pattern = /【关键词】([\s\S]*?)<\/tr>/; + if (pattern.test(page)) { + tags = trimTags(pattern.exec(page)[1]).split(";"); + } + } + if (tags) { + for (var i=0; i<tags.length; i++) { + var tag = Zotero.Utilities.trim(tags[i]); + if (tag.length>0 && newItem.tags.indexOf(tag)<0) { + newItem.tags.push(tag); } - } else if(heading == "【摘要】" || heading == "【英文摘要】") { - //Zotero.debug("Abstract:"); - var abstract = null; - if (content.getElementsByTagName("font")[0] != null){ - abstract = content.getElementsByTagName("font")[0].textContent; + } +// Zotero.debug("tags:\n"+tags); + } + pattern = /【Key words】\s*<span[^>]*>(.*?)<\/a>*<\/span>/; + if (pattern.test(page)) { + tags = trimTags(pattern.exec(page)[1]).split(";"); + } else { + pattern = /【英文关键词】([\s\S]*?)<\/tr>/; + if (pattern.test(page)) { + tags = trimTags(pattern.exec(page)[1]).split(";"); + } + } + if (tags) { + for (var i=0; i<tags.length; i++) { + var tag = Zotero.Utilities.trim(tags[i]); + if (tag.length>0 && newItem.tags.indexOf(tag)<0) { + newItem.tags.push(tag); } - else{ - // 有些地方没有字体,直接在td标签下就是摘要。 - abstract = content.textContent; + } +// Zotero.debug("tags:\n"+tags); + } +// Zotero.debug(newItem.tags); + + // 出版学校 & DOI & 出版时间 + var publisher; + pattern = /【网络出版投稿人】\s*<a[^>]*>(.*?)<\/a>/; + if (pattern.test(page)) { + publisher = pattern.exec(page)[1]; + } else { + pattern = /【网络出版投稿人】([\s\S]*?)<\/tr>/; + if (pattern.test(page)) { + publisher = Zotero.Utilities.trim( + trimTags(pattern.exec(page)[1])); + } + } + if (publisher) { + pattern = /(.*?)((.*?))/; + if (pattern.test(publisher)) { + newItem.publisher = pattern.exec(publisher)[1]; + newItem.place = pattern.exec(publisher)[2]; + } else { + newItem.publisher = publisher; + } +// Zotero.debug("publisher: "+publisher); + } + var doi; + pattern = /【DOI】(.*?)<\/li>/; + if (pattern.test(page)) { + doi= pattern.exec(page)[1]; + } else { + pattern = /【DOI】([\s\S]*?)<\/tr>/; + if (pattern.test(page)) { + var doi= trimTags(pattern.exec(page)[1]); + } + } + if (doi) { + newItem.DOI = Zotero.Utilities.trim(doi); +// Zotero.debug("doi: "+doi); + } + var date; + pattern = /【网络出版投稿时间】(.*?)\s*<\/li>/; + if (pattern.test(page)) { + date = pattern.exec(page)[1]; + } else { + pattern = /【网络出版投稿时间】([\s\S]*?)\s*<\/tr>/; + if (pattern.test(page)) { + date = trimTags(pattern.exec(page)[1]); + } + } + if (date) { + newItem.date = Zotero.Utilities.trim(date); +// Zotero.debug("date: "+date); + } + + newItem.complete(); +} + +// work for conferencePaper +function scrapeAndParse3(url) { +// Zotero.debug("conferencePaper"); + var page = Zotero.Utilities.retrieveSource(url); + var pattern; + + // 类型 & URL + var itemType = "conferencePaper"; + var newItem = new Zotero.Item(itemType); +// Zotero.debug(url); + newItem.url = url; + + // 标题 + pattern = /<span id="chTitle">(.*?)<\/span>/; + if (pattern.test(page)) { + var title = trimTags(pattern.exec(page)[1]); + newItem.title = title; +// Zotero.debug("title: "+title); + } + + // 作者 + pattern = /【作者】(.*?)<\/p>/; + if (pattern.test(page)) { + var authorNames = trimTags(pattern.exec(page)[1]).split(";"); + for (var i=0; i<authorNames.length; i++) { + newItem.creators.push( + Zotero.Utilities.cleanAuthor( + Zotero.Utilities.trim(authorNames[i]), + "author", true)); + } +// Zotero.debug("authorNames:\n"+authorNames); + } + + // 摘要 + var abst; + pattern = /ReplaceFont\('ChDivSummary','(.*?)(?='\);ReplaceFont)/; + if (pattern.test(page)) { + abst = pattern.exec(page)[1]; +// Zotero.debug("raw:\n"+abst); + pattern = /(<.*?>)/g; + abst = abst.replace(pattern, ""); +// Zotero.debug("after:\n"+abst); + newItem.abstractNote = Zotero.Utilities.trim(abst); + } + + pattern = /ReplaceFont\('EnDivSummary','(.*?)(?='\);if)/; + if (pattern.test(page)) { + abst = pattern.exec(page)[1]; +// Zotero.debug("raw:\n"+abst); + if (abst != undefined && abst != null) { + pattern = /(<.*?>)/g; + abst = abst.replace(pattern, ""); +// Zotero.debug("after:\n"+abst); + + if (newItem.abstractNote===undefined) { + newItem.abstractNote = Zotero.Utilities.trim(abst); + } else { + newItem.abstractNote = newItem.abstractNote + "\n" + + Zotero.Utilities.trim(abst); } - //Zotero.debug(abstract); - //Zotero.debug(newItem.abstractNote); - if(newItem.abstractNote===undefined){ - newItem.abstractNote = Zotero.Utilities.trim(abstract); - }else{ - newItem.abstractNote = newItem.abstractNote + "\n" + Zotero.Utilities.trim(abstract); + } + } +// Zotero.debug("abst:\n"+newItem.abstractNote); + + // 关键词 + pattern = /【关键词】\s*<span[^>]*>(.*?)<\/a>*<\/span>/; + if (pattern.test(page)) { + var tags = trimTags(pattern.exec(page)[1]).split(";"); + for (var i=0; i<tags.length; i++) { + var tag = Zotero.Utilities.trim(tags[i]); + if (tag.length>0 && newItem.tags.indexOf(tag)<0) { + newItem.tags.push(tag); } - } else if(heading == "【DOI】") { - //Zotero.debug("DOI:"); - var doi = Zotero.Utilities.trimInternal(content.textContent); - //Zotero.debug(doi); - newItem.DOI = doi; - } else if(heading == "【关键词】"||heading == "【英文关键词】"||heading == "【中文关键词】") { - //Zotero.debug("tags:"); - var as = content.getElementsByTagName("a"); - var i=0; - for(i=0;i<as.length;i++) { - var a = as[i]; - //Zotero.debug(a.textContent); - newItem.tags.push(a.textContent); + } +// Zotero.debug("tags:\n"+tags); + } + pattern = /【Key words】\s*<span[^>]*>(.*?)<\/a>*<\/span>/; + if (pattern.test(page)) { + var tags = trimTags(pattern.exec(page)[1]).split(";"); + for (var i=0; i<tags.length; i++) { + var tag = Zotero.Utilities.trim(tags[i]); + if (tag.length>0 && newItem.tags.indexOf(tag)<0) { + newItem.tags.push(tag); } } +// Zotero.debug("tags:\n"+tags); + } +// Zotero.debug(newItem.tags); + + // 会议名称 & 会议录名称 & 会议地点 & 会议时间 + pattern = /【会议名称】(.*?)\s*<\/li>/; + if (pattern.test(page)) { + var conferenceName = trimTags(pattern.exec(page)[1]); + newItem.conferenceName = conferenceName; +// Zotero.debug("conferenceName: "+conferenceName); + } + pattern = /【会议录名称】(.*?)\s*<\/li>/; + if (pattern.test(page)) { + var proceedingsTitle = trimTags(pattern.exec(page)[1]); + newItem.proceedingsTitle = proceedingsTitle; +// Zotero.debug("proceedingsTitle: "+proceedingsTitle); + } + pattern = /【会议地点】(.*?)\s*<\/li>/; + if (pattern.test(page)) { + var place = trimTags(pattern.exec(page)[1]); + newItem.place = place; +// Zotero.debug("place: "+place); + } + pattern = /【会议时间】(.*?)\s*<\/li>/; + if (pattern.test(page)) { + var date = trimTags(pattern.exec(page)[1]); + newItem.date = date; +// Zotero.debug("date: "+date); + } + + newItem.complete(); +} + +// work for newspaperArticle +function scrapeAndParse4(url) { +// Zotero.debug("newspaperArticle"); + var page = Zotero.Utilities.retrieveSource(url); + var pattern; + + // 类型 & URL + var itemType = "newspaperArticle"; + var newItem = new Zotero.Item(itemType); +// Zotero.debug(url); + newItem.url = url; + + // 标题 + pattern = /<span id="chTitle">(.*?)<\/span>/; + if (pattern.test(page)) { + var title = trimTags(pattern.exec(page)[1]); + newItem.title = title; +// Zotero.debug("title: "+title); + } + + // 副标题/引题 + var shortTitle; + pattern = /<p>【(?:副标题|引题)】(.*?)(?=<\/p>)/; + if (pattern.test(page)) { + shortTitle = pattern.exec(page)[1]; +// Zotero.debug("shortTitle: "+shortTitle); + newItem.shortTitle = Zotero.Utilities.trimInternal(shortTitle); + } +// Zotero.debug(newItem.shortTitle); + + // 作者 + pattern = /【作\s*者】(.*?)<\/p>/; + if (pattern.test(page)) { + var authorNames = trimTags(pattern.exec(page)[1]).split(";"); + for (var i=0; i<authorNames.length; i++) { + newItem.creators.push( + Zotero.Utilities.cleanAuthor( + Zotero.Utilities.trim(authorNames[i]), + "author", true)); + } +// Zotero.debug("authorNames:\n"+authorNames); + } + + // 正文快照 + var abst; + pattern = /<p>【正文快照】(.*?)(?=<\/p>)/; + if (pattern.test(page)) { + abst = pattern.exec(page)[1]; +// Zotero.debug("abst:\n"+abst); + newItem.abstractNote = Zotero.Utilities.trimInternal(abst); + } +// Zotero.debug(newItem.abstractNote); + + // 报纸名称 & DOI & 出版时间 & 版名 & 版号 + pattern = /【报纸名称】\s*<[^>]*>(.*?)<\/a>/; + if (pattern.test(page)) { + var publicationTitle = trimTags(pattern.exec(page)[1]); + newItem.publicationTitle = publicationTitle; +// Zotero.debug("publicationTitle: "+publicationTitle); + } + pattern = /【DOI】\s*(.*?)\s*<\/li>/; + if (pattern.test(page)) { + var doi = pattern.exec(page)[1]; + newItem.DOI = doi; +// Zotero.debug("doi: "+doi); + } + pattern = /【报纸日期】\s*(.*?)\s*<\/li>/; + if (pattern.test(page)) { + var date = pattern.exec(page)[1]; + newItem.date = date; +// Zotero.debug("date: "+date); } - // download pdf file - // /html/body/table[4]/tbody/tr/td[2]/table/tbody/tr/td[2]/table/tbody/tr/td/table/tbody/tr/td/table/tbody/tr/td/table/tbody/tr/td[2]/a[2] - //var as = table3.getElementsByTagName("a"); - //Zotero.debug(as[0].textContent); - //Zotero.debug(as[1].textContent); - //var pdfurlElmt = as[1]; - //if (pdfurlElmt) { - // newItem.attachments.push({url:pdfurlElmt.href, title:"CNKI Full Text PDF", mimeType:"application/pdf", downloadable:true}); - //} - //Zotero.debug(pdfurlElmt.href); - //Zotero.debug("finished."); + pattern = /【版名】\s*(.*?)\s*<\/li>/; + if (pattern.test(page)) { + var section = pattern.exec(page)[1]; + newItem.section = section; +// Zotero.debug("section: "+section); + } + pattern = /【版号】\s*(.*?)\s*<\/li>/; + if (pattern.test(page)) { + var edition = pattern.exec(page)[1]; + newItem.edition = edition; +// Zotero.debug("edition: "+edition); + } + newItem.complete(); } +// ######################### +// ##### API functions ##### +// ######################### + +function detectWeb(doc, url) { + var pattern = /detail.aspx/; + + if (pattern.test(url)) { + var code = detectCode(url); +// Zotero.debug(code); + if (code == "CJFQ" || code == "CJFD") { + return "journalArticle"; + } else if (code == "CDFD") { + return "thesis"; + } else if (code == "CMFD" || code == "CLKM") { + return "thesis"; + } else if (code == "CPFD") { + return "conferencePaper"; + } else if (code == "CCND") { + return "newspaperArticle"; + } + } + + pattern = /brief/; + if (pattern.test(url)) { + return "multiple" + } + + return false; +} + function doWeb(doc, url) { - var namespace = doc.documentElement.namespaceURI; - var nsResolver = null; + var nsResolver = getResolver(doc); + var urls, tds; Zotero.debug(url); - if(detectWeb(doc, url) == "multiple") { - //Zotero.debug("Enter multiple~"); + if (detectWeb(doc, url) == "multiple") { +// Zotero.debug("Enter multiple."); // search page var items = new Array(); - var tableRows = doc.evaluate('//table[4]/tbody/tr/td[4]/table[3]/tbody/tr/td/table/tbody/tr[2]/td/table/tbody/tr', doc, nsResolver, XPathResult.ANY_TYPE, null); - //Zotero.debug("get table rows"); - var tableRow; - //Zotero.debug("begin to fetch multiple title and link"); - while(tableRow = tableRows.iterateNext()) { - //Zotero.debug(tableRow!=null); - var title = ""; - var link = ""; - var as = tableRow.getElementsByTagName("a"); - for each(var a in as) { - if(a.textContent) { - // shoulde only one 'a' here. - link = a.href; - title = a.textContent; - } + var xpath = '//iframe[@id="iframeResult"]'; + var iframe = doc.evaluate(xpath, doc, nsResolver, + XPathResult.ANY_TYPE, null).iterateNext(); + xpath = '//div[@class="GridTitleDiv"]'; + if (iframe) { + var subdoc = iframe.contentDocument; + tds = subdoc.evaluate(xpath, subdoc, nsResolver, + XPathResult.ANY_TYPE, null); + } else { + tds = doc.evaluate(xpath, doc, nsResolver, + XPathResult.ANY_TYPE, null); + } + + var td = tds.iterateNext(); + var link; + var title; + while (td) { + var a = td.getElementsByTagName("a")[0]; + title = Zotero.Utilities.cleanTags(a.textContent); + pattern = /;(.*)/; + if (pattern.test(title)) { + title = pattern.exec(title)[1]; } - //Zotero.debug(title); - //Zotero.debug(link); - if(link) { + link = a.getAttribute("href"); + if (link) { + pattern = /^(http:\/\/.*?)\//; + link = pattern.exec(url)[1] + link; items[link] = Zotero.Utilities.trimInternal(title); +// Zotero.debug("title:"+title); +// Zotero.debug("link:"+link); } + td = tds.iterateNext(); } - // 让用户选择要保存哪些文献 - items = Zotero.selectItems(items); - if(!items) return true; - //Zotero.debug("go on processing."); +// Zotero.debug(items); + if (items.__count__) { + // 让用户选择要保存哪些文献 + items = Zotero.selectItems(items); + if (!items) return true; - var urls = new Array(); - for(var url in items) { - urls.push(url); + urls = new Array(); + for (var url in items) { + urls.push(url); + } } } else { - var urls = [url]; + urls = [url]; + } + + if (urls) { +// Zotero.debug(urls); + + for (var i=0; i<urls.length; i++) { + var type = detectWeb(null, urls[i]); +// Zotero.debug(type); + if (type == "journalArticle") { + scrapeAndParse1(urls[i]); + } else if (type == "thesis") { + scrapeAndParse2(urls[i]); + } else if (type == "conferencePaper") { + scrapeAndParse3(urls[i]); + } else if (type == "newspaperArticle") { + scrapeAndParse4(urls[i]); + } else { + Zotero.debug("Not supported type."); + } + } } - //Zotero.debug(urls); - // 下面对每条url进行解析 - Zotero.Utilities.processDocuments(urls, scrape, function() { Zotero.done(); },null); - Zotero.wait(); }