www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | Submodules | README | LICENSE

commit f672b05d89d69a67e7d00b5b7fe9fa4320ecc623
parent 9aa227db6b47d4be646ca00756aa07a2f3586ff5
Author: Avram Lyon <ajlyon@gmail.com>
Date:   Mon,  9 Aug 2010 00:08:23 +0000

- Adding Sopheak's TVNZ translator.
- Adding reference book support to CrossRef
	per http://forums.zotero.org/discussion/12942
- Changing authorship of NZ Herald to match usual standard.
- Adding support for patent issue dates in RIS import and export
	discussed somewhere; patch has been laying about for some time


Diffstat:
Mtranslators/CrossRef.js | 38+++++++++++++++++++++++++++++++++-----
Mtranslators/New Zealand Herald.js | 2+-
Mtranslators/RIS.js | 50+++++++++++++++++++++++++++++++++++++++++++++++---
Atranslators/TVNZ.js | 215+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 296 insertions(+), 9 deletions(-)

diff --git a/translators/CrossRef.js b/translators/CrossRef.js @@ -76,9 +76,38 @@ function processCrossRef(xmlOutput) { var metadataXML = xml.doi_record[0].crossref.book.book_series_metadata; item.publicationTitle = metadataXML.series_metadata.titles.title[0].toString(); - } + // Reference book entry + // Example: doi: 10.1002/14651858.CD002966.pub3 + // http://www.crossref.org/openurl/?url_ver=Z39.88-2004&req_dat=usr:pwd&rft_id=info:doi/10.1002/14651858.CD002966.pub3&format=unixref&redirect=false + } else if(xml.doi_record[0].crossref.book.@book_type.length() + && xml.doi_record[0].crossref.book.@book_type == 'reference' + && xml.doi_record[0].crossref.book.content_item.@component_type == 'reference_entry') { + var item = new Zotero.Item("bookSection"); + var refXML = xml.doi_record[0].crossref.book.content_item; + var metadataXML = xml.doi_record[0].crossref.book.book_metadata; + item.publicationTitle = metadataXML.titles.title[0].toString(); + + // Handle book authors + if (metadataXML.contributors.length()) { + for each (var creatorXML in metadataXML.contributors.children()) { + var creator = {creatorType:"bookAuthor"}; + if(creatorXML.@contributor_role == "editor") { + creator.creatorType = "editor"; + } else if(creatorXML.@contributor_role == "translator") { + creator.creatorType = "translator"; + } + if(creatorXML.localName() == "organization") { + creator.fieldMode = 1; + creator.lastName = creatorXML.toString(); + } else if(creatorXML.localName() == "person_name") { + creator.firstName = fixAuthorCapitalization(creatorXML.given_name.toString()); + creator.lastName = fixAuthorCapitalization(creatorXML.surname.toString()); + } + item.creators.push(creator); + } + } // Book - else { + } else { var item = new Zotero.Item("book"); var refXML = xml.doi_record[0].crossref.book.book_metadata; var metadataXML = refXML; @@ -112,7 +141,7 @@ function processCrossRef(xmlOutput) { } item.seriesNumber = seriesXML.series_number.toString(); } - + for each(var creatorXML in contributors) { var creator = {creatorType:"author"}; if(creatorXML.@contributor_role == "editor") { @@ -179,4 +208,4 @@ function doSearch(item) { }); Zotero.wait(); -} -\ No newline at end of file +} diff --git a/translators/New Zealand Herald.js b/translators/New Zealand Herald.js @@ -1,7 +1,7 @@ { "translatorID" : "c7830593-807e-48cb-99f2-c3bed2b148c2", "label" : "New Zealand Herald", - "creator" : "Sopheak Hean (University of Waikato, Faculty of Education, New Zealand)", + "creator" : "Sopheak Hean, Michael Berkowitz", "target" : "^http://www\\.nzherald\\.co\\.nz", "minVersion" : "1.0", "maxVersion" : "", diff --git a/translators/RIS.js b/translators/RIS.js @@ -197,7 +197,6 @@ function processTag(item, tag, value) { // the secondary date field can mean two things, a secondary date, or an // invalid EndNote-style date. let's see which one this is. // patent: application (filing) date -- do not append to date field - // for now. Zotero needs a filing date field added to make use of this. var dateParts = value.split("/"); if(dateParts.length != 4 && item.itemType != "patent") { // an invalid date and not a patent. @@ -207,6 +206,29 @@ function processTag(item, tag, value) { value += " " + item.date; } item.date = value; + } else if (item.itemType == "patent") { + // Date-handling code copied from above + if(dateParts.length == 1) { + // technically, if there's only one date part, the file isn't valid + // RIS, but EndNote writes this, so we have to too + // Nick: RIS spec example records also only contain a single part + // even though it says the slashes are not optional (?) + item.filingDate = value; + } else { + // in the case that we have a year and other data, format that way + + var month = parseInt(dateParts[1]); + if(month) { + month--; + } else { + month = undefined; + } + + item.filingDate = Zotero.Utilities.formatDate({year:dateParts[0], + month:month, + day:dateParts[2], + part:dateParts[3]}); + } } // ToDo: Handle correctly formatted Y2 fields (secondary date) } else if(tag == "N1" || tag == "AB") { @@ -243,6 +265,7 @@ function processTag(item, tag, value) { } } else if(tag == "SN") { // ISSN/ISBN - just add both + // TODO We should be able to tell these apart if(!item.ISBN) { item.ISBN = value; } @@ -479,6 +502,28 @@ function doExport() { } addTag("PY", string); } + + // filingDate (patents) + if(item.filingDate) { + var date = Zotero.Utilities.strToDate(item.filingDate); + var string = date.year+"/"; + if(date.month != undefined) { + // deal with javascript months + date.month++; + if(date.month < 10) string += "0"; + string += date.month; + } + string += "/"; + if(date.day != undefined) { + if(date.day < 10) string += "0"; + string += date.day; + } + string += "/"; + if(date.part != undefined) { + string += date.part; + } + addTag("Y2", string); + } // notes if(Zotero.getOption("exportNotes")) { @@ -524,4 +569,4 @@ function doExport() { Zotero.write("ER - \r\n\r\n"); } -} -\ No newline at end of file +} diff --git a/translators/TVNZ.js b/translators/TVNZ.js @@ -0,0 +1,215 @@ +{ + "translatorID" : "649c2836-a94d-4bbe-8e28-6771f283702f", + "label" : "TVNZ", + "creator" : "Sopheak Hean", + "target" : "^http://tvnz\\.co\\.nz", + "minVersion" : "1.0", + "maxVersion" : "", + "priority" : 100, + "inRepository" : true, + "translatorType" : 4, + "lastUpdated":"2010-08-03 10:30:20" +} + +function detectWeb(doc, url) { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == "x" ) return namespace; else return null; + } : null; + + if (doc.location.href.indexOf("/search/") !=-1){ + return "multiple"; + } + else if ((doc.location.href.indexOf("politics-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1) + || (doc.location.href.indexOf("politics-news/") !=-1) && (doc.location.href.indexOf("/video") !=-1) + || (doc.location.href.indexOf("business-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1) + || (doc.location.href.indexOf("national-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1) + || (doc.location.href.indexOf("breakfast-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1) + || (doc.location.href.indexOf("breakfast-news/") !=-1) && (doc.location.href.indexOf("/video") !=-1) + || (doc.location.href.indexOf("world-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1) + || (doc.location.href.indexOf("all-blacks/") !=-1) && (doc.location.href.indexOf("-video") !=-1) + || (doc.location.href.indexOf("weather/") !=-1) && (doc.location.href.indexOf("-video") !=-1) + || (doc.location.href.indexOf("-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1) + || (doc.location.href.indexOf("-news/") !=-1) && (doc.location.href.indexOf("/video") !=-1) + || (doc.location.href.indexOf("on/") !=-1) && (doc.location.href.indexOf("-video") !=-1) + || (doc.location.href.indexOf("up/") !=-1) && (doc.location.href.indexOf("/video") !=-1)){ + return "tvBroadcast"; + } + else if ((doc.location.href.indexOf("news/") !=-1) || (doc.location.href.indexOf("all-blacks/") !=-1) || (doc.location.href.indexOf("up/")!=-1)){ + return "newspaperArticle"; + } +} + +function scrape(doc, url){ + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == "x" ) return namespace; else return null; + } : null; + if (detectWeb(doc, url) == "newspaperArticle") { + var newItem = new Zotero.Item('newspaperArticle'); + newItem.url = doc.location.href; + newItem.publicationTitle = "TVNZ"; + newItem.language = "English"; + + var titleXPath = '//h1'; + var titleXPathObject = doc.evaluate(titleXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); + if (titleXPathObject){ + var titleXPathString = titleXPathObject.textContent; + newItem.title = titleXPathString ; + } + + var dateXPath = '//p[@class="time"]'; + var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); + if(dateXPathObject){ + var dateXPathString = dateXPathObject.textContent.replace(/\W\bPublished:\W\d{1,2}:\d{1,2}(AM|PM) (\w)+ /g, ''); + newItem.date = dateXPathString.replace(/^\s*|\s*$/g, ''); + } + //get Author from the article + var authorXPath = '//p[@class="source"]'; + var authorXPathObject = doc.evaluate(authorXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); + if (authorXPathObject){ + var authorXPathString = authorXPathObject.textContent.replace(/\W\bSource:\W+/g, ''); + newItem.creators.push(Zotero.Utilities.cleanAuthor(authorXPathString.replace(/\W+/g, '-'), "author")); + } + + //get Section of the article + var sectionXPath = '//li[@class="selectedLi"]/a/span'; + var sectionXPathObject = doc.evaluate(sectionXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); + if (sectionXPathObject){ + + var sectionXPathString = sectionXPathObject.textContent.replace(/^s/g, ''); + var sectionArray = new Array("Rugby", "All Blacks", "Cricket", "League", "Football", "Netball", "Basketball", "Tennis", "Motor", "Golf", "Other", "Tipping"); + + //loop through the Array and check for condition for section category + //var count =0; + for (var i=0; i <sectionArray.length; i++){ + //count = 1; + //if there is a match in the loop then replacing the section found with SPORT + if(sectionXPathString == sectionArray[i]){ + sectionXPathString = "Sport"; + newItem.section = sectionXPathString; + } + //if not found then take the value from XPath + newItem.section = sectionXPathString; + //count++; + + } + } + + //get Abstract + var a= "//meta[@name='description']"; + var abs= doc.evaluate(a, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); + if (abs){ + var abstractString = abs.content; + newItem.abstractNote = abstractString; + } + + //closed up NewItem + newItem.complete(); + + } else if (detectWeb(doc, url) == "tvBroadcast"){ + var newItem = new Zotero.Item("tvBroadcast"); + newItem.url = doc.location.href; + + newItem.network = "TVNZ"; + newItem.language = "English"; + + /* get Title and Running time for video clip */ + //if meta title exist + + + //if the array is true then do this + + var dateXPath = '//p[@class="added"]'; + var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); + + if (dateXPathObject){ + var dateString = dateXPathObject.textContent.replace(/\W\bAdded:\W\d{1,2}:\d{1,2}(AM|PM) (\w)+ /g, ''); + newItem.date = dateString.replace(/^\s*|\s*$/g, ''); + } else { + var dateXPath = '//p[@class="time"]'; + var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace(/\W\bPublished:\W\d{1,2}:\d{1,2}(AM|PM) (\w)+ /g, ''); + newItem.date = dateXPathObject.replace(/^\s*|\s*$/g, ''); + + } + + var myTitlePath ='//meta[@name="title"]'; + var myTitlePathObject= doc.evaluate(myTitlePath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); + if (myTitlePathObject){ + var titleString= myTitlePathObject.content.replace(/\b[)]+/g, ''); + var TitleResult= titleString.split(" ("); + newItem.title = TitleResult[0]; + var runTime = TitleResult[1]; + if(TitleResult[1] == undefined) { + newItem.runningTime =""; + } else { + newItem.runningTime = runTime; + } + }else{ + var myPath = '//head/title'; + var myPathObject = doc.evaluate(myPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.split(" | "); + newItem.title= myPathObject[0]; + } + + //get Author from the article + var authorXPath = '//p[@class="source"]'; + var authorXPathObject = doc.evaluate(authorXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); + if (authorXPathObject){ + var authorString = authorXPathObject.textContent.replace(/\W\bSource:\W+/g, ''); + newItem.creators.push(Zotero.Utilities.cleanAuthor(authorString.replace(/\W+/g, '-'), "author")); + + } else { + var keywordsPath = '//meta[@name="keywords"]'; + var keywordsObject = doc.evaluate(keywordsPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().content.replace(/\s+/g, '-').split(","); + newItem.creators.push(Zotero.Utilities.cleanAuthor(keywordsObject[0], "author")); + } + + //get Abstract + var a= "//meta[@name='description']"; + var abs= doc.evaluate(a, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().content; + newItem.abstractNote = abs; + + //get Section of the video, not sure if this meant for Archive location, if incorrect then leave it commented. + //var sectionPath = "//meta[@name='keywords']"; + //var sectionPathObject = doc.evaluate(sectionPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().content; + //var sectionResult = sectionMetaObject.split(","); + //newItem.archiveLocation = sectionPathObject; + + newItem.complete(); + } +} + +function doWeb(doc, url){ + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix){ + if (prefix =='x') + return namespace; else return null; + } :null; + + var articles = new Array(); + var items = new Object(); + var nextTitle; + + if (detectWeb(doc, url) == "multiple"){ + var titleXPath = '//div[@class="readItem"]/h4/a'; + var titles = doc.evaluate(titleXPath, doc, nsResolver, XPathResult.ANY_TYPE, null); + while (nextTitle = titles.iterateNext()){ + items[nextTitle.href] = nextTitle.textContent; + } + items= Zotero.selectItems(items); + for (var i in items){ + articles.push(i); + } + } else if (detectWeb(doc,url) =="webpage"){ + articles = [url]; + } + else if (detectWeb(doc,url) =="tvBroadcast"){ + articles = [url]; + } + + Zotero.debug(articles); + //Zotero.Util only works when scrape function is declared + Zotero.Utilities.processDocuments(articles, scrape, function(){Zotero.done();}); + + Zotero.wait(); +}