commit f672b05d89d69a67e7d00b5b7fe9fa4320ecc623
parent 9aa227db6b47d4be646ca00756aa07a2f3586ff5
Author: Avram Lyon <ajlyon@gmail.com>
Date: Mon, 9 Aug 2010 00:08:23 +0000
- Adding Sopheak's TVNZ translator.
- Adding reference book support to CrossRef
per http://forums.zotero.org/discussion/12942
- Changing authorship of NZ Herald to match usual standard.
- Adding support for patent issue dates in RIS import and export
discussed somewhere; patch has been laying about for some time
Diffstat:
4 files changed, 296 insertions(+), 9 deletions(-)
diff --git a/translators/CrossRef.js b/translators/CrossRef.js
@@ -76,9 +76,38 @@ function processCrossRef(xmlOutput) {
var metadataXML = xml.doi_record[0].crossref.book.book_series_metadata;
item.publicationTitle = metadataXML.series_metadata.titles.title[0].toString();
- }
+ // Reference book entry
+ // Example: doi: 10.1002/14651858.CD002966.pub3
+ // http://www.crossref.org/openurl/?url_ver=Z39.88-2004&req_dat=usr:pwd&rft_id=info:doi/10.1002/14651858.CD002966.pub3&format=unixref&redirect=false
+ } else if(xml.doi_record[0].crossref.book.@book_type.length()
+ && xml.doi_record[0].crossref.book.@book_type == 'reference'
+ && xml.doi_record[0].crossref.book.content_item.@component_type == 'reference_entry') {
+ var item = new Zotero.Item("bookSection");
+ var refXML = xml.doi_record[0].crossref.book.content_item;
+ var metadataXML = xml.doi_record[0].crossref.book.book_metadata;
+ item.publicationTitle = metadataXML.titles.title[0].toString();
+
+ // Handle book authors
+ if (metadataXML.contributors.length()) {
+ for each (var creatorXML in metadataXML.contributors.children()) {
+ var creator = {creatorType:"bookAuthor"};
+ if(creatorXML.@contributor_role == "editor") {
+ creator.creatorType = "editor";
+ } else if(creatorXML.@contributor_role == "translator") {
+ creator.creatorType = "translator";
+ }
+ if(creatorXML.localName() == "organization") {
+ creator.fieldMode = 1;
+ creator.lastName = creatorXML.toString();
+ } else if(creatorXML.localName() == "person_name") {
+ creator.firstName = fixAuthorCapitalization(creatorXML.given_name.toString());
+ creator.lastName = fixAuthorCapitalization(creatorXML.surname.toString());
+ }
+ item.creators.push(creator);
+ }
+ }
// Book
- else {
+ } else {
var item = new Zotero.Item("book");
var refXML = xml.doi_record[0].crossref.book.book_metadata;
var metadataXML = refXML;
@@ -112,7 +141,7 @@ function processCrossRef(xmlOutput) {
}
item.seriesNumber = seriesXML.series_number.toString();
}
-
+
for each(var creatorXML in contributors) {
var creator = {creatorType:"author"};
if(creatorXML.@contributor_role == "editor") {
@@ -179,4 +208,4 @@ function doSearch(item) {
});
Zotero.wait();
-}
-\ No newline at end of file
+}
diff --git a/translators/New Zealand Herald.js b/translators/New Zealand Herald.js
@@ -1,7 +1,7 @@
{
"translatorID" : "c7830593-807e-48cb-99f2-c3bed2b148c2",
"label" : "New Zealand Herald",
- "creator" : "Sopheak Hean (University of Waikato, Faculty of Education, New Zealand)",
+ "creator" : "Sopheak Hean, Michael Berkowitz",
"target" : "^http://www\\.nzherald\\.co\\.nz",
"minVersion" : "1.0",
"maxVersion" : "",
diff --git a/translators/RIS.js b/translators/RIS.js
@@ -197,7 +197,6 @@ function processTag(item, tag, value) {
// the secondary date field can mean two things, a secondary date, or an
// invalid EndNote-style date. let's see which one this is.
// patent: application (filing) date -- do not append to date field
- // for now. Zotero needs a filing date field added to make use of this.
var dateParts = value.split("/");
if(dateParts.length != 4 && item.itemType != "patent") {
// an invalid date and not a patent.
@@ -207,6 +206,29 @@ function processTag(item, tag, value) {
value += " " + item.date;
}
item.date = value;
+ } else if (item.itemType == "patent") {
+ // Date-handling code copied from above
+ if(dateParts.length == 1) {
+ // technically, if there's only one date part, the file isn't valid
+ // RIS, but EndNote writes this, so we have to too
+ // Nick: RIS spec example records also only contain a single part
+ // even though it says the slashes are not optional (?)
+ item.filingDate = value;
+ } else {
+ // in the case that we have a year and other data, format that way
+
+ var month = parseInt(dateParts[1]);
+ if(month) {
+ month--;
+ } else {
+ month = undefined;
+ }
+
+ item.filingDate = Zotero.Utilities.formatDate({year:dateParts[0],
+ month:month,
+ day:dateParts[2],
+ part:dateParts[3]});
+ }
}
// ToDo: Handle correctly formatted Y2 fields (secondary date)
} else if(tag == "N1" || tag == "AB") {
@@ -243,6 +265,7 @@ function processTag(item, tag, value) {
}
} else if(tag == "SN") {
// ISSN/ISBN - just add both
+ // TODO We should be able to tell these apart
if(!item.ISBN) {
item.ISBN = value;
}
@@ -479,6 +502,28 @@ function doExport() {
}
addTag("PY", string);
}
+
+ // filingDate (patents)
+ if(item.filingDate) {
+ var date = Zotero.Utilities.strToDate(item.filingDate);
+ var string = date.year+"/";
+ if(date.month != undefined) {
+ // deal with javascript months
+ date.month++;
+ if(date.month < 10) string += "0";
+ string += date.month;
+ }
+ string += "/";
+ if(date.day != undefined) {
+ if(date.day < 10) string += "0";
+ string += date.day;
+ }
+ string += "/";
+ if(date.part != undefined) {
+ string += date.part;
+ }
+ addTag("Y2", string);
+ }
// notes
if(Zotero.getOption("exportNotes")) {
@@ -524,4 +569,4 @@ function doExport() {
Zotero.write("ER - \r\n\r\n");
}
-}
-\ No newline at end of file
+}
diff --git a/translators/TVNZ.js b/translators/TVNZ.js
@@ -0,0 +1,215 @@
+{
+ "translatorID" : "649c2836-a94d-4bbe-8e28-6771f283702f",
+ "label" : "TVNZ",
+ "creator" : "Sopheak Hean",
+ "target" : "^http://tvnz\\.co\\.nz",
+ "minVersion" : "1.0",
+ "maxVersion" : "",
+ "priority" : 100,
+ "inRepository" : true,
+ "translatorType" : 4,
+ "lastUpdated":"2010-08-03 10:30:20"
+}
+
+function detectWeb(doc, url) {
+ var namespace = doc.documentElement.namespaceURI;
+ var nsResolver = namespace ? function(prefix) {
+ if (prefix == "x" ) return namespace; else return null;
+ } : null;
+
+ if (doc.location.href.indexOf("/search/") !=-1){
+ return "multiple";
+ }
+ else if ((doc.location.href.indexOf("politics-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
+ || (doc.location.href.indexOf("politics-news/") !=-1) && (doc.location.href.indexOf("/video") !=-1)
+ || (doc.location.href.indexOf("business-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
+ || (doc.location.href.indexOf("national-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
+ || (doc.location.href.indexOf("breakfast-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
+ || (doc.location.href.indexOf("breakfast-news/") !=-1) && (doc.location.href.indexOf("/video") !=-1)
+ || (doc.location.href.indexOf("world-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
+ || (doc.location.href.indexOf("all-blacks/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
+ || (doc.location.href.indexOf("weather/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
+ || (doc.location.href.indexOf("-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
+ || (doc.location.href.indexOf("-news/") !=-1) && (doc.location.href.indexOf("/video") !=-1)
+ || (doc.location.href.indexOf("on/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
+ || (doc.location.href.indexOf("up/") !=-1) && (doc.location.href.indexOf("/video") !=-1)){
+ return "tvBroadcast";
+ }
+ else if ((doc.location.href.indexOf("news/") !=-1) || (doc.location.href.indexOf("all-blacks/") !=-1) || (doc.location.href.indexOf("up/")!=-1)){
+ return "newspaperArticle";
+ }
+}
+
+function scrape(doc, url){
+ var namespace = doc.documentElement.namespaceURI;
+ var nsResolver = namespace ? function(prefix) {
+ if (prefix == "x" ) return namespace; else return null;
+ } : null;
+ if (detectWeb(doc, url) == "newspaperArticle") {
+ var newItem = new Zotero.Item('newspaperArticle');
+ newItem.url = doc.location.href;
+ newItem.publicationTitle = "TVNZ";
+ newItem.language = "English";
+
+ var titleXPath = '//h1';
+ var titleXPathObject = doc.evaluate(titleXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
+ if (titleXPathObject){
+ var titleXPathString = titleXPathObject.textContent;
+ newItem.title = titleXPathString ;
+ }
+
+ var dateXPath = '//p[@class="time"]';
+ var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
+ if(dateXPathObject){
+ var dateXPathString = dateXPathObject.textContent.replace(/\W\bPublished:\W\d{1,2}:\d{1,2}(AM|PM) (\w)+ /g, '');
+ newItem.date = dateXPathString.replace(/^\s*|\s*$/g, '');
+ }
+ //get Author from the article
+ var authorXPath = '//p[@class="source"]';
+ var authorXPathObject = doc.evaluate(authorXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
+ if (authorXPathObject){
+ var authorXPathString = authorXPathObject.textContent.replace(/\W\bSource:\W+/g, '');
+ newItem.creators.push(Zotero.Utilities.cleanAuthor(authorXPathString.replace(/\W+/g, '-'), "author"));
+ }
+
+ //get Section of the article
+ var sectionXPath = '//li[@class="selectedLi"]/a/span';
+ var sectionXPathObject = doc.evaluate(sectionXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
+ if (sectionXPathObject){
+
+ var sectionXPathString = sectionXPathObject.textContent.replace(/^s/g, '');
+ var sectionArray = new Array("Rugby", "All Blacks", "Cricket", "League", "Football", "Netball", "Basketball", "Tennis", "Motor", "Golf", "Other", "Tipping");
+
+ //loop through the Array and check for condition for section category
+ //var count =0;
+ for (var i=0; i <sectionArray.length; i++){
+ //count = 1;
+ //if there is a match in the loop then replacing the section found with SPORT
+ if(sectionXPathString == sectionArray[i]){
+ sectionXPathString = "Sport";
+ newItem.section = sectionXPathString;
+ }
+ //if not found then take the value from XPath
+ newItem.section = sectionXPathString;
+ //count++;
+
+ }
+ }
+
+ //get Abstract
+ var a= "//meta[@name='description']";
+ var abs= doc.evaluate(a, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
+ if (abs){
+ var abstractString = abs.content;
+ newItem.abstractNote = abstractString;
+ }
+
+ //closed up NewItem
+ newItem.complete();
+
+ } else if (detectWeb(doc, url) == "tvBroadcast"){
+ var newItem = new Zotero.Item("tvBroadcast");
+ newItem.url = doc.location.href;
+
+ newItem.network = "TVNZ";
+ newItem.language = "English";
+
+ /* get Title and Running time for video clip */
+ //if meta title exist
+
+
+ //if the array is true then do this
+
+ var dateXPath = '//p[@class="added"]';
+ var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
+
+ if (dateXPathObject){
+ var dateString = dateXPathObject.textContent.replace(/\W\bAdded:\W\d{1,2}:\d{1,2}(AM|PM) (\w)+ /g, '');
+ newItem.date = dateString.replace(/^\s*|\s*$/g, '');
+ } else {
+ var dateXPath = '//p[@class="time"]';
+ var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace(/\W\bPublished:\W\d{1,2}:\d{1,2}(AM|PM) (\w)+ /g, '');
+ newItem.date = dateXPathObject.replace(/^\s*|\s*$/g, '');
+
+ }
+
+ var myTitlePath ='//meta[@name="title"]';
+ var myTitlePathObject= doc.evaluate(myTitlePath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
+ if (myTitlePathObject){
+ var titleString= myTitlePathObject.content.replace(/\b[)]+/g, '');
+ var TitleResult= titleString.split(" (");
+ newItem.title = TitleResult[0];
+ var runTime = TitleResult[1];
+ if(TitleResult[1] == undefined) {
+ newItem.runningTime ="";
+ } else {
+ newItem.runningTime = runTime;
+ }
+ }else{
+ var myPath = '//head/title';
+ var myPathObject = doc.evaluate(myPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.split(" | ");
+ newItem.title= myPathObject[0];
+ }
+
+ //get Author from the article
+ var authorXPath = '//p[@class="source"]';
+ var authorXPathObject = doc.evaluate(authorXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
+ if (authorXPathObject){
+ var authorString = authorXPathObject.textContent.replace(/\W\bSource:\W+/g, '');
+ newItem.creators.push(Zotero.Utilities.cleanAuthor(authorString.replace(/\W+/g, '-'), "author"));
+
+ } else {
+ var keywordsPath = '//meta[@name="keywords"]';
+ var keywordsObject = doc.evaluate(keywordsPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().content.replace(/\s+/g, '-').split(",");
+ newItem.creators.push(Zotero.Utilities.cleanAuthor(keywordsObject[0], "author"));
+ }
+
+ //get Abstract
+ var a= "//meta[@name='description']";
+ var abs= doc.evaluate(a, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().content;
+ newItem.abstractNote = abs;
+
+ //get Section of the video, not sure if this meant for Archive location, if incorrect then leave it commented.
+ //var sectionPath = "//meta[@name='keywords']";
+ //var sectionPathObject = doc.evaluate(sectionPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().content;
+ //var sectionResult = sectionMetaObject.split(",");
+ //newItem.archiveLocation = sectionPathObject;
+
+ newItem.complete();
+ }
+}
+
+function doWeb(doc, url){
+ var namespace = doc.documentElement.namespaceURI;
+ var nsResolver = namespace ? function(prefix){
+ if (prefix =='x')
+ return namespace; else return null;
+ } :null;
+
+ var articles = new Array();
+ var items = new Object();
+ var nextTitle;
+
+ if (detectWeb(doc, url) == "multiple"){
+ var titleXPath = '//div[@class="readItem"]/h4/a';
+ var titles = doc.evaluate(titleXPath, doc, nsResolver, XPathResult.ANY_TYPE, null);
+ while (nextTitle = titles.iterateNext()){
+ items[nextTitle.href] = nextTitle.textContent;
+ }
+ items= Zotero.selectItems(items);
+ for (var i in items){
+ articles.push(i);
+ }
+ } else if (detectWeb(doc,url) =="webpage"){
+ articles = [url];
+ }
+ else if (detectWeb(doc,url) =="tvBroadcast"){
+ articles = [url];
+ }
+
+ Zotero.debug(articles);
+ //Zotero.Util only works when scrape function is declared
+ Zotero.Utilities.processDocuments(articles, scrape, function(){Zotero.done();});
+
+ Zotero.wait();
+}