www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | Submodules | README | LICENSE

commit 0cd3021cf3f1cad63ca4479f794380410c599e16
parent c925c6bf0191b0c90523a94b2889e3c2ee030060
Author: Simon Kornblith <simon@simonster.com>
Date:   Wed, 30 Aug 2006 21:56:52 +0000

closes #241, improved date handling

- Scholar.strToDate() accepts a string date and returns an object containing year, month, day, and part
- capture access date whenever URL is captured
- updated Zotero.dot to use new namespaces


Diffstat:
MZotero.dot.dmg | 0
Mchrome/chromeFiles/content/scholar/xpcom/cite.js | 133++++++++++++++++++++++++++++++++++++++-----------------------------------------
Mchrome/chromeFiles/content/scholar/xpcom/scholar.js | 87+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mchrome/chromeFiles/content/scholar/xpcom/translate.js | 5+++++
Mscrapers.sql | 8++++----
5 files changed, 160 insertions(+), 73 deletions(-)

diff --git a/Zotero.dot.dmg b/Zotero.dot.dmg Binary files differ. diff --git a/chrome/chromeFiles/content/scholar/xpcom/cite.js b/chrome/chromeFiles/content/scholar/xpcom/cite.js @@ -89,13 +89,13 @@ Scholar.Cite = new function() { * want to use the Scholar data model, but does want to use CSL in JavaScript */ CSL = function(csl) { - this._csl = new XML(this._cleanXML(csl)); + this._csl = new XML(CSL._cleanXML(csl)); // initialize CSL - this._init(); + CSL.init(); // load localizations - this._terms = this._parseLocales(this._csl.terms); + this._terms = CSL._parseLocales(this._csl.terms); // load class defaults this.class = this._csl["@class"].toString(); @@ -272,12 +272,6 @@ CSL.prototype.createBibliography = function(format) { return output; } - -CSL._months = ["January", "February", "March", "April", "May", "June", "July", - "August", "September", "October", "November", "December"]; -CSL._monthsShort = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]; - // for elements that inherit defaults from each other CSL._inherit = { author:"contributor", @@ -307,11 +301,10 @@ CSL._classDefaults["author-date"] = { CSL.ns = "http://purl.org/net/xbiblio/csl"; -CSL.prototype._cleanXML = function(xml) { - return xml.replace(/<\?[^>]*\?>/g, ""); -} - -CSL.prototype._init = function() { +/* + * initializes CSL interpreter + */ +CSL.init = function() { if(!CSL._xmlLang) { // get XML lang var localeService = Components.classes['@mozilla.org/intl/nslocaleservice;1']. @@ -326,12 +319,30 @@ CSL.prototype._init = function() { req.send(null); // get default terms - var locales = new XML(this._cleanXML(req.responseText)); - CSL._defaultTerms = this._parseLocales(locales); + var locales = new XML(CSL._cleanXML(req.responseText)); + CSL._defaultTerms = CSL._parseLocales(locales); } } -CSL.prototype._parseLocales = function(termXML) { +/* + * returns an array of short or long month strings + */ +CSL.getMonthStrings = function(form) { + CSL.init(); + return CSL._defaultTerms[form]["_months"]; +} + +/* + * removes parse instructions from XML + */ +CSL._cleanXML = function(xml) { + return xml.replace(/<\?[^>]*\?>/g, ""); +} + +/* + * parses locale strings into CSL._defaultTerms + */ +CSL._parseLocales = function(termXML) { // return defaults if there are no terms if(!termXML.length()) { return (CSL._defaultTerms ? CSL._defaultTerms : {}); @@ -403,7 +414,17 @@ CSL.prototype._parseLocales = function(termXML) { } } } else { - termArray[form][name] = term.text().toString(); + if(name.substr(0, 6) == "month-") { + // place months into separate array + if(!termArray[form]["_months"]) { + termArray[form]["_months"] = new Array(); + } + var monthIndex = parseInt(name.substr(6),10)-1; + var term = term.text().toString(); + termArray[form]["_months"][monthIndex] = term[0].toUpperCase()+term.substr(1).toLowerCase(); + } else { + termArray[form][name] = term.text().toString(); + } } } @@ -550,8 +571,8 @@ CSL.prototype._parseEtAl = function(etAl, bibCitElement) { for each(var etAlElement in etAl) { if(etAlElement.@position == "subsequent") { bibCitElement.subsequentEtAl = new Object(); - bibCitElement.subsequentEtAl.minCreators = parseInt(etAlElement['@min-authors']); - bibCitElement.subsequentEtAl.useFirst = parseInt(etAlElement['@use-first']); + bibCitElement.subsequentEtAl.minCreators = parseInt(etAlElement['@min-authors'], 10); + bibCitElement.subsequentEtAl.useFirst = parseInt(etAlElement['@use-first'], 10); } else { var parseElement = etAlElement; } @@ -560,8 +581,8 @@ CSL.prototype._parseEtAl = function(etAl, bibCitElement) { var parseElement = etAl; } - bibCitElement.etAl.minCreators = parseInt(parseElement['@min-authors']); - bibCitElement.etAl.useFirst = parseInt(parseElement['@use-first']); + bibCitElement.etAl.minCreators = parseInt(parseElement['@min-authors'], 10); + bibCitElement.etAl.useFirst = parseInt(parseElement['@use-first'], 10); } } @@ -765,46 +786,6 @@ CSL.prototype._getTerm = function(term, plural, form) { } /* - * process the date "string" into a useful object - */ -CSL.prototype._processDate = function(string) { - var date = new Object(); - - var dateRe = /^([0-9]{4})-([0-9]{2})-([0-9]{2})$/; - var m = dateRe.exec(string); - if(m) { // sql date - var jsDate = new Date(m[1], m[2]-1, m[3], false, false, false); - } else { // not an sql date - var yearRe = /^[0-9]+$/; - if(yearRe.test(string)) { // is a year - date.year = string; - return date; - } else { // who knows what this is - var jsDate = new Date(string) - } - } - - if(isNaN(jsDate.valueOf())) { // couldn't parse - // get year and say other parts are month - var yearRe = /^(.*)([0-9]{4})(.*)$/ - var m = yearRe.exec(string); - - if(m) { - date.year = m[2]; - date.month = m[1]; - if(m[2] && m[3]) date.month += " "; - date.month += m[3]; - } - } else { - date.year = jsDate.getFullYear(); - date.month = jsDate.getMonth(); - date.day = jsDate.getDay(); - } - - return date; -} - -/* * escapes a string for a given format */ CSL.prototype._escapeString = function(string, format) { @@ -972,15 +953,19 @@ CSL.prototype._formatDate = function(element, date, format) { string += date.disambiguation; } } - } else if(child.name == "month" && date.month) { - if(format == "compare") { - string = this._lpad(date.month+1, "0", 2); - } else { - if(element.form == "short") { - string = CSL._monthsShort[date.month]; + } else if(child.name == "month") { + if(date.month) { + if(format == "compare") { + string = this._lpad(date.month+1, "0", 2); } else { - string = CSL._months[date.month]; + if(element.form == "short") { + string = this._terms["short"]["_months"][date.month]; + } else { + string = this._terms["long"]["_months"][date.month]; + } } + } else if(date.part && format != "compare") { + string = date.part; } } else if(child.name == "day" && date.day) { if(format == "compare") { @@ -1518,6 +1503,9 @@ CSL.prototype._getTypeFromItem = function(item) { return [CSL._optionalTypeMappings[scholarType], CSL._fallbackTypeMappings[scholarType]]; } +/* + * separate creators object into authors, editors, and translators + */ CSL.prototype._separateItemCreators = function(item) { var authors = new Array(); var editors = new Array(); @@ -1543,6 +1531,13 @@ CSL.prototype._separateItemCreators = function(item) { return [authors, editors, translators]; } + +/* + * return an object containing year, month, and day + */ +CSL.prototype._processDate = function(string) { + return Scholar.strToDate(string); +} /* * END SCHOLAR-SPECIFIC CODE */ \ No newline at end of file diff --git a/chrome/chromeFiles/content/scholar/xpcom/scholar.js b/chrome/chromeFiles/content/scholar/xpcom/scholar.js @@ -35,10 +35,12 @@ var Scholar = new function(){ this.randomString = randomString; this.getRandomID = getRandomID; this.moveToUnique = moveToUnique; + this.strToDate = strToDate; // Public properties this.version; this.platform; + this.isMac; /* * Initialize the extension @@ -424,6 +426,91 @@ var Scholar = new function(){ file.moveTo(newFile.parent, newName); return file; } + + /* + * converts a string to an object containing: + * day: integer form of the day + * month: integer form of the month (indexed from 0, not 1) + * year: 4 digit year (or, year + BC/AD/etc.) + * part: anything that does not fall under any of the above categories + * (e.g., "Summer," etc.) + */ + function strToDate(string) { + var date = new Object(); + + // skip empty things + if(!string) { + return date; + } + + // get short month strings from CSL interpreter + var months = CSL.getMonthStrings("short"); + + string = string.replace(/^\s+/, "").replace(/\s+$/, "").replace(/\s+/, " "); + + var dateRe = /^([0-9]{4})[\-\/]([0-9]{2})[\-\/]([0-9]{2})$/; + var m = dateRe.exec(string); + if(m) { // sql date + Scholar.debug("DATE: used form 1: SQL"); + var jsDate = new Date(m[1], m[2]-1, m[3], false, false, false); + } else { // not an sql date + var yearRe = /^((?:circa |around |about |c\.? ?)[0-9]{1,4}(?: ?B\.? ?C\.?(?: ?E\.?)?| ?C\.? ?E\.?| ?A\.? ?D\.?)|[0-9]{4})$/i; + if(yearRe.test(string)) { + // is just a year + Scholar.debug("DATE: used form 2: year-only"); + date.year = string; + return date; + } + + // who knows what this is, but try JavaScript's date handling first + var jsDate = new Date(string) + } + + if(!isNaN(jsDate.valueOf())) { + Scholar.debug("DATE: retrieved from JavaScript"); + // got a javascript date + date.year = jsDate.getFullYear(); + date.month = jsDate.getMonth(); + date.day = jsDate.getDate(); + return date; + } + + // no javascript date. time for cruder things. + + // first, see if we have anything resembling a year + var yearRe = /^(.*)\b((?:circa |around |about |c\.? ?)[0-9]{1,4}(?: ?B\.? ?C\.?(?: ?E\.?)?| ?C\.? ?E\.?| ?A\.? ?D\.?)|[0-9]{4})\b(.*)$/i; + + var m = yearRe.exec(string); + if(m) { + date.year = m[2]; + date.part = m[1]+m[3]; + Scholar.debug("DATE: got year ("+date.year+", "+date.part+")"); + + // then, see if have anything resembling a month anywhere + var monthRe = new RegExp("^(.*)\\b("+months.join("|")+")[^ ]* (.*)$", "i"); + var m = monthRe.exec(date.part); + if(m) { + date.month = months.indexOf(m[2][0].toUpperCase()+m[2].substr(1).toLowerCase()); + date.part = m[1]+m[3]; + Scholar.debug("DATE: got month ("+date.month+", "+date.part+")"); + + // then, see if there's a day + var dayRe = /^(.*)\b([0-9]{1,2})\b(.*)$/i; + var m = dayRe.exec(date.part); + if(m) { + date.day = m[2]; + date.part = m[1]+m[3]; + Scholar.debug("DATE: got day ("+date.day+", "+date.part+")"); + } + } + } + + if(date.part) { + date.part = date.part.replace(/^[^A-Za-z0-9]+/, "").replace(/[^A-Za-z0-9]+$/, ""); + } + + return date; + } }; diff --git a/chrome/chromeFiles/content/scholar/xpcom/translate.js b/chrome/chromeFiles/content/scholar/xpcom/translate.js @@ -1035,6 +1035,11 @@ Scholar.Translate.prototype._itemDone = function(item) { // makes looping through easier item.itemType = item.complete = undefined; + // automatically set access date if URL is set + if(item.url && !item.accessDate) { + item.accessDate = (new Date()).toLocaleString(); + } + var fieldID, field; for(var i in item) { // loop through item fields diff --git a/scrapers.sql b/scrapers.sql @@ -1,4 +1,4 @@ --- 65 +-- 66 -- Set the following timestamp to the most recent scraper update date REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-15 15:42:00')); @@ -6076,7 +6076,7 @@ REPLACE INTO "csl" VALUES('http://purl.org/net/xbiblio/csl/styles/apa.csl', '200 </bibliography> </style>'); -REPLACE INTO "csl" VALUES('http://purl.org/net/xbiblio/csl/styles/chicago-note.csl', '2006-08-29 23:05:00', 'Chicago Manual of Style (Note)', +REPLACE INTO "csl" VALUES('http://purl.org/net/xbiblio/csl/styles/chicago-note.csl', '2006-08-30 17:40:00', 'Chicago Manual of Style (Note)', '<?xml version="1.0" encoding="UTF-8"?> <?oxygen RNGSchema="../schema/trunk/csl.rnc" type="compact"?> <style xmlns="http://purl.org/net/xbiblio/csl" class="note" xml:lang="en"> @@ -6173,8 +6173,8 @@ REPLACE INTO "csl" VALUES('http://purl.org/net/xbiblio/csl/styles/chicago-note.c <type name="article"> <author suffix=", "/> <titles prefix="&#8220;" suffix=",&#8221; "/> - <titles relation="container" font-style="italic"/> - <date prefix=", "> + <titles relation="container" font-style="italic" suffix=", "/> + <date> <day suffix=" "/> <month suffix=" " text-transform="capitalize"/> <year/>