commit e4c5d86bc6a0c621a1cfe191f62ed593d729e7cf
parent c43d36900a3e9a574b9ec746302f8a7300c3b8b7
Author: Dan Stillman <dstillman@zotero.org>
Date: Thu, 24 Sep 2015 22:02:35 -0400
Update citeproc-js to 1.1.58
Closes #850
Diffstat:
1 file changed, 369 insertions(+), 343 deletions(-)
diff --git a/chrome/content/zotero/xpcom/citeproc.js b/chrome/content/zotero/xpcom/citeproc.js
@@ -80,7 +80,7 @@ if (!Array.indexOf) {
};
}
var CSL = {
- PROCESSOR_VERSION: "1.1.46",
+ PROCESSOR_VERSION: "1.1.58",
CONDITION_LEVEL_TOP: 1,
CONDITION_LEVEL_BOTTOM: 2,
PLAIN_HYPHEN_REGEX: /(?:[^\\]-|\u2013)/,
@@ -269,7 +269,7 @@ var CSL = {
DATE_PARTS: ["year", "month", "day"],
DATE_PARTS_ALL: ["year", "month", "day", "season"],
DATE_PARTS_INTERNAL: ["year", "month", "day", "year_end", "month_end", "day_end"],
- NAME_PARTS: ["family", "given", "dropping-particle", "non-dropping-particle", "suffix", "literal"],
+ NAME_PARTS: ["non-dropping-particle", "family", "given", "dropping-particle", "suffix", "literal"],
DECORABLE_NAME_PARTS: ["given", "family", "suffix"],
DISAMBIGUATE_OPTIONS: [
"disambiguate-add-names",
@@ -1365,9 +1365,6 @@ CSL.Engine = function (sys, style, lang, forceLang) {
if (CSL.getAbbreviation) {
this.sys.getAbbreviation = CSL.getAbbreviation;
}
- if (CSL.suppressJurisdictions) {
- this.sys.suppressJurisdictions = CSL.suppressJurisdictions;
- }
if (this.sys.stringCompare) {
CSL.stringCompare = this.sys.stringCompare;
}
@@ -1418,8 +1415,8 @@ CSL.Engine = function (sys, style, lang, forceLang) {
this.opt.xclass = sys.xml.getAttributeValue(this.cslXml, "class");
this.opt.class = this.opt.xclass;
this.opt.styleID = this.sys.xml.getStyleId(this.cslXml);
- if (CSL.setSuppressJurisdictions) {
- CSL.setSuppressJurisdictions(this.opt.styleID);
+ if (CSL.setSuppressedJurisdictions) {
+ CSL.setSuppressedJurisdictions(this.opt.styleID, this.opt.suppressedJurisdictions);
}
this.opt.styleName = this.sys.xml.getStyleId(this.cslXml, true);
if (this.opt.version.slice(0,4) === "1.1m") {
@@ -2577,7 +2574,7 @@ CSL.Output.Queue.prototype.append = function (str, tokname, notSerious, ignorePr
blob.blobs = blob.blobs.replace(/\.([^a-z]|$)/g, "$1");
}
for (var i = blob.decorations.length - 1; i > -1; i += -1) {
- if (blob.decorations[i][0] === "@quotes" && blob.decorations[i][1] === "true") {
+ if (blob.decorations[i][0] === "@quotes" && blob.decorations[i][1] !== "false") {
blob.punctuation_in_quote = this.state.getOpt("punctuation-in-quote");
}
if (!blob.blobs.match(CSL.ROMANESQUE_REGEXP)) {
@@ -2978,7 +2975,7 @@ CSL.Output.Queue.adjust = function (punctInQuote) {
function blobHasDescendantQuotes(blob) {
if (blob.decorations) {
for (var i=0,ilen=blob.decorations.length;i<ilen;i++) {
- if (blob.decorations[i][0] === '@quotes') {
+ if (blob.decorations[i][0] === '@quotes' && blob.decorations[i][1] !== "false") {
return true;
}
}
@@ -3191,9 +3188,12 @@ CSL.Output.Queue.adjust = function (punctInQuote) {
if (i === (parent.blobs.length - 1)) {
if (true || !someChildrenAreNumbers) {
var parentChar = parentStrings.suffix.slice(0, 1);
- var allowMigration = blobHasDescendantQuotes(child);
- if (!allowMigration && PUNCT[parentChar]) {
+ var allowMigration = false;
+ if (PUNCT[parentChar]) {
allowMigration = blobHasDescendantMergingPunctuation(parentChar,child);
+ if (!allowMigration && punctInQuote) {
+ allowMigration = blobHasDescendantQuotes(child);
+ }
}
if (allowMigration) {
if (PUNCT[parentChar]) {
@@ -3285,7 +3285,7 @@ CSL.Output.Queue.adjust = function (punctInQuote) {
var quoteSwap = false;
for (var j=0,jlen=child.decorations.length;j<jlen;j++) {
var decoration = child.decorations[j];
- if (decoration[0] === "@quotes") {
+ if (decoration[0] === "@quotes" && decoration[1] !== "false") {
quoteSwap = true;
}
}
@@ -3309,6 +3309,7 @@ CSL.Engine.Opt = function () {
this.mode = "html";
this.dates = {};
this.jurisdictions_seen = {};
+ this.suppressedJurisdictions = {};
this["locale-sort"] = [];
this["locale-translit"] = [];
this["locale-translat"] = [];
@@ -3957,13 +3958,13 @@ CSL.Engine.prototype.processCitationCluster = function (citation, citationsPre,
for (n = 0, nlen = CSL.POSITION_TEST_VARS.length; n < nlen; n += 1) {
var param = CSL.POSITION_TEST_VARS[n];
if (item[1][param] !== oldvalue[param]) {
- if (param === 'first-reference-note-number') {
- rerunAkeys[this.registry.registry[myid].ambig] = true;
+ if (this.registry.registry[myid]) {
+ if (param === 'first-reference-note-number') {
+ rerunAkeys[this.registry.registry[myid].ambig] = true;
+ this.tmp.taintedItemIDs[myid] = true;
+ }
}
this.tmp.taintedCitationIDs[onecitation.citationID] = true;
- if (param === 'first-reference-note-number') {
- this.tmp.taintedItemIDs[myid] = true;
- }
}
}
}
@@ -5653,7 +5654,9 @@ CSL.Node["date-part"] = {
if (state[state.tmp.area].opt.collapse === "year-suffix-ranged") {
number.range_prefix = state.getTerm("citation-range-delimiter");
}
- if (state[state.tmp.area].opt["year-suffix-delimiter"]) {
+ if (state[state.tmp.area].opt.cite_group_delimiter) {
+ number.successor_prefix = state[state.tmp.area].opt.cite_group_delimiter;
+ } else if (state[state.tmp.area].opt["year-suffix-delimiter"]) {
number.successor_prefix = state[state.tmp.area].opt["year-suffix-delimiter"];
} else {
number.successor_prefix = state[state.tmp.area].opt.layout_delimiter;
@@ -7796,7 +7799,7 @@ CSL.NameOutput.prototype._renderOnePersonalName = function (value, pos, i, j) {
suffix_sep = " ";
}
var romanesque = this._isRomanesque(name);
- var has_hyphenated_non_dropping_particle = (non_dropping_particle && ["\u2019", "\'", "-"].indexOf(non_dropping_particle.blobs.slice(-1)) > -1);
+ var has_hyphenated_non_dropping_particle = (non_dropping_particle && ["\u2019", "\'", "-", " "].indexOf(non_dropping_particle.blobs.slice(-1)) > -1);
var blob, merged, first, second;
if (romanesque === 0) {
blob = this._join([non_dropping_particle, family, given], "");
@@ -10942,34 +10945,44 @@ CSL.Transform = function (state) {
}
ret = {name:"", usedOrig:stopOrig,locale:getFieldLocale(Item,field)};
opts = state.opt[locale_type];
+ var hasVal = false;
+ var jurisdictionName = false;
if (locale_type === 'locale-orig') {
if (stopOrig) {
ret = {name:"", usedOrig:stopOrig};
} else {
ret = {name:Item[field], usedOrig:false, locale:getFieldLocale(Item,field)};
}
- return ret;
+ hasVal = true;
} else if (use_default && ("undefined" === typeof opts || opts.length === 0)) {
- return {name:Item[field], usedOrig:true, locale:getFieldLocale(Item,field)};
- }
- for (var i = 0, ilen = opts.length; i < ilen; i += 1) {
- opt = opts[i];
- o = opt.split(/[\-_]/)[0];
- if (opt && Item.multi && Item.multi._keys[field] && Item.multi._keys[field][opt]) {
- ret.name = Item.multi._keys[field][opt];
- ret.locale = o;
- break;
- } else if (o && Item.multi && Item.multi._keys[field] && Item.multi._keys[field][o]) {
- ret.name = Item.multi._keys[field][o];
- ret.locale = o;
- break;
+ var ret = {name:Item[field], usedOrig:true, locale:getFieldLocale(Item,field)};
+ hasVal = true;
+ }
+ if (!hasVal) {
+ for (var i = 0, ilen = opts.length; i < ilen; i += 1) {
+ opt = opts[i];
+ o = opt.split(/[\-_]/)[0];
+ if (opt && Item.multi && Item.multi._keys[field] && Item.multi._keys[field][opt]) {
+ ret.name = Item.multi._keys[field][opt];
+ ret.locale = o;
+ if (field === 'jurisdiction') jurisdictionName = ret.name;
+ break;
+ } else if (o && Item.multi && Item.multi._keys[field] && Item.multi._keys[field][o]) {
+ ret.name = Item.multi._keys[field][o];
+ ret.locale = o;
+ if (field === 'jurisdiction') jurisdictionName = ret.name;
+ break;
+ }
+ }
+ if (!ret.name && use_default) {
+ ret = {name:Item[field], usedOrig:true, locale:getFieldLocale(Item,field)};
}
}
- if (!ret.name && use_default) {
- ret = {name:Item[field], usedOrig:true, locale:getFieldLocale(Item,field)};
- }
- if (field === 'jurisdiction') {
- ret.name = state.sys.suppressJurisdictions(Item[field], ret.name);
+ if (field === 'jurisdiction' && CSL.getSuppressedJurisdictionName) {
+ if (ret.name && !jurisdictionName) {
+ jurisdictionName = state.sys.getHumanForm(Item[field]);
+ }
+ ret.name = CSL.getSuppressedJurisdictionName.call(state, Item[field], jurisdictionName);
}
return ret;
}
@@ -11032,14 +11045,6 @@ CSL.Transform = function (state) {
}
return false;
}
- var suppressJurisdictions;
- if (state.sys.suppressJurisdictions) {
- suppressJurisdictions = state.sys.suppressJurisdictions;
- } else {
- suppressJurisdictions = function(codeStr, humanStr) {
- return humanStr;
- }
- }
function getOutputFunction(variables, myabbrev_family, abbreviation_fallback, alternative_varname, transform_fallback) {
var localesets;
var langPrefs = CSL.LangPrefsMap[variables[0]];
@@ -14383,313 +14388,334 @@ CSL.Engine.prototype.retrieveAllStyleModules = function (jurisdictionList) {
}
return ret;
}
+CSL.ParticleList = function() {
+ var always_dropping_1 = [[[0,1], null]];
+ var always_dropping_2 = [[[0,2], null]];
+ var always_dropping_3 = [[[0,3], null]]
+ var always_non_dropping_1 = [[null, [0,1]]];
+ var always_non_dropping_2 = [[null, [0,2]]];
+ var always_non_dropping_3 = [[null, [0,3]]];
+ var either_1 = [[null, [0,1]],[[0,1],null]];
+ var either_2 = [[null, [0,2]],[[0,2],null]];
+ var either_1_dropping_best = [[[0,1],null],[null, [0,1]]];
+ var either_2_dropping_best = [[[0,2],null],[null, [0,2]]];
+ var either_3_dropping_best = [[[0,3],null],[null, [0,3]]];
+ var non_dropping_2_alt_dropping_1_non_dropping_1 = [[null, [0,2]], [[0,1], [1,2]]];
+ return PARTICLES = [
+ ["'s", always_non_dropping_1],
+ ["'s-", always_non_dropping_1],
+ ["'t", always_non_dropping_1],
+ ["a", always_non_dropping_1],
+ ["aan 't", always_non_dropping_2],
+ ["aan de", always_non_dropping_2],
+ ["aan den", always_non_dropping_2],
+ ["aan der", always_non_dropping_2],
+ ["aan het", always_non_dropping_2],
+ ["aan t", always_non_dropping_2],
+ ["aan", always_non_dropping_1],
+ ["ad-", either_1],
+ ["adh-", either_1],
+ ["af", either_1],
+ ["al", either_1],
+ ["al-", either_1],
+ ["am de", always_non_dropping_2],
+ ["am", always_non_dropping_1],
+ ["an-", either_1],
+ ["ar-", either_1],
+ ["as-", either_1],
+ ["ash-", either_1],
+ ["at-", either_1],
+ ["ath-", either_1],
+ ["auf dem", either_2_dropping_best],
+ ["auf den", either_2_dropping_best],
+ ["auf der", either_2_dropping_best],
+ ["auf ter", always_non_dropping_2],
+ ["auf", either_1_dropping_best],
+ ["aus 'm", either_2_dropping_best],
+ ["aus dem", either_2_dropping_best],
+ ["aus den", either_2_dropping_best],
+ ["aus der", either_2_dropping_best],
+ ["aus m", either_2_dropping_best],
+ ["aus", either_1_dropping_best],
+ ["aus'm", either_2_dropping_best],
+ ["az-", either_1],
+ ["aš-", either_1],
+ ["aḍ-", either_1],
+ ["aḏ-", either_1],
+ ["aṣ-", either_1],
+ ["aṭ-", either_1],
+ ["aṯ-", either_1],
+ ["aẓ-", either_1],
+ ["ben", always_non_dropping_1],
+ ["bij 't", always_non_dropping_2],
+ ["bij de", always_non_dropping_2],
+ ["bij den", always_non_dropping_2],
+ ["bij het", always_non_dropping_2],
+ ["bij t", always_non_dropping_2],
+ ["bij", always_non_dropping_1],
+ ["bin", always_non_dropping_1],
+ ["boven d", always_non_dropping_2],
+ ["boven d'", always_non_dropping_2],
+ ["d", always_non_dropping_1],
+ ["d'", either_1],
+ ["da", either_1],
+ ["dal", always_non_dropping_1],
+ ["dal'", always_non_dropping_1],
+ ["dall'", always_non_dropping_1],
+ ["dalla", always_non_dropping_1],
+ ["das", either_1],
+ ["de die le", always_non_dropping_3],
+ ["de die", always_non_dropping_2],
+ ["de l", always_non_dropping_2],
+ ["de l'", always_non_dropping_2],
+ ["de la", non_dropping_2_alt_dropping_1_non_dropping_1],
+ ["de las", non_dropping_2_alt_dropping_1_non_dropping_1],
+ ["de le", always_non_dropping_2],
+ ["de li", either_2],
+ ["de van der", always_non_dropping_3],
+ ["de", either_1],
+ ["de'", either_1],
+ ["deca", always_non_dropping_1],
+ ["degli", either_1],
+ ["dei", either_1],
+ ["del", either_1],
+ ["dela", always_dropping_1],
+ ["dell'", either_1],
+ ["della", either_1],
+ ["delle", either_1],
+ ["dello", either_1],
+ ["den", either_1],
+ ["der", either_1],
+ ["des", either_1],
+ ["di", either_1],
+ ["die le", always_non_dropping_2],
+ ["do", always_non_dropping_1],
+ ["don", always_non_dropping_1],
+ ["dos", either_1],
+ ["du", either_1],
+ ["ed-", either_1],
+ ["edh-", either_1],
+ ["el", either_1],
+ ["el-", either_1],
+ ["en-", either_1],
+ ["er-", either_1],
+ ["es-", either_1],
+ ["esh-", either_1],
+ ["et-", either_1],
+ ["eth-", either_1],
+ ["ez-", either_1],
+ ["eš-", either_1],
+ ["eḍ-", either_1],
+ ["eḏ-", either_1],
+ ["eṣ-", either_1],
+ ["eṭ-", either_1],
+ ["eṯ-", either_1],
+ ["eẓ-", either_1],
+ ["het", always_non_dropping_1],
+ ["i", always_non_dropping_1],
+ ["il", always_dropping_1],
+ ["im", always_non_dropping_1],
+ ["in 't", always_non_dropping_2],
+ ["in de", always_non_dropping_2],
+ ["in den", always_non_dropping_2],
+ ["in der", either_2],
+ ["in het", always_non_dropping_2],
+ ["in t", always_non_dropping_2],
+ ["in", always_non_dropping_1],
+ ["l", always_non_dropping_1],
+ ["l'", always_non_dropping_1],
+ ["la", always_non_dropping_1],
+ ["las", always_non_dropping_1],
+ ["le", always_non_dropping_1],
+ ["les", either_1],
+ ["lo", either_1],
+ ["los", always_non_dropping_1],
+ ["lou", always_non_dropping_1],
+ ["of", always_non_dropping_1],
+ ["onder 't", always_non_dropping_2],
+ ["onder de", always_non_dropping_2],
+ ["onder den", always_non_dropping_2],
+ ["onder het", always_non_dropping_2],
+ ["onder t", always_non_dropping_2],
+ ["onder", always_non_dropping_1],
+ ["op 't", always_non_dropping_2],
+ ["op de", either_2],
+ ["op den", always_non_dropping_2],
+ ["op der", always_non_dropping_2],
+ ["op gen", always_non_dropping_2],
+ ["op het", always_non_dropping_2],
+ ["op t", always_non_dropping_2],
+ ["op ten", always_non_dropping_2],
+ ["op", always_non_dropping_1],
+ ["over 't", always_non_dropping_2],
+ ["over de", always_non_dropping_2],
+ ["over den", always_non_dropping_2],
+ ["over het", always_non_dropping_2],
+ ["over t", always_non_dropping_2],
+ ["over", always_non_dropping_1],
+ ["s", always_non_dropping_1],
+ ["s'", always_non_dropping_1],
+ ["sen", always_dropping_1],
+ ["t", always_non_dropping_1],
+ ["te", always_non_dropping_1],
+ ["ten", always_non_dropping_1],
+ ["ter", always_non_dropping_1],
+ ["tho", always_non_dropping_1],
+ ["thoe", always_non_dropping_1],
+ ["thor", always_non_dropping_1],
+ ["to", always_non_dropping_1],
+ ["toe", always_non_dropping_1],
+ ["tot", always_non_dropping_1],
+ ["uijt 't", always_non_dropping_2],
+ ["uijt de", always_non_dropping_2],
+ ["uijt den", always_non_dropping_2],
+ ["uijt te de", always_non_dropping_3],
+ ["uijt ten", always_non_dropping_2],
+ ["uijt", always_non_dropping_1],
+ ["uit 't", always_non_dropping_2],
+ ["uit de", always_non_dropping_2],
+ ["uit den", always_non_dropping_2],
+ ["uit het", always_non_dropping_2],
+ ["uit t", always_non_dropping_2],
+ ["uit te de", always_non_dropping_3],
+ ["uit ten", always_non_dropping_2],
+ ["uit", always_non_dropping_1],
+ ["unter", always_non_dropping_1],
+ ["v", always_non_dropping_1],
+ ["v.", always_non_dropping_1],
+ ["v.d.", always_non_dropping_1],
+ ["van 't", always_non_dropping_2],
+ ["van de l", always_non_dropping_3],
+ ["van de l'", always_non_dropping_3],
+ ["van de", always_non_dropping_2],
+ ["van de", always_non_dropping_2],
+ ["van den", always_non_dropping_2],
+ ["van der", always_non_dropping_2],
+ ["van gen", always_non_dropping_2],
+ ["van het", always_non_dropping_2],
+ ["van la", always_non_dropping_2],
+ ["van t", always_non_dropping_2],
+ ["van ter", always_non_dropping_2],
+ ["van van de", always_non_dropping_3],
+ ["van", either_1],
+ ["vander", always_non_dropping_1],
+ ["vd", always_non_dropping_1],
+ ["ver", always_non_dropping_1],
+ ["vom und zum", always_dropping_3],
+ ["vom", either_1],
+ ["von 't", always_non_dropping_2],
+ ["von dem", either_2_dropping_best],
+ ["von den", either_2_dropping_best],
+ ["von der", either_2_dropping_best],
+ ["von t", always_non_dropping_2],
+ ["von und zu", either_3_dropping_best],
+ ["von zu", either_2_dropping_best],
+ ["von", either_1_dropping_best],
+ ["voor 't", always_non_dropping_2],
+ ["voor de", always_non_dropping_2],
+ ["voor den", always_non_dropping_2],
+ ["voor in 't", always_non_dropping_3],
+ ["voor in t", always_non_dropping_3],
+ ["voor", always_non_dropping_1],
+ ["vor der", either_2_dropping_best],
+ ["vor", either_1_dropping_best],
+ ["z", always_dropping_1],
+ ["ze", always_dropping_1],
+ ["zu", either_1_dropping_best],
+ ["zum", either_1],
+ ["zur", either_1]
+ ];
+}();
CSL.parseParticles = function(){
- var PARTICLES = [
- ["al-", [[[0,1], null],[null,[0,1]]]],
- ["at-", [[[0,1], null],[null,[0,1]]]],
- ["ath-", [[[0,1], null],[null,[0,1]]]],
- ["aṯ-", [[[0,1], null],[null,[0,1]]]],
- ["ad-", [[[0,1], null],[null,[0,1]]]],
- ["adh-", [[[0,1], null],[null,[0,1]]]],
- ["aḏ-", [[[0,1], null],[null,[0,1]]]],
- ["ar-", [[[0,1], null],[null,[0,1]]]],
- ["az-", [[[0,1], null],[null,[0,1]]]],
- ["as-", [[[0,1], null],[null,[0,1]]]],
- ["ash-", [[[0,1], null],[null,[0,1]]]],
- ["aš-", [[[0,1], null],[null,[0,1]]]],
- ["aṣ-", [[[0,1], null],[null,[0,1]]]],
- ["aḍ-", [[[0,1], null],[null,[0,1]]]],
- ["aṭ-", [[[0,1], null],[null,[0,1]]]],
- ["aẓ-", [[[0,1], null],[null,[0,1]]]],
- ["an-", [[[0,1], null],[null,[0,1]]]],
- ["et-", [[[0,1], null],[null,[0,1]]]],
- ["eth-", [[[0,1], null],[null,[0,1]]]],
- ["eṯ-", [[[0,1], null],[null,[0,1]]]],
- ["ed-", [[[0,1], null],[null,[0,1]]]],
- ["edh-", [[[0,1], null],[null,[0,1]]]],
- ["eḏ-", [[[0,1], null],[null,[0,1]]]],
- ["er-", [[[0,1], null],[null,[0,1]]]],
- ["ez-", [[[0,1], null],[null,[0,1]]]],
- ["es-", [[[0,1], null],[null,[0,1]]]],
- ["esh-", [[[0,1], null],[null,[0,1]]]],
- ["eš-", [[[0,1], null],[null,[0,1]]]],
- ["eṣ-", [[[0,1], null],[null,[0,1]]]],
- ["eḍ-", [[[0,1], null],[null,[0,1]]]],
- ["eṭ-", [[[0,1], null],[null,[0,1]]]],
- ["eẓ-", [[[0,1], null],[null,[0,1]]]],
- ["el-", [[[0,1], null],[null,[0,1]]]],
- ["en-", [[[0,1], null],[null,[0,1]]]],
- ["'s-", [[[0,1], null]]],
- ["'t", [[[0,1], null]]],
- ["af", [[[0,1], null]]],
- ["al", [[[0,1], null]]],
- ["auf den", [[[0,2], null]]],
- ["auf der", [[[0,2], null]]],
- ["aus der", [[[0,2], null]]],
- ["aus'm", [[null, [0,1]]]],
- ["ben", [[null, [0,1]]]],
- ["bin", [[null, [0,1]]]],
- ["d'", [[[0,1], null],[null,[0,1]]]],
- ["da", [[null, [0,1]]]],
- ["dall'", [[null, [0,1]]]],
- ["das", [[[0,1], null]]],
- ["de", [[null, [0,1]],[[0,1],null]]],
- ["de la", [[null, [0,2]], [[0,1], [1,2]]]],
- ["de las", [[null, [0,2]], [[0,1], [1,2]]]],
- ["de li", [[[0,2], null]]],
- ["de'", [[[0,1], null]]],
- ["degli", [[[0,1], null]]],
- ["dei", [[[0,1], null]]],
- ["del", [[null, [0,1]]]],
- ["dela", [[[0,1], null]]],
- ["della", [[[0,1], null]]],
- ["dello", [[[0,1], null]]],
- ["den", [[[0,1], null]]],
- ["der", [[[0,1], null]]],
- ["des", [[null, [0,1]],[[0,1], null]]],
- ["di", [[null, [0,1]]]],
- ["do", [[null, [0,1]]]],
- ["dos", [[[0,1], null]]],
- ["du", [[[0,1], null]]],
- ["el", [[[0,1], null]]],
- ["il", [[[0,1], null]]],
- ["in 't", [[[0,2], null]]],
- ["in de", [[[0,2], null]]],
- ["in der", [[[0,2], null]]],
- ["in het", [[[0,2], null]]],
- ["lo", [[[0,1], null]]],
- ["les", [[[0,1], null]]],
- ["l'", [[null, [0,1]]]],
- ["la", [[null, [0,1]]]],
- ["le", [[null, [0,1]]]],
- ["lou", [[null, [0,1]]]],
- ["mac", [[null, [0,1]]]],
- ["op de", [[[0,2], null]]],
- ["pietro", [[null, [0,1]]]],
- ["saint", [[null, [0,1]]]],
- ["sainte", [[null, [0,1]]]],
- ["sen", [[[0,1], null]]],
- ["st.", [[null, [0,1]]]],
- ["ste.", [[null, [0,1]]]],
- ["te", [[[0,1], null]]],
- ["ten", [[[0,1], null]]],
- ["ter", [[[0,1], null]]],
- ["uit de", [[[0,2], null]]],
- ["uit den", [[[0,2], null]]],
- ["v.d.", [[null, [0,1]]]],
- ["van", [[null, [0,1]]]],
- ["van de", [[null, [0,2]]]],
- ["van den", [[null, [0,2]]]],
- ["van der", [[null, [0,2]]]],
- ["van het", [[null, [0,2]]]],
- ["vander", [[null, [0,1]]]],
- ["vd", [[null, [0,1]]]],
- ["ver", [[null, [0,1]]]],
- ["von", [[[0,1], null],[null,[0,1]]]],
- ["von der", [[[0,2], null]]],
- ["von dem",[[[0,2], null]]],
- ["von und zu", [[[0,3], null]]],
- ["von zu", [[[0,2], null]]],
- ["v.", [[[0,1], null]]],
- ["v", [[[0,1], null]]],
- ["vom", [[[0,1], null]]],
- ["vom und zum", [[[0,3], null]]],
- ["z", [[[0,1], null]]],
- ["ze", [[[0,1], null]]],
- ["zum", [[[0,1], null]]],
- ["zur", [[[0,1], null]]]
- ]
- var CATEGORIZER = null;
- function createCategorizer () {
- CATEGORIZER = {};
- for (var i=0,ilen=PARTICLES.length;i<ilen;i++) {
- var tLst = PARTICLES[i][0].split(" ");
- var pInfo = [];
- for (var j=0,jlen=PARTICLES[i][1].length;j<jlen;j++) {
- var pParams = PARTICLES[i][1][j];
- var str1 = pParams[0] ? tLst.slice(pParams[0][0], pParams[0][1]).join(" ") : "";
- var str2 = pParams[1] ? tLst.slice(pParams[1][0], pParams[1][1]).join(" ") : "";
- pInfo.push({
- strings: [str1, str2],
- positions: [pParams[0], pParams[1]]
- });
- }
- CATEGORIZER[PARTICLES[i][0]] = pInfo;
- }
- }
- createCategorizer();
- var LIST = null;
- var REX = null;
- function assignToList (nospaceList, spaceList, particle) {
- if (["\'", "-"].indexOf(particle.slice(-1)) > -1) {
- nospaceList.push(particle);
- } else {
- spaceList.push(particle);
- }
- }
- function composeParticleLists () {
- LIST = {
- "family": {
- "space": [],
- "nospace": []
- },
- "given": {
- "partial": {},
- "full": []
- }
- }
- REX = {
- "family": null,
- "given": {
- "full_lower": null,
- "full_comma": null,
- "partial": {}
- }
- }
- var FAM_SP = LIST.family.space;
- var FAM_NSP = LIST.family.nospace;
- var GIV_PART = LIST.given.partial;
- var GIV_FULL = LIST.given.full;
- for (var i=0,ilen=PARTICLES.length;i<ilen;i++) {
- var info = PARTICLES[i];
- var particle = info[0].split(" ");
- if (particle.length === 1) {
- assignToList(FAM_NSP, FAM_SP, particle[0]);
- GIV_FULL.push(particle[0]);
- if (!GIV_PART[particle[0]]) {
- GIV_PART[particle[0]] = [];
- }
- GIV_PART[particle[0]].push("");
- } else if (particle.length === 2) {
- assignToList(FAM_NSP, FAM_SP, particle[1]);
- if (!GIV_PART[particle[1]]) {
- GIV_PART[particle[1]] = [];
- }
- GIV_PART[particle[1]].push(particle[0]);
- particle = particle.join(" ");
- assignToList(FAM_NSP, FAM_SP, particle);
- GIV_FULL.push(particle);
- }
- }
- FAM_SP.sort(byLength);
- FAM_NSP.sort(byLength);
- GIV_FULL.sort(byLength);
- for (var key in GIV_PART) {
- GIV_PART[key].sort(byLength);
- }
- }
- function byLength(a,b) {
- if (a.length<b.length) {
- return 1;
- } else if (a.length>b.length) {
- return -1;
- } else {
- return 0;
- }
- }
- function composeRegularExpressions () {
- composeParticleLists();
- REX.family = new RegExp("^((?:" + LIST.family.space.join("|") + ")(\\s+)|(?:" + LIST.family.nospace.join("|") + "([^\\s]))).*", "i");
- REX.given.full_comma = new RegExp(".*?(,[\\s]*)(" + LIST.given.full.join("|") + ")$", "i");
- REX.given.full_lower = new RegExp(".*?([ ]+)(" + LIST.given.full.join("|") + ")$");
- X = "Tom du".match(REX.given.full_lower)
- var allInTheFamily = LIST.family.space
- for (var key in LIST.given.partial) {
- REX.given.partial[key] = new RegExp(".*?(\\s+)(" + LIST.given.partial[key].join("|") + ")$", "i");
+ function splitParticles(nameValue, firstNameFlag, caseOverride) {
+ var origNameValue = nameValue;
+ nameValue = caseOverride ? nameValue.toLowerCase() : nameValue;
+ var particleList = [];
+ var apostrophe;
+ if (firstNameFlag) {
+ apostrophe ="\u02bb";
+ nameValue = nameValue.split("").reverse().join("");
+ } else {
+ apostrophe ="-\u2019";
+ }
+ var rex = new RegExp("^([^ ]+[" + apostrophe + " \'] *)(.+)$");
+ var m = nameValue.match(rex);
+ while (m) {
+ var m1 = firstNameFlag ? m[1].split("").reverse().join("") : m[1];
+ var firstChar = m ? m1 : false;
+ var firstChar = firstChar ? m1.replace(/^[-\'\u02bb\u2019\s]*(.).*$/, "$1") : false;
+ var hasParticle = firstChar ? firstChar.toUpperCase() !== firstChar : false;
+ if (!hasParticle) break;
+ if (firstNameFlag) {
+ particleList.push(origNameValue.slice(m1.length * -1));
+ origNameValue = origNameValue.slice(0,m1.length * -1);
+ } else {
+ particleList.push(origNameValue.slice(0,m1.length));
+ origNameValue = origNameValue.slice(m1.length);
+ }
+ nameValue = m[2];
+ m = nameValue.match(rex);
+ }
+ if (firstNameFlag) {
+ nameValue = nameValue.split("").reverse().join("");
+ particleList.reverse();
+ for (var i=1,ilen=particleList.length;i<ilen;i++) {
+ if (particleList[i].slice(0, 1) == " ") {
+ particleList[i-1] += " ";
+ }
+ }
+ for (var i=0,ilen=particleList.length;i<ilen;i++) {
+ if (particleList[i].slice(0, 1) == " ") {
+ particleList[i] = particleList[i].slice(1);
+ }
+ }
+ nameValue = origNameValue.slice(0, nameValue.length);
+ } else {
+ nameValue = origNameValue.slice(nameValue.length * -1);
+ }
+ return [hasParticle, nameValue, particleList];
+ }
+ function trimLast(str) {
+ var lastChar = str.slice(-1);
+ str = str.trim();
+ if (lastChar === " " && ["\'", "\u2019"].indexOf(str.slice(-1)) > -1) {
+ str += " ";
}
+ return str;
}
- composeRegularExpressions();
- function matchRegularExpressions (name) {
- var m = REX.family.exec(name.family);
- var result = {
- family: {match:null, str:null},
- given: {match:null, str:null}
- }
- if (m) {
- result.family.match = m[2] ? m[1] : m[3] ? m[1].slice(0,-m[3].length) : m[1];
- result.family.str = (m[2] ? m[1].slice(0,-m[2].length) : m[3] ? m[1].slice(0,-m[3].length) : m[1]);
- if (REX.given.partial[result.family.str.toLowerCase()]) {
- var m = REX.given.partial[result.family.str.toLowerCase()].exec(name.given);
- if (m) {
- result.given.match = m[2] ? m[1] + m[2] : m[2];
- result.given.str = m[2];
- }
- }
- } else {
- var m = REX.given.full_comma.exec(name.given);
- if (!m) m = REX.given.full_lower.exec(name.given);
- if (m) {
- result.given.match = m[1] ? m[1] + m[2] : m[2];
- result.given.str = m[2];
- }
- }
- return result;
- }
- function apostropheNormalizer(name, reverse) {
- var params = ["\u2019", "\'"]
- if (reverse) params.reverse();
- if (name.family) {
- name.family = name.family.replace(params[0], params[1])
- }
- if (name.given) {
- name.given = name.given.replace(params[0], params[1])
- }
- }
- return function (name, normalizeApostrophe) {
- if (normalizeApostrophe) {
- apostropheNormalizer(name);
- }
- var result = matchRegularExpressions(name);
- var particles = [];
- if (result.given.match) {
- name.given = name.given.slice(0,-result.given.match.length);
- particles.push(result.given.str);
- }
- if (result.family.match) {
- name.family = name.family.slice(result.family.match.length);
- particles.push(result.family.str);
- }
- particles = particles.join(" ").split(" ");
- if (particles.length) {
- var key = particles.join(" ");
- var pInfo = CATEGORIZER[key.toLowerCase()];
- if (pInfo) {
- for (var i=pInfo.length-1;i>-1;i--) {
- var pSet = pInfo[i];
- if (!result.family.str) result.family.str = "";
- if (!result.given.str) result.given.str = "";
- if (result.given.str.toLowerCase() === pSet.strings[0] && result.family.str.toLowerCase() === pSet.strings[1]) {
- break;
- }
- }
- if (pSet.positions[0] !== null) {
- name["dropping-particle"] = particles.slice(pSet.positions[0][0], pSet.positions[0][1]).join(" ");
- }
- if (pSet.positions[1] !== null) {
- name["non-dropping-particle"] = particles.slice(pSet.positions[1][0], pSet.positions[1][1]).join(" ");
- }
- }
- }
- if (!name.suffix && name.given) {
- m = name.given.match(/(\s*,!*\s*)/);
+ function parseSuffix(nameObj) {
+ if (!nameObj.suffix && nameObj.given) {
+ m = nameObj.given.match(/(\s*,!*\s*)/);
if (m) {
- idx = name.given.indexOf(m[1]);
- var possible_suffix = name.given.slice(idx + m[1].length);
- var possible_comma = name.given.slice(idx, idx + m[1].length).replace(/\s*/g, "");
+ idx = nameObj.given.indexOf(m[1]);
+ var possible_suffix = nameObj.given.slice(idx + m[1].length);
+ var possible_comma = nameObj.given.slice(idx, idx + m[1].length).replace(/\s*/g, "");
if (possible_suffix.length <= 3) {
if (possible_comma.length === 2) {
- name["comma-suffix"] = true;
+ nameObj["comma-suffix"] = true;
}
- name.suffix = possible_suffix;
- } else if (!name["dropping-particle"] && name.given) {
- name["dropping-particle"] = possible_suffix;
- name["comma-dropping-particle"] = ",";
+ nameObj.suffix = possible_suffix;
+ } else if (!nameObj["dropping-particle"] && nameObj.given) {
+ nameObj["dropping-particle"] = possible_suffix;
+ nameObj["comma-dropping-particle"] = ",";
}
- name.given = name.given.slice(0, idx);
+ nameObj.given = nameObj.given.slice(0, idx);
}
}
- if (normalizeApostrophe) {
- apostropheNormalizer(name, true);
+ }
+ return function(nameObj) {
+ [hasLastParticle, lastNameValue, lastParticleList] = splitParticles(nameObj.family);
+ nameObj.family = lastNameValue;
+ var nonDroppingParticle = trimLast(lastParticleList.join(""));
+ if (nonDroppingParticle) {
+ nameObj['non-dropping-particle'] = nonDroppingParticle;
+ }
+ [hasFirstParticle, firstNameValue, firstParticleList] = splitParticles(nameObj.given, true);
+ nameObj.given = firstNameValue;
+ var droppingParticle = firstParticleList.join("").trim();
+ if (droppingParticle) {
+ nameObj['dropping-particle'] = droppingParticle;
}
+ parseSuffix(nameObj);
}
}();