www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | Submodules | README | LICENSE

translators.js (16742B)


      1 /*
      2     ***** BEGIN LICENSE BLOCK *****
      3     
      4     Copyright © 2009 Center for History and New Media
      5                      George Mason University, Fairfax, Virginia, USA
      6                      http://zotero.org
      7     
      8     This file is part of Zotero.
      9     
     10     Zotero is free software: you can redistribute it and/or modify
     11     it under the terms of the GNU Affero General Public License as published by
     12     the Free Software Foundation, either version 3 of the License, or
     13     (at your option) any later version.
     14     
     15     Zotero is distributed in the hope that it will be useful,
     16     but WITHOUT ANY WARRANTY; without even the implied warranty of
     17     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     18     GNU Affero General Public License for more details.
     19     
     20     You should have received a copy of the GNU Affero General Public License
     21     along with Zotero.  If not, see <http://www.gnu.org/licenses/>.
     22     
     23     ***** END LICENSE BLOCK *****
     24 */
     25 
     26 "use strict";
     27 
     28 /**
     29  * Singleton to handle loading and caching of translators
     30  * @namespace
     31  */
     32 Zotero.Translators = new function() {
     33 	var _cache, _translators;
     34 	var _initialized = false;
     35 	var _initializationDeferred = false;
     36 	
     37 	/**
     38 	 * Initializes translator cache, loading all translator metadata into memory
     39 	 *
     40 	 * @param {Object} [options.metadataCache] - Translator metadata keyed by filename, if already
     41 	 *     available (e.g., in updateBundledFiles()), to avoid unnecesary file reads
     42 	 */
     43 	this.init = Zotero.Promise.coroutine(function* (options = {}) {
     44 		// Wait until bundled files have been updated, except when this is called by the schema update
     45 		// code itself
     46 		if (!options.fromSchemaUpdate) {
     47 			yield Zotero.Schema.schemaUpdatePromise;
     48 		}
     49 		
     50 		// If an initialization has already started, a regular init() call should return the promise
     51 		// for that (which may already be resolved). A reinit should yield on that but then continue
     52 		// with reinitialization.
     53 		if (_initializationDeferred) {
     54 			let promise = _initializationDeferred.promise;
     55 			if (options.reinit) {
     56 				yield promise;
     57 			}
     58 			else {
     59 				return promise;
     60 			}
     61 		}
     62 		
     63 		_initializationDeferred = Zotero.Promise.defer();
     64 		
     65 		Zotero.debug("Initializing translators");
     66 		var start = new Date;
     67 		
     68 		_cache = {"import":[], "export":[], "web":[], "webWithTargetAll":[], "search":[]};
     69 		_translators = {};
     70 		
     71 		var sql = "SELECT rowid, fileName, metadataJSON, lastModifiedTime FROM translatorCache";
     72 		var dbCacheResults = yield Zotero.DB.queryAsync(sql);
     73 		var dbCache = {};
     74 		for (let i = 0; i < dbCacheResults.length; i++) {
     75 			let entry = dbCacheResults[i];
     76 			dbCache[entry.fileName] = entry;
     77 		}
     78 		
     79 		var numCached = 0;
     80 		var filesInCache = {};
     81 		var translatorsDir = Zotero.getTranslatorsDirectory().path;
     82 		var iterator = new OS.File.DirectoryIterator(translatorsDir);
     83 		try {
     84 			while (true) {
     85 				let entries = yield iterator.nextBatch(5); // TODO: adjust as necessary
     86 				if (!entries.length) break;
     87 				for (let i = 0; i < entries.length; i++) {
     88 					let entry = entries[i];
     89 					let path = entry.path;
     90 					let fileName = entry.name;
     91 					
     92 					if (!(/^[^.].*\.js$/.test(fileName))) continue;
     93 					
     94 					let lastModifiedTime;
     95 					if ('winLastWriteDate' in entry) {
     96 						lastModifiedTime = entry.winLastWriteDate.getTime();
     97 					}
     98 					else {
     99 						lastModifiedTime = (yield OS.File.stat(path)).lastModificationDate.getTime();
    100 					}
    101 					
    102 					// Check passed cache for metadata
    103 					let memCacheJSON = false;
    104 					if (options.metadataCache && options.metadataCache[fileName]) {
    105 						memCacheJSON = options.metadataCache[fileName];
    106 					}
    107 					
    108 					// Check DB cache
    109 					let dbCacheEntry = false;
    110 					if (dbCache[fileName]) {
    111 						filesInCache[fileName] = true;
    112 						if (dbCache[fileName].lastModifiedTime == lastModifiedTime) {
    113 							dbCacheEntry = dbCache[fileName];
    114 						}
    115 					}
    116 					
    117 					// Get JSON from cache if possible
    118 					if (memCacheJSON || dbCacheEntry) {
    119 						try {
    120 							var translator = Zotero.Translators.load(
    121 								memCacheJSON || dbCacheEntry.metadataJSON, path
    122 							);
    123 						}
    124 						catch (e) {
    125 							Zotero.logError(e);
    126 							Zotero.debug(memCacheJSON || dbCacheEntry.metadataJSON, 1);
    127 							
    128 							// If JSON is invalid, clear from cache
    129 							yield Zotero.DB.queryAsync(
    130 								"DELETE FROM translatorCache WHERE fileName=?",
    131 								fileName
    132 							);
    133 							continue;
    134 						}
    135 					}
    136 					// Otherwise, load from file
    137 					else {
    138 						try {
    139 							var translator = yield Zotero.Translators.loadFromFile(path);
    140 						}
    141 						catch (e) {
    142 							Zotero.logError(e);
    143 							
    144 							// If translator file is invalid, delete it and clear the cache entry
    145 							// so that the translator is reinstalled the next time it's updated.
    146 							//
    147 							// TODO: Reinstall the correct translator immediately
    148 							yield OS.File.remove(path);
    149 							let sql = "DELETE FROM translatorCache WHERE fileName=?";
    150 							yield Zotero.DB.queryAsync(sql, fileName);
    151 							continue;
    152 						}
    153 					}
    154 					
    155 					// When can this happen?
    156 					if (!translator.translatorID) {
    157 						Zotero.debug("Translator ID for " + path + " not found");
    158 						continue;
    159 					}
    160 					
    161 					// Check if there's already a cached translator with the same id
    162 					if (_translators[translator.translatorID]) {
    163 						let existingTranslator = _translators[translator.translatorID];
    164 						// If cached translator is older, delete it
    165 						if (existingTranslator.lastUpdated < translator.lastUpdated) {
    166 							translator.logError("Deleting older translator "
    167 								+ existingTranslator.fileName + " with same ID as "
    168 								+ translator.fileName);
    169 							yield OS.File.remove(existingTranslator.path);
    170 							delete _translators[translator.translatorID];
    171 						}
    172 						// If cached translator is newer or the same, delete the current one
    173 						else {
    174 							translator.logError("Translator " + existingTranslator.fileName
    175 								+ " with same ID is already loaded -- deleting "
    176 								+ translator.fileName);
    177 							yield OS.File.remove(translator.path);
    178 							continue;
    179 						}
    180 					}
    181 					
    182 					// add to cache
    183 					_translators[translator.translatorID] = translator;
    184 					for (let type in TRANSLATOR_TYPES) {
    185 						if (translator.translatorType & TRANSLATOR_TYPES[type]) {
    186 							_cache[type].push(translator);
    187 							if ((translator.translatorType & TRANSLATOR_TYPES.web) && translator.targetAll) {
    188 								_cache.webWithTargetAll.push(translator);
    189 							}
    190 						}
    191 					}
    192 					
    193 					if (!dbCacheEntry) {
    194 						yield Zotero.Translators.cacheInDB(
    195 							fileName,
    196 							translator.serialize(TRANSLATOR_REQUIRED_PROPERTIES.
    197 												 concat(TRANSLATOR_OPTIONAL_PROPERTIES)),
    198 							lastModifiedTime
    199 						);
    200 					}
    201 					
    202 					numCached++;
    203 				}
    204 			}
    205 		}
    206 		finally {
    207 			iterator.close();
    208 		}
    209 		
    210 		// Remove translators from DB cache if no file
    211 		for (let fileName in dbCache) {
    212 			if (!filesInCache[fileName]) {
    213 				yield Zotero.DB.queryAsync(
    214 					"DELETE FROM translatorCache WHERE rowid=?",
    215 					dbCache[fileName].rowid
    216 				);
    217 			}
    218 		}
    219 		
    220 		// Sort by priority
    221 		var collation = Zotero.getLocaleCollation();
    222 		var cmp = function (a, b) {
    223 			if (a.priority > b.priority) {
    224 				return 1;
    225 			}
    226 			else if (a.priority < b.priority) {
    227 				return -1;
    228 			}
    229 			return collation.compareString(1, a.label, b.label);
    230 		}
    231 		for(var type in _cache) {
    232 			_cache[type].sort(cmp);
    233 		}
    234 		
    235 		_initializationDeferred.resolve();
    236 		_initialized = true;
    237 		
    238 		Zotero.debug("Cached " + numCached + " translators in " + ((new Date) - start) + " ms");
    239 	});
    240 	
    241 	
    242 	this.reinit = function (options = {}) {
    243 		return this.init(Object.assign({}, options, { reinit: true }));
    244 	};
    245 	
    246 	
    247 	/**
    248 	 * Loads a translator from JSON, with optional code
    249 	 *
    250 	 * @param {String|Object} json - Metadata JSON
    251 	 * @param {String} path
    252 	 * @param {String} [code]
    253 	 */
    254 	this.load = function (json, path, code) {
    255 		var info = typeof json == 'string' ? JSON.parse(json) : json;
    256 		info.path = path;
    257 		info.code = code;
    258 		return new Zotero.Translator(info);
    259 	}
    260 
    261 	/**
    262 	 * Loads a translator from the disk
    263 	 *
    264 	 * @param {String} file - Path to translator file
    265 	 */
    266 	this.loadFromFile = function(path) {
    267 		const infoRe = /^\s*{[\S\s]*?}\s*?[\r\n]/;
    268 		return Zotero.File.getContentsAsync(path)
    269 		.then(function(source) {
    270 			return Zotero.Translators.load(infoRe.exec(source)[0], path, source);
    271 		})
    272 		.catch(function() {
    273 			throw "Invalid or missing translator metadata JSON object in " + OS.Path.basename(path);
    274 		});
    275 	}
    276 	
    277 	/**
    278 	 * Gets the translator that corresponds to a given ID
    279 	 *
    280 	 * @param {String} id The ID of the translator
    281 	 */
    282 	this.get = function(id) {
    283 		if (!_initialized) {
    284 			throw new Zotero.Exception.UnloadedDataException("Translators not yet loaded", 'translators');
    285 		}
    286 		return  _translators[id] ? _translators[id] : false
    287 	}
    288 	
    289 	/**
    290 	 * Gets all translators for a specific type of translation
    291 	 *
    292 	 * @param {String} type The type of translators to get (import, export, web, or search)
    293 	 */
    294 	this.getAllForType = function(type) {
    295 		return this.init().then(function () {
    296 			return _cache[type].slice();
    297 		});
    298 	}
    299 	
    300 	/**
    301 	 * Gets all translators for a specific type of translation
    302 	 */
    303 	this.getAll = function() {
    304 		return this.init().then(function () {
    305 			return Object.keys(_translators).map(id => _translators[id]);
    306 		});
    307 	}
    308 	
    309 	/**
    310 	 * Gets web translators for a specific location
    311 	 * @param {String} uri The URI for which to look for translators
    312 	 * @param {String} rootUri The root URI of the page, different from `uri` if running in an iframe
    313 	 */
    314 	this.getWebTranslatorsForLocation = function(URI, rootURI) {
    315 		var isFrame = URI !== rootURI;
    316 		var type = isFrame ? "webWithTargetAll" : "web";
    317 		
    318 		return this.getAllForType(type).then(function(allTranslators) {
    319 			var potentialTranslators = [];
    320 			var proxies = [];
    321 			
    322 			var rootSearchURIs = Zotero.Proxies.getPotentialProxies(rootURI);
    323 			var frameSearchURIs = isFrame ? Zotero.Proxies.getPotentialProxies(URI) : rootSearchURIs;
    324 			
    325 			Zotero.debug("Translators: Looking for translators for "+Object.keys(frameSearchURIs).join(', '));
    326 			
    327 			for (let translator of allTranslators) {
    328 				rootURIsLoop:
    329 				for (let rootSearchURI in rootSearchURIs) {
    330 					let isGeneric = !translator.webRegexp.root;
    331 					
    332 					let rootURIMatches = isGeneric || rootSearchURI.length < 8192 && translator.webRegexp.root.test(rootSearchURI);
    333 					if (translator.webRegexp.all && rootURIMatches) {
    334 						for (let frameSearchURI in frameSearchURIs) {
    335 							let frameURIMatches = frameSearchURI.length < 8192 && translator.webRegexp.all.test(frameSearchURI);
    336 								
    337 							if (frameURIMatches) {
    338 								potentialTranslators.push(translator);
    339 								proxies.push(frameSearchURIs[frameSearchURI]);
    340 								// prevent adding the translator multiple times
    341 								break rootURIsLoop;
    342 							}
    343 						}
    344 					}
    345 					else if(!isFrame && (isGeneric || rootURIMatches)) {
    346 						potentialTranslators.push(translator);
    347 						proxies.push(rootSearchURIs[rootSearchURI]);
    348 						break;
    349 					}
    350 				}
    351 			}
    352 			
    353 			return [potentialTranslators, proxies];
    354 		}.bind(this));
    355 	},
    356 
    357 	/**
    358 	 * Get the array of searchURIs and related proxy converter functions
    359 	 * 
    360 	 * @param {String} URI to get searchURIs and converterFunctions for
    361 	 */
    362 	this.getSearchURIs = function(URI) {
    363 		var properURI = Zotero.Proxies.proxyToProper(URI);
    364 		if (properURI !== URI) {
    365 			// if we know this proxy, just use the proper URI for detection
    366 			let obj = {};
    367 			obj[properURI] = Zotero.Proxies.properToProxy;
    368 			return obj;
    369 		}
    370 			
    371 		var searchURIs = {};
    372 		searchURIs[URI] = null;
    373 		
    374 		// if there is a subdomain that is also a TLD, also test against URI with the domain
    375 		// dropped after the TLD
    376 		// (i.e., www.nature.com.mutex.gmu.edu => www.nature.com)
    377 		var m = /^(https?:\/\/)([^\/]+)/i.exec(URI);
    378 		if (m) {
    379 			// First, drop the 0- if it exists (this is an III invention)
    380 			var host = m[2];
    381 			if(host.substr(0, 2) === "0-") host = host.substr(2);
    382 			var hostnames = host.split(".");
    383 			for (var i=1; i<hostnames.length-2; i++) {
    384 				if (TLDS[hostnames[i].toLowerCase()]) {
    385 					var properHost = hostnames.slice(0, i+1).join(".");
    386 					searchURIs[m[1]+properHost+URI.substr(m[0].length)] = new function() {
    387 						var re = new RegExp('^https?://(?:[^/]+\\.)?'+Zotero.Utilities.quotemeta(properHost)+'(?=/)', "gi");
    388 						var proxyHost = hostnames.slice(i+1).join(".").replace(/\$/g, "$$$$");
    389 						return function(uri) { return uri.replace(re, "$&."+proxyHost) };
    390 					};
    391 				}
    392 			}
    393 		}
    394 		return searchURIs;
    395 	},
    396 	
    397 	/**
    398 	 * Gets import translators for a specific location
    399 	 * @param {String} location The location for which to look for translators
    400 	 * @param {Function} [callback] An optional callback to be executed when translators have been
    401 	 *                              retrieved
    402 	 * @return {Promise<Zotero.Translator[]|true>} - An array of translators if no callback is specified;
    403 	 *     otherwise true
    404 	 */
    405 	this.getImportTranslatorsForLocation = function(location, callback) {	
    406 		return Zotero.Translators.getAllForType("import").then(function(allTranslators) {
    407 			var tier1Translators = [];
    408 			var tier2Translators = [];
    409 			
    410 			for(var i=0; i<allTranslators.length; i++) {
    411 				if(allTranslators[i].importRegexp && allTranslators[i].importRegexp.test(location)) {
    412 					tier1Translators.push(allTranslators[i]);
    413 				} else {
    414 					tier2Translators.push(allTranslators[i]);
    415 				}
    416 			}
    417 			
    418 			var translators = tier1Translators.concat(tier2Translators);
    419 			if(callback) {
    420 				callback(translators);
    421 				return true;
    422 			}
    423 			return translators;
    424 		});
    425 	}
    426 	
    427 	/**
    428 	 * @param	{String}		label
    429 	 * @return	{String}
    430 	 */
    431 	this.getFileNameFromLabel = function(label, alternative) {
    432 		var fileName = Zotero.Utilities.removeDiacritics(
    433 			Zotero.File.getValidFileName(label)) + ".js";
    434 		// Use translatorID if name still isn't ASCII (e.g., Cyrillic)
    435 		if (alternative && !fileName.match(/^[\x00-\x7f]+$/)) {
    436 			fileName = alternative + ".js";
    437 		}
    438 		return fileName;
    439 	}
    440 	
    441 	/**
    442 	 * @param	{String}		metadata
    443 	 * @param	{String}		metadata.translatorID		Translator GUID
    444 	 * @param	{Integer}		metadata.translatorType		See TRANSLATOR_TYPES in translate.js
    445 	 * @param	{String}		metadata.label				Translator title
    446 	 * @param	{String}		metadata.creator			Translator author
    447 	 * @param	{String|Null}	metadata.target				Target regexp
    448 	 * @param	{String|Null}	metadata.minVersion
    449 	 * @param	{String}		metadata.maxVersion
    450 	 * @param	{String|undefined}	metadata.configOptions
    451 	 * @param	{String|undefined}	metadata.displayOptions
    452 	 * @param	{Integer}		metadata.priority
    453 	 * @param	{String}		metadata.browserSupport
    454 	 * @param	{Boolean}		metadata.inRepository
    455 	 * @param	{String}		metadata.lastUpdated		SQL date
    456 	 * @param	{String}		code
    457 	 * @return	{Promise<nsIFile>}
    458 	 */
    459 	this.save = Zotero.Promise.coroutine(function* (metadata, code) {
    460 		if (!metadata.translatorID) {
    461 			throw ("metadata.translatorID not provided in Zotero.Translators.save()");
    462 		}
    463 		
    464 		if (!metadata.translatorType) {
    465 			var found = false;
    466 			for (let type in TRANSLATOR_TYPES) {
    467 				if (metadata.translatorType & TRANSLATOR_TYPES[type]) {
    468 					found = true;
    469 					break;
    470 				}
    471 			}
    472 			if (!found) {
    473 				throw ("Invalid translatorType '" + metadata.translatorType + "' in Zotero.Translators.save()");
    474 			}
    475 		}
    476 		
    477 		if (!metadata.label) {
    478 			throw new Error("metadata.label not provided");
    479 		}
    480 		
    481 		if (!metadata.priority) {
    482 			throw new Error("metadata.priority not provided");
    483 		}
    484 		
    485 		if (!metadata.lastUpdated) {
    486 			throw new Error("metadata.lastUpdated not provided");
    487 		}
    488 		
    489 		if (!code) {
    490 			throw new Error("code not provided");
    491 		}
    492 		
    493 		var fileName = Zotero.Translators.getFileNameFromLabel(
    494 			metadata.label, metadata.translatorID
    495 		);
    496 		var destFile = OS.Path.join(Zotero.getTranslatorsDirectory().path, fileName);
    497 		
    498 		// JSON.stringify has the benefit of indenting JSON
    499 		var metadataJSON = JSON.stringify(metadata, null, "\t");
    500 		
    501 		var str = metadataJSON + "\n\n" + code;
    502 		
    503 		// Make sure file ends with newline
    504 		if (!str.endsWith('\n')) {
    505 			str += '\n';
    506 		}
    507 		
    508 		var translator = Zotero.Translators.get(metadata.translatorID);
    509 		var sameFile = translator && destFile == translator.path;
    510 		
    511 		var exists = yield OS.File.exists(destFile);
    512 		if (!sameFile && exists) {
    513 			var msg = "Overwriting translator with same filename '"
    514 				+ fileName + "'";
    515 			Zotero.debug(msg, 1);
    516 			Zotero.debug(metadata, 1);
    517 			Components.utils.reportError(msg);
    518 		}
    519 		
    520 		Zotero.debug("Saving translator '" + metadata.label + "'");
    521 		Zotero.debug(metadata);
    522 		return Zotero.File.putContentsAsync(destFile, str).return(destFile);
    523 	});
    524 	
    525 	this.cacheInDB = function(fileName, metadataJSON, lastModifiedTime) {
    526 		return Zotero.DB.queryAsync(
    527 			"REPLACE INTO translatorCache VALUES (?, ?, ?)",
    528 			[fileName, JSON.stringify(metadataJSON), lastModifiedTime]
    529 		);
    530 	}
    531 }