translators.js (16742B)
1 /* 2 ***** BEGIN LICENSE BLOCK ***** 3 4 Copyright © 2009 Center for History and New Media 5 George Mason University, Fairfax, Virginia, USA 6 http://zotero.org 7 8 This file is part of Zotero. 9 10 Zotero is free software: you can redistribute it and/or modify 11 it under the terms of the GNU Affero General Public License as published by 12 the Free Software Foundation, either version 3 of the License, or 13 (at your option) any later version. 14 15 Zotero is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 GNU Affero General Public License for more details. 19 20 You should have received a copy of the GNU Affero General Public License 21 along with Zotero. If not, see <http://www.gnu.org/licenses/>. 22 23 ***** END LICENSE BLOCK ***** 24 */ 25 26 "use strict"; 27 28 /** 29 * Singleton to handle loading and caching of translators 30 * @namespace 31 */ 32 Zotero.Translators = new function() { 33 var _cache, _translators; 34 var _initialized = false; 35 var _initializationDeferred = false; 36 37 /** 38 * Initializes translator cache, loading all translator metadata into memory 39 * 40 * @param {Object} [options.metadataCache] - Translator metadata keyed by filename, if already 41 * available (e.g., in updateBundledFiles()), to avoid unnecesary file reads 42 */ 43 this.init = Zotero.Promise.coroutine(function* (options = {}) { 44 // Wait until bundled files have been updated, except when this is called by the schema update 45 // code itself 46 if (!options.fromSchemaUpdate) { 47 yield Zotero.Schema.schemaUpdatePromise; 48 } 49 50 // If an initialization has already started, a regular init() call should return the promise 51 // for that (which may already be resolved). A reinit should yield on that but then continue 52 // with reinitialization. 53 if (_initializationDeferred) { 54 let promise = _initializationDeferred.promise; 55 if (options.reinit) { 56 yield promise; 57 } 58 else { 59 return promise; 60 } 61 } 62 63 _initializationDeferred = Zotero.Promise.defer(); 64 65 Zotero.debug("Initializing translators"); 66 var start = new Date; 67 68 _cache = {"import":[], "export":[], "web":[], "webWithTargetAll":[], "search":[]}; 69 _translators = {}; 70 71 var sql = "SELECT rowid, fileName, metadataJSON, lastModifiedTime FROM translatorCache"; 72 var dbCacheResults = yield Zotero.DB.queryAsync(sql); 73 var dbCache = {}; 74 for (let i = 0; i < dbCacheResults.length; i++) { 75 let entry = dbCacheResults[i]; 76 dbCache[entry.fileName] = entry; 77 } 78 79 var numCached = 0; 80 var filesInCache = {}; 81 var translatorsDir = Zotero.getTranslatorsDirectory().path; 82 var iterator = new OS.File.DirectoryIterator(translatorsDir); 83 try { 84 while (true) { 85 let entries = yield iterator.nextBatch(5); // TODO: adjust as necessary 86 if (!entries.length) break; 87 for (let i = 0; i < entries.length; i++) { 88 let entry = entries[i]; 89 let path = entry.path; 90 let fileName = entry.name; 91 92 if (!(/^[^.].*\.js$/.test(fileName))) continue; 93 94 let lastModifiedTime; 95 if ('winLastWriteDate' in entry) { 96 lastModifiedTime = entry.winLastWriteDate.getTime(); 97 } 98 else { 99 lastModifiedTime = (yield OS.File.stat(path)).lastModificationDate.getTime(); 100 } 101 102 // Check passed cache for metadata 103 let memCacheJSON = false; 104 if (options.metadataCache && options.metadataCache[fileName]) { 105 memCacheJSON = options.metadataCache[fileName]; 106 } 107 108 // Check DB cache 109 let dbCacheEntry = false; 110 if (dbCache[fileName]) { 111 filesInCache[fileName] = true; 112 if (dbCache[fileName].lastModifiedTime == lastModifiedTime) { 113 dbCacheEntry = dbCache[fileName]; 114 } 115 } 116 117 // Get JSON from cache if possible 118 if (memCacheJSON || dbCacheEntry) { 119 try { 120 var translator = Zotero.Translators.load( 121 memCacheJSON || dbCacheEntry.metadataJSON, path 122 ); 123 } 124 catch (e) { 125 Zotero.logError(e); 126 Zotero.debug(memCacheJSON || dbCacheEntry.metadataJSON, 1); 127 128 // If JSON is invalid, clear from cache 129 yield Zotero.DB.queryAsync( 130 "DELETE FROM translatorCache WHERE fileName=?", 131 fileName 132 ); 133 continue; 134 } 135 } 136 // Otherwise, load from file 137 else { 138 try { 139 var translator = yield Zotero.Translators.loadFromFile(path); 140 } 141 catch (e) { 142 Zotero.logError(e); 143 144 // If translator file is invalid, delete it and clear the cache entry 145 // so that the translator is reinstalled the next time it's updated. 146 // 147 // TODO: Reinstall the correct translator immediately 148 yield OS.File.remove(path); 149 let sql = "DELETE FROM translatorCache WHERE fileName=?"; 150 yield Zotero.DB.queryAsync(sql, fileName); 151 continue; 152 } 153 } 154 155 // When can this happen? 156 if (!translator.translatorID) { 157 Zotero.debug("Translator ID for " + path + " not found"); 158 continue; 159 } 160 161 // Check if there's already a cached translator with the same id 162 if (_translators[translator.translatorID]) { 163 let existingTranslator = _translators[translator.translatorID]; 164 // If cached translator is older, delete it 165 if (existingTranslator.lastUpdated < translator.lastUpdated) { 166 translator.logError("Deleting older translator " 167 + existingTranslator.fileName + " with same ID as " 168 + translator.fileName); 169 yield OS.File.remove(existingTranslator.path); 170 delete _translators[translator.translatorID]; 171 } 172 // If cached translator is newer or the same, delete the current one 173 else { 174 translator.logError("Translator " + existingTranslator.fileName 175 + " with same ID is already loaded -- deleting " 176 + translator.fileName); 177 yield OS.File.remove(translator.path); 178 continue; 179 } 180 } 181 182 // add to cache 183 _translators[translator.translatorID] = translator; 184 for (let type in TRANSLATOR_TYPES) { 185 if (translator.translatorType & TRANSLATOR_TYPES[type]) { 186 _cache[type].push(translator); 187 if ((translator.translatorType & TRANSLATOR_TYPES.web) && translator.targetAll) { 188 _cache.webWithTargetAll.push(translator); 189 } 190 } 191 } 192 193 if (!dbCacheEntry) { 194 yield Zotero.Translators.cacheInDB( 195 fileName, 196 translator.serialize(TRANSLATOR_REQUIRED_PROPERTIES. 197 concat(TRANSLATOR_OPTIONAL_PROPERTIES)), 198 lastModifiedTime 199 ); 200 } 201 202 numCached++; 203 } 204 } 205 } 206 finally { 207 iterator.close(); 208 } 209 210 // Remove translators from DB cache if no file 211 for (let fileName in dbCache) { 212 if (!filesInCache[fileName]) { 213 yield Zotero.DB.queryAsync( 214 "DELETE FROM translatorCache WHERE rowid=?", 215 dbCache[fileName].rowid 216 ); 217 } 218 } 219 220 // Sort by priority 221 var collation = Zotero.getLocaleCollation(); 222 var cmp = function (a, b) { 223 if (a.priority > b.priority) { 224 return 1; 225 } 226 else if (a.priority < b.priority) { 227 return -1; 228 } 229 return collation.compareString(1, a.label, b.label); 230 } 231 for(var type in _cache) { 232 _cache[type].sort(cmp); 233 } 234 235 _initializationDeferred.resolve(); 236 _initialized = true; 237 238 Zotero.debug("Cached " + numCached + " translators in " + ((new Date) - start) + " ms"); 239 }); 240 241 242 this.reinit = function (options = {}) { 243 return this.init(Object.assign({}, options, { reinit: true })); 244 }; 245 246 247 /** 248 * Loads a translator from JSON, with optional code 249 * 250 * @param {String|Object} json - Metadata JSON 251 * @param {String} path 252 * @param {String} [code] 253 */ 254 this.load = function (json, path, code) { 255 var info = typeof json == 'string' ? JSON.parse(json) : json; 256 info.path = path; 257 info.code = code; 258 return new Zotero.Translator(info); 259 } 260 261 /** 262 * Loads a translator from the disk 263 * 264 * @param {String} file - Path to translator file 265 */ 266 this.loadFromFile = function(path) { 267 const infoRe = /^\s*{[\S\s]*?}\s*?[\r\n]/; 268 return Zotero.File.getContentsAsync(path) 269 .then(function(source) { 270 return Zotero.Translators.load(infoRe.exec(source)[0], path, source); 271 }) 272 .catch(function() { 273 throw "Invalid or missing translator metadata JSON object in " + OS.Path.basename(path); 274 }); 275 } 276 277 /** 278 * Gets the translator that corresponds to a given ID 279 * 280 * @param {String} id The ID of the translator 281 */ 282 this.get = function(id) { 283 if (!_initialized) { 284 throw new Zotero.Exception.UnloadedDataException("Translators not yet loaded", 'translators'); 285 } 286 return _translators[id] ? _translators[id] : false 287 } 288 289 /** 290 * Gets all translators for a specific type of translation 291 * 292 * @param {String} type The type of translators to get (import, export, web, or search) 293 */ 294 this.getAllForType = function(type) { 295 return this.init().then(function () { 296 return _cache[type].slice(); 297 }); 298 } 299 300 /** 301 * Gets all translators for a specific type of translation 302 */ 303 this.getAll = function() { 304 return this.init().then(function () { 305 return Object.keys(_translators).map(id => _translators[id]); 306 }); 307 } 308 309 /** 310 * Gets web translators for a specific location 311 * @param {String} uri The URI for which to look for translators 312 * @param {String} rootUri The root URI of the page, different from `uri` if running in an iframe 313 */ 314 this.getWebTranslatorsForLocation = function(URI, rootURI) { 315 var isFrame = URI !== rootURI; 316 var type = isFrame ? "webWithTargetAll" : "web"; 317 318 return this.getAllForType(type).then(function(allTranslators) { 319 var potentialTranslators = []; 320 var proxies = []; 321 322 var rootSearchURIs = Zotero.Proxies.getPotentialProxies(rootURI); 323 var frameSearchURIs = isFrame ? Zotero.Proxies.getPotentialProxies(URI) : rootSearchURIs; 324 325 Zotero.debug("Translators: Looking for translators for "+Object.keys(frameSearchURIs).join(', ')); 326 327 for (let translator of allTranslators) { 328 rootURIsLoop: 329 for (let rootSearchURI in rootSearchURIs) { 330 let isGeneric = !translator.webRegexp.root; 331 332 let rootURIMatches = isGeneric || rootSearchURI.length < 8192 && translator.webRegexp.root.test(rootSearchURI); 333 if (translator.webRegexp.all && rootURIMatches) { 334 for (let frameSearchURI in frameSearchURIs) { 335 let frameURIMatches = frameSearchURI.length < 8192 && translator.webRegexp.all.test(frameSearchURI); 336 337 if (frameURIMatches) { 338 potentialTranslators.push(translator); 339 proxies.push(frameSearchURIs[frameSearchURI]); 340 // prevent adding the translator multiple times 341 break rootURIsLoop; 342 } 343 } 344 } 345 else if(!isFrame && (isGeneric || rootURIMatches)) { 346 potentialTranslators.push(translator); 347 proxies.push(rootSearchURIs[rootSearchURI]); 348 break; 349 } 350 } 351 } 352 353 return [potentialTranslators, proxies]; 354 }.bind(this)); 355 }, 356 357 /** 358 * Get the array of searchURIs and related proxy converter functions 359 * 360 * @param {String} URI to get searchURIs and converterFunctions for 361 */ 362 this.getSearchURIs = function(URI) { 363 var properURI = Zotero.Proxies.proxyToProper(URI); 364 if (properURI !== URI) { 365 // if we know this proxy, just use the proper URI for detection 366 let obj = {}; 367 obj[properURI] = Zotero.Proxies.properToProxy; 368 return obj; 369 } 370 371 var searchURIs = {}; 372 searchURIs[URI] = null; 373 374 // if there is a subdomain that is also a TLD, also test against URI with the domain 375 // dropped after the TLD 376 // (i.e., www.nature.com.mutex.gmu.edu => www.nature.com) 377 var m = /^(https?:\/\/)([^\/]+)/i.exec(URI); 378 if (m) { 379 // First, drop the 0- if it exists (this is an III invention) 380 var host = m[2]; 381 if(host.substr(0, 2) === "0-") host = host.substr(2); 382 var hostnames = host.split("."); 383 for (var i=1; i<hostnames.length-2; i++) { 384 if (TLDS[hostnames[i].toLowerCase()]) { 385 var properHost = hostnames.slice(0, i+1).join("."); 386 searchURIs[m[1]+properHost+URI.substr(m[0].length)] = new function() { 387 var re = new RegExp('^https?://(?:[^/]+\\.)?'+Zotero.Utilities.quotemeta(properHost)+'(?=/)', "gi"); 388 var proxyHost = hostnames.slice(i+1).join(".").replace(/\$/g, "$$$$"); 389 return function(uri) { return uri.replace(re, "$&."+proxyHost) }; 390 }; 391 } 392 } 393 } 394 return searchURIs; 395 }, 396 397 /** 398 * Gets import translators for a specific location 399 * @param {String} location The location for which to look for translators 400 * @param {Function} [callback] An optional callback to be executed when translators have been 401 * retrieved 402 * @return {Promise<Zotero.Translator[]|true>} - An array of translators if no callback is specified; 403 * otherwise true 404 */ 405 this.getImportTranslatorsForLocation = function(location, callback) { 406 return Zotero.Translators.getAllForType("import").then(function(allTranslators) { 407 var tier1Translators = []; 408 var tier2Translators = []; 409 410 for(var i=0; i<allTranslators.length; i++) { 411 if(allTranslators[i].importRegexp && allTranslators[i].importRegexp.test(location)) { 412 tier1Translators.push(allTranslators[i]); 413 } else { 414 tier2Translators.push(allTranslators[i]); 415 } 416 } 417 418 var translators = tier1Translators.concat(tier2Translators); 419 if(callback) { 420 callback(translators); 421 return true; 422 } 423 return translators; 424 }); 425 } 426 427 /** 428 * @param {String} label 429 * @return {String} 430 */ 431 this.getFileNameFromLabel = function(label, alternative) { 432 var fileName = Zotero.Utilities.removeDiacritics( 433 Zotero.File.getValidFileName(label)) + ".js"; 434 // Use translatorID if name still isn't ASCII (e.g., Cyrillic) 435 if (alternative && !fileName.match(/^[\x00-\x7f]+$/)) { 436 fileName = alternative + ".js"; 437 } 438 return fileName; 439 } 440 441 /** 442 * @param {String} metadata 443 * @param {String} metadata.translatorID Translator GUID 444 * @param {Integer} metadata.translatorType See TRANSLATOR_TYPES in translate.js 445 * @param {String} metadata.label Translator title 446 * @param {String} metadata.creator Translator author 447 * @param {String|Null} metadata.target Target regexp 448 * @param {String|Null} metadata.minVersion 449 * @param {String} metadata.maxVersion 450 * @param {String|undefined} metadata.configOptions 451 * @param {String|undefined} metadata.displayOptions 452 * @param {Integer} metadata.priority 453 * @param {String} metadata.browserSupport 454 * @param {Boolean} metadata.inRepository 455 * @param {String} metadata.lastUpdated SQL date 456 * @param {String} code 457 * @return {Promise<nsIFile>} 458 */ 459 this.save = Zotero.Promise.coroutine(function* (metadata, code) { 460 if (!metadata.translatorID) { 461 throw ("metadata.translatorID not provided in Zotero.Translators.save()"); 462 } 463 464 if (!metadata.translatorType) { 465 var found = false; 466 for (let type in TRANSLATOR_TYPES) { 467 if (metadata.translatorType & TRANSLATOR_TYPES[type]) { 468 found = true; 469 break; 470 } 471 } 472 if (!found) { 473 throw ("Invalid translatorType '" + metadata.translatorType + "' in Zotero.Translators.save()"); 474 } 475 } 476 477 if (!metadata.label) { 478 throw new Error("metadata.label not provided"); 479 } 480 481 if (!metadata.priority) { 482 throw new Error("metadata.priority not provided"); 483 } 484 485 if (!metadata.lastUpdated) { 486 throw new Error("metadata.lastUpdated not provided"); 487 } 488 489 if (!code) { 490 throw new Error("code not provided"); 491 } 492 493 var fileName = Zotero.Translators.getFileNameFromLabel( 494 metadata.label, metadata.translatorID 495 ); 496 var destFile = OS.Path.join(Zotero.getTranslatorsDirectory().path, fileName); 497 498 // JSON.stringify has the benefit of indenting JSON 499 var metadataJSON = JSON.stringify(metadata, null, "\t"); 500 501 var str = metadataJSON + "\n\n" + code; 502 503 // Make sure file ends with newline 504 if (!str.endsWith('\n')) { 505 str += '\n'; 506 } 507 508 var translator = Zotero.Translators.get(metadata.translatorID); 509 var sameFile = translator && destFile == translator.path; 510 511 var exists = yield OS.File.exists(destFile); 512 if (!sameFile && exists) { 513 var msg = "Overwriting translator with same filename '" 514 + fileName + "'"; 515 Zotero.debug(msg, 1); 516 Zotero.debug(metadata, 1); 517 Components.utils.reportError(msg); 518 } 519 520 Zotero.debug("Saving translator '" + metadata.label + "'"); 521 Zotero.debug(metadata); 522 return Zotero.File.putContentsAsync(destFile, str).return(destFile); 523 }); 524 525 this.cacheInDB = function(fileName, metadataJSON, lastModifiedTime) { 526 return Zotero.DB.queryAsync( 527 "REPLACE INTO translatorCache VALUES (?, ?, ?)", 528 [fileName, JSON.stringify(metadataJSON), lastModifiedTime] 529 ); 530 } 531 }