translate_firefox.js (34059B)
1 /* 2 ***** BEGIN LICENSE BLOCK ***** 3 4 Copyright © 2012 Center for History and New Media 5 George Mason University, Fairfax, Virginia, USA 6 http://zotero.org 7 8 This file is part of Zotero. 9 10 Portions of this file are derived from Special Powers code, 11 Copyright (C) 2010 Mozilla Corporation. All Rights Reserved. 12 13 Zotero is free software: you can redistribute it and/or modify 14 it under the terms of the GNU Affero General Public License as published by 15 the Free Software Foundation, either version 3 of the License, or 16 (at your option) any later version. 17 18 Zotero is distributed in the hope that it will be useful, 19 but WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21 GNU Affero General Public License for more details. 22 23 You should have received a copy of the GNU Affero General Public License 24 along with Zotero. If not, see <http://www.gnu.org/licenses/>. 25 26 ***** END LICENSE BLOCK ***** 27 */ 28 29 const BOMs = { 30 "UTF-8":"\xEF\xBB\xBF", 31 "UTF-16BE":"\xFE\xFF", 32 "UTF-16LE":"\xFF\xFE", 33 "UTF-32BE":"\x00\x00\xFE\xFF", 34 "UTF-32LE":"\xFF\xFE\x00\x00" 35 } 36 37 Components.utils.import("resource://gre/modules/NetUtil.jsm"); 38 Components.utils.import("resource://gre/modules/Services.jsm"); 39 40 Zotero.Translate.DOMWrapper = new function() { 41 var Cu = Components.utils; 42 43 /* 44 * BEGIN SPECIAL POWERS WRAPPING CODE 45 * https://dxr.mozilla.org/mozilla-central/source/testing/specialpowers/content/specialpowersAPI.js 46 * 47 * Includes modifications by Zotero to support overrides 48 */ 49 function isWrappable(x) { 50 if (typeof x === "object") 51 return x !== null; 52 return typeof x === "function"; 53 }; 54 55 function isWrapper(x) { 56 return isWrappable(x) && (typeof x.SpecialPowers_wrappedObject !== "undefined"); 57 }; 58 59 function unwrapIfWrapped(x) { 60 return isWrapper(x) ? unwrapPrivileged(x) : x; 61 }; 62 63 function wrapIfUnwrapped(x) { 64 return isWrapper(x) ? x : wrapPrivileged(x); 65 } 66 67 function isObjectOrArray(obj) { 68 if (Object(obj) !== obj) 69 return false; 70 let arrayClasses = ['Object', 'Array', 'Int8Array', 'Uint8Array', 71 'Int16Array', 'Uint16Array', 'Int32Array', 72 'Uint32Array', 'Float32Array', 'Float64Array', 73 'Uint8ClampedArray']; 74 let className = Cu.getClassName(obj, true); 75 return arrayClasses.indexOf(className) != -1; 76 } 77 78 // In general, we want Xray wrappers for content DOM objects, because waiving 79 // Xray gives us Xray waiver wrappers that clamp the principal when we cross 80 // compartment boundaries. However, there are some exceptions where we want 81 // to use a waiver: 82 // 83 // * Xray adds some gunk to toString(), which has the potential to confuse 84 // consumers that aren't expecting Xray wrappers. Since toString() is a 85 // non-privileged method that returns only strings, we can just waive Xray 86 // for that case. 87 // 88 // * We implement Xrays to pure JS [[Object]] and [[Array]] instances that 89 // filter out tricky things like callables. This is the right thing for 90 // security in general, but tends to break tests that try to pass object 91 // literals into SpecialPowers. So we waive [[Object]] and [[Array]] 92 // instances before inspecting properties. 93 // 94 // * When we don't have meaningful Xray semantics, we create an Opaque 95 // XrayWrapper for security reasons. For test code, we generally want to see 96 // through that sort of thing. 97 function waiveXraysIfAppropriate(obj, propName) { 98 if (propName == 'toString' || isObjectOrArray(obj) || 99 /Opaque/.test(Object.prototype.toString.call(obj))) 100 { 101 return XPCNativeWrapper.unwrap(obj); 102 } 103 return obj; 104 } 105 106 // We can't call apply() directy on Xray-wrapped functions, so we have to be 107 // clever. 108 function doApply(fun, invocant, args) { 109 // We implement Xrays to pure JS [[Object]] instances that filter out tricky 110 // things like callables. This is the right thing for security in general, 111 // but tends to break tests that try to pass object literals into 112 // SpecialPowers. So we waive [[Object]] instances when they're passed to a 113 // SpecialPowers-wrapped callable. 114 // 115 // Note that the transitive nature of Xray waivers means that any property 116 // pulled off such an object will also be waived, and so we'll get principal 117 // clamping for Xrayed DOM objects reached from literals, so passing things 118 // like {l : xoWin.location} won't work. Hopefully the rabbit hole doesn't 119 // go that deep. 120 args = args.map(x => isObjectOrArray(x) ? Cu.waiveXrays(x) : x); 121 return Reflect.apply(fun, invocant, args); 122 } 123 124 function wrapPrivileged(obj, overrides) { 125 126 // Primitives pass straight through. 127 if (!isWrappable(obj)) 128 return obj; 129 130 // No double wrapping. 131 if (isWrapper(obj)) 132 throw "Trying to double-wrap object!"; 133 134 let dummy; 135 if (typeof obj === "function") 136 dummy = function() {}; 137 else 138 dummy = Object.create(null); 139 140 return new Proxy(dummy, new SpecialPowersHandler(obj, overrides)); 141 }; 142 143 function unwrapPrivileged(x) { 144 145 // We don't wrap primitives, so sometimes we have a primitive where we'd 146 // expect to have a wrapper. The proxy pretends to be the type that it's 147 // emulating, so we can just as easily check isWrappable() on a proxy as 148 // we can on an unwrapped object. 149 if (!isWrappable(x)) 150 return x; 151 152 // If we have a wrappable type, make sure it's wrapped. 153 if (!isWrapper(x)) 154 throw "Trying to unwrap a non-wrapped object!"; 155 156 var obj = x.SpecialPowers_wrappedObject; 157 // unwrapped. 158 return obj; 159 }; 160 161 /* 162 * We want to waive the __exposedProps__ security check for SpecialPowers-wrapped 163 * objects. We do this by creating a proxy singleton that just always returns 'rw' 164 * for any property name. 165 */ 166 function ExposedPropsWaiverHandler() { 167 // NB: XPConnect denies access if the relevant member of __exposedProps__ is not 168 // enumerable. 169 var _permit = { value: 'rw', writable: false, configurable: false, enumerable: true }; 170 return { 171 getOwnPropertyDescriptor: function(name) { return _permit; }, 172 ownKeys: function() { throw Error("Can't enumerate ExposedPropsWaiver"); }, 173 enumerate: function() { throw Error("Can't enumerate ExposedPropsWaiver"); }, 174 defineProperty: function(name) { throw Error("Can't define props on ExposedPropsWaiver"); }, 175 deleteProperty: function(name) { throw Error("Can't delete props from ExposedPropsWaiver"); } 176 }; 177 }; 178 ExposedPropsWaiver = new Proxy({}, ExposedPropsWaiverHandler()); 179 180 function SpecialPowersHandler(wrappedObject, overrides) { 181 this.wrappedObject = wrappedObject; 182 this.overrides = overrides ? overrides : {}; 183 } 184 185 SpecialPowersHandler.prototype = { 186 construct(target, args) { 187 // The arguments may or may not be wrappers. Unwrap them if necessary. 188 var unwrappedArgs = Array.prototype.slice.call(args).map(unwrapIfWrapped); 189 190 // We want to invoke "obj" as a constructor, but using unwrappedArgs as 191 // the arguments. Make sure to wrap and re-throw exceptions! 192 try { 193 return wrapIfUnwrapped(Reflect.construct(this.wrappedObject, unwrappedArgs)); 194 } catch (e) { 195 throw wrapIfUnwrapped(e); 196 } 197 }, 198 199 apply(target, thisValue, args) { 200 // The invocant and arguments may or may not be wrappers. Unwrap 201 // them if necessary. 202 var invocant = unwrapIfWrapped(thisValue); 203 var unwrappedArgs = Array.prototype.slice.call(args).map(unwrapIfWrapped); 204 205 try { 206 return wrapIfUnwrapped(doApply(this.wrappedObject, invocant, unwrappedArgs)); 207 } catch (e) { 208 // Wrap exceptions and re-throw them. 209 throw wrapIfUnwrapped(e); 210 } 211 }, 212 213 has(target, prop) { 214 if (prop === "SpecialPowers_wrappedObject") 215 return true; 216 217 if (this.overrides[prop] !== undefined) { 218 return true; 219 } 220 221 return Reflect.has(this.wrappedObject, prop); 222 }, 223 224 get(target, prop, receiver) { 225 if (prop === "SpecialPowers_wrappedObject") 226 return this.wrappedObject; 227 228 if (prop == "SpecialPowers_wrapperOverrides") { 229 return this.overrides; 230 } 231 232 if (prop in this.overrides) { 233 return this.overrides[prop]; 234 } 235 236 let obj = waiveXraysIfAppropriate(this.wrappedObject, prop); 237 return wrapIfUnwrapped(Reflect.get(obj, prop)); 238 }, 239 240 set(target, prop, val, receiver) { 241 if (prop === "SpecialPowers_wrappedObject") 242 return false; 243 244 let obj = waiveXraysIfAppropriate(this.wrappedObject, prop); 245 return Reflect.set(obj, prop, unwrapIfWrapped(val)); 246 }, 247 248 delete(target, prop) { 249 if (prop === "SpecialPowers_wrappedObject") 250 return false; 251 252 return Reflect.deleteProperty(this.wrappedObject, prop); 253 }, 254 255 defineProperty(target, prop, descriptor) { 256 throw "Can't call defineProperty on SpecialPowers wrapped object"; 257 }, 258 259 getOwnPropertyDescriptor(target, prop) { 260 // Handle our special API. 261 if (prop === "SpecialPowers_wrappedObject") { 262 return { value: this.wrappedObject, writeable: true, 263 configurable: true, enumerable: false }; 264 } 265 266 if (prop == "SpecialPowers_wrapperOverrides") { 267 return { value: this.overrides, writeable: false, configurable: false, enumerable: false }; 268 } 269 if (prop == "__exposedProps__") { 270 return { value: ExposedPropsWaiver, writable: false, configurable: false, enumerable: false }; 271 } 272 if (prop in this.overrides) { 273 return { value: this.overrides[prop], writeable: false, configurable: true, enumerable: true }; 274 } 275 276 let obj = waiveXraysIfAppropriate(this.wrappedObject, prop); 277 let desc = Reflect.getOwnPropertyDescriptor(obj, prop); 278 279 if (desc === undefined) 280 return undefined; 281 282 // Transitively maintain the wrapper membrane. 283 function wrapIfExists(key) { 284 if (key in desc) 285 desc[key] = wrapIfUnwrapped(desc[key]); 286 }; 287 288 wrapIfExists('value'); 289 wrapIfExists('get'); 290 wrapIfExists('set'); 291 292 // A trapping proxy's properties must always be configurable, but sometimes 293 // we come across non-configurable properties. Tell a white lie. 294 desc.configurable = true; 295 296 return desc; 297 }, 298 299 ownKeys(target) { 300 // Insert our special API. It's not enumerable, but ownKeys() 301 // includes non-enumerable properties. 302 let props = ['SpecialPowers_wrappedObject']; 303 304 // Do the normal thing. 305 let flt = (a) => !props.includes(a); 306 props = props.concat(Object.keys(this.overrides).filter(flt)); 307 props = props.concat(Reflect.ownKeys(this.wrappedObject).filter(flt)); 308 309 // If we've got an Xray wrapper, include the expandos as well. 310 if ('wrappedJSObject' in this.wrappedObject) { 311 props = props.concat(Reflect.ownKeys(this.wrappedObject.wrappedJSObject) 312 .filter(flt)); 313 } 314 315 return props; 316 }, 317 318 preventExtensions(target) { 319 throw "Can't call preventExtensions on SpecialPowers wrapped object"; 320 } 321 }; 322 323 /* 324 * END SPECIAL POWERS WRAPPING CODE 325 */ 326 327 /** 328 * Abstracts DOM wrapper support for avoiding XOWs 329 * @param {XPCCrossOriginWrapper} obj 330 * @return {Object} An obj that is no longer Xrayed 331 */ 332 this.wrap = function(obj, overrides) { 333 if(isWrapper(obj)) return obj; 334 return wrapPrivileged(obj, overrides); 335 }; 336 337 /** 338 * Unwraps an object 339 */ 340 this.unwrap = function(obj) { 341 if(isWrapper(obj)) { 342 return unwrapPrivileged(obj); 343 } else { 344 return obj; 345 } 346 } 347 348 /** 349 * Wraps an object in the same sandbox as another object 350 */ 351 this.wrapIn = function(obj, insamebox) { 352 if(insamebox.__wrappingManager) return insamebox.__wrappingManager.wrap(obj); 353 return this.wrap(obj); 354 } 355 356 /** 357 * Checks whether an object is wrapped by a DOM wrapper 358 * @param {XPCCrossOriginWrapper} obj 359 * @return {Boolean} Whether or not the object is wrapped 360 */ 361 this.isWrapped = isWrapper; 362 } 363 364 /** 365 * @class Manages the translator sandbox 366 * @param {Zotero.Translate} translate 367 * @param {String|window} sandboxLocation 368 */ 369 Zotero.Translate.SandboxManager = function(sandboxLocation) { 370 // sandboxLocation = Components.classes["@mozilla.org/systemprincipal;1"].createInstance(Components.interfaces.nsIPrincipal); 371 var sandbox = this.sandbox = new Components.utils.Sandbox( 372 sandboxLocation, 373 { 374 wantComponents: false, 375 wantGlobalProperties: [ 376 'atob', 377 'XMLHttpRequest' 378 ] 379 } 380 ); 381 this.sandbox.Zotero = {}; 382 383 // import functions missing from global scope into Fx sandbox 384 this.sandbox.XPathResult = Components.interfaces.nsIDOMXPathResult; 385 if(typeof sandboxLocation === "object" && "DOMParser" in sandboxLocation) { 386 this.sandbox.DOMParser = sandboxLocation.DOMParser; 387 } else { 388 this.sandbox.DOMParser = function() { 389 var obj = new sandbox.Object(); 390 var wrappedObj = obj.wrappedJSObject || obj; 391 wrappedObj.__exposedProps__ = {"parseFromString":"r"}; 392 wrappedObj.parseFromString = function(str, contentType) { 393 var xhr = new sandbox.XMLHttpRequest(); 394 xhr.open("GET", "data:"+contentType+";charset=utf-8,"+encodeURIComponent(str), false); 395 xhr.send(); 396 if (!xhr.responseXML) throw new Error("error parsing XML"); 397 return xhr.responseXML; 398 } 399 return obj; 400 }; 401 } 402 this.sandbox.DOMParser.__exposedProps__ = {"prototype":"r"}; 403 this.sandbox.DOMParser.prototype = {}; 404 this.sandbox.XMLSerializer = function() { 405 var s = Components.classes["@mozilla.org/xmlextras/xmlserializer;1"] 406 .createInstance(Components.interfaces.nsIDOMSerializer); 407 var obj = new sandbox.Object(); 408 var wrappedObj = obj.wrappedJSObject || obj; 409 wrappedObj.serializeToString = function(doc) { 410 return s.serializeToString(Zotero.Translate.DOMWrapper.unwrap(doc)); 411 }; 412 return obj; 413 }; 414 this.sandbox.XMLSerializer.__exposedProps__ = {"prototype":"r"}; 415 this.sandbox.XMLSerializer.prototype = {"__exposedProps__":{"serializeToString":"r"}}; 416 417 var expr = "(function(x) { return function() { this.args = arguments; return Function.prototype.apply.call(x, this); }.bind({}); })"; 418 this._makeContentForwarder = Components.utils.evalInSandbox(expr, sandbox); 419 420 var _proxy = Components.utils.evalInSandbox('(function (target, x, overrides) {'+ 421 ' return new Proxy(x, ProxyHandler(target, overrides));'+ 422 '})', sandbox); 423 var wrap = this.wrap = function(target, x, overrides) { 424 if (target === null || (typeof target !== "object" && typeof target !== "function")) return target; 425 if (!x) x = new sandbox.Object(); 426 return _proxy(target, x, overrides); 427 }; 428 var me = this; 429 sandbox.ProxyHandler = this._makeContentForwarder(function() { 430 var target = (this.args.wrappedJSObject || this.args)[0]; 431 var overrides = (this.args.wrappedJSObject || this.args)[1] || {}; 432 if(target instanceof Components.interfaces.nsISupports) { 433 target = new XPCNativeWrapper(target); 434 } 435 var ret = new sandbox.Object(); 436 var wrappedRet = ret.wrappedJSObject || ret; 437 wrappedRet.has = function(x, prop) { 438 return overrides.hasOwnProperty(prop) || prop in target; 439 }; 440 wrappedRet.get = function(x, prop, receiver) { 441 if (prop === "SpecialPowers_wrappedObject") return target; 442 if (prop === "SpecialPowers_wrapperOverrides") return overrides; 443 if (prop === "__wrappingManager") return me; 444 var y = overrides.hasOwnProperty(prop) ? overrides[prop] : target[prop]; 445 if (y === null || (typeof y !== "object" && typeof y !== "function")) return y; 446 return wrap(y, typeof y === "function" ? function() { 447 var args = Array.prototype.slice.apply(arguments); 448 for (var i = 0; i < args.length; i++) { 449 if (typeof args[i] === "object" && args[i] !== null && 450 args[i].wrappedJSObject && args[i].wrappedJSObject.SpecialPowers_wrappedObject) 451 args[i] = new XPCNativeWrapper(args[i].wrappedJSObject.SpecialPowers_wrappedObject); 452 } 453 return wrap(y.apply(target, args)); 454 } : new sandbox.Object()); 455 }; 456 wrappedRet.ownKeys = function(x) { 457 return Components.utils.cloneInto( 458 Object.getOwnPropertyNames(target) 459 .concat(Object.getOwnPropertySymbols(target)), 460 sandbox 461 ); 462 }; 463 wrappedRet.enumerate = function(x) { 464 var y = new sandbox.Array(); 465 for (var i in target) y.wrappedJSObject.push(i); 466 return y; 467 }; 468 return ret; 469 }); 470 } 471 472 Zotero.Translate.SandboxManager.prototype = { 473 /** 474 * Evaluates code in the sandbox 475 */ 476 "eval":function(code, exported, path) { 477 Components.utils.evalInSandbox(code, this.sandbox, "1.8", path, 1); 478 }, 479 480 /** 481 * Imports an object into the sandbox 482 * 483 * @param {Object} object Object to be imported (under Zotero) 484 * @param {*} [passTranslateAsFirstArgument] An argument to pass 485 * as the first argument to the function. 486 * @param {Object} [attachTo] The object to attach `object` to. 487 * Defaults to this.sandbox.Zotero 488 */ 489 "importObject":function(object, passAsFirstArgument, attachTo) { 490 if(!attachTo) attachTo = this.sandbox.Zotero; 491 if(attachTo.wrappedJSObject) attachTo = attachTo.wrappedJSObject; 492 var newExposedProps = false, sandbox = this.sandbox, me = this; 493 if(!object.__exposedProps__) newExposedProps = {}; 494 for(var key in (newExposedProps ? object : object.__exposedProps__)) { 495 let localKey = key; 496 if(newExposedProps) newExposedProps[localKey] = "r"; 497 498 var type = typeof object[localKey]; 499 var isFunction = type === "function"; 500 var isObject = typeof object[localKey] === "object"; 501 if(isFunction || isObject) { 502 if(isFunction) { 503 attachTo[localKey] = this._makeContentForwarder(function() { 504 var args = Array.prototype.slice.apply(this.args.wrappedJSObject || this.args); 505 for(var i = 0; i<args.length; i++) { 506 // Make sure we keep XPCNativeWrappers 507 if(args[i] instanceof Components.interfaces.nsISupports) { 508 args[i] = new XPCNativeWrapper(args[i]); 509 } 510 } 511 if(passAsFirstArgument) args.unshift(passAsFirstArgument); 512 return me.copyObject(object[localKey].apply(object, args)); 513 }); 514 } else { 515 attachTo[localKey] = new sandbox.Object(); 516 } 517 518 // attach members 519 if(!(object instanceof Components.interfaces.nsISupports)) { 520 this.importObject(object[localKey], passAsFirstArgument, attachTo[localKey]); 521 } 522 } else { 523 attachTo[localKey] = object[localKey]; 524 } 525 } 526 527 if(newExposedProps) { 528 attachTo.__exposedProps__ = newExposedProps; 529 } else { 530 attachTo.__exposedProps__ = object.__exposedProps__; 531 } 532 }, 533 534 "_canCopy":function(obj) { 535 if(typeof obj !== "object" || obj === null) return false; 536 537 if ((obj.wrappedJSObject && obj.wrappedJSObject.__wrappingManager) 538 || Zotero.Translate.DOMWrapper.isWrapped(obj) 539 || "__exposedProps__" in obj 540 || !["Object", "Array", "Error"].includes(obj.constructor.name)) { 541 return false; 542 } 543 return true; 544 }, 545 546 /** 547 * Copies a JavaScript object to this sandbox 548 * @param {Object} obj 549 * @return {Object} 550 */ 551 "copyObject":function(obj, wm) { 552 if(!this._canCopy(obj)) return obj; 553 if(!wm) wm = new WeakMap(); 554 switch (obj.constructor.name) { 555 case 'Array': 556 case 'Error': 557 var obj2 = this.sandbox[obj.constructor.name](); 558 break; 559 560 default: 561 var obj2 = this.sandbox.Object(); 562 break; 563 } 564 var wobj2 = obj2.wrappedJSObject ? obj2.wrappedJSObject : obj2; 565 for(var i in obj) { 566 if(!obj.hasOwnProperty(i)) continue; 567 568 var prop1 = obj[i]; 569 if(this._canCopy(prop1)) { 570 var prop2 = wm.get(prop1); 571 if(prop2 === undefined) { 572 prop2 = this.copyObject(prop1, wm); 573 wm.set(prop1, prop2); 574 } 575 wobj2[i] = prop2; 576 } else { 577 wobj2[i] = prop1; 578 } 579 } 580 return obj2; 581 }, 582 583 "newChild":function() { 584 return new Zotero.Translate.ChildSandboxManager(this); 585 } 586 } 587 588 Zotero.Translate.ChildSandboxManager = function(parent) { 589 this._wrappedSandbox = new parent.sandbox.Object(); 590 this._wrappedSandbox.Zotero = new parent.sandbox.Object(); 591 this.sandbox = this._wrappedSandbox.wrappedJSObject || this._wrappedSandbox; 592 this._parent = parent; 593 } 594 Zotero.Translate.ChildSandboxManager.prototype = { 595 "eval":function(code, functions, path) { 596 // eval in sandbox scope 597 if(functions) { 598 for(var i = 0; i < functions.length; i++) { 599 delete this.sandbox[functions[i]]; 600 } 601 } 602 this._parent.sandbox._withSandbox = this._wrappedSandbox; 603 Components.utils.evalInSandbox("with(_withSandbox){"+code+"};", this._parent.sandbox, "1.8", path, 1); 604 if(functions) { 605 for(var i = 0; i < functions.length; i++) { 606 try { 607 this._wrappedSandbox[functions[i]] = Components.utils.evalInSandbox(functions[i], this._parent.sandbox); 608 } catch(e) {} 609 } 610 } 611 this._parent.sandbox._withSandbox = undefined; 612 }, 613 "importObject":function(object, passAsFirstArgument, attachTo) { 614 if(!attachTo) attachTo = this.sandbox.Zotero; 615 // Zotero.debug(object); 616 // Zotero.debug(attachTo); 617 this._parent.importObject(object, passAsFirstArgument, attachTo); 618 // Zotero.debug(attachTo); 619 }, 620 "copyObject":function(obj) { 621 return this._parent.copyObject(obj); 622 }, 623 "newChild":function() { 624 return this._parent.newChild(); 625 }, 626 "_makeContentForwarder":function(f) { 627 return this._parent._makeContentForwarder(f); 628 }, 629 "wrap": function (target, x, overrides) { 630 return this._parent.wrap(target, x, overrides); 631 } 632 } 633 634 /** 635 * This variable holds a reference to all open nsIInputStreams and nsIOutputStreams in the global 636 * scope at all times. Otherwise, our streams might get garbage collected when we allow other code 637 * to run during Zotero.wait(). 638 */ 639 Zotero.Translate.IO.maintainedInstances = []; 640 641 /******* (Native) Read support *******/ 642 643 Zotero.Translate.IO.Read = function(file, sandboxManager) { 644 Zotero.Translate.IO.maintainedInstances.push(this); 645 646 this.file = file; 647 this._sandboxManager = sandboxManager; 648 649 // open file 650 this._openRawStream(); 651 652 // start detecting charset 653 this._charset = null; 654 this._bomLength = 0; 655 656 // look for a BOM in the document 657 var binStream = Components.classes["@mozilla.org/binaryinputstream;1"]. 658 createInstance(Components.interfaces.nsIBinaryInputStream); 659 binStream.setInputStream(this._rawStream); 660 var first4 = binStream.readBytes(4); 661 662 for(var possibleCharset in BOMs) { 663 if(first4.substr(0, BOMs[possibleCharset].length) == BOMs[possibleCharset]) { 664 this._charset = possibleCharset; 665 break; 666 } 667 } 668 669 if(this._charset) { 670 Zotero.debug("Translate: Found BOM. Setting character encoding to " + this._charset); 671 // BOM found; store its length and go back to the beginning of the file 672 this._bomLength = BOMs[this._charset].length; 673 } else { 674 this._rewind(); 675 676 // look for an XML parse instruction 677 var sStream = Components.classes["@mozilla.org/scriptableinputstream;1"] 678 .createInstance(Components.interfaces.nsIScriptableInputStream); 679 sStream.init(this._rawStream); 680 681 // read until we see if the file begins with a parse instruction 682 const whitespaceRe = /\s/g; 683 var read; 684 do { 685 read = sStream.read(1); 686 } while(whitespaceRe.test(read)) 687 688 if(read == "<") { 689 var firstPart = read + sStream.read(4); 690 if(firstPart == "<?xml") { 691 // got a parse instruction, read until it ends 692 read = true; 693 while((read !== false) && (read !== ">")) { 694 read = sStream.read(1); 695 firstPart += read; 696 } 697 698 const encodingRe = /encoding=['"]([^'"]+)['"]/; 699 var m = encodingRe.exec(firstPart); 700 if(m) { 701 // Make sure encoding is valid 702 try { 703 var charconv = Components.classes["@mozilla.org/intl/scriptableunicodeconverter"] 704 .getService(Components.interfaces.nsIScriptableUnicodeConverter); 705 charconv.charset = m[1]; 706 } catch(e) { 707 Zotero.debug("Translate: Ignoring unknown XML encoding "+m[1]); 708 } 709 } 710 711 if(this._charset) { 712 Zotero.debug("Translate: Found XML parse instruction. Setting character encoding to " + this._charset); 713 } else { 714 // if we know for certain document is XML, we also know for certain that the 715 // default charset for XML is UTF-8 716 this._charset = "UTF-8"; 717 Zotero.debug("Translate: XML parse instruction not found. Defaulting to UTF-8 for XML files"); 718 } 719 } 720 } 721 722 // If we managed to get a charset here, then translators shouldn't be able to override it, 723 // since it's almost certainly correct. Otherwise, we allow override. 724 this._allowCharsetOverride = !this._charset; 725 this._rewind(); 726 727 if(!this._charset) { 728 // No XML parse instruction or BOM. 729 730 // Check whether the user has specified a charset preference 731 var charsetPref = Zotero.Prefs.get("import.charset"); 732 if(charsetPref == "auto") { 733 Zotero.debug("Translate: Checking whether file is UTF-8"); 734 // For auto-detect, we are basically going to check if the file could be valid 735 // UTF-8, and if this is true, we will treat it as UTF-8. Prior likelihood of 736 // UTF-8 is very high, so this should be a reasonable strategy. 737 738 // from http://codex.wordpress.org/User:Hakre/UTF8 739 const UTF8Regex = new RegExp('^(?:' + 740 '[\x09\x0A\x0D\x20-\x7E]' + // ASCII 741 '|[\xC2-\xDF][\x80-\xBF]' + // non-overlong 2-byte 742 '|\xE0[\xA0-\xBF][\x80-\xBF]' + // excluding overlongs 743 '|[\xE1-\xEC\xEE][\x80-\xBF]{2}' + // 3-byte, but exclude U-FFFE and U-FFFF 744 '|\xEF[\x80-\xBE][\x80-\xBF]' + 745 '|\xEF\xBF[\x80-\xBD]' + 746 '|\xED[\x80-\x9F][\x80-\xBF]' + // excluding surrogates 747 '|\xF0[\x90-\xBF][\x80-\xBF]{2}' + // planes 1-3 748 '|[\xF1-\xF3][\x80-\xBF]{3}' + // planes 4-15 749 '|\xF4[\x80-\x8F][\x80-\xBF]{2}' + // plane 16 750 ')*$'); 751 752 // Read all currently available bytes from file. This seems to be the entire file, 753 // since the IO is blocking anyway. 754 this._charset = "UTF-8"; 755 let bytesAvailable; 756 while(bytesAvailable = this._rawStream.available()) { 757 // read 131072 bytes 758 let fileContents = binStream.readBytes(Math.min(131072, bytesAvailable)); 759 760 // on failure, try reading up to 3 more bytes and see if that makes this 761 // valid (since we have chunked it) 762 let isUTF8; 763 for(let i=1; !(isUTF8 = UTF8Regex.test(fileContents)) && i <= 3; i++) { 764 if(this._rawStream.available()) { 765 fileContents += binStream.readBytes(1); 766 } 767 } 768 769 // if the regexp continues to fail, this is not UTF-8 770 if(!isUTF8) { 771 // Can't be UTF-8; see if a default charset is defined 772 var prefs = Components.classes["@mozilla.org/preferences-service;1"] 773 .getService(Components.interfaces.nsIPrefBranch); 774 try { 775 this._charset = prefs.getComplexValue("intl.charset.default", 776 Components.interfaces.nsIPrefLocalizedString).toString(); 777 } catch(e) {} 778 779 if(!this._charset) { 780 try { 781 this._charset = prefs.getCharPref("intl.charset.default"); 782 } catch(e) {} 783 784 785 // ISO-8859-1 by default 786 if(!this._charset) this._charset = "ISO-8859-1"; 787 } 788 789 break; 790 } 791 } 792 this._rewind(); 793 } else { 794 // No need to auto-detect; user has specified a charset 795 this._charset = charsetPref; 796 } 797 } 798 } 799 800 Zotero.debug("Translate: Detected file charset as "+this._charset); 801 } 802 803 Zotero.Translate.IO.Read.prototype = { 804 "__exposedProps__":{ 805 "getXML":"r", 806 "RDF":"r", 807 "read":"r", 808 "setCharacterSet":"r" 809 }, 810 811 "_openRawStream":function() { 812 if(this._rawStream) this._rawStream.close(); 813 this._rawStream = Components.classes["@mozilla.org/network/file-input-stream;1"] 814 .createInstance(Components.interfaces.nsIFileInputStream); 815 this._rawStream.init(this.file, 0x01, 0o664, 0); 816 }, 817 818 "_rewind":function() { 819 this._linesExhausted = false; 820 this._rawStream.QueryInterface(Components.interfaces.nsISeekableStream) 821 .seek(Components.interfaces.nsISeekableStream.NS_SEEK_SET, this._bomLength); 822 this._rawStream.QueryInterface(Components.interfaces.nsIFileInputStream); 823 this.bytesRead = this._bomLength; 824 }, 825 826 "_seekToStart":function(charset) { 827 this._openRawStream(); 828 829 this._rewind(); 830 831 this.inputStream = Components.classes["@mozilla.org/intl/converter-input-stream;1"] 832 .createInstance(Components.interfaces.nsIConverterInputStream); 833 this.inputStream.init(this._rawStream, charset, 32768, 834 Components.interfaces.nsIConverterInputStream.DEFAULT_REPLACEMENT_CHARACTER); 835 }, 836 837 "_readToString":function() { 838 var str = {}; 839 var stringBits = []; 840 this.inputStream.QueryInterface(Components.interfaces.nsIUnicharInputStream); 841 while(1) { 842 var read = this.inputStream.readString(32768, str); 843 if(!read) break; 844 stringBits.push(str.value); 845 } 846 return stringBits.join(""); 847 }, 848 849 "_initRDF":function() { 850 // get URI 851 var IOService = Components.classes['@mozilla.org/network/io-service;1'] 852 .getService(Components.interfaces.nsIIOService); 853 var fileHandler = IOService.getProtocolHandler("file") 854 .QueryInterface(Components.interfaces.nsIFileProtocolHandler); 855 var baseURI = fileHandler.getURLSpecFromFile(this.file); 856 857 Zotero.debug("Translate: Initializing RDF data store"); 858 this._dataStore = new Zotero.RDF.AJAW.IndexedFormula(); 859 var parser = new Zotero.RDF.AJAW.RDFParser(this._dataStore); 860 try { 861 var nodes = Zotero.Translate.IO.parseDOMXML(this._rawStream, this._charset, this.file.fileSize); 862 parser.parse(nodes, baseURI); 863 864 this.RDF = new Zotero.Translate.IO._RDFSandbox(this._dataStore); 865 } catch(e) { 866 this.close(); 867 throw new Error("Translate: No RDF found"); 868 } 869 }, 870 871 "setCharacterSet":function(charset) { 872 if(typeof charset !== "string") { 873 throw "Translate: setCharacterSet: charset must be a string"; 874 } 875 876 // seek back to the beginning 877 this._seekToStart(this._allowCharsetOverride ? charset : this._charset); 878 879 if(!this._allowCharsetOverride) { 880 Zotero.debug("Translate: setCharacterSet: translate charset override ignored due to BOM or XML parse instruction. Using " + this._charset); 881 } 882 }, 883 884 "read":function(bytes) { 885 var str = {}; 886 887 if(bytes) { 888 // read number of bytes requested 889 this.inputStream.QueryInterface(Components.interfaces.nsIUnicharInputStream); 890 var amountRead = this.inputStream.readString(bytes, str); 891 if(!amountRead) return false; 892 this.bytesRead += amountRead; 893 } else { 894 // bytes not specified; read a line 895 this.inputStream.QueryInterface(Components.interfaces.nsIUnicharLineInputStream); 896 if(this._linesExhausted) return false; 897 this._linesExhausted = !this.inputStream.readLine(str); 898 this.bytesRead += str.value.length+1; // only approximate 899 } 900 901 return str.value; 902 }, 903 904 "getXML":function() { 905 if(this.bytesRead !== 0) this._seekToStart(this._charset); 906 try { 907 var xml = Zotero.Translate.IO.parseDOMXML(this._rawStream, this._charset, this.file.fileSize); 908 } catch(e) { 909 this._xmlInvalid = true; 910 throw e; 911 } 912 return (Zotero.isFx ? this._sandboxManager.wrap(xml) : xml); 913 }, 914 915 init: function (newMode) { 916 if(Zotero.Translate.IO.maintainedInstances.indexOf(this) === -1) { 917 Zotero.Translate.IO.maintainedInstances.push(this); 918 } 919 this._seekToStart(this._charset); 920 921 this._mode = newMode; 922 if(newMode === "xml/e4x") { 923 throw new Error("E4X is not supported"); 924 } else if(Zotero.Translate.IO.rdfDataModes.indexOf(this._mode) !== -1 && !this.RDF) { 925 this._initRDF(); 926 } 927 }, 928 929 "close":function() { 930 var myIndex = Zotero.Translate.IO.maintainedInstances.indexOf(this); 931 if(myIndex !== -1) Zotero.Translate.IO.maintainedInstances.splice(myIndex, 1); 932 933 if(this._rawStream) { 934 this._rawStream.close(); 935 delete this._rawStream; 936 } 937 } 938 } 939 Zotero.Translate.IO.Read.prototype.__defineGetter__("contentLength", 940 function() { 941 return this.file.fileSize; 942 }); 943 944 /******* Write support *******/ 945 946 Zotero.Translate.IO.Write = function(file) { 947 Zotero.Translate.IO.maintainedInstances.push(this); 948 this._rawStream = Components.classes["@mozilla.org/network/file-output-stream;1"] 949 .createInstance(Components.interfaces.nsIFileOutputStream); 950 this._rawStream.init(file, 0x02 | 0x08 | 0x20, 0o664, 0); // write, create, truncate 951 this._writtenToStream = false; 952 } 953 954 Zotero.Translate.IO.Write.prototype = { 955 "__exposedProps__":{ 956 "RDF":"r", 957 "write":"r", 958 "setCharacterSet":"r" 959 }, 960 961 "_initRDF":function() { 962 Zotero.debug("Translate: Initializing RDF data store"); 963 this._dataStore = new Zotero.RDF.AJAW.IndexedFormula(); 964 this.RDF = new Zotero.Translate.IO._RDFSandbox(this._dataStore); 965 }, 966 967 "setCharacterSet":function(charset) { 968 if(typeof charset !== "string") { 969 throw "Translate: setCharacterSet: charset must be a string"; 970 } 971 972 if(!this.outputStream) { 973 this.outputStream = Components.classes["@mozilla.org/intl/converter-output-stream;1"] 974 .createInstance(Components.interfaces.nsIConverterOutputStream); 975 } 976 977 if(charset == "UTF-8xBOM") charset = "UTF-8"; 978 this.outputStream.init(this._rawStream, charset, 1024, "?".charCodeAt(0)); 979 this._charset = charset; 980 }, 981 982 "write":function(data) { 983 if(!this._charset) this.setCharacterSet("UTF-8"); 984 985 if(!this._writtenToStream && this._charset.substr(this._charset.length-4) == "xBOM" 986 && BOMs[this._charset.substr(0, this._charset.length-4).toUpperCase()]) { 987 // If stream has not yet been written to, and a UTF type has been selected, write BOM 988 this._rawStream.write(BOMs[streamCharset], BOMs[streamCharset].length); 989 } 990 991 if(this._charset == "MACINTOSH") { 992 // fix buggy Mozilla MacRoman 993 var splitData = data.split(/([\r\n]+)/); 994 for(var i=0; i<splitData.length; i+=2) { 995 // write raw newlines straight to the string 996 this.outputStream.writeString(splitData[i]); 997 if(splitData[i+1]) { 998 this._rawStream.write(splitData[i+1], splitData[i+1].length); 999 } 1000 } 1001 } else { 1002 this.outputStream.writeString(data); 1003 } 1004 1005 this._writtenToStream = true; 1006 }, 1007 1008 init: function (newMode, charset) { 1009 this._mode = newMode; 1010 if(Zotero.Translate.IO.rdfDataModes.indexOf(this._mode) !== -1) { 1011 this._initRDF(); 1012 if(!this._writtenToString) this.setCharacterSet("UTF-8"); 1013 } else if(!this._writtenToString) { 1014 this.setCharacterSet(charset ? charset : "UTF-8"); 1015 } 1016 }, 1017 1018 "close":function() { 1019 if(Zotero.Translate.IO.rdfDataModes.indexOf(this._mode) !== -1) { 1020 this.write(this.RDF.serialize()); 1021 } 1022 1023 var myIndex = Zotero.Translate.IO.maintainedInstances.indexOf(this); 1024 if(myIndex !== -1) Zotero.Translate.IO.maintainedInstances.splice(myIndex, 1); 1025 1026 this._rawStream.close(); 1027 } 1028 }