mime.js (12726B)
1 /* 2 ***** BEGIN LICENSE BLOCK ***** 3 4 Copyright © 2009 Center for History and New Media 5 George Mason University, Fairfax, Virginia, USA 6 http://zotero.org 7 8 This file is part of Zotero. 9 10 Zotero is free software: you can redistribute it and/or modify 11 it under the terms of the GNU Affero General Public License as published by 12 the Free Software Foundation, either version 3 of the License, or 13 (at your option) any later version. 14 15 Zotero is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 GNU Affero General Public License for more details. 19 20 You should have received a copy of the GNU Affero General Public License 21 along with Zotero. If not, see <http://www.gnu.org/licenses/>. 22 23 ***** END LICENSE BLOCK ***** 24 */ 25 26 Zotero.MIME = new function(){ 27 this.isTextType = isTextType; 28 this.getPrimaryExtension = getPrimaryExtension; 29 this.sniffForBinary = sniffForBinary; 30 this.hasNativeHandler = hasNativeHandler; 31 this.hasInternalHandler = hasInternalHandler; 32 33 // Magic numbers 34 var _snifferEntries = [ 35 ["%PDF-", "application/pdf"], 36 ["%!PS-Adobe-", 'application/postscript', 0], 37 ["%! PS-Adobe-", 'application/postscript', 0], 38 ["\uFFFD\uFFFD\x11\u0871\x1A\uFFFD\x00\x00", "application/msword", 0], 39 ["From", 'text/plain', 0], 40 [">From", 'text/plain', 0], 41 ["#!", 'text/plain', 0], 42 ["<?xml", 'text/xml', 0], 43 ["<!DOCTYPE html", 'text/html', 0], 44 ["<html", 'text/html', 0], 45 ["\uFFFD\uFFFD\uFFFD\uFFFD", 'image/jpeg', 0], 46 ["GIF8", 'image/gif', 0], 47 ["\uFFFDPNG", 'image/png', 0], 48 ["JFIF", 'image/jpeg'], 49 ["FLV", "video/x-flv", 0], 50 ["\u0000\u0000\u0001\u0000", "image/vnd.microsoft.icon", 0], 51 ["\u0053\u0051\u004C\u0069\u0074\u0065\u0020\u0066" 52 + "\u006F\u0072\u006D\u0061\u0074\u0020\u0033\u0000", "application/x-sqlite3", 0] 53 ]; 54 55 var _extensions = { 56 // MS Office 57 'doc': 'application/msword', 58 'dot': 'application/msword', 59 'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 60 'dotx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.template', 61 'docm': 'application/vnd.ms-word.document.macroEnabled.12', 62 'dotm': 'application/vnd.ms-word.template.macroEnabled.12', 63 'xls': 'application/vnd.ms-excel', 64 'xlt': 'application/vnd.ms-excel', 65 'xla': 'application/vnd.ms-excel', 66 'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 67 'xltx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.template', 68 'xlsm': 'application/vnd.ms-excel.sheet.macroEnabled.12', 69 'xltm': 'application/vnd.ms-excel.template.macroEnabled.12', 70 'xlam': 'application/vnd.ms-excel.addin.macroEnabled.12', 71 'xlsb': 'application/vnd.ms-excel.sheet.binary.macroEnabled.12', 72 'ppt': 'application/vnd.ms-powerpoint', 73 'pot': 'application/vnd.ms-powerpoint', 74 'pps': 'application/vnd.ms-powerpoint', 75 'ppa': 'application/vnd.ms-powerpoint', 76 'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation', 77 'potx': 'application/vnd.openxmlformats-officedocument.presentationml.template', 78 'ppsx': 'application/vnd.openxmlformats-officedocument.presentationml.slideshow', 79 'ppam': 'application/vnd.ms-powerpoint.addin.macroEnabled.12', 80 'pptm': 'application/vnd.ms-powerpoint.presentation.macroEnabled.12', 81 'potm': 'application/vnd.ms-powerpoint.template.macroEnabled.12', 82 'ppsm': 'application/vnd.ms-powerpoint.slideshow.macroEnabled.12', 83 84 // OpenOffice/LibreOffice 85 'odt': 'application/vnd.oasis.opendocument.text', 86 87 'pdf': 'application/pdf' 88 }; 89 90 var _textTypes = { 91 'application/xhtml+xml': true, 92 'application/xml': true, 93 'application/x-javascript': true 94 }; 95 96 var _webPageTypes = [ 97 'text/html', 98 'application/xhtml+xml' 99 ] 100 101 // MIME types handled natively by Gecko 102 // DEBUG: There's definitely a better way of getting these 103 var _nativeMIMETypes = { 104 'text/html': true, 105 'text/css': true, 106 'text/xml': true, 107 'application/xhtml+xml': true, 108 'application/xml': true, 109 'text/plain': true, 110 'application/x-javascript': true 111 }; 112 113 // Extensions of text files (generally XML) to force to be external 114 var _externalTextExtensions = { 115 graffle: true, 116 mm: true, 117 opml: true, 118 bib: true 119 }; 120 121 122 123 function isTextType(mimeType) { 124 return mimeType.substr(0, 5) == 'text/' || _textTypes[mimeType]; 125 } 126 127 this.isWebPageType = function(mimeType) { 128 return _webPageTypes.indexOf(mimeType) != -1; 129 } 130 131 /* 132 * Our own wrapper around the MIME service's getPrimaryExtension() that 133 * works a little better 134 */ 135 function getPrimaryExtension(mimeType, ext) { 136 // Enforce some extensions 137 switch (mimeType) { 138 case 'text/html': 139 case 'application/xhtml+xml': 140 return 'html'; 141 142 case 'application/pdf': 143 case 'application/x-pdf': 144 case 'application/acrobat': 145 case 'applications/vnd.pdf': 146 case 'text/pdf': 147 case 'text/x-pdf': 148 return 'pdf'; 149 150 case 'image/jpg': 151 case 'image/jpeg': 152 return 'jpg'; 153 154 case 'image/gif': 155 return 'gif'; 156 157 case 'application/msword': 158 case 'application/doc': 159 case 'application/vnd.msword': 160 case 'application/vnd.ms-word': 161 case 'application/winword': 162 case 'application/word': 163 case 'application/x-msw6': 164 case 'application/x-msword': 165 return 'doc'; 166 167 case 'application/vnd.oasis.opendocument.text': 168 case 'application/x-vnd.oasis.opendocument.text': 169 return 'odt'; 170 171 case 'video/flv': 172 case 'video/x-flv': 173 return 'flv'; 174 175 case 'image/tif': 176 case 'image/tiff': 177 case 'image/tif': 178 case 'image/x-tif': 179 case 'image/tiff': 180 case 'image/x-tiff': 181 case 'application/tif': 182 case 'application/x-tif': 183 case 'application/tiff': 184 case 'application/x-tiff': 185 return 'tiff'; 186 187 case 'application/zip': 188 case 'application/x-zip': 189 case 'application/x-zip-compressed': 190 case 'application/x-compress': 191 case 'application/x-compressed': 192 case 'multipart/x-zip': 193 return 'zip'; 194 195 case 'video/quicktime': 196 case 'video/x-quicktime': 197 return 'mov'; 198 199 case 'video/avi': 200 case 'video/msvideo': 201 case 'video/x-msvideo': 202 return 'avi'; 203 204 case 'audio/wav': 205 case 'audio/x-wav': 206 case 'audio/wave': 207 return 'wav'; 208 209 case 'audio/aiff': 210 case 'audio/x-aiff': 211 case 'sound/aiff': 212 return 'aiff'; 213 } 214 215 try { 216 ext = Components.classes["@mozilla.org/mime;1"] 217 .getService(Components.interfaces.nsIMIMEService) 218 .getPrimaryExtension(mimeType, ext); 219 } 220 // nsIMIMEService.getPrimaryExtension() doesn't work on Linux and 221 // throws an error if it can't find an extension 222 catch (e) {} 223 224 return ext ? ext : ''; 225 } 226 227 228 /* 229 * Searches string for magic numbers 230 */ 231 this.sniffForMIMEType = function (str) { 232 for (let i in _snifferEntries) { 233 let match = false; 234 // If an offset is defined, match only from there 235 if (_snifferEntries[i][2] != undefined) { 236 if (str.substr(_snifferEntries[i][2]).indexOf(_snifferEntries[i][0]) == 0) { 237 match = true; 238 } 239 } 240 // Otherwise allow match anywhere in sample 241 // (200 bytes from getSample() by default) 242 else if (str.indexOf(_snifferEntries[i][0]) != -1) { 243 match = true; 244 } 245 246 if (match) { 247 return _snifferEntries[i][1]; 248 } 249 } 250 251 return false; 252 } 253 254 255 /* 256 * Searches string for embedded nulls 257 * 258 * Returns 'application/octet-stream' or 'text/plain' 259 */ 260 function sniffForBinary(str){ 261 for (var i=0; i<str.length; i++){ 262 if (!_isTextCharacter(str.charAt(i))){ 263 return 'application/octet-stream'; 264 } 265 } 266 return 'text/plain'; 267 } 268 269 270 /* 271 * Try to determine the MIME type of a string, using a few different 272 * techniques 273 * 274 * ext is an optional file extension hint if data sniffing is unsuccessful 275 */ 276 this.getMIMETypeFromData = function (str, ext){ 277 var mimeType = this.sniffForMIMEType(str); 278 if (mimeType){ 279 Zotero.debug('Detected MIME type ' + mimeType); 280 return mimeType; 281 } 282 283 if (ext) { 284 mimeType = this.getMIMETypeFromExtension(ext); 285 if (mimeType) { 286 return mimeType; 287 } 288 } 289 290 var mimeType = sniffForBinary(str); 291 Zotero.debug('Cannot determine MIME type from magic number or extension -- settling for ' + mimeType); 292 return mimeType; 293 } 294 295 296 this.getMIMETypeFromExtension = function (ext) { 297 var type = false; 298 299 if (_extensions[ext]) { 300 var type = _extensions[ext]; 301 } 302 else { 303 try { 304 var type = Components.classes["@mozilla.org/uriloader/external-helper-app-service;1"] 305 .getService(Components.interfaces.nsIMIMEService).getTypeFromExtension(ext); 306 } 307 catch (e) {} 308 } 309 310 Zotero.debug("Got MIME type " + type + " from extension '" + ext + "'"); 311 return type; 312 } 313 314 315 /* 316 * Try to determine the MIME type of the file, using a few different 317 * techniques 318 */ 319 this.getMIMETypeFromFile = Zotero.Promise.coroutine(function* (file) { 320 var str = yield Zotero.File.getSample(file); 321 var ext = Zotero.File.getExtension(file); 322 323 return this.getMIMETypeFromData(str, ext); 324 }); 325 326 327 /** 328 * @param {String} url 329 * @param {Zotero.CookieSandbox} [cookieSandbox] 330 * @return {Promise} 331 */ 332 this.getMIMETypeFromURL = function (url, cookieSandbox) { 333 return Zotero.HTTP.promise("HEAD", url, { cookieSandbox: cookieSandbox, successCodes: false }) 334 .then(function (xmlhttp) { 335 if (xmlhttp.status != 200 && xmlhttp.status != 204) { 336 Zotero.debug("Attachment HEAD request returned with status code " 337 + xmlhttp.status + " in Zotero.MIME.getMIMETypeFromURL()", 2); 338 var mimeType = ''; 339 } 340 else { 341 var mimeType = xmlhttp.channel.contentType; 342 } 343 344 var nsIURL = Components.classes["@mozilla.org/network/standard-url;1"] 345 .createInstance(Components.interfaces.nsIURL); 346 nsIURL.spec = url; 347 348 // Override MIME type to application/pdf if extension is .pdf -- 349 // workaround for sites that respond to the HEAD request with an 350 // invalid MIME type (https://www.zotero.org/trac/ticket/460) 351 // 352 // Downloaded file is inspected in attachment code and deleted if actually HTML 353 if (nsIURL.fileName.match(/pdf$/) || url.match(/pdf$/)) { 354 mimeType = 'application/pdf'; 355 } 356 357 var ext = nsIURL.fileExtension; 358 var hasNativeHandler = Zotero.MIME.hasNativeHandler(mimeType, ext); 359 360 return [mimeType, hasNativeHandler]; 361 }); 362 } 363 364 365 /* 366 * Determine if a MIME type can be handled natively 367 * or if it needs to be passed off to a plugin or external helper app 368 * 369 * ext is an optional extension hint (only needed for text files 370 * that should be forced to open externally) 371 * 372 * Note: it certainly seems there should be a more native way of doing this 373 * without replicating all the Mozilla functionality 374 * 375 * Note: nsIMIMEInfo provides a hasDefaultHandler() method, but it doesn't 376 * do what we need 377 */ 378 function hasNativeHandler(mimeType, ext) { 379 if (_nativeMIMETypes[mimeType]){ 380 Zotero.debug('MIME type ' + mimeType + ' can be handled natively'); 381 return true; 382 } 383 return false; 384 } 385 386 387 /* 388 * Determine if a MIME type can be handled internally 389 * or if it needs to be passed off to an external helper app 390 * 391 * Similar to hasNativeHandler() but also includes plugins 392 */ 393 function hasInternalHandler(mimeType, ext) { 394 if (hasNativeHandler(mimeType, ext)) { 395 return true; 396 } 397 398 if(mimeType === "application/pdf" 399 && "@mozilla.org/streamconv;1?from=application/pdf&to=*/*" in Components.classes) { 400 // PDF can be handled internally if pdf.js is installed 401 return true; 402 } 403 404 // Is there a better way to get to navigator? 405 var types = Components.classes["@mozilla.org/appshell/appShellService;1"] 406 .getService(Components.interfaces.nsIAppShellService) 407 .hiddenDOMWindow.navigator.mimeTypes; 408 409 for (let type of types) { 410 if (type.type && type.type == mimeType) { 411 Zotero.debug('MIME type ' + mimeType + ' can be handled by plugins'); 412 return true; 413 } 414 } 415 416 Zotero.debug('MIME type ' + mimeType + ' cannot be handled internally'); 417 return false; 418 } 419 420 421 this.fileHasInternalHandler = Zotero.Promise.coroutine(function* (file){ 422 var mimeType = yield this.getMIMETypeFromFile(file); 423 var ext = Zotero.File.getExtension(file); 424 return hasInternalHandler(mimeType, ext); 425 }); 426 427 428 /* 429 * Detect whether a character is text 430 * 431 * Based on RFC 2046 Section 4.1.2. Treat any char 0-31 432 * except the 9-13 range (\t, \n, \v, \f, \r) and char 27 (used by 433 * encodings like Shift_JIS) as non-text 434 * 435 * This is the logic used by the Mozilla sniffer. 436 */ 437 function _isTextCharacter(chr){ 438 var chr = chr.charCodeAt(0); 439 return chr > 31 || (9 <= chr && chr <=13 ) || chr == 27; 440 } 441 }