www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | Submodules | README | LICENSE

mime.js (12726B)


      1 /*
      2     ***** BEGIN LICENSE BLOCK *****
      3     
      4     Copyright © 2009 Center for History and New Media
      5                      George Mason University, Fairfax, Virginia, USA
      6                      http://zotero.org
      7     
      8     This file is part of Zotero.
      9     
     10     Zotero is free software: you can redistribute it and/or modify
     11     it under the terms of the GNU Affero General Public License as published by
     12     the Free Software Foundation, either version 3 of the License, or
     13     (at your option) any later version.
     14     
     15     Zotero is distributed in the hope that it will be useful,
     16     but WITHOUT ANY WARRANTY; without even the implied warranty of
     17     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     18     GNU Affero General Public License for more details.
     19     
     20     You should have received a copy of the GNU Affero General Public License
     21     along with Zotero.  If not, see <http://www.gnu.org/licenses/>.
     22     
     23     ***** END LICENSE BLOCK *****
     24 */
     25 
     26 Zotero.MIME = new function(){
     27 	this.isTextType = isTextType;
     28 	this.getPrimaryExtension = getPrimaryExtension;
     29 	this.sniffForBinary = sniffForBinary;
     30 	this.hasNativeHandler = hasNativeHandler;
     31 	this.hasInternalHandler = hasInternalHandler;
     32 	
     33 	// Magic numbers
     34 	var _snifferEntries = [
     35 		["%PDF-", "application/pdf"],
     36 		["%!PS-Adobe-", 'application/postscript', 0],
     37 		["%! PS-Adobe-", 'application/postscript', 0],
     38 		["\uFFFD\uFFFD\x11\u0871\x1A\uFFFD\x00\x00", "application/msword", 0],
     39 		["From", 'text/plain', 0],
     40 		[">From", 'text/plain', 0],
     41 		["#!", 'text/plain', 0],
     42 		["<?xml", 'text/xml', 0],
     43 		["<!DOCTYPE html", 'text/html', 0],
     44 		["<html", 'text/html', 0],
     45 		["\uFFFD\uFFFD\uFFFD\uFFFD", 'image/jpeg', 0],
     46 		["GIF8", 'image/gif', 0],
     47 		["\uFFFDPNG", 'image/png', 0],
     48 		["JFIF", 'image/jpeg'],
     49 		["FLV", "video/x-flv", 0],
     50 		["\u0000\u0000\u0001\u0000", "image/vnd.microsoft.icon", 0],
     51 		["\u0053\u0051\u004C\u0069\u0074\u0065\u0020\u0066"
     52 			+ "\u006F\u0072\u006D\u0061\u0074\u0020\u0033\u0000", "application/x-sqlite3", 0]
     53 	];
     54 	
     55 	var _extensions = {
     56 		// MS Office
     57 		'doc': 'application/msword',
     58 		'dot': 'application/msword',
     59 		'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
     60 		'dotx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.template',
     61 		'docm': 'application/vnd.ms-word.document.macroEnabled.12',
     62 		'dotm': 'application/vnd.ms-word.template.macroEnabled.12',
     63 		'xls': 'application/vnd.ms-excel',
     64 		'xlt': 'application/vnd.ms-excel',
     65 		'xla': 'application/vnd.ms-excel',
     66 		'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
     67 		'xltx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.template',
     68 		'xlsm': 'application/vnd.ms-excel.sheet.macroEnabled.12',
     69 		'xltm': 'application/vnd.ms-excel.template.macroEnabled.12',
     70 		'xlam': 'application/vnd.ms-excel.addin.macroEnabled.12',
     71 		'xlsb': 'application/vnd.ms-excel.sheet.binary.macroEnabled.12',
     72 		'ppt': 'application/vnd.ms-powerpoint',
     73 		'pot': 'application/vnd.ms-powerpoint',
     74 		'pps': 'application/vnd.ms-powerpoint',
     75 		'ppa': 'application/vnd.ms-powerpoint',
     76 		'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
     77 		'potx': 'application/vnd.openxmlformats-officedocument.presentationml.template',
     78 		'ppsx': 'application/vnd.openxmlformats-officedocument.presentationml.slideshow',
     79 		'ppam': 'application/vnd.ms-powerpoint.addin.macroEnabled.12',
     80 		'pptm': 'application/vnd.ms-powerpoint.presentation.macroEnabled.12',
     81 		'potm': 'application/vnd.ms-powerpoint.template.macroEnabled.12',
     82 		'ppsm': 'application/vnd.ms-powerpoint.slideshow.macroEnabled.12',
     83 		
     84 		// OpenOffice/LibreOffice
     85 		'odt': 'application/vnd.oasis.opendocument.text',
     86 		
     87 		'pdf': 'application/pdf'
     88 	};
     89 	
     90 	var _textTypes = {
     91 		'application/xhtml+xml': true,
     92 		'application/xml': true,
     93 		'application/x-javascript': true
     94 	};
     95 	
     96 	var _webPageTypes = [
     97 		'text/html',
     98 		'application/xhtml+xml'
     99 	]
    100 	
    101 	// MIME types handled natively by Gecko
    102 	// DEBUG: There's definitely a better way of getting these
    103 	var _nativeMIMETypes = {
    104 		'text/html': true,
    105 		'text/css': true,
    106 		'text/xml': true,
    107 		'application/xhtml+xml': true,
    108 		'application/xml': true,
    109 		'text/plain': true,
    110 		'application/x-javascript': true
    111 	};
    112 	
    113 	// Extensions of text files (generally XML) to force to be external
    114 	var _externalTextExtensions = {
    115 		graffle: true,
    116 		mm: true,
    117 		opml: true,
    118 		bib: true
    119 	};
    120 	
    121 	
    122 	
    123 	function isTextType(mimeType) {
    124 		return mimeType.substr(0, 5) == 'text/' || _textTypes[mimeType];
    125 	}
    126 	
    127 	this.isWebPageType = function(mimeType) {
    128 		return _webPageTypes.indexOf(mimeType) != -1;
    129 	}
    130 	
    131 	/*
    132 	 * Our own wrapper around the MIME service's getPrimaryExtension() that
    133 	 * works a little better
    134 	 */
    135 	function getPrimaryExtension(mimeType, ext) {
    136 		// Enforce some extensions
    137 		switch (mimeType) {
    138 			case 'text/html':
    139 			case 'application/xhtml+xml':
    140 				return 'html';
    141 			
    142 			case 'application/pdf':
    143 			case 'application/x-pdf':
    144 			case 'application/acrobat':
    145 			case 'applications/vnd.pdf':
    146 			case 'text/pdf':
    147 			case 'text/x-pdf':
    148 				return 'pdf';
    149 			
    150 			case 'image/jpg':
    151 			case 'image/jpeg':
    152 				return 'jpg';
    153 			
    154 			case 'image/gif':
    155 				return 'gif';
    156 			
    157 			case 'application/msword':
    158 			case 'application/doc':
    159 			case 'application/vnd.msword':
    160 			case 'application/vnd.ms-word':
    161 			case 'application/winword':
    162 			case 'application/word':
    163 			case 'application/x-msw6':
    164 			case 'application/x-msword':
    165 				return 'doc';
    166 			
    167 			case 'application/vnd.oasis.opendocument.text':
    168 			case 'application/x-vnd.oasis.opendocument.text':
    169 				return 'odt';
    170 			
    171 			case 'video/flv':
    172 			case 'video/x-flv':
    173 				return 'flv';
    174 			
    175 			case 'image/tif':
    176 			case 'image/tiff':
    177 			case 'image/tif':
    178 			case 'image/x-tif':
    179 			case 'image/tiff':
    180 			case 'image/x-tiff':
    181 			case 'application/tif':
    182 			case 'application/x-tif':
    183 			case 'application/tiff':
    184 			case 'application/x-tiff':
    185 				return 'tiff';
    186 			
    187 			case 'application/zip':
    188 			case 'application/x-zip':
    189 			case 'application/x-zip-compressed':
    190 			case 'application/x-compress':
    191 			case 'application/x-compressed':
    192 			case 'multipart/x-zip':
    193 				return 'zip';
    194 				
    195 			case 'video/quicktime':
    196 			case 'video/x-quicktime':
    197 				return 'mov';
    198 				
    199 			case 'video/avi':
    200 			case 'video/msvideo':
    201 			case 'video/x-msvideo':
    202 				return 'avi';
    203 				
    204 			case 'audio/wav':
    205 			case 'audio/x-wav':
    206 			case 'audio/wave':
    207 				return 'wav';
    208 				
    209 			case 'audio/aiff':
    210 			case 'audio/x-aiff':
    211 			case 'sound/aiff':
    212 				return 'aiff';
    213 		}
    214 		
    215 		try {
    216 			ext = Components.classes["@mozilla.org/mime;1"]
    217 				.getService(Components.interfaces.nsIMIMEService)
    218 				.getPrimaryExtension(mimeType, ext);
    219 		}
    220 		// nsIMIMEService.getPrimaryExtension() doesn't work on Linux and
    221 		// throws an error if it can't find an extension
    222 		catch (e) {}
    223 		
    224 		return ext ? ext : '';
    225 	}
    226 	
    227 	
    228 	/*
    229 	 * Searches string for magic numbers
    230 	 */
    231 	this.sniffForMIMEType = function (str) {
    232 		for (let i in _snifferEntries) {
    233 			let match = false;
    234 			// If an offset is defined, match only from there
    235 			if (_snifferEntries[i][2] != undefined) {
    236 				if (str.substr(_snifferEntries[i][2]).indexOf(_snifferEntries[i][0]) == 0) {
    237 					match = true;
    238 				}
    239 			}
    240 			// Otherwise allow match anywhere in sample
    241 			// (200 bytes from getSample() by default)
    242 			else if (str.indexOf(_snifferEntries[i][0]) != -1) {
    243 				match = true;
    244 			}
    245 			
    246 			if (match) {
    247 				return _snifferEntries[i][1];
    248 			}
    249 		}
    250 		
    251 		return false;
    252 	}
    253 	
    254 	
    255 	/*
    256 	 * Searches string for embedded nulls
    257 	 *
    258 	 * Returns 'application/octet-stream' or 'text/plain'
    259 	 */
    260 	function sniffForBinary(str){
    261 		for (var i=0; i<str.length; i++){
    262 			if (!_isTextCharacter(str.charAt(i))){
    263 				return 'application/octet-stream';
    264 			}
    265 		}
    266 		return 'text/plain';
    267 	}
    268 	
    269 	
    270 	/*
    271 	 * Try to determine the MIME type of a string, using a few different
    272 	 * techniques
    273 	 *
    274 	 * ext is an optional file extension hint if data sniffing is unsuccessful
    275 	 */
    276 	this.getMIMETypeFromData = function (str, ext){
    277 		var mimeType = this.sniffForMIMEType(str);
    278 		if (mimeType){
    279 			Zotero.debug('Detected MIME type ' + mimeType);
    280 			return mimeType;
    281 		}
    282 		
    283 		if (ext) {
    284 			mimeType = this.getMIMETypeFromExtension(ext);
    285 			if (mimeType) {
    286 				return mimeType;
    287 			}
    288 		}
    289 		
    290 		var mimeType = sniffForBinary(str);
    291 		Zotero.debug('Cannot determine MIME type from magic number or extension -- settling for ' + mimeType);
    292 		return mimeType;
    293 	}
    294 	
    295 	
    296 	this.getMIMETypeFromExtension = function (ext) {
    297 		var type = false;
    298 		
    299 		if (_extensions[ext]) {
    300 			var type = _extensions[ext];
    301 		}
    302 		else {
    303 			try {
    304 				var type = Components.classes["@mozilla.org/uriloader/external-helper-app-service;1"]
    305 					.getService(Components.interfaces.nsIMIMEService).getTypeFromExtension(ext);
    306 			}
    307 			catch (e) {}
    308 		}
    309 		
    310 		Zotero.debug("Got MIME type " + type + " from extension '" + ext + "'");
    311 		return type;
    312 	}
    313 	
    314 	
    315 	/*
    316 	 * Try to determine the MIME type of the file, using a few different
    317 	 * techniques
    318 	 */
    319 	this.getMIMETypeFromFile = Zotero.Promise.coroutine(function* (file) {
    320 		var str = yield Zotero.File.getSample(file);
    321 		var ext = Zotero.File.getExtension(file);
    322 		
    323 		return this.getMIMETypeFromData(str, ext);
    324 	});
    325 	
    326 	
    327 	/**
    328 	 * @param {String} url
    329 	 * @param {Zotero.CookieSandbox} [cookieSandbox]
    330 	 * @return {Promise}
    331 	 */
    332 	this.getMIMETypeFromURL = function (url, cookieSandbox) {
    333 		return Zotero.HTTP.promise("HEAD", url, { cookieSandbox: cookieSandbox, successCodes: false })
    334 		.then(function (xmlhttp) {
    335 			if (xmlhttp.status != 200 && xmlhttp.status != 204) {
    336 				Zotero.debug("Attachment HEAD request returned with status code "
    337 					+ xmlhttp.status + " in Zotero.MIME.getMIMETypeFromURL()", 2);
    338 				var mimeType = '';
    339 			}
    340 			else {
    341 				var mimeType = xmlhttp.channel.contentType;
    342 			}
    343 			
    344 			var nsIURL = Components.classes["@mozilla.org/network/standard-url;1"]
    345 				.createInstance(Components.interfaces.nsIURL);
    346 			nsIURL.spec = url;
    347 			
    348 			// Override MIME type to application/pdf if extension is .pdf --
    349 			// workaround for sites that respond to the HEAD request with an
    350 			// invalid MIME type (https://www.zotero.org/trac/ticket/460)
    351 			//
    352 			// Downloaded file is inspected in attachment code and deleted if actually HTML
    353 			if (nsIURL.fileName.match(/pdf$/) || url.match(/pdf$/)) {
    354 				mimeType = 'application/pdf';
    355 			}
    356 			
    357 			var ext = nsIURL.fileExtension;
    358 			var hasNativeHandler = Zotero.MIME.hasNativeHandler(mimeType, ext);
    359 			
    360 			return [mimeType, hasNativeHandler];
    361 		});
    362 	}
    363 	
    364 	
    365 	/*
    366 	 * Determine if a MIME type can be handled natively
    367 	 * or if it needs to be passed off to a plugin or external helper app
    368 	 *
    369 	 * ext is an optional extension hint (only needed for text files
    370 	 * that should be forced to open externally)
    371 	 *
    372 	 * Note: it certainly seems there should be a more native way of doing this
    373 	 * without replicating all the Mozilla functionality
    374 	 *
    375 	 * Note: nsIMIMEInfo provides a hasDefaultHandler() method, but it doesn't
    376 	 * do what we need
    377 	 */
    378 	function hasNativeHandler(mimeType, ext) {
    379 		if (_nativeMIMETypes[mimeType]){
    380 			Zotero.debug('MIME type ' + mimeType + ' can be handled natively');
    381 			return true;
    382 		}
    383 		return false;
    384 	}
    385 	
    386 	
    387 	/*
    388 	 * Determine if a MIME type can be handled internally
    389 	 * or if it needs to be passed off to an external helper app
    390 	 *
    391 	 * Similar to hasNativeHandler() but also includes plugins
    392 	 */
    393 	function hasInternalHandler(mimeType, ext) {
    394 		if (hasNativeHandler(mimeType, ext)) {
    395 			return true;
    396 		}
    397 		
    398 		if(mimeType === "application/pdf"
    399 				&& "@mozilla.org/streamconv;1?from=application/pdf&to=*/*" in Components.classes) {
    400 			// PDF can be handled internally if pdf.js is installed
    401 			return true;
    402 		}
    403 		
    404 		// Is there a better way to get to navigator?
    405 		var types = Components.classes["@mozilla.org/appshell/appShellService;1"]
    406 				.getService(Components.interfaces.nsIAppShellService)
    407 				.hiddenDOMWindow.navigator.mimeTypes;
    408 		
    409 		for (let type of types) {
    410 			if (type.type && type.type == mimeType) {
    411 				Zotero.debug('MIME type ' + mimeType + ' can be handled by plugins');
    412 				return true;
    413 			}
    414 		}
    415 		
    416 		Zotero.debug('MIME type ' + mimeType + ' cannot be handled internally');
    417 		return false;
    418 	}
    419 	
    420 	
    421 	this.fileHasInternalHandler = Zotero.Promise.coroutine(function* (file){
    422 		var mimeType = yield this.getMIMETypeFromFile(file);
    423 		var ext = Zotero.File.getExtension(file);
    424 		return hasInternalHandler(mimeType, ext);
    425 	});
    426 	
    427 	
    428 	/*
    429 	 * Detect whether a character is text
    430 	 * 
    431 	 * Based on RFC 2046 Section 4.1.2. Treat any char 0-31
    432 	 * except the 9-13 range (\t, \n, \v, \f, \r) and char 27 (used by
    433      * encodings like Shift_JIS) as non-text
    434 	 *
    435 	 * This is the logic used by the Mozilla sniffer.
    436 	 */
    437 	function _isTextCharacter(chr){
    438 		var chr = chr.charCodeAt(0);
    439 		return chr > 31 || (9 <= chr && chr <=13 ) || chr == 27;
    440 	}
    441 }