Scholar.File -- some methods to help with MIME type detection of local files (might be abstracted into MIME class later) - www - Unnamed repository; edit this file 'description' to name the repository.

commit d0d1ed8c1d0882e0cc9354bbef85774d263e6547
parent f07ff9ac2a6b1ced0d52139e0ef0751356e33fc4
Author: Dan Stillman <dstillman@zotero.org>
Date:   Sat, 12 Aug 2006 01:41:48 +0000

Scholar.File -- some methods to help with MIME type detection of local files (might be abstracted into MIME class later)

Methods:

getExtension(ext)
isExternalTextExtension(ext)
getSample(nsIFile)
sniffForMIMEType(nsIFile)
sniffForBinary(nsIFile)
getMIMETypeFromFile(nsIFile)
hasInternalHandler(nsIFile)


Diffstat:
A chrome/chromeFiles/content/scholar/xpcom/file.js  | 176 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

1 file changed, 176 insertions(+), 0 deletions(-)
diff --git a/chrome/chromeFiles/content/scholar/xpcom/file.js b/chrome/chromeFiles/content/scholar/xpcom/file.js
@@ -0,0 +1,176 @@
+Scholar.File = new function(){
+	this.getExtension = getExtension;
+	this.isExternalTextExtension = isExternalTextExtension;
+	this.getSample = getSample;
+	this.sniffForMIMEType = sniffForMIMEType;
+	this.sniffForBinary = sniffForBinary;
+	this.getMIMETypeFromFile = getMIMETypeFromFile;
+	this.hasInternalHandler = hasInternalHandler;
+	
+	// Magic numbers
+	var _snifferEntries = [
+		["%PDF-", "application/pdf"],
+		["%!PS-Adobe-", 'application/postscript'],
+		["%! PS-Adobe-", 'application/postscript'],
+		["From", 'text/plain'],
+		[">From", 'text/plain'],
+		["#!", 'text/plain'],
+		["<?xml", 'text/xml']
+	];
+	
+	// MIME types handled natively by Gecko
+	// DEBUG: There's definitely a better way of getting these
+	var _nativeMIMETypes = {
+		'text/html': true,
+		'image/jpeg': true,
+		'image/gif': true,
+		'text/xml': true,
+		'text/plain': true,
+		'application/x-javascript': true
+	};
+	
+	// Extensions of text files (generally XML) to force to be external
+	var _externalTextExtensions = {
+		'graffle': true
+	};
+	
+	
+	function getExtension(file){
+		var pos = file.leafName.lastIndexOf('.');
+		return pos==-1 ? '' : file.leafName.substr(pos+1);
+	}
+	
+	
+	/*
+	 * Check if file extension should be forced to open externally
+	 */
+	function isExternalTextExtension(ext){
+		return typeof _externalTextExtensions['ext'] != 'undefined';
+	}
+	
+	
+	/*
+	 * Get the first 128 bytes of the file as a string (multibyte-safe)
+	 */
+	function getSample(file){
+		var fis = Components.classes["@mozilla.org/network/file-input-stream;1"].
+			createInstance(Components.interfaces.nsIFileInputStream);
+		fis.init(file, false, false, false);
+		
+		const replacementChar
+			= Components.interfaces.nsIConverterInputStream.DEFAULT_REPLACEMENT_CHARACTER;
+		var is = Components.classes["@mozilla.org/intl/converter-input-stream;1"]
+			.createInstance(Components.interfaces.nsIConverterInputStream);
+		is.init(fis, "UTF-8", 128, replacementChar);
+		var str = {};
+		var numChars = is.readString(512, str);
+		is.close();
+		
+		return str.value;
+	}
+	
+	/*
+	 * Searches file for magic numbers
+	 */
+	function sniffForMIMEType(file){
+		var str = this.getSample(file);
+		
+		for (var i in _snifferEntries){
+			if (str.indexOf(_snifferEntries[i][0])==0){
+				return _snifferEntries[i][1];
+			}
+		}
+		
+		return false;
+	}
+	
+	
+	/*
+	 * Searches file for embedded nulls
+	 */
+	function sniffForBinary(file){
+		var str = this.getSample(file);
+		
+		for (var i=0; i<str.length; i++){
+			if (!_isTextCharacter(str.charAt(i))){
+				return 'application/octet-stream';
+			}
+		}
+		return 'text/plain';
+	}
+	
+	
+	/*
+	 * Try to determine the MIME type of the file, trying a few different
+	 * techniques
+	 */
+	function getMIMETypeFromFile(file){
+		var mimeType = this.sniffForMIMEType(file);
+		if (mimeType){
+			Scholar.debug('Detected MIME type ' + mimeType);
+			return mimeType;
+		}
+		
+		try {
+			var mimeType = Components.classes["@mozilla.org/uriloader/external-helper-app-service;1"]
+				.getService(Components.interfaces.nsIMIMEService).getTypeFromFile(file);
+			Scholar.debug('Got MIME type ' + mimeType + ' from extension');
+			return mimeType;
+		}
+		catch (e){
+			var mimeType = this.sniffForBinary(file);
+			Scholar.debug('Cannot determine MIME type -- settling for ' + mimeType);
+			return mimeType;
+		}
+	}
+	
+	
+	/*
+	 * Determine if file can be handled internally (natively or with plugins)
+	 * or if it needs to be passed off to an external helper app
+	 *
+	 * Note: it certainly seems there should be a more native way of doing this
+	 * without replicating all the Mozilla functionality
+	 */
+	function hasInternalHandler(file){
+		var mimeType = this.getMIMETypeFromFile(file);
+		
+		if (mimeType=='text/plain'){
+			if (this.isExternalTextExtension(this.getExtension(file))){
+				Scholar.debug('text/plain file has extension that should be handled externally');
+				return false;
+			}
+			return true;
+		}
+		
+		if (_nativeMIMETypes[mimeType]){
+			Scholar.debug('MIME type ' + mimeType + ' can be handled natively');
+			return true;
+		}
+		
+		for (var i in navigator.mimeTypes){
+			if (navigator.mimeTypes[i].type==mimeType){
+				Scholar.debug('MIME type ' + mimeType + ' can be handled by plugins');
+				return true;
+			}
+		}
+		
+		Scholar.debug('MIME type ' + mimeType + ' cannot be handled natively');
+		return false;
+	}
+	
+	
+	/*
+	 * Detect whether a character is text
+	 * 
+	 * Based on RFC 2046 Section 4.1.2. Treat any char 0-31
+	 * except the 9-13 range (\t, \n, \v, \f, \r) and char 27 (used by
+     * encodings like Shift_JIS) as non-text
+	 *
+	 * This is the logic used by the Mozilla sniffer.
+	 */
+	function _isTextCharacter(chr){
+		var chr = chr.charCodeAt(0);
+		return chr > 31 || (9 <= chr && chr <=13 ) || chr == 27;
+	}
+}

	www Unnamed repository; edit this file 'description' to name the repository.
	Log \| Files \| Refs \| Submodules \| README \| LICENSE