commit 56bb5b17ad386d67a1ef52987a4ead38a862f47f
parent 6520a717883cedd470cc3cb99ce358bba36d517f
Author: Dan Stillman <dstillman@zotero.org>
Date: Mon, 28 Jan 2013 22:44:02 -0500
Better MIME type detection of Office files
For at least one Windows user, a .docx file was being interpreted as
text/plain. Instead of relying entirely on the system, hard-code some
extensions we know. (More can be added.)
Also:
- Determine MIME type when opening files instead of using stored type,
since we might have gotten smarter
Diffstat:
2 files changed, 66 insertions(+), 22 deletions(-)
diff --git a/chrome/content/zotero/xpcom/mime.js b/chrome/content/zotero/xpcom/mime.js
@@ -29,11 +29,8 @@ Zotero.MIME = new function(){
this.getPrimaryExtension = getPrimaryExtension;
this.sniffForMIMEType = sniffForMIMEType;
this.sniffForBinary = sniffForBinary;
- this.getMIMETypeFromData = getMIMETypeFromData;
- this.getMIMETypeFromFile = getMIMETypeFromFile;
this.hasNativeHandler = hasNativeHandler;
this.hasInternalHandler = hasInternalHandler;
- this.fileHasInternalHandler = fileHasInternalHandler;
// Magic numbers
var _snifferEntries = [
@@ -55,6 +52,41 @@ Zotero.MIME = new function(){
];
+ var _extensions = {
+ // MS Office
+ 'doc': 'application/msword',
+ 'dot': 'application/msword',
+ 'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+ 'dotx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.template',
+ 'docm': 'application/vnd.ms-word.document.macroEnabled.12',
+ 'dotm': 'application/vnd.ms-word.template.macroEnabled.12',
+ 'xls': 'application/vnd.ms-excel',
+ 'xlt': 'application/vnd.ms-excel',
+ 'xla': 'application/vnd.ms-excel',
+ 'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+ 'xltx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.template',
+ 'xlsm': 'application/vnd.ms-excel.sheet.macroEnabled.12',
+ 'xltm': 'application/vnd.ms-excel.template.macroEnabled.12',
+ 'xlam': 'application/vnd.ms-excel.addin.macroEnabled.12',
+ 'xlsb': 'application/vnd.ms-excel.sheet.binary.macroEnabled.12',
+ 'ppt': 'application/vnd.ms-powerpoint',
+ 'pot': 'application/vnd.ms-powerpoint',
+ 'pps': 'application/vnd.ms-powerpoint',
+ 'ppa': 'application/vnd.ms-powerpoint',
+ 'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+ 'potx': 'application/vnd.openxmlformats-officedocument.presentationml.template',
+ 'ppsx': 'application/vnd.openxmlformats-officedocument.presentationml.slideshow',
+ 'ppam': 'application/vnd.ms-powerpoint.addin.macroEnabled.12',
+ 'pptm': 'application/vnd.ms-powerpoint.presentation.macroEnabled.12',
+ 'potm': 'application/vnd.ms-powerpoint.template.macroEnabled.12',
+ 'ppsm': 'application/vnd.ms-powerpoint.slideshow.macroEnabled.12',
+
+ // OpenOffice/LibreOffice
+ 'odt': 'application/vnd.oasis.opendocument.text',
+
+ 'pdf': 'application/pdf'
+ };
+
var _textTypes = {
'application/xhtml+xml': true,
'application/xml': true,
@@ -245,22 +277,19 @@ Zotero.MIME = new function(){
*
* ext is an optional file extension hint if data sniffing is unsuccessful
*/
- function getMIMETypeFromData(str, ext){
+ this.getMIMETypeFromData = function (str, ext){
var mimeType = sniffForMIMEType(str);
if (mimeType){
Zotero.debug('Detected MIME type ' + mimeType);
return mimeType;
}
- try {
- if (ext) {
- var mimeType = Components.classes["@mozilla.org/uriloader/external-helper-app-service;1"]
- .getService(Components.interfaces.nsIMIMEService).getTypeFromExtension(ext);
- Zotero.debug('Got MIME type ' + mimeType + ' from extension');
+ if (ext) {
+ mimeType = this.getMIMETypeFromExtension(ext);
+ if (mimeType) {
return mimeType;
}
}
- catch (e) {}
var mimeType = sniffForBinary(str);
Zotero.debug('Cannot determine MIME type from magic number or extension -- settling for ' + mimeType);
@@ -268,15 +297,34 @@ Zotero.MIME = new function(){
}
+ this.getMIMETypeFromExtension = function (ext) {
+ var type = false;
+
+ if (_extensions[ext]) {
+ var type = _extensions[ext];
+ }
+ else {
+ try {
+ var type = Components.classes["@mozilla.org/uriloader/external-helper-app-service;1"]
+ .getService(Components.interfaces.nsIMIMEService).getTypeFromExtension(ext);
+ }
+ catch (e) {}
+ }
+
+ Zotero.debug("Got MIME type " + type + " from extension '" + ext + "'");
+ return type;
+ }
+
+
/*
* Try to determine the MIME type of the file, using a few different
* techniques
*/
- function getMIMETypeFromFile(file){
+ this.getMIMETypeFromFile = function (file) {
var str = Zotero.File.getSample(file);
var ext = Zotero.File.getExtension(file);
- return getMIMETypeFromData(str, ext);
+ return this.getMIMETypeFromData(str, ext);
}
@@ -378,8 +426,8 @@ Zotero.MIME = new function(){
}
- function fileHasInternalHandler(file){
- var mimeType = getMIMETypeFromFile(file);
+ this.fileHasInternalHandler = function (file){
+ var mimeType = this.getMIMETypeFromFile(file);
var ext = Zotero.File.getExtension(file);
return hasInternalHandler(mimeType, ext);
}
diff --git a/chrome/content/zotero/zoteroPane.js b/chrome/content/zotero/zoteroPane.js
@@ -3443,14 +3443,10 @@ var ZoteroPane = new function()
if(forceExternalViewer !== undefined) {
var externalViewer = forceExternalViewer;
} else {
- var mimeType = attachment.attachmentMIMEType;
- // If no MIME type specified, try to detect again (I guess in case
- // we've gotten smarter since the file was imported?)
- if (!mimeType) {
- mimeType = Zotero.MIME.getMIMETypeFromFile(file);
-
- // TODO: update DB with new info
- }
+ var mimeType = Zotero.MIME.getMIMETypeFromFile(file);
+
+ //var mimeType = attachment.attachmentMIMEType;
+ // TODO: update DB with new info if changed?
var ext = Zotero.File.getExtension(file);
var externalViewer = Zotero.isStandalone || (!Zotero.MIME.hasNativeHandler(mimeType, ext) &&