Merge pull request #194 from aurimasv/shortenFN - www - Unnamed repository; edit this file 'description' to name the repository.

commit 02aca0660a96c5e22b2825e016e48dcb6cc48c99
parent 9665e67042a796a7bb396e25d18b9507257f4878
Author: Dan Stillman <dstillman@gmail.com>
Date:   Thu, 22 Nov 2012 10:14:24 -0800

Merge pull request #194 from aurimasv/shortenFN

Shorten long file names. Closes #8
Diffstat:
M chrome/content/zotero/webpagedump/common.js  | 13 +++++--------
M chrome/content/zotero/webpagedump/domsaver.js  | 15 +++++++++------
M chrome/content/zotero/xpcom/attachments.js  | 19 +++++++++----------
M chrome/content/zotero/xpcom/file.js  | 38 +++++++++++++++++++++++++++++++++++---

4 files changed, 58 insertions(+), 27 deletions(-)
diff --git a/chrome/content/zotero/webpagedump/common.js b/chrome/content/zotero/webpagedump/common.js
@@ -54,6 +54,8 @@ var WPD_DEFAULTHEIGHT = 768;
 
 var WPD_MAXUIERRORCOUNT = 8;
 
+// maximum character length for a valid file name (excluding extension)
+var WPD_MAX_FILENAME_LENGTH = 100;
 
 /*function wpdGetTopBrowserWindow()
 {
@@ -353,15 +355,10 @@ var wpdCommon = {
 	},
 
 	// replace illegal characters
+	// and shorten long file names
 	getValidFileName: function (aFileName) {
-		aFileName = aFileName.replace(/[\"\?!~`]+/g, "");
-		aFileName = aFileName.replace(/[\*\&]+/g, "+");
-		aFileName = aFileName.replace(/[\\\/\|\:;]+/g, "-");
-		aFileName = aFileName.replace(/[\<]+/g, "(");
-		aFileName = aFileName.replace(/[\>]+/g, ")");
-		aFileName = aFileName.replace(/[\s]+/g, "_");
-		aFileName = aFileName.replace(/[%]+/g, "@");
-		return aFileName;
+		aFileName = Zotero.File.getValidFileName(aFileName).toLowerCase();
+		return Zotero.File.truncateFileName(aFileName, WPD_MAX_FILENAME_LENGTH);
 	},
 
 	getURL: function () {
diff --git a/chrome/content/zotero/webpagedump/domsaver.js b/chrome/content/zotero/webpagedump/domsaver.js
@@ -164,7 +164,8 @@ var wpdDOMSaver = {
 
 		// Split fileName in Path and Name
 
-		this.name = wpdCommon.getFileLeafName(fileName); // extract fileName from filePath
+		this.name = wpdCommon.getValidFileName(
+			wpdCommon.getFileLeafName(fileName)); // extract fileName from filePath
 		this.currentDir = wpdCommon.getFilePath(fileName); // only directory
 		this.name = wpdCommon.splitFileName(this.name)[0]; // no extension!
 
@@ -221,7 +222,7 @@ var wpdDOMSaver = {
 	// resolve the javascript links inside the attributes (e.g. onclick,...)
 	normalizeJavaScriptLink: function (aNode, aAttr) {
 		var val = aNode.getAttribute(aAttr); // get the attribute value and check for link stuff
-		if (!val.match(/\(\'([^\']+)\'/)) return aNode;
+		if (!val || !val.match(/\(\'([^\']+)\'/)) return aNode;
 		val = RegExp.$1;
 		if (val.indexOf("/") == -1 && val.indexOf(".") == -1) return aNode;
 		val = wpdCommon.resolveURL(this.currentURL, val); // it is a link -> resolve and set the URL to the local URL
@@ -409,9 +410,12 @@ var wpdDOMSaver = {
 				case "link":
 					// could containt urls (icon, stylesheet and fontdef)
 					// We have to remove nodes with the stylesheet attribute because they will be added later
-					if ((aNode.getAttribute("rel").toLowerCase() == "stylesheet") && (aNode.getAttribute("href").indexOf("chrome://") == -1)) {
+					if(!aNode.hasAttribute("rel")) return aNode;
+					if (aNode.getAttribute("rel").toLowerCase() == "stylesheet"
+							&& (aNode.hasAttribute("href") && aNode.getAttribute("href").indexOf("chrome://") == -1)) {
 						return wpdCommon.removeNodeFromParent(aNode);
-					} else if ((aNode.getAttribute("rel").toLowerCase() == "shortcut icon") || (aNode.getAttribute("rel").toLowerCase() == "icon")) {
+					} else if (aNode.getAttribute("rel").toLowerCase() == "shortcut icon"
+							|| aNode.getAttribute("rel").toLowerCase() == "icon") {
 						var aFileName = this.download(aNode.href, true);
 						// Changed by Dan S. for Zotero -- see this.repairRelativeLinks()
 						if (aFileName) aNode.setAttribute("href", this.relativeLinkFix(aFileName));
@@ -732,7 +736,6 @@ var wpdDOMSaver = {
 			// generate a filename
 			var newFileName = aURL.fileName.toLowerCase();
 			if (!newFileName) newFileName = "untitled";
-			newFileName = wpdCommon.getValidFileName(newFileName);
 			// same name but different location?
 			newFileName = this.getUniqueFileNameAndRegister(newFileName, aURLSpec);
 			// is the file already registered (processed) ?
@@ -1076,7 +1079,7 @@ var wpdDOMSaver = {
 	// (be sure to call the init function at the top of this file before)
 	saveHTMLDocument: function () {
 		try {
-			this.saveDocumentEx(this.document, this.name);
+			return this.saveDocumentEx(this.document, this.name);
 		} catch (ex) {
 			wpdCommon.addError("[wpdDOMSaver.saveHTMLDocument]\n -> " + ex);
 		}
diff --git a/chrome/content/zotero/xpcom/attachments.js b/chrome/content/zotero/xpcom/attachments.js
@@ -545,10 +545,12 @@ Zotero.Attachments = new function(){
 			var file = Components.classes["@mozilla.org/file/local;1"].
 					createInstance(Components.interfaces.nsILocalFile);
 			file.initWithFile(destDir);
-			
-			var fileName = _getFileNameFromURL(url, mimeType);
-			file.append(fileName);
-			
+
+			var fileName = Zotero.File.truncateFileName(
+												_getFileNameFromURL(url, mimeType).toLowerCase(),
+												100); //make sure this matches WPD settings in webpagedump/common.js
+			file.append(fileName)
+
 			if (mimeType == 'application/pdf') {
 				var f = function() {
 					Zotero.Fulltext.indexPDF(file, itemID);
@@ -576,10 +578,10 @@ Zotero.Attachments = new function(){
 				Components.classes["@mozilla.org/moz/jssubscript-loader;1"]
 					.getService(Components.interfaces.mozIJSSubScriptLoader)
 					.loadSubScript("chrome://zotero/content/webpagedump/domsaver.js", wpd);
-				
+
 				wpd.wpdDOMSaver.init(file.path, document);
 				wpd.wpdDOMSaver.saveHTMLDocument();
-				
+
 				attachmentItem.attachmentPath = this.getPath(
 					file, Zotero.Attachments.LINK_MODE_IMPORTED_URL
 				);
@@ -1172,10 +1174,7 @@ Zotero.Attachments = new function(){
 			nsIURL.fileBaseName = nsIURL.fileBaseName + '.' + tld;
 		}
 		
-		// Pass unencoded name to getValidFileName() so that '%20' isn't stripped to '20'
-		nsIURL.fileBaseName = Zotero.File.getValidFileName(decodeURIComponent(nsIURL.fileBaseName));
-		
-		return decodeURIComponent(nsIURL.fileName);
+		return Zotero.File.getValidFileName(decodeURIComponent(nsIURL.fileName));
 	}
 	
 	
diff --git a/chrome/content/zotero/xpcom/file.js b/chrome/content/zotero/xpcom/file.js
@@ -31,6 +31,7 @@ Zotero.File = new function(){
 	this.getContentsFromURL = getContentsFromURL;
 	this.putContents = putContents;
 	this.getValidFileName = getValidFileName;
+	this.truncateFileName = truncateFileName;
 	this.copyToUnique = this.copyToUnique;
 	this.getCharsetFromFile = getCharsetFromFile;
 	this.addCharsetListener = addCharsetListener;
@@ -226,7 +227,7 @@ Zotero.File = new function(){
 		// URL encode when saving attachments that trigger this
 		fileName = fileName.replace(/[\/\\\?%\*:|"<>]/g, '');
 		// Replace newlines and tabs (which shouldn't be in the string in the first place) with spaces
-		fileName = fileName.replace(/[\n\t]/g, ' ');
+		fileName = fileName.replace(/[\r\n\t]+/g, ' ');
 		// Replace various thin spaces
 		fileName = fileName.replace(/[\u2000-\u200A]/g, ' ');
 		// Replace zero-width spaces
@@ -235,13 +236,44 @@ Zotero.File = new function(){
 			// Strip characters not valid in XML, since they won't sync and they're probably unwanted
 			fileName = fileName.replace(/[\u0000-\u0008\u000b\u000c\u000e-\u001f\ud800-\udfff\ufffe\uffff]/g, '');
 		}
-		// Don't allow blank filename
-		if (!fileName) {
+		// Don't allow blank or illegal filenames
+		if (!fileName || fileName == '.' || fileName == '..') {
 			fileName = '_';
 		}
 		return fileName;
 	}
 	
+	/**
+	 * Truncate a filename (excluding the extension) to the given total length
+	 * If the "extension" is longer than 20 characters,
+	 * it is treated as part of the file name
+	 */
+	function truncateFileName(fileName, maxLength) {
+		if(!fileName || (fileName + '').length <= maxLength) return fileName;
+
+		var parts = (fileName + '').split(/\.(?=[^\.]+$)/);
+		var fn = parts[0];
+		var ext = parts[1];
+		//if the file starts with a period , use the whole file
+		//the whole file name might also just be a period
+		if(!fn) {
+			fn = '.' + (ext || '');
+		}
+
+		//treat long extensions as part of the file name
+		if(ext && ext.length > 20) {
+			fn += '.' + ext;
+			ext = undefined;
+		}
+
+		if(ext === undefined) {	//there was no period in the whole file name
+			ext = '';
+		} else {
+			ext = '.' + ext;
+		}
+
+		return fn.substr(0,maxLength-ext.length) + ext;
+	}
 	
 	/*
 	 * Not implemented, but it'd sure be great if it were

	www Unnamed repository; edit this file 'description' to name the repository.
	Log \| Files \| Refs \| Submodules \| README \| LICENSE

M	chrome/content/zotero/webpagedump/common.js	\|	13	+++++--------
M	chrome/content/zotero/webpagedump/domsaver.js	\|	15	+++++++++------
M	chrome/content/zotero/xpcom/attachments.js	\|	19	+++++++++----------
M	chrome/content/zotero/xpcom/file.js	\|	38	+++++++++++++++++++++++++++++++++++---