commit 02aca0660a96c5e22b2825e016e48dcb6cc48c99
parent 9665e67042a796a7bb396e25d18b9507257f4878
Author: Dan Stillman <dstillman@gmail.com>
Date: Thu, 22 Nov 2012 10:14:24 -0800
Merge pull request #194 from aurimasv/shortenFN
Shorten long file names. Closes #8
Diffstat:
4 files changed, 58 insertions(+), 27 deletions(-)
diff --git a/chrome/content/zotero/webpagedump/common.js b/chrome/content/zotero/webpagedump/common.js
@@ -54,6 +54,8 @@ var WPD_DEFAULTHEIGHT = 768;
var WPD_MAXUIERRORCOUNT = 8;
+// maximum character length for a valid file name (excluding extension)
+var WPD_MAX_FILENAME_LENGTH = 100;
/*function wpdGetTopBrowserWindow()
{
@@ -353,15 +355,10 @@ var wpdCommon = {
},
// replace illegal characters
+ // and shorten long file names
getValidFileName: function (aFileName) {
- aFileName = aFileName.replace(/[\"\?!~`]+/g, "");
- aFileName = aFileName.replace(/[\*\&]+/g, "+");
- aFileName = aFileName.replace(/[\\\/\|\:;]+/g, "-");
- aFileName = aFileName.replace(/[\<]+/g, "(");
- aFileName = aFileName.replace(/[\>]+/g, ")");
- aFileName = aFileName.replace(/[\s]+/g, "_");
- aFileName = aFileName.replace(/[%]+/g, "@");
- return aFileName;
+ aFileName = Zotero.File.getValidFileName(aFileName).toLowerCase();
+ return Zotero.File.truncateFileName(aFileName, WPD_MAX_FILENAME_LENGTH);
},
getURL: function () {
diff --git a/chrome/content/zotero/webpagedump/domsaver.js b/chrome/content/zotero/webpagedump/domsaver.js
@@ -164,7 +164,8 @@ var wpdDOMSaver = {
// Split fileName in Path and Name
- this.name = wpdCommon.getFileLeafName(fileName); // extract fileName from filePath
+ this.name = wpdCommon.getValidFileName(
+ wpdCommon.getFileLeafName(fileName)); // extract fileName from filePath
this.currentDir = wpdCommon.getFilePath(fileName); // only directory
this.name = wpdCommon.splitFileName(this.name)[0]; // no extension!
@@ -221,7 +222,7 @@ var wpdDOMSaver = {
// resolve the javascript links inside the attributes (e.g. onclick,...)
normalizeJavaScriptLink: function (aNode, aAttr) {
var val = aNode.getAttribute(aAttr); // get the attribute value and check for link stuff
- if (!val.match(/\(\'([^\']+)\'/)) return aNode;
+ if (!val || !val.match(/\(\'([^\']+)\'/)) return aNode;
val = RegExp.$1;
if (val.indexOf("/") == -1 && val.indexOf(".") == -1) return aNode;
val = wpdCommon.resolveURL(this.currentURL, val); // it is a link -> resolve and set the URL to the local URL
@@ -409,9 +410,12 @@ var wpdDOMSaver = {
case "link":
// could containt urls (icon, stylesheet and fontdef)
// We have to remove nodes with the stylesheet attribute because they will be added later
- if ((aNode.getAttribute("rel").toLowerCase() == "stylesheet") && (aNode.getAttribute("href").indexOf("chrome://") == -1)) {
+ if(!aNode.hasAttribute("rel")) return aNode;
+ if (aNode.getAttribute("rel").toLowerCase() == "stylesheet"
+ && (aNode.hasAttribute("href") && aNode.getAttribute("href").indexOf("chrome://") == -1)) {
return wpdCommon.removeNodeFromParent(aNode);
- } else if ((aNode.getAttribute("rel").toLowerCase() == "shortcut icon") || (aNode.getAttribute("rel").toLowerCase() == "icon")) {
+ } else if (aNode.getAttribute("rel").toLowerCase() == "shortcut icon"
+ || aNode.getAttribute("rel").toLowerCase() == "icon") {
var aFileName = this.download(aNode.href, true);
// Changed by Dan S. for Zotero -- see this.repairRelativeLinks()
if (aFileName) aNode.setAttribute("href", this.relativeLinkFix(aFileName));
@@ -732,7 +736,6 @@ var wpdDOMSaver = {
// generate a filename
var newFileName = aURL.fileName.toLowerCase();
if (!newFileName) newFileName = "untitled";
- newFileName = wpdCommon.getValidFileName(newFileName);
// same name but different location?
newFileName = this.getUniqueFileNameAndRegister(newFileName, aURLSpec);
// is the file already registered (processed) ?
@@ -1076,7 +1079,7 @@ var wpdDOMSaver = {
// (be sure to call the init function at the top of this file before)
saveHTMLDocument: function () {
try {
- this.saveDocumentEx(this.document, this.name);
+ return this.saveDocumentEx(this.document, this.name);
} catch (ex) {
wpdCommon.addError("[wpdDOMSaver.saveHTMLDocument]\n -> " + ex);
}
diff --git a/chrome/content/zotero/xpcom/attachments.js b/chrome/content/zotero/xpcom/attachments.js
@@ -545,10 +545,12 @@ Zotero.Attachments = new function(){
var file = Components.classes["@mozilla.org/file/local;1"].
createInstance(Components.interfaces.nsILocalFile);
file.initWithFile(destDir);
-
- var fileName = _getFileNameFromURL(url, mimeType);
- file.append(fileName);
-
+
+ var fileName = Zotero.File.truncateFileName(
+ _getFileNameFromURL(url, mimeType).toLowerCase(),
+ 100); //make sure this matches WPD settings in webpagedump/common.js
+ file.append(fileName)
+
if (mimeType == 'application/pdf') {
var f = function() {
Zotero.Fulltext.indexPDF(file, itemID);
@@ -576,10 +578,10 @@ Zotero.Attachments = new function(){
Components.classes["@mozilla.org/moz/jssubscript-loader;1"]
.getService(Components.interfaces.mozIJSSubScriptLoader)
.loadSubScript("chrome://zotero/content/webpagedump/domsaver.js", wpd);
-
+
wpd.wpdDOMSaver.init(file.path, document);
wpd.wpdDOMSaver.saveHTMLDocument();
-
+
attachmentItem.attachmentPath = this.getPath(
file, Zotero.Attachments.LINK_MODE_IMPORTED_URL
);
@@ -1172,10 +1174,7 @@ Zotero.Attachments = new function(){
nsIURL.fileBaseName = nsIURL.fileBaseName + '.' + tld;
}
- // Pass unencoded name to getValidFileName() so that '%20' isn't stripped to '20'
- nsIURL.fileBaseName = Zotero.File.getValidFileName(decodeURIComponent(nsIURL.fileBaseName));
-
- return decodeURIComponent(nsIURL.fileName);
+ return Zotero.File.getValidFileName(decodeURIComponent(nsIURL.fileName));
}
diff --git a/chrome/content/zotero/xpcom/file.js b/chrome/content/zotero/xpcom/file.js
@@ -31,6 +31,7 @@ Zotero.File = new function(){
this.getContentsFromURL = getContentsFromURL;
this.putContents = putContents;
this.getValidFileName = getValidFileName;
+ this.truncateFileName = truncateFileName;
this.copyToUnique = this.copyToUnique;
this.getCharsetFromFile = getCharsetFromFile;
this.addCharsetListener = addCharsetListener;
@@ -226,7 +227,7 @@ Zotero.File = new function(){
// URL encode when saving attachments that trigger this
fileName = fileName.replace(/[\/\\\?%\*:|"<>]/g, '');
// Replace newlines and tabs (which shouldn't be in the string in the first place) with spaces
- fileName = fileName.replace(/[\n\t]/g, ' ');
+ fileName = fileName.replace(/[\r\n\t]+/g, ' ');
// Replace various thin spaces
fileName = fileName.replace(/[\u2000-\u200A]/g, ' ');
// Replace zero-width spaces
@@ -235,13 +236,44 @@ Zotero.File = new function(){
// Strip characters not valid in XML, since they won't sync and they're probably unwanted
fileName = fileName.replace(/[\u0000-\u0008\u000b\u000c\u000e-\u001f\ud800-\udfff\ufffe\uffff]/g, '');
}
- // Don't allow blank filename
- if (!fileName) {
+ // Don't allow blank or illegal filenames
+ if (!fileName || fileName == '.' || fileName == '..') {
fileName = '_';
}
return fileName;
}
+ /**
+ * Truncate a filename (excluding the extension) to the given total length
+ * If the "extension" is longer than 20 characters,
+ * it is treated as part of the file name
+ */
+ function truncateFileName(fileName, maxLength) {
+ if(!fileName || (fileName + '').length <= maxLength) return fileName;
+
+ var parts = (fileName + '').split(/\.(?=[^\.]+$)/);
+ var fn = parts[0];
+ var ext = parts[1];
+ //if the file starts with a period , use the whole file
+ //the whole file name might also just be a period
+ if(!fn) {
+ fn = '.' + (ext || '');
+ }
+
+ //treat long extensions as part of the file name
+ if(ext && ext.length > 20) {
+ fn += '.' + ext;
+ ext = undefined;
+ }
+
+ if(ext === undefined) { //there was no period in the whole file name
+ ext = '';
+ } else {
+ ext = '.' + ext;
+ }
+
+ return fn.substr(0,maxLength-ext.length) + ext;
+ }
/*
* Not implemented, but it'd sure be great if it were