commit fcb6e0c068e090cdadbfd785d11fd59743013fb0
parent 26b161fce2aa4f8f89ac292be490cbdfb951d900
Author: Dan Stillman <dstillman@zotero.org>
Date: Tue, 31 May 2016 18:03:40 -0400
Save snapshots via nsIWebBrowserPersist instead of WebPageDump
WPD code hasn't been updated in many years, and there was an issue with
document permissions in 5.0. We'll need to replace nsIWBP in Electron,
but this will do for now.
Attachments are opened using file:// URIs instead of
zotero://attachment, which is what Standalone does anyway. Ancient HTML
annotations and highlights won't be displayed anymore, but I'm not sure
they worked anyway, and it hasn't been possible to create them in years.
We might be able to write out existing annotations to notes.
iframes are skipped during saving, in an attempt to reduce the number of
junk ad files. JS can still cause problems with viewing, so we might
still want to either disable scripts or force the viewed page offline
(if such a thing is possible).
There might be issues with auxiliary filename length/characters during
cross-platform file syncing. (We modified the WPD code to shorten/clean
them.)
Diffstat:
9 files changed, 85 insertions(+), 1852 deletions(-)
diff --git a/chrome/content/zotero/about.xul b/chrome/content/zotero/about.xul
@@ -67,7 +67,6 @@
<label class="zotero-text-link" href="http://www.w3.org/2005/ajar/tab" value="Tabulator (RDF parser)"/>
<label class="zotero-text-link" href="http://tango.freedesktop.org/Tango_Desktop_Project" value="Tango Desktop Project (pref icons)"/>
<label class="zotero-text-link" href="http://tinymce.moxiecode.com/" value="TinyMCE (rich-text editing)"/>
- <label class="zotero-text-link" href="http://www.dbai.tuwien.ac.at/user/pollak/webpagedump/" value="WebPageDump (snapshot code)"/>
<label class="zotero-text-link" href="http://www.foolabs.com/xpdf/" value="Xpdf (pdftotext)"/>
</vbox>
</vbox>
diff --git a/chrome/content/zotero/webpagedump/common.js b/chrome/content/zotero/webpagedump/common.js
@@ -1,739 +0,0 @@
-/* ***** BEGIN LICENSE BLOCK *****
- * Version: MPL 1.1/GPL 2.0/LGPL 2.1
- *
- * The contents of this file are subject to the Mozilla Public License Version
- * 1.1 (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the
- * License.
- *
- * The Original Code is ScrapBook.
- *
- * The Initial Developer of the Original Code is Gomita.
- * Portions created by the Initial Developer are Copyright (C) 2004
- * the Initial Developer. All Rights Reserved.
- *
- * Contributor(s):
- * Bernhard Pollak <pollak@dbai.tuwien.ac.at> (WebPageDump Fork)
- *
- * Alternatively, the contents of this file may be used under the terms of
- * either the GNU Affero General Public License Version 2 or later (the "GPL"), or
- * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- * in which case the provisions of the GPL or the LGPL are applicable instead
- * of those above. If you wish to allow use of your version of this file only
- * under the terms of either the GPL or the LGPL, and not to allow others to
- * use your version of this file under the terms of the MPL, indicate your
- * decision by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL or the LGPL. If you do not delete
- * the provisions above, a recipient may use your version of this file under
- * the terms of any one of the MPL, the GPL or the LGPL.
- *
- * ***** END LICENSE BLOCK ***** */
-// --------------------------------------------------------------------------------
-// "WebPageDump" Firefox Extension
-// --------------------------------------------------------------------------------
-// - File: "common.js" -
-// - Description:
-// provides common functions (file, preferences, windows, error,...)
-//
-// --------------------------------------------------------------------------------
-var gBrowserWindow = null;
-var gExceptLocation = "about:blank";
-var gCallback = "";
-var gTimeOutID = 0;
-var gTimedOut = false;
-var gWaitForPaint = false;
-
-var MODE_SIMULATE = false;
-var WPD_DEFAULTWIDTH = 1024;
-var WPD_DEFAULTHEIGHT = 768;
-
-var WPD_MAXUIERRORCOUNT = 8;
-
-// maximum character length for a valid file name (excluding extension)
-var WPD_MAX_FILENAME_LENGTH = 100;
-
-/*function wpdGetTopBrowserWindow()
-{
- var winMed = Components.classes["@mozilla.org/appshell/window-mediator;1"]
- .getService(Components.interfaces.nsIWindowMediator);
- var winList = winMed.getZOrderDOMWindowEnumerator("navigator:browser", true);
- if (!winList.hasMoreElements())
- return top.getBrowser().contentWindow; // fallback
-
- return winList.getNext().getBrowser().contentWindow;
-}*/
-
-
-
-/* [14:55:15] paolinho: var browserWin = windowMediator.getMostRecentWindow("navigator:browser");
- const mainTabBox = browserWin.getBrowser().mTabBox;
- const topWindow = browserWin.getBrowser().browsers[mainTabBox.selectedIndex].contentWindow;
-[14:55:50]
- var windowMediator = Components.classes["@mozilla.org/appshell/window-mediator;1"].getService(Components.interfaces.nsIWindowMediator);
-*/
-
-function wpdGetTopBrowserWindow() {
- var windowManager = Components.classes['@mozilla.org/appshell/window-mediator;1'].getService();
- var windowManagerInterface = windowManager.QueryInterface(Components.interfaces.nsIWindowMediator);
- var topWindowOfType = windowManagerInterface.getMostRecentWindow("navigator:browser");
-
- if (topWindowOfType) {
- return topWindowOfType;
- }
- return null;
-}
-
-
-function wpdWindowLoaded() {
- try {
- // this will be called multiple times if the page contains more than one document (frames, flash,...)
- //var browser=this.document.getElementById("content");
- Zotero.debug("[wpdWindowLoaded] ... ");
- var browser = this.top.getBrowser();
- // each time we have to check if the page is fully loaded...
- if (!(browser.webProgress.isLoadingDocument || browser.contentDocument.location == gExceptLocation)) {
- Zotero.debug("[wpdWindowLoaded] window finally loaded");
- gBrowserWindow.clearTimeout(gTimeOutID);
- gBrowserWindow.removeEventListener("load", wpdWindowLoaded, true);
- //dump("[wpdWindowLoaded] calling "+gCallback+"\n");
- if (gWaitForPaint) {
- wpdCommon.sizeWindow(WPD_DEFAULTWIDTH - 1, WPD_DEFAULTHEIGHT); // this is for the strange empty lines bug
- wpdCommon.sizeWindow(WPD_DEFAULTWIDTH, WPD_DEFAULTHEIGHT);
- }
- var w = 0;
- if (gWaitForPaint) w = 5000; // wait for painting
- gBrowserWindow.setTimeout(gCallback, w);
- }
- } catch (ex) {
- Zotero.debug("[wpdWindowLoaded] EXCEPTION: " + ex);
- }
-}
-
-function wpdTimeOut() {
- Zotero.debug("[wpdTimeOut] timeout triggered!");
- gTimedOut = true;
- gBrowserWindow.clearTimeout(gTimeOutID);
- gBrowserWindow.removeEventListener("load", wpdWindowLoaded, true);
- gBrowserWindow.setTimeout(gCallback, 0);
-}
-
-function wpdIsTimedOut() {
- return gTimedOut;
-}
-
-function wpdLoadURL(aURI, aCallback) {
- try {
- gTimedOut = false;
- Zotero.debug("[wpdLoadURL] aURI: " + aURI);
- if (aURI == "") return;
- gBrowserWindow = wpdGetTopBrowserWindow();
- gBrowserWindow.loadURI(aURI);
- gCallback = aCallback;
- // 30 seconds maximum for loading the page
- gTimeOutID = gBrowserWindow.setTimeout(wpdTimeOut, 60000);
- gBrowserWindow.addEventListener("load", wpdWindowLoaded, true);
- } catch (ex) {
- Zotero.debug("[wpdLoadURL] EXCEPTION: " + ex);
- }
-}
-
-var wpdCommon = {
-
- errList: "",
- errCount: 0,
- downloading: false,
- downloaded: false,
-
- allowed_entities:
- ""&'<> ¡¢£¤¥¦" +
- "§¨©ª«¬­®¯°±" +
- "²³´µ¶·¸¹º»" +
- "¼½¾¿ÀÁÂÃÄ" +
- "ÅÆÇÈÉÊËÌÍ" +
- "ÎÏÐÑÒÓÔÕÖ" +
- "רÙÚÛÜÝÞß" +
- "àáâãäåæçè" +
- "éêëìíîïðñò" +
- "óôõö÷øùúûü" +
- "ýþÿŒœŠšŸƒˆ" +
- "˜ΑΒΓΔΕΖΗΘΙΚ" +
- "ΛΜΝΞΟΠΡΣΤΥΦΧΨ" +
- "Ωαβγδεζηθικ" +
- "λμνξοπρςστυφ" +
- "χψωϑϒφϖ   ‌" +
- "‍‎‏–—‘’‚“”„" +
- "†‡•…‰′″‹›" +
- "‾⁄€ℑ℘ℜ™ℵ←↑" +
- "→↓↔↵⇐⇑⇒⇓⇔∀" +
- "∂∃∅∇∈∉∋∏∑−∗√" +
- "∝∞∠∨∩∪∫∴∼≅≈≠≡" +
- "≤≥⊂⊃⊄⊆⊇⊕⊗⊥⋅⌈" +
- "⌉⌊⌋⟨⟩◊♠♣♥♦",
-
-
-
- trim: function (aString) {
- try {
- return (aString.replace(/\s+$/, "").replace(/^\s+/, ""));
- } catch (ex) {
- return aString;
- }
- },
-
-
- // checks the CRLFs at the beginning - if there are CRLFs present
- // one additional CRLF will be added at the beginning
- checkCRLF: function (aNode) {
- try {
- var before = false;
- var after = false;
- if (aNode.parentNode.firstChild == aNode) before = true;
- if (!before && !after) {
- throw new Error("return");
- }
- // why <BR>? Because the <BR> Tag ist not present in text DOM nodes...
- var aString = aNode.nodeValue;
- if (aString.search(/\n/) == -1) throw new Error("return");
- aString = (aString.replace(/\r\n/g, "<br>").replace(/\n/g, "<br>"));
- var a = aString.split("<br>");
- var s = 0;
- var e = 0;
-
-
- if (before) {
- for (var i = 0; i < a.length; i++) {
- if (this.trim(a[i]) != "") {
- break;
- } else {
- s++;
- break; //we only need to now if there are any
- }
- }
- }
-
- aString = a.join("\r\n");
- if (s > 0) aString = "\r\n" + aString;
- return aString;
-
- } catch (ex) {
- return aNode.nodeValue;
- }
- },
-
- unicodeToEntity: function (text, charset) {
-
- function convertEntity(letter) {
- try {
- var l = gEntityConverter.ConvertToEntity(letter, entityVersion);
- // is the entity allowed?
- if (entities.indexOf(l) >= 0) {
- return l;
- } else if ((l != letter)) {
- return "&#" + letter.charCodeAt(0) + ";";
- }
- } catch (ex) {}
- // now we check if the letter is valid inside the destination charset
- // (if the result is a ? it is not valid - except letter=?)
- try {
- var s = gUnicodeConverter.ConvertFromUnicode(letter);
- if ((charset != "UTF-8") && (s == "?")) {
- return "&#" + letter.charCodeAt(0) + ";";
- }
- } catch (ex) {}
- return letter;
- }
-
- if (!gUnicodeConverter) {
- try {
- var gUnicodeConverter = Components.classes['@mozilla.org/intl/scriptableunicodeconverter'].getService(Components.interfaces.nsIScriptableUnicodeConverter);
- gUnicodeConverter.charset = charset;
- } catch (ex) {
- Zotero.debug("gUnicodeConverter EXCEPTION:" + ex);
- }
- }
-
- if (!gEntityConverter) {
- try {
- var gEntityConverter = Components.classes["@mozilla.org/intl/entityconverter;1"].createInstance(Components.interfaces.nsIEntityConverter);
- } catch (e) {
- Zotero.debug("gEntityConverter EXCEPTION:" + ex);
- }
- }
-
- // Firefox - Source Code Snippet:
- // const unsigned long entityNone = 0;
- // const unsigned long html40Latin1 = 1;
- // const unsigned long html40Symbols = 2;
- // const unsigned long html40Special = 4; // excludes ", &, <, >
- // const unsigned long transliterate = 8;
- // const unsigned long mathml20 = 16;
- // const unsigned long html32 = html40Latin1;
- // const unsigned long html40 = html40Latin1+html40Symbols+html40Special;
- // const unsigned long entityW3C = html40+mathml20;
- const entityVersion = Components.interfaces.nsIEntityConverter.html40;
- // convert to entities (
- // replace other chars > 0x7f via nsIEntityConverter/convertEntity
- var entities = this.allowed_entities;
- text = text.replace(/[^\0-\u007f]/g, convertEntity);
- return text;
- },
-
-
- playSound: function () {
- try {
- var sound = Components.classes["@mozilla.org/sound;1"].createInstance(Components.interfaces.nsISound);
- sound.playSystemSound("ringin.wav");
- } catch (ex) {}
- },
-
- // return the current focused window
- getFocusedWindow: function () {
- var win = document.commandDispatcher.focusedWindow;
- if (!win || win == window || win instanceof Components.interfaces.nsIDOMChromeWindow) win = window._content;
- return win;
- },
-
- sizeWindow: function (w, h) {
- try {
- var window = this.getFocusedWindow();
- window.moveTo(0, 0);
- if ((w == 0) || (w > screen.availWidth)) w = screen.availWidth;
- if ((h == 0) || (w > screen.availHeight)) h = screen.availHeight;
- window.resizeTo(w, h);
- window.focus();
- } catch (ex) {}
- },
-
- // add a line to the error list (displays a maximum of 15 errors)
- addError: function (errorMsg, errorObj) {
- if (errorMsg) Zotero.debug(errorMsg);
- if (errorObj) Zotero.debug(errorObj);
- /*
- if (this.errCount < WPD_MAXUIERRORCOUNT) {
- if (this.errList.indexOf(aError) > -1) return; // is the same
- this.errList = this.errList + aError + "\n";
- } else if (this.errCount == WPD_MAXUIERRORCOUNT) {
- this.errList = this.errList + '...';
- }
- this.errCount++;
- */
- },
-
- saveWebPage: function (aDestFile) {
- Zotero.debug("[saveWebPage] " + aDestFile);
- var nsIWBP = Components.classes["@mozilla.org/embedding/browser/nsWebBrowserPersist;1"].createInstance(Components.interfaces.nsIWebBrowserPersist);
- var doc = window.content.document;
- var file = Components.classes["@mozilla.org/file/local;1"].createInstance(Components.interfaces.nsILocalFile);
- file.initWithPath(aDestFile);
- var dataPath = Components.classes["@mozilla.org/file/local;1"].createInstance(Components.interfaces.nsILocalFile);
- dataPath.initWithPath(this.getFilePath(aDestFile));
- nsIWBP.saveDocument(doc, file, dataPath, null, 0, 0);
- },
-
- // returns num as string of length i filled up with 0s
- addLeftZeros: function (num, i) {
- var s = "" + num;
- var r = "";
- for (var f = 0; f < i - s.length; f++) r = r + "0";
- return r + s;
- },
-
- // split the filename in filename and extension
- splitFileName: function (aFileName) {
- var pos = aFileName.lastIndexOf(".");
- var ret = [];
- if (pos != -1) {
- ret[0] = aFileName.substring(0, pos);
- ret[1] = aFileName.substring(pos + 1, aFileName.length);
- } else {
- ret[0] = aFileName;
- ret[1] = "";
- }
- return ret;
- },
-
- // replace illegal characters
- // and shorten long file names
- getValidFileName: function (aFileName) {
- aFileName = Zotero.File.getValidFileName(aFileName);
- return Zotero.File.truncateFileName(aFileName, WPD_MAX_FILENAME_LENGTH);
- },
-
- getURL: function () {
- return top.window._content.document.location.href;
- },
-
- // remove get variables from an URL
- removeGETFromURL: function (aURL) {
- var pos;
- aURL = ((pos = aURL.indexOf("?")) != -1) ? aURL.substring(0, pos) : aURL;
- aURL = ((pos = aURL.indexOf("#")) != -1) ? aURL.substring(0, pos) : aURL;
- return aURL;
- },
-
- // extract filename from URL
- getFileName: function (aURL) {
- var pos;
- aURL = this.removeGETFromURL(aURL);
- aURL = ((pos = aURL.lastIndexOf("/")) != -1) ? aURL.substring(++pos) : aURL;
- return aURL;
- },
-
- filePathToURI: function (filePath) {
- var obj_File = Components.classes["@mozilla.org/file/local;1"].getService(Components.interfaces.nsILocalFile);
- obj_File.initWithPath(filePath);
- var obj_FPH = Components.classes["@mozilla.org/network/protocol;1?name=file"].getService(Components.interfaces.nsIFileProtocolHandler);
- return obj_FPH.getURLSpecFromFile(obj_File);
- },
-
- URLToFilePath: function (aURL) {
- var obj_FPH = Components.classes["@mozilla.org/network/protocol;1?name=file"].getService(Components.interfaces.nsIFileProtocolHandler);
- try {
- return obj_FPH.getFileFromURLSpec(aURL).path;
- } catch (ex) {
- return aURL;
- }
- },
-
- // right part of filepath/filename
- getFileLeafName: function (filePath) {
- var obj_File = Components.classes["@mozilla.org/file/local;1"].getService(Components.interfaces.nsILocalFile);
- obj_File.initWithPath(filePath);
- return obj_File.leafName;
- },
-
- getFilePath: function (filePath) {
- var obj_File = Components.classes["@mozilla.org/file/local;1"].getService(Components.interfaces.nsILocalFile);
- obj_File.initWithPath(filePath);
- var pos; // Added by Dan S. for Zotero
- return ((pos = filePath.lastIndexOf(obj_File.leafName)) != -1) ? filePath.substring(0, pos) : filePath;
- },
-
- appendFilePath: function (filePath, appendPath) {
- var obj_File = Components.classes["@mozilla.org/file/local;1"].getService(Components.interfaces.nsILocalFile);
- obj_File.initWithPath(filePath);
- obj_File.appendRelativePath(appendPath);
- return obj_File.path;
- },
-
- pathExists: function (filePath) {
- var obj_File = Components.classes["@mozilla.org/file/local;1"].getService(Components.interfaces.nsILocalFile);
- try {
- obj_File.initWithPath(filePath);
- return obj_File.exists();
- } catch (ex) {
- return false;
- }
- },
-
- // add the HTML Tag Stuff to aNode and embedd the aNode.innerHTML between the tags
- nodeToHTMLString: function (aNode) {
- if (aNode == null) return "";
- var tag = "<" + aNode.nodeName.toLowerCase();
- for (var i = 0; i < aNode.attributes.length; i++) {
- tag += ' ' + aNode.attributes[i].name + '="' + aNode.attributes[i].value + '"';
- }
- tag += ">\n";
- return tag + aNode.innerHTML + "</" + aNode.nodeName.toLowerCase() + ">\n";
- },
-
- ConvertFromUnicode16: function (aString, charset) {
- if (!aString) return "";
- try {
- var UNICODE = Components.classes['@mozilla.org/intl/scriptableunicodeconverter'].getService(Components.interfaces.nsIScriptableUnicodeConverter);
- UNICODE.charset = charset;
- aString = UNICODE.ConvertFromUnicode(aString);
- aString = aString + UNICODE.Finish();
- } catch (ex) {
- //this.addError("[wpdCommon.convertStringToCharset]:\n -> charset: "+charset+"\n -> "+ex);
- }
- return aString;
- },
-
- ConvertToUnicode16: function (aString, charset) {
- if (!aString) return "";
- try {
- var UNICODE = Components.classes['@mozilla.org/intl/scriptableunicodeconverter'].getService(Components.interfaces.nsIScriptableUnicodeConverter);
- UNICODE.charset = charset;
- aString = UNICODE.ConvertToUnicode(aString);
- } catch (ex) {
- //this.addError("[wpdCommon.convertStringToCharset]:\n -> charset: "+charset+"\n -> "+ex);
- }
- return aString;
- },
-
- // convert the doctype to an HTML doctype String
- doctypeToHTMLString: function (aDoctype) {
- if (!aDoctype) return "";
- var ret = "<!DOCTYPE " + aDoctype.name;
- if (aDoctype.publicId) ret += ' PUBLIC "' + aDoctype.publicId + '"';
- if (aDoctype.systemId) ret += ' "' + aDoctype.systemId + '"';
- ret += ">\n";
- return ret;
- },
-
- addCommentTag: function (targetNode, aComment) {
- targetNode.appendChild(document.createTextNode("\n"));
- targetNode.appendChild(document.createComment(aComment));
- targetNode.appendChild(document.createTextNode("\n"));
- },
-
-
- removeNodeFromParent: function (aNode) {
- // Added by Dan S. for Zotero
- var document = aNode.ownerDocument;
-
- var newNode = document.createTextNode("");
- aNode.parentNode.replaceChild(newNode, aNode);
- aNode = newNode;
- return aNode;
- },
-
- // convert URL String to Object
- // for easier URL handling
- convertURLToObject: function (aURLString) {
- var aURL = Components.classes['@mozilla.org/network/standard-url;1'].createInstance(Components.interfaces.nsIURL);
- aURL.spec = aURLString;
- return aURL;
- },
-
- // resolves the relative URL (aRelURL) with the base URL (aBaseURL)
- resolveURL: function (aBaseURL, aRelURL) {
- try {
- var aBaseURLObj = this.convertURLToObject(aBaseURL);
- return aBaseURLObj.resolve(aRelURL);
- } catch (ex) {
- this.addError("[wpdCommon.resolveURL]:\n -> aBaseURL: " + aBaseURL + "\n -> aRelURL: " + aRelURL, ex);
- }
- return "";
- },
-
- getHostName: function (aURL) {
- try {
- var aURLObj = Components.classes['@mozilla.org/network/standard-url;1'].createInstance(Components.interfaces.nsIURI);
- aURLObj.spec = aURL
- return aURLObj.asciiHost;
- } catch (ex) {
- this.addError("[wpdCommon.getHostName]:\n -> aURL: " + aURL, ex);
- }
- return "";
- },
-
- convertUrlToASCII: function (aURL) {
- try {
- var aURLObj = Components.classes['@mozilla.org/network/standard-url;1'].createInstance(Components.interfaces.nsIURI);
- aURLObj.spec = aURL
- return aURLObj.asciiSpec;
- } catch (ex) {
- this.addError("[wpdCommon.getHostName]:\n -> aURL: " + aURL, ex);
- }
- return "";
- },
-
- createDir: function (str_Dir) {
- var obj_File = Components.classes["@mozilla.org/file/local;1"].createInstance(Components.interfaces.nsILocalFile);
- obj_File.initWithPath(str_Dir);
- if (!obj_File.exists()) obj_File.create(obj_File.DIRECTORY_TYPE, 0700);
- },
-
- readDir: function (str_Dir) {
- var obj_File = Components.classes["@mozilla.org/file/local;1"].createInstance(Components.interfaces.nsILocalFile);
- obj_File.initWithPath(str_Dir);
- if (obj_File.exists()) return obj_File.directoryEntries;
- return [];
- },
-
- fileSize: function (str_Filename) {
- var obj_File = Components.classes["@mozilla.org/file/local;1"].createInstance(Components.interfaces.nsILocalFile);
- obj_File.initWithPath(str_Filename);
- return obj_File.fileSize;
- },
-
- // read the file (str_Filename) to a String Buffer (str_Buffer)
- readFile: function (str_Filename, removeComments, text) {
- try {
- var obj_File = Components.classes["@mozilla.org/file/local;1"].createInstance(Components.interfaces.nsILocalFile);
- obj_File.initWithPath(str_Filename);
- if (!obj_File.exists()) {
- this.addError("[wpdCommon.readFile]:\n -> str_Filename: " + str_Filename + "\n -> file not found!");
- return "";
- }
-
- var obj_Transport = Components.classes["@mozilla.org/network/file-input-stream;1"].createInstance(Components.interfaces.nsIFileInputStream);
-
- obj_Transport.init(obj_File, 0x01, 004, 0);
-
- var sis = Components.classes["@mozilla.org/scriptableinputstream;1"].createInstance(Components.interfaces.nsIScriptableInputStream);
- sis.init(obj_Transport);
- var output = sis.read(sis.available());
- if (text) output = output.replace(/\r/g, "");
- if (text && removeComments) {
- output = output.replace(/^\/\/.*/g, "");
- output = output.replace(/\n\/\/.*/g, "");
- output = output.replace(/\n\n+/g, "\n");
- }
- if (text) output = output.split(/\n/g);
- return output;
- } catch (ex) {
- this.addError("[wpdCommon.readFile]:\n -> str_Filename: " + str_Filename, ex);
- }
- return "";
- },
-
- // write the String Buffer (str_Buffer) to a file (str_Filename)
- writeFile: function (str_Buffer, str_Filename) {
- if (MODE_SIMULATE) return true;
- try {
- var obj_File = Components.classes["@mozilla.org/file/local;1"].createInstance(Components.interfaces.nsILocalFile);
- obj_File.initWithPath(str_Filename);
- if (!obj_File.exists()) obj_File.create(Components.interfaces.nsIFile.NORMAL_FILE_TYPE, 0666);
-
- var obj_Transport = Components.classes["@mozilla.org/network/file-output-stream;1"].createInstance(Components.interfaces.nsIFileOutputStream);
-
- /* Open flags
- #define PR_RDONLY 0x01 - Open for reading only.
- #define PR_WRONLY 0x02 - Open for writing only.
- #define PR_RDWR 0x04 - Open for reading and writing.
- #define PR_CREATE_FILE 0x08 - If the file does not exist, the file is created. If the file exists, this flag has no effect.
- #define PR_APPEND 0x10 - The file pointer is set to the end of the file prior to each write.
- #define PR_TRUNCATE 0x20 - If the file exists, its length is truncated to 0.
- #define PR_SYNC 0x40 - If set, each write will wait for both the file data and file status to be physically updated.
- #define PR_EXCL 0x80 - With PR_CREATE_FILE, if the file does not exist, the file is created. If the file already exists, no action and NULL is returned.
-
- File modes
- 'mode' is currently only applicable on UNIX platforms.
- The 'mode' argument may be ignored by PR_Open on other platforms.
- 00400 Read by owner.
- 00200 Write by owner.
- 00100 Execute (search if a directory) by owner.
- 00040 Read by group.
- 00020 Write by group.
- 00010 Execute by group.
- 00004 Read by others.
- 00002 Write by others
- 00001 Execute by others.
- */
- obj_Transport.init(obj_File, 0x20 | 0x04 | 0x08, 064, 0);
- obj_Transport.write(str_Buffer, str_Buffer.length);
- obj_Transport.flush();
- obj_Transport.close();
- return true;
- } catch (ex) {
- this.addError("[wpdCommon.writeFile]:\n -> str_Filename: " + str_Filename, ex);
- }
- return false;
- },
-
-
- copyFile: function (sourcefile, destfile) {
-
- var destdir = this.getFilePath(destfile);
- destfile = this.getFileLeafName(destfile);
- var aFile = Components.classes["@mozilla.org/file/local;1"].createInstance(Components.interfaces.nsILocalFile);
- if (!aFile) return false;
-
- var aDir = Components.classes["@mozilla.org/file/local;1"].createInstance(Components.interfaces.nsILocalFile);
- if (!aDir) return false;
-
- aFile.initWithPath(sourcefile);
-
- aDir.initWithPath(destdir);
-
- aFile.copyToFollowingLinks(aDir, destfile);
- return true; // Added by Dan S. for Zotero
- },
-
- // download aSourceURL to aTargetFilename
- // (works also on local files...)
- downloadFile: function (aSourceURL, aTargetFilename) {
- if (MODE_SIMULATE) return true;
- try {
- //new obj_URI object
- var obj_URI = Components.classes["@mozilla.org/network/io-service;1"]
- .getService(Components.interfaces.nsIIOService)
- .newURI(aSourceURL, null, null);
-
- //new file object
- var obj_TargetFile = Components.classes["@mozilla.org/file/local;1"]
- .createInstance(Components.interfaces.nsILocalFile);
- //set file with path
- // NOTE: This function has a known bug on the macintosh and other OSes
- // which do not represent file locations as paths. If you do use this
- // function, be very aware of this problem!
- obj_TargetFile.initWithPath(aTargetFilename);
-
- //new persistence object
- var obj_Persist = Components.classes["@mozilla.org/embedding/browser/nsWebBrowserPersist;1"]
- .createInstance(Components.interfaces.nsIWebBrowserPersist);
-
- // set flags
- const nsIWBP = Components.interfaces.nsIWebBrowserPersist;
- var flags = nsIWBP.PERSIST_FLAGS_REPLACE_EXISTING_FILES
- | nsIWBP.PERSIST_FLAGS_FROM_CACHE;
- //nsIWBP.PERSIST_FLAGS_BYPASS_CACHE;
- obj_Persist.persistFlags = flags;
-
- // has the url the same filetype like the file extension?
- //save file to target
- Zotero.Utilities.Internal.saveURI(obj_Persist, obj_URI, obj_TargetFile);
-
- return true;
-
- } catch (ex) {
- aSourceURL = this.removeGETFromURL(aSourceURL);
- this.addError("[wpdCommon.downloadFile]:\n -> aSourceURL: " + aSourceURL.substring(aSourceURL.length - 60) + "\n -> aTargetFilename: " + aTargetFilename, ex);
- }
- return false;
- },
-
- // get the integer preferences
- getIntPrefs: function (branch) {
- var mPrefSvc = Components.classes["@mozilla.org/preferences-service;1"].getService(Components.interfaces.nsIPrefService);
- return mPrefSvc.getIntPref(branch);
- },
-
- // set the integer preferences
- setIntPrefs: function (branch, value) {
- var mPrefSvc = Components.classes["@mozilla.org/preferences-service;1"].getService(Components.interfaces.nsIPrefService);
- return mPrefSvc.setIntPref(branch, value);
- },
-
- // get the integer preferences
- getStrPrefs: function (branch) {
- var mPrefSvc = Components.classes["@mozilla.org/preferences-service;1"].getService(Components.interfaces.nsIPrefService);
- return mPrefSvc.getCharPref(branch);
- },
-
- // set the string preferences
- setStrPrefs: function (branch, value) {
- var mPrefSvc = Components.classes["@mozilla.org/preferences-service;1"].getService(Components.interfaces.nsIPrefService);
- return mPrefSvc.setCharPref(branch, value);
- },
-
- // get the string preferences
- getStrPrefsEx: function (branch) {
- var mPrefSvc = Components.classes["@mozilla.org/preferences-service;1"].getService(Components.interfaces.nsIPrefBranch);
- return mPrefSvc.getComplexValue(branch, Components.interfaces.nsISupportsString).data;
- },
-
- // set the string preferences
- setStrPrefsEx: function (branch, value) {
- var str = Components.classes["@mozilla.org/supports-string;1"].createInstance(Components.interfaces.nsISupportsString);
- str.data = value;
- var mPrefSvc = Components.classes["@mozilla.org/preferences-service;1"].getService(Components.interfaces.nsIPrefBranch);
- return mPrefSvc.setComplexValue(branch, Components.interfaces.nsISupportsString, str);
- },
-
-
- // Get the preferences branch ("browser.download." for normal 'save' mode)...
- setBoolPrefs: function (branch, value) {
- var mPrefSvc = Components.classes["@mozilla.org/preferences-service;1"].getService(Components.interfaces.nsIPrefService);
- return mPrefSvc.setBoolPref(branch, value);
- }
-
-};
-\ No newline at end of file
diff --git a/chrome/content/zotero/webpagedump/domsaver.js b/chrome/content/zotero/webpagedump/domsaver.js
@@ -1,1091 +0,0 @@
-/* ***** BEGIN LICENSE BLOCK *****
- * Version: MPL 1.1/GPL 2.0/LGPL 2.1
- *
- * The contents of this file are subject to the Mozilla Public License Version
- * 1.1 (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the
- * License.
- *
- * The Original Code is ScrapBook.
- *
- * The Initial Developer of the Original Code is Gomita.
- * Portions created by the Initial Developer are Copyright (C) 2004
- * the Initial Developer. All Rights Reserved.
- *
- * Contributor(s):
- * Bernhard Pollak <pollak@dbai.tuwien.ac.at> (WebPageDump Fork)
- *
- * Alternatively, the contents of this file may be used under the terms of
- * either the GNU Affero General Public License Version 2 or later (the "GPL"), or
- * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- * in which case the provisions of the GPL or the LGPL are applicable instead
- * of those above. If you wish to allow use of your version of this file only
- * under the terms of either the GPL or the LGPL, and not to allow others to
- * use your version of this file under the terms of the MPL, indicate your
- * decision by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL or the LGPL. If you do not delete
- * the provisions above, a recipient may use your version of this file under
- * the terms of any one of the MPL, the GPL or the LGPL.
- *
- * ***** END LICENSE BLOCK ***** */
-// --------------------------------------------------------------------------------
-// "WebPageDump" Firefox Extension
-// --------------------------------------------------------------------------------
-// - File: "domsaver.js" -
-// - Description:
-// Makes a (hopefully perfect) local copy of the actual open webpage.
-// Current Browsers make sometimes errors when saving a webpage. The files
-// will be saved in one flat directory (without subdirs)
-// - Using:
-// 1. call "wpdDOMSaver.init(filePath)" and pass the full destination path
-// 2. afterwards call "wpdDOMSaver.saveHTMLDocument" for saving the (active) window
-// --------------------------------------------------------------------------------
-// Call Tree Overview - wpdDOMSaver
-//
-// saveHTMLDocument
-// saveDocumentEx (decide if we have a HTML or another file)
-// saveDocumentFile (we have a non HTML file (e.g for embedded objects - images, movies,...))
-// download (we download the file and ...)
-// writefile (... make a HTML wrapper file)
-// saveDocumentHTML (we have a HTML File)
-// processDOMRecursively (go through the DOM nodes)
-// processDOMNode (for each node we do extensive processing (links, javascript,...))
-// download (for image,flash,... references)
-// saveDocumentEx ... (starting again with "saveDocumentEx" for frame documents)
-// saveDocumentCSS (save CSS File)
-// processCSSRecursively (process the CSS text)
-// processCSSText (do some replacement stuff and link processing)
-// download (download CSS image references)
-// generateHTMLString (create the HTML string)
-//
-//
-// --------------------------------------------------------------------------------
-//
-//
-// TO DO: use version information from rdf file...
-var WPD_VERSION = "0.2";
-
-
-// Bug variables: set to false if the bug is not present anymore
-
-// CRLFBUG: Innerhtml trims the text inside a tag. This lead
-// to problems with the PRE Tag where sometimes one starting
-// carriage return is lost...
-var WPD_CRLFBUG = true;
-
-// ENTITYBUG: HTML Entities are lost inside the DOM Tree (they
-// are converted to corresponding unicode characters) which
-// results in problems when using a non unicode charset as output
-// target where this values/symbols do not exist. So we call
-// the ConvertToEntities XPCOM function for generating usual
-// HTML Entities...
-// (this is precisely not a bug but a concept failure)
-var WPD_ENTITYBUG = false;
-
-// CSSSCROLLBUG: The css "scroll" property of "background" is
-// loosing the zero vertical position leading to a false
-// positioned background (centered by default)...
-var WPD_CSSSCROLLBUG = true;
-// CSSBACKGROUNDPOSITIONBUG: "background-position 0 0" is
-// loosing the zero vertical position
-var WPD_CSSBACKGROUNDPOSITIONBUG = true;
-
-// DOCTYPEBUG: If the doctype is inserted before
-// the <HTML> tag there would be rendering errors with the
-// right to left writing direction, because there are problems
-// with the DIR attribute (text direction (rtl,ltr,lro,rlo))
-// Positioning the doctype below the <HTML> Tag would fix the
-// problem.
-// But: inserting the docctype only below the <HTML> tag
-// results in small layout changes in some tables. So we
-// leave the doctype at the original position before the
-// HTML tag <HTML> and insert the doctype entry a second
-// time below the <HTML> tag...
-var WPD_DOCTYPEBUG = false;
-
-// JAVASCRIPTSRCBUG: Deleting the "src" attribute together
-// with the whole <SCRIPT> tag may result in unexpected
-// layout changes (table width is changed). So we set the
-// "src" attribute of the <SCRIPT> tag to an empty string
-// and don�t delete the whole tag...
-// Remark: it may be necessary to use an invalid ip address
-// (e.g. http://0.0.0.0) but this may lead to other strange
-// layout dependencies...
-var WPD_JAVASCRIPTSRCBUG = true;
-
-// CLONENODEBUG: CloneNode copies only the initial state of
-// the INPUT fields and ignores the actual values of the fields
-// We introduced this.curBody and the getCurrentNodeValue function.
-var WPD_CLONENODEBUG = true;
-
-
-var wpdDOMSaver = {
-
- name: "",
- document: null, // the original document
- curDocument: null, // the current document
- curCharacterSet: "", // the current characterset
- curBody: null, // the current body node (inclusive child nodes)
- currentDir: "",
- baseURL: "", // the original base url
- currentURL: "", // the current url (necessary for frames)
- fileInfo: [], // for saving already processed files and double name checking
- // (cause we use one flat directory for all files)
- option: {},
- frameList: [],
- frameNumber: 0,
- dateObj: null,
-
- // initialize the properties (set document, URL, Directory, ...)
- init: function (fileName, document) {
- Zotero.debug("[wpdDOMSaver.init] ...");
-
- this.name = "";
- this.document = null;
- this.curDocument = null;
- this.curCharacterSet = "";
- this.curBody = null;
- this.currentDir = "";
- this.baseURL = "";
- this.currentURL = "";
- this.fileInfo = []; // clear registered downloaded files...
-
- this.option = {};
- this.frameList = []; // clear frame list
- this.frameNumber = 0;
-
- this.dateObj = new Date();
-
-
- // Split fileName in Path and Name
-
- this.name = wpdCommon.getValidFileName(
- wpdCommon.getFileLeafName(fileName)); // extract fileName from filePath
- this.currentDir = wpdCommon.getFilePath(fileName); // only directory
- this.name = wpdCommon.splitFileName(this.name)[0]; // no extension!
-
-
- // Added by Dan S. for Zotero, replacing three lines below
- this.document = document;
- this.setFrameList(document.defaultView);
- this.baseURL = document.location.href;
-
-
- // Set the document and frames
- //this.document = top.window._content.document;
-
- //this.setFrameList(top.window._content);
-
- // set the urls
- //this.baseURL = wpdCommon.getURL(); // initial base url
- this.currentURL = this.baseURL; // current base url - needed for frame processing
- // (without frames this property will always be like the baseURL)
-
- // default options - for the files which should be downloaded
- // (this is only for external link references not for the embedded files)
- this.option = {
- "image": false,
- "sound": false,
- "movie": false,
- "archive": false,
- "custom": "", // comma delimited custom extensions (e.g. doc,xls,...)
- "format": true, // when false we get only naked html without images
-
- // Changed by Dan for Zotero
- "script": true, // no scripts
-
- "encodeUTF8": true, // write the DOM Tree as UTF-8 and change the charset entry of the document
- "metainfo": true, // include meta tags with URL and date/time information
- "metacharset": false // if the meta charset is defined inside html override document charset
- //"xtagging" : true // include a x tag around each word
- };
-
-
- },
-
-
- // get all frames in the document (recursively) and save in this.frameList
- setFrameList: function (aDocument) {
- try {
- for (var f = 0; f < aDocument.frames.length; f++) {
- this.frameList.push(aDocument.frames[f]);
- this.setFrameList(aDocument.frames[f]);
- }
- } catch (ex) {}
- },
-
- // resolve the javascript links inside the attributes (e.g. onclick,...)
- normalizeJavaScriptLink: function (aNode, aAttr) {
- var val = aNode.getAttribute(aAttr); // get the attribute value and check for link stuff
- if (!val || !val.match(/\(\'([^\']+)\'/)) return aNode;
- val = RegExp.$1;
- if (val.indexOf("/") == -1 && val.indexOf(".") == -1) return aNode;
- val = wpdCommon.resolveURL(this.currentURL, val); // it is a link -> resolve and set the URL to the local URL
- if (aNode.nodeName.toLowerCase() == "img") {
- if (aNode.parentNode.nodeName.toLowerCase() == "a") {
- aNode.parentNode.setAttribute("href", val); // change the href of img to the onclick url
- aNode.removeAttribute("onclick");
- } else {
- val = "window.open('" + val + "');"; // if this is not a reference make a window open function for the img
- aNode.setAttribute(aAttr, val);
- }
- } else {
- if (aNode.hasAttribute("href") && aNode.getAttribute("href").indexOf("http://") != 0) {
- aNode.setAttribute("href", val);
- aNode.removeAttribute("onclick");
- }
- }
- return aNode;
- },
-
- // check if the file extension of the url is specified in the options array
- checkFileTypeOptions: function (aURL) {
- var ext = wpdCommon.splitFileName(wpdCommon.getFileName(aURL))[1].toLowerCase();
- var flag = false;
- switch (ext) {
- case "jpg":
- case "jpeg":
- case "png":
- case "gif":
- flag = this.option["image"];
- break;
- case "mp3":
- case "wav":
- case "ram":
- case "wma":
- flag = this.option["sound"];
- break;
- case "mpg":
- case "mpeg":
- case "avi":
- case "ram":
- case "rm":
- case "mov":
- case "wmv":
- flag = this.option["movie"];
- break;
- case "zip":
- case "lzh":
- case "rar":
- case "xpi":
- flag = this.option["archive"];
- break;
- default:
- if (ext && this.option["custom"]) {
- if ((", " + this.option["custom"] + ", ").indexOf(", " + ext + ", ") != -1) flag = true;
- }
- }
- if (aURL.indexOf("file://") == 0 && !aURL.match(/\.html$/)) flag = true;
- return flag;
- },
-
-
- // do the conversion from the DOM Text to the destination Charset
- convertEntity: function (aText) {
- if (this.option["encodeUTF8"]) {
- return wpdCommon.unicodeToEntity(aText, "UTF-8");
- } else {
- return wpdCommon.unicodeToEntity(aText, this.curCharacterSet);
- }
- },
-
- // we only can manage GIF animations - Flash does not work...
- disableAnimation: function (aNode) {
- // thanx to pageanimator extension...
- /* try {
- //dump("inspecting "+aNode.nodeName+"\n");
- //aNode.setAttribute("swLiveConnect", "true");
- aNode.StopPlay();
- dump ("prepare flash deanimation ... ");
- if ( aNode.hasAttribute("play") ) aNode.setAttribute("play", "false");
- dump ("flash deanimation ... ");
- aNode.Rewind(); // seems to be the key for some obnoxious instances
- aNode.StopPlay();
- dump ("ready! \n");
- } catch (e) {} */
- try {
- var container = aNode.QueryInterface(Components.interfaces.nsIImageLoadingContent)
- .getRequest(Components.interfaces.nsIImageLoadingContent.CURRENT_REQUEST)
- .image;
- container.animationMode = Components.interfaces.imgIContainer.kDontAnimMode;
- } catch (e) {}
- },
-
- // get the node value of aNode directly from the actual DOM tree (WPD_CLONENODEBUG)
- getCurrentNodeValue: function (aNode) {
- try {
- this.curDocument.body.cloneNode(false);
- var body = this.curDocument.body;
- } catch (ex) {
- var body = this.curDocument.getElementsByTagName("body")[0];
- }
- var refnodes = body.getElementsByTagName(aNode.nodeName);
- var nodes = this.curBody.getElementsByTagName(aNode.nodeName);
- if (refnodes.length != nodes.length) return aNode.value;
- for (var i = 0; i < refnodes.length; i++) {
- if ((nodes[i] == aNode) && (refnodes[i].name == aNode.name) && (refnodes[i].defaultValue == aNode.defaultValue)) {
- return refnodes[i].value;
- }
- }
- return aNode.value;
- },
-
- // process the DOM Node (update the links, remove attributes and process the options)
- processDOMNode: function (aNode) {
- this.disableAnimation(aNode);
- try {
- switch (aNode.nodeName.toLowerCase()) {
- case "img":
- case "embed":
- // "embed": embedding multimedia content
- if (this.option["format"]) {
- if (aNode.hasAttribute("onclick")) aNode = this.normalizeJavaScriptLink(aNode, "onclick");
- var aDownload = true;
- if (aNode.nodeName.toLowerCase() == "img") {
- try {
- aDownload = aNode.complete;
- } catch (ex) {}
- }
- var aFileName = this.download(aNode.src, aDownload);
- // Changed by Dan S. for Zotero -- see this.repairRelativeLinks()
- if (aFileName) aNode.setAttribute("src", this.relativeLinkFix(aFileName));
- } else {
- return wpdCommon.removeNodeFromParent(aNode);
- }
- break;
- case "object":
- // for embedding different data sources in the html page
- if (this.option["format"]) {
- var aFileName = this.download(aNode.data, true);
- // Changed by Dan S. for Zotero -- see this.repairRelativeLinks()
- if (aFileName) aNode.setAttribute("data", this.relativeLinkFix(aFileName));
- } else {
- return wpdCommon.removeNodeFromParent(aNode);
- }
- break;
- case "body":
- if (this.option["format"]) {
- var aFileName = this.download(aNode.background, true);
- // Changed by Dan S. for Zotero -- see this.repairRelativeLinks()
- if (aFileName) aNode.setAttribute("background", this.relativeLinkFix(aFileName));
- } else {
- aNode.removeAttribute("background");
- aNode.removeAttribute("bgcolor");
- aNode.removeAttribute("text");
- }
- break;
- case "table":
- case "tr":
- case "th":
- case "td":
- if (this.option["format"]) {
- var aFileName = this.download(aNode.getAttribute("background"), true);
- // Changed by Dan S. for Zotero -- see this.repairRelativeLinks()
- if (aFileName) aNode.setAttribute("background", this.relativeLinkFix(aFileName));
- } else {
- aNode.removeAttribute("background");
- aNode.removeAttribute("bgcolor");
- }
- break;
- case "input":
- if (aNode.type.toLowerCase() == "image") {
- if (this.option["format"]) {
- var aFileName = this.download(aNode.src, true);
- // Changed by Dan S. for Zotero -- see this.repairRelativeLinks()
- if (aFileName) aNode.setAttribute("src", this.relativeLinkFix(aFileName));
- } else {
- aNode.setAttribute("type", "button");
- aNode.removeAttribute("src");
- }
- } else if ((aNode.type.toLowerCase() != "hidden") && (aNode.hasAttribute("value"))) {
- if (WPD_CLONENODEBUG) aNode.setAttribute("value", this.getCurrentNodeValue(aNode));
- if (WPD_ENTITYBUG) aNode.setAttribute("value", this.convertEntity(aNode.getAttribute("value")));
- }
- break;
- case "link":
- // could containt urls (icon, stylesheet and fontdef)
- // We have to remove nodes with the stylesheet attribute because they will be added later
- if(!aNode.hasAttribute("rel")) return aNode;
- if (aNode.getAttribute("rel").toLowerCase() == "stylesheet"
- && (aNode.hasAttribute("href") && aNode.getAttribute("href").indexOf("chrome://") == -1)) {
- return wpdCommon.removeNodeFromParent(aNode);
- } else if (aNode.getAttribute("rel").toLowerCase() == "shortcut icon"
- || aNode.getAttribute("rel").toLowerCase() == "icon") {
- var aFileName = this.download(aNode.href, true);
- // Changed by Dan S. for Zotero -- see this.repairRelativeLinks()
- if (aFileName) aNode.setAttribute("href", this.relativeLinkFix(aFileName));
- } else if (aNode.getAttribute("rel").toLowerCase() == "fontdef") {
- var aFileName = this.download(aNode.src, true);
- // Changed by Dan S. for Zotero -- see this.repairRelativeLinks()
- if (aFileName) aNode.setAttribute("src", this.relativeLinkFix(aFileName));
- } else {
- aNode.setAttribute("href", aNode.href);
- }
- break;
- case "style":
- return wpdCommon.removeNodeFromParent(aNode);
- break;
- case "applet":
- if (aNode.hasAttribute("code")) aNode.setAttribute("code", "");
- if (aNode.hasAttribute("codebase")) aNode.setAttribute("codebase", "");
- if (aNode.hasAttribute("archive")) aNode.setAttribute("archive", "");
- break;
- case "script":
- if (this.option["script"]) {
- if (aNode.hasAttribute("src")) {
- var aFileName = this.download(aNode.src, true);
- // Changed by Dan S. for Zotero -- see this.repairRelativeLinks()
- if (aFileName) aNode.setAttribute("src", this.relativeLinkFix(aFileName));
- }
- } else {
- if (WPD_JAVASCRIPTSRCBUG && aNode.hasAttribute("src")) {
- //if ( aNode.getAttribute("src").indexOf("http://")!=-1 ) {
- // aNode.setAttribute("src", "http://0.0.0.0");
- //} else {
- aNode.setAttribute("src", "");
- //}
- } else {
- return wpdCommon.removeNodeFromParent(aNode);
- }
- }
- break;
- case "noscript":
- if (!WPD_JAVASCRIPTSRCBUG) return wpdCommon.removeNodeFromParent(aNode);
- break;
- case "a":
- case "area":
- if (aNode.hasAttribute("onclick")) aNode = this.normalizeJavaScriptLink(aNode, "onclick");
- if (!aNode.hasAttribute("href")) return aNode;
- if (aNode.target == "_blank") aNode.setAttribute("target", "_top");
- if (aNode.href.match(/^javascript:/i)) aNode = this.normalizeJavaScriptLink(aNode, "href");
- if (!this.selection && aNode.getAttribute("href").charAt(0) == "#") return aNode;
- // download file depending on option settings and file extension
- if (this.checkFileTypeOptions(aNode.href)) {
- var aFileName = this.download(aNode.href, true);
- if (aFileName) aNode.setAttribute("href", aFileName);
- } else {
- aNode.setAttribute("href", aNode.href);
- }
- break;
- case "form":
- aNode.setAttribute("action", wpdCommon.resolveURL(this.currentURL, aNode.action));
- break;
- case "meta":
- if ((aNode.hasAttribute("http-equiv") && aNode.hasAttribute("content")) && (aNode.getAttribute("http-equiv").toLowerCase() == "content-type") && (aNode.getAttribute("content").match(/charset\=/i))) {
- // we remove possible charset definitions because they will be added later
- return wpdCommon.removeNodeFromParent(aNode);
- }
- if ((aNode.hasAttribute("http-equiv") && aNode.hasAttribute("content")) && (aNode.getAttribute("http-equiv").toLowerCase() == "refresh") && (aNode.getAttribute("content").match(/URL\=/i))) {
- // there should be no refresh present - could be a noframe relict...
- // (is already processed or timer is longer...)
- return wpdCommon.removeNodeFromParent(aNode);
- }
- break;
- case "base":
- //<BASE HREF="http://www.amin.org/look/amin/">
- // we need to set the base url to currenturl
- if (aNode.hasAttribute("href") && (aNode.getAttribute("href") != "")) this.currentURL = aNode.getAttribute("href");
- return wpdCommon.removeNodeFromParent(aNode);
- break;
- case "frame":
- case "iframe":
- // normal and embedded frames (iframe) -> call "saveDocumentEx" for saving the frame document
- try {
- // we don't have to worry about the currentURL - saveDocumentEx will set the
- // currentURL to the URL of the frame document and afterwards back to the baseURL
- if (this.frameNumber < this.frameList.length) {
- var newFileName = this.saveDocumentEx(this.frameList[this.frameNumber++].document, this.name + "_" + this.frameNumber);
- aNode.setAttribute("src", this.relativeLinkFix(newFileName));
- }
- } catch (ex) {
- wpdCommon.addError("[wpdCommon.processDOMNode]:\n -> aNode.nodeName: " + aNode.nodeName, ex);
- }
- break;
- case "xmp":
- // TO DO
- var pre = aNode.ownerDocument.createElement("pre");
- pre.appendChild(aNode.firstChild);
- aNode.parentNode.replaceChild(pre, aNode);
- break;
- }
- if (!this.option["format"]) {
- aNode.removeAttribute("style");
- } else if (aNode.style && aNode.style.cssText) {
- var newCSStext = this.processCSSText(aNode.style.cssText, this.currentURL, true);
- if (newCSStext) aNode.setAttribute("style", newCSStext);
- }
- if (!this.option["script"]) {
- aNode.removeAttribute("onmouseover");
- aNode.removeAttribute("onmouseout");
- aNode.removeAttribute("onload");
- }
- } catch (ex) {
- wpdCommon.addError("[wpdDOMSaver.processDOMNode]:\n -> aNode.nodeName: " + aNode.nodeName, ex);
- }
- return aNode;
- },
-
-
- // get through the DOM tree (recursiv function)
- processDOMRecursively: function (rootNode) {
- if (rootNode == null) return;
- for (var curNode = rootNode.firstChild; curNode != null; curNode = curNode.nextSibling) {
- if (curNode.nodeName != "#text" && curNode.nodeName != "#comment") {
- curNode = this.processDOMNode(curNode);
- this.processDOMRecursively(curNode);
- } else if ((curNode.nodeName == "#text") && (wpdCommon.trim(curNode.nodeValue) != "")) {
- // we need to replace special chars with HTML Entities
- if (WPD_ENTITYBUG) curNode.nodeValue = this.convertEntity(curNode.nodeValue);
- // if we have CRLFs before or after the text "innerhtml" will remove them,
- // so we have to make sure that we preserve this CRLFs for the PRE Tag
- if (WPD_CRLFBUG) curNode.nodeValue = wpdCommon.checkCRLF(curNode);
- }
- }
- },
-
- // Do a correction directly inside the final HTML text.
- // This is necessary because setting the css text for the
- // style attribute does not work - innerHTML will finally
- // generate e.g "repeat scroll 0%;" regardless of the style setting
- // (e.g. "repeat;")
- repairInlineCSS: function (aHTMLText) {
- if ((WPD_CSSSCROLLBUG) && (aHTMLText.match(/background:/i))) {
- // Regex fixed by Dan for Zotero
- //var re = new RegExp(/style=\"(.*)background:(.*)(repeat scroll 0(?:pt|px|%);)/);
- var re = new RegExp(/style=\"([^\"]*)background:([^;\"]*)(repeat scroll 0(?:pt|px|%);?)/);
- while (re.exec(aHTMLText)) {
- var firstPart = RegExp.$1;
- var secondPart = RegExp.$2;
- // '?' added by Dan for Zotero
- //var thirdPart = RegExp.$3.replace(/scroll 0(pt|px|%);/g, ';');
- var thirdPart = RegExp.$3.replace(/scroll 0(pt|px|%);?/g, ';');
- aHTMLText = aHTMLText.replace(re, "style=\"" + firstPart + "background:" + secondPart + thirdPart);
- }
- }
- if ((WPD_CSSBACKGROUNDPOSITIONBUG) && (aHTMLText.match(/background-position: /i))) {
- // Regex fixed by Dan for Zotero
- //var re = new RegExp(/style=\"(.*)background-position: 0(?:pt|px|%);/);
- var re = new RegExp(/style=\"([^\"]*)background-position: 0(?:pt|px|%);/);
- while (re.exec(aHTMLText)) {
- aHTMLText = aHTMLText.replace(re, "style=\"" + RegExp.$1 + "background-position: ;");
- }
- }
- return aHTMLText;
- },
-
- // While we're replacing references with local file paths,
- // we don't want to have the browser try and fetch them
- // We prefix them with 'about:blank?' and remove later via repairRelativeLinks
- relativeLinkFix: function (aFileName) {
- return "about:blank?" + aFileName;
- },
-
- // Added by Dan S. for Zotero to restore relative links,
- // which are prepended with "about:blank?" to fix a bug in Scrapbook/WPD
- // that sending an invalid request to the server when the img src
- // is a relative link to a file in a different directory
- repairRelativeLinks: function (aHTMLText) {
- return aHTMLText.replace(/(src|background|data|href)="about:blank\?([^"]*)"/g, '$1="$2"');
- },
-
-
- // process the CSS text of one stylesheet element
- processCSSText: function (aCSStext, aCSShref, inline) {
- if (!aCSStext) return "";
-
- // search for "url" entries inside the css
- // Double-quotes in regexp added by Dan S. for Zotero
- var re = new RegExp(/ url\("?([^'")]+)"?\)/);
- var i = 0;
- while (aCSStext.match(re)) {
- if (++i > 20) break; // safer (we try it maximal 20 times for one stylesheet element)
- var imgFile = this.download(wpdCommon.resolveURL(aCSShref, RegExp.$1), true);
- aCSStext = aCSStext.replace(re, " url('" + imgFile + "')");
- }
-
- // search for "content" entries inside the css and clean "attr"
- re = new RegExp(/ content: \"(.*?)\"; /);
- if (aCSStext.match(re)) {
- var innerQuote = RegExp.$1;
- innerQuote = innerQuote.replace(/\"/g, '\\"');
- innerQuote = innerQuote.replace(/\\\" attr\(([^\)]+)\) \\\"/g, '" attr($1) "');
- aCSStext = aCSStext.replace(re, ' content: "' + innerQuote + '"; ');
- }
-
- //
- if ((WPD_CSSSCROLLBUG) && (aCSStext.match(/background: /i))) aCSStext = aCSStext.replace(/ scroll 0(pt|px|%);/g, ";");
- if ((WPD_CSSBACKGROUNDPOSITIONBUG) && (aCSStext.match(/background-position: /i))) aCSStext = aCSStext.replace(/ background-position: 0(pt|px|%);/g, ";");
- return aCSStext;
- },
-
- // process the CSS stylesheets (recursively)
- // CSS Types:
- // UNKNOWN_RULE = 0,
- // STYLE_RULE = 1,
- // CHARSET_RULE = 2,
- // IMPORT_RULE = 3,
- // MEDIA_RULE = 4,
- // FONT_FACE_RULE = 5,
- // PAGE_RULE = 6
- processCSSRecursively: function (aCSS) {
- if (!aCSS || aCSS.disabled) return "";
- var content = "";
- var medium = aCSS.media.mediaText;
- if (medium != "" && medium.indexOf("screen") < 0 && medium.indexOf("all") < 0) {
- return "";
- }
- // Disabled by Dan S. to fix CSS on snapshots of Reader View
- //if (aCSS.href != null && aCSS.href.indexOf("chrome") == 0) return "";
- var flag = "";
-
- // Added by Dan S. for Zotero
- //
- // Make sure cssRules is accessible -- it might not be if a <link>
- // element appears within <body> instead of <head>
- try {
- aCSS.cssRules
- } catch (e) {
- var msg = "Unable to access cssRules property of " + aCSS.href + " in wpdDOMSaver.processCSSRecursively()";
- Zotero.debug("WebPageDump: " + msg, 2);
- Components.utils.reportError(msg);
- return "";
- }
-
- for (var i = 0; i < aCSS.cssRules.length; i++) {
- if (aCSS.cssRules[i].type == 1 || aCSS.cssRules[i].type == 4) {
- if (flag == "") {
- content += "\n/* ::::: " + aCSS.href + " ::::: */\n\n"; // write original css filename
- flag = aCSS.href;
- }
- var ref = aCSS.href;
- if (flag == null || flag.indexOf(".css") == -1) ref = this.currentURL;
- content += this.processCSSText(aCSS.cssRules[i].cssText, ref, false) + "\n";
- } else if (aCSS.cssRules[i].type == 3) {
- content += this.processCSSRecursively(aCSS.cssRules[i].styleSheet);
- }
- }
- return content;
- },
-
- //given a file name and source URL (optional) with content (optional)
- //returns a unique file name and registers it
- getUniqueFileNameAndRegister: function(fileName, sourceURL, content) {
- fileName = this.checkForEqualFilenames(
- wpdCommon.getValidFileName(fileName),
- sourceURL);
- this.registerFile(fileName, sourceURL, content);
- return fileName;
- },
-
- //register filename, so we don't overwrite them later
- registerFile: function (newFileName, sourceURL, content) {
- this.fileInfo[newFileName.toLowerCase()] = {
- url: sourceURL,
- downloaded: content
- }
- },
-
- // is the file registered (e.g. downloaded)?
- isFileRegistered: function (newFileName) {
- if (this.fileInfo[newFileName.toLowerCase()] != undefined) return true;
- return false;
- },
-
- isDownloaded: function(fileName) {
- fileName = fileName.toLowerCase();
- if(!this.fileInfo[fileName]) return;
- return this.fileInfo[fileName].downloaded;
- },
-
- // check for equal Filenames with different locations
- // if this is the case, we generate a new name
- // if no aURLSpec is passed, this generates a unique file name
- checkForEqualFilenames: function (newFileName, aURLSpec) {
- if (this.isFileRegistered(newFileName)) {
- if (!aURLSpec || this.fileInfo[newFileName.toLowerCase()]["url"] != aURLSpec) {
- // the file is already registered but from a different location
- // => probably not the same file, so we have to find a different name it (e.g. filename_001.ext)
- var seq = 1;
- var fileLR = wpdCommon.splitFileName(newFileName);
- if (!fileLR[1]) fileLR[1] = "dat";
- newFileName = fileLR[0] + "_" + wpdCommon.addLeftZeros(seq++, 3) + "." + fileLR[1];
- while (this.fileInfo[newFileName.toLowerCase()] != undefined) {
- // is the file already registered with the new name?
- if (aURLSpec && this.fileInfo[newFileName.toLowerCase()]["url"] == aURLSpec) return newFileName; // Yes -> so it's already downloaded and we are finished
- newFileName = fileLR[0] + "_" + wpdCommon.addLeftZeros(seq++, 3) + "." + fileLR[1]; // No -> "increment" filename
- }
- }
- }
- return newFileName;
- },
-
- // Download the specified URL to "this.currentDir". Takes
- // care about equal filenames from different locations
- download: function (aURLSpec, aDownload) {
- if (!aURLSpec) return "";
-
- // is this a relative URL (no protocol present) which needs to be resolved?
- if (aURLSpec.indexOf("://") < 0) aURLSpec = wpdCommon.resolveURL(this.currentURL, aURLSpec);
-
- try {
- var aURL = wpdCommon.convertURLToObject(aURLSpec);
-
- // generate a filename
- var newFileName = aURL.fileName;
- if (!newFileName) newFileName = "untitled";
- // same name but different location?
- newFileName = this.getUniqueFileNameAndRegister(newFileName, aURLSpec);
- // is the file already registered (processed) ?
- if (!this.isDownloaded(newFileName)) {
- if (aDownload) {
- aDownload = wpdCommon.downloadFile(aURLSpec, this.currentDir + newFileName);
- } else {
- aDownload = true;
- }
- this.registerFile(newFileName, aURLSpec, aDownload);
- }
- return newFileName;
- } catch (ex) {
- wpdCommon.addError("[wpdDOMSaver.download]\n -> aURLSpec: " + aURLSpec, ex);
- return "";
- }
- },
-
- // Get a CSS filename node for inserting in the DOM Tree
- createCSSFileNode: function (aDocument, rootNode, aFileName) {
- var newLinkNode = aDocument.createElement("link");
-
- rootNode.firstChild.appendChild(aDocument.createTextNode("\n"));
-
- newLinkNode.setAttribute("media", "all");
- newLinkNode.setAttribute("href", aFileName);
- newLinkNode.setAttribute("type", "text/css");
- newLinkNode.setAttribute("rel", "stylesheet");
-
- rootNode.firstChild.appendChild(newLinkNode);
-
- rootNode.firstChild.appendChild(aDocument.createTextNode("\n"));
- //return newLinkNode;
- },
-
- // Creates a placeholder node for inserting the DOCTYPE after the html tag
- createPseudeDocTypeNode: function (aDocument, rootNode) {
- var aDoctype = aDocument.doctype;
- if (!aDoctype) return;
- try {
- rootNode.insertBefore(aDocument.createTextNode("\n"), rootNode.firstChild);
-
- var metaNode = aDocument.createElement("wpd_doctype");
- rootNode.insertBefore(metaNode, rootNode.firstChild);
-
- rootNode.insertBefore(aDocument.createTextNode("\n"), rootNode.firstChild);
- } catch (ex) {
- wpdCommon.addError("[wpdDOMSaver.createDocTypeNode]", ex);
- }
- },
-
- // replaces the placeholder node generated by createPseudeDocTypeNode with the DOCTYPE
- replaceDocType: function (aDocument, aHTMLText) {
- var aDoctype = aDocument.doctype;
- if (!aDoctype) return aHTMLText;
- try {
- return aHTMLText.replace("<wpd_doctype></wpd_doctype>", this.getDocType(aDocument));
- } catch (ex) {
- wpdCommon.addError("[wpdDOMSaver.replaceDocType]", ex);
- }
- return aHTMLText;
- },
-
- // Returns the HTML Text generated from rootNode and does
- // some processing (WPD_DOCTYPEBUG, WPD_ENTITYBUG, cleaning,...)
- generateHTMLString: function (aDocument, rootNode) {
- if (WPD_DOCTYPEBUG) this.createPseudeDocTypeNode(aDocument, rootNode);
- var HTMLText = wpdCommon.nodeToHTMLString(rootNode);
- if (WPD_DOCTYPEBUG) HTMLText = this.replaceDocType(aDocument, HTMLText);
- // adding the doctype entry at the top
- HTMLText = this.getDocType(aDocument) + HTMLText;
- HTMLText = HTMLText.replace(/\x00/g, " ");
- // replace the & added by the innerHTML method
- // because we have already generated all entities
- if (WPD_ENTITYBUG) HTMLText = HTMLText.replace(/&/g, "&");
-
- // Added by Dan S. for Zotero
- HTMLText = this.repairRelativeLinks(HTMLText);
-
- return this.repairInlineCSS(HTMLText);
- },
-
- // Returns a DOCTYPE definition string based on aDocument.doctype
- getDocType: function (aDocument) {
- var aDoctype = aDocument.doctype;
- if (!aDoctype) return "";
- var dt = "<!DOCTYPE " + aDoctype.name;
- if (aDoctype.publicId) dt += ' PUBLIC "' + aDoctype.publicId + '"';
- if (aDoctype.systemId) dt += ' "' + aDoctype.systemId + '"';
- dt += ">\n";
- return dt;
- },
-
- // Get the meta charset information from the document
- getMetaCharset: function (aDocument) {
- var metas = aDocument.getElementsByTagName("meta");
- for (var i = metas.length; --i >= 0;) {
- var meta = metas[i];
- if (/content-type/i.test(meta.httpEquiv)) {
- r = /^text\/html; *charset=(.*)$/i.exec(meta.content);
- return r[1];
- }
- }
- return "";
- },
-
-
- // Create and return a meta charset node for the DOM Tree
- createMetaCharsetNode: function (aDocument, rootNode, aContentType, aCharSet) {
- try {
- var metaNode = aDocument.createElement("meta");
- rootNode.firstChild.insertBefore(aDocument.createTextNode("\n"), rootNode.firstChild.firstChild);
-
- metaNode.setAttribute("content", aContentType + "; charset=" + aCharSet);
- metaNode.setAttribute("http-equiv", "Content-Type");
-
- rootNode.firstChild.insertBefore(metaNode, rootNode.firstChild.firstChild);
-
- rootNode.firstChild.insertBefore(aDocument.createTextNode("\n"), rootNode.firstChild.firstChild);
- } catch (ex) {
- wpdCommon.addError("[wpdDOMSaver.createMetaCharsetNode]", ex);
- }
- },
-
- // get a meta node for the DOM Tree
- createMetaNameNode: function (aDocument, rootNode, name, content) {
- try {
- var metaNode = aDocument.createElement("meta");
-
- metaNode.setAttribute("content", content);
- metaNode.setAttribute("name", name);
-
- rootNode.firstChild.insertBefore(aDocument.createTextNode("\n"), rootNode.firstChild.firstChild);
- rootNode.firstChild.insertBefore(metaNode, rootNode.firstChild.firstChild);
- } catch (ex) {
- wpdCommon.addError("[wpdDOMSaver.createMetaNameNode]", ex);
- }
- },
-
- /*existMetaCharsetNode : function(aDocument);
- {
- var metaNodes = aDocument.getElementsByTagName("meta");
- for (var i=0; i<metaNodes.length; i++ ) {
- if ( (metaNodes[i].hasAttribute("http-equiv") && metaNodes[i].hasAttribute("content")) &&
- (metaNodes[i].getAttribute("http-equiv").toLowerCase() == "content-type") &&
- (metaNodes[i].getAttribute("content").match(/charset\=/i)) )
- return true;
- }
- return false;
- }*/
-
-
- // Return the WPD Meta Base URL Information from aFile
- getMetaBaseURL: function (aFile) {
- if (wpdCommon.pathExists(aFile)) {
- str = new String(wpdCommon.readFile(aFile, false, true));
- re = new RegExp(/<meta name=\"wpd_baseurl\" content=\"(.*?)\">/);
- if (str.match(re)) {
- return RegExp.$1;
- }
- }
- return "";
- },
-
- // Return the WPD Meta Date Information from aFile
- getMetaDate: function (aFile) {
- if (wpdCommon.pathExists(aFile)) {
- str = new String(wpdCommon.readFile(aFile, false, true));
- re = new RegExp(/<meta name=\"wpd_date\" content=\"(.*?)\">/);
- if (str.match(re)) {
- return RegExp.$1;
- }
- }
- return "";
- },
-
- // creates the meta nodes for the wpd meta tags (version, baseurl, url, date/time)
- createMetaInformation: function (aDocument, rootNode) {
- // insert url/date/time meta information
- //
- var d = this.dateObj.getUTCFullYear() + "-" + wpdCommon.addLeftZeros(this.dateObj.getUTCMonth(), 2) + "-" + wpdCommon.addLeftZeros(this.dateObj.getUTCDate(), 2);
- d = d + "T" + wpdCommon.addLeftZeros(this.dateObj.getUTCHours(), 2) + ":" + wpdCommon.addLeftZeros(this.dateObj.getUTCMinutes(), 2) + "Z";
- this.createMetaNameNode(aDocument, rootNode, "wpd_date", d);
- this.createMetaNameNode(aDocument, rootNode, "wpd_url", this.currentURL);
- this.createMetaNameNode(aDocument, rootNode, "wpd_baseurl", this.baseURL);
- this.createMetaNameNode(aDocument, rootNode, "wpd_version", WPD_VERSION);
- rootNode.firstChild.insertBefore(aDocument.createTextNode("\n\n"), rootNode.firstChild.firstChild);
- },
-
- // save a non HTML "aDocument" as "aFileName" and generate a
- // wrapper HTML File which references "aDocument"
- // ("aFileName" is the filename without(!) extension)
- saveDocumentFile: function (aDocument, aFileName) {
- Zotero.debug("[wpdDOMSaver.saveDocumentFile]: Saving file from " + this.currentURL);
- aFileName = this.download(this.currentURL, true)
- Zotero.debug("[wpdDOMSaver.saveDocumentFile]: Saved to " + aFileName);
-
- return aFileName;
- /* Wrapper file disabled by Dan S. for Zotero
- var aFileURL = aDocument.location.href;
-
- if ( !aFileName ) aFileName = "file" + Math.random().toString();
- // this.download will generate a unique filename
- var newFileName = this.download(this.currentURL,true);
-
- if ( aDocument.contentType.substring(0,5) == "image" ) {
- var HTMLText = '<html><body><img src="' + newFileName + '"></body></html>';
- } else {
- var HTMLText = '<html><head><meta http-equiv="refresh" content="0;URL=' + newFileName + '"></head><body></body></html>';
- }
-
- var HTMLFile = this.currentDir + aFileName + ".html";
-
- if (!wpdCommon.writeFile(HTMLText,HTMLFile))
- wpdCommon.addError("[wpdDOMSaver.saveDocumentFile]: could not write HTML wrapper for "+aFileName+"\n");
-
- return aFileName + ".html";
- */
- },
-
- // save the CSS Stylesheets of "aDocument" as "aFileName" and
- // process the CSS Text
- // "aFileName" is the filename without(!) extension
- // (".css" will be added)
- saveDocumentCSS: function (aDocument, aFileName) {
- var CSSText = ""; //"body {display: block;margin: 8px;}; ";
- if (this.option["format"]) {
- var myStyleSheets = aDocument.styleSheets;
- // get all style sheets to "CSSText"
- for (var i = 0; i < myStyleSheets.length; i++) {
- CSSText += this.processCSSRecursively(myStyleSheets[i]);
- }
- if (CSSText) {
- // don't forget to convert the CSS String to the document charset..
- // (necessary for e.g. font-family)
- if (this.option["encodeUTF8"]) {
- CSSText = wpdCommon.ConvertFromUnicode16(CSSText, "UTF-8");
- } else {
- CSSText = wpdCommon.ConvertFromUnicode16(CSSText, this.curCharacterSet);
- }
- aFileName = this.getUniqueFileNameAndRegister(aFileName + ".css");
- Zotero.debug("[wpdDOMSaver.saveDocumentCSS]: " + this.currentDir + aFileName);
- // write css file
- var CSSFile = this.currentDir + aFileName;
- if (!wpdCommon.writeFile(CSSText, CSSFile)) wpdCommon.addError("[wpdDOMSaver.saveDocumentCSS]: could not write CSS File");
- return aFileName;
- }
- }
- return false;
- },
-
- // save the HTML "aDocument" as "aFileName" and process the
- // DOM Tree (see processDOMNode) - calls also saveDocumentCSS
- // "aFileName" is the filename without(!) extension
- // (".html" will be added)
- saveDocumentHTML: function (aDocument, aFileName) {
- aFileName = this.getUniqueFileNameAndRegister(aFileName + ".html");
- var aFileNameNoExt = wpdCommon.splitFileName(aFileName)[0];
-
- Zotero.debug("[wpdDOMSaver.saveDocumentHTML]: " + this.currentDir + aFileName);
-
- this.curDocument = aDocument;
- this.curCharacterSet = aDocument.characterSet;
- var charset = this.curCharacterSet;
- // we get the html node without childs and add the head and body trees
- // manually so we are sure that we have a correct html file
- var rootNode = aDocument.getElementsByTagName("html")[0].cloneNode(false);
-
- try {
- var headNode = aDocument.getElementsByTagName("head")[0].cloneNode(true);
- rootNode.appendChild(headNode);
- rootNode.appendChild(aDocument.createTextNode("\n"));
- } catch (ex) {}
- try {
- this.curBody = aDocument.body.cloneNode(true);
- } catch (ex) {
- this.curBody = aDocument.getElementsByTagName("body")[0].cloneNode(true);
- }
- rootNode.appendChild(this.curBody);
- rootNode.appendChild(aDocument.createTextNode("\n"));
-
- // now the processing of the dom nodes (changing hrefs, downloading...)
- this.processDOMRecursively(rootNode);
-
- // write css file and add css node with the new css filename in the DOM Tree
- var cssFileName = this.saveDocumentCSS(aDocument, aFileNameNoExt);
- if (cssFileName) this.createCSSFileNode(aDocument, rootNode, cssFileName);
-
- // create meta information (version, base_url, url, date/time)
- if (this.option["metainfo"]) this.createMetaInformation(aDocument, rootNode);
-
- // add the charset defintions previously removed by processDOMNode
- if (this.option["encodeUTF8"]) {
- this.createMetaCharsetNode(aDocument, rootNode, aDocument.contentType, "UTF-8");
- } else {
- // charset probably sent by web server only -> add the charset meta header for local viewing
- this.createMetaCharsetNode(aDocument, rootNode, aDocument.contentType, charset);
- }
-
- // convert the nodes to a html string (including some processing)
-
- // "var " added by Dan S. for Zotero
- var HTMLText = this.generateHTMLString(aDocument, rootNode);
- // convert the DOM String to the desired Charset
- if (this.option["encodeUTF8"]) {
- HTMLText = wpdCommon.ConvertFromUnicode16(HTMLText, "UTF-8");
- } else {
- HTMLText = wpdCommon.ConvertFromUnicode16(HTMLText, charset);
- }
-
- this.curCharacterSet = charset;
-
- // and write the file...
- var HTMLFile = this.currentDir + aFileName;
- if (!wpdCommon.writeFile(HTMLText, HTMLFile)) wpdCommon.addError("[wpdDOMSaver.saveDocumentHTML]: could not write HTML File");
-
- return aFileName;
- },
-
- // Decides the calling of SaveDocumentFile or saveDocumentHTML
- saveDocumentEx: function (aDocument, aFileName) {
- // we have to set a new current url which is the
- // base reference url (necessary for frame processing)
- this.currentURL = aDocument.location.href;
-
- // distinguish between HTML Documents and other
- // embedded files like flash, video or images...
- if ((aDocument.getElementsByTagName("head").length == 0) || !aDocument.contentType.match(/htm|html|xml/i)) {
- aFileName = this.saveDocumentFile(aDocument, aFileName);
- } else {
- aFileName = this.saveDocumentHTML(aDocument, aFileName)
- }
-
- // set the current URL back to the original base URL
- this.currentURL = this.baseURL;
-
- return aFileName;
-
- },
-
- // Main Routine: call it for saving the actual active top window
- // (be sure to call the init function at the top of this file before)
- saveHTMLDocument: function () {
- try {
- return this.saveDocumentEx(this.document, this.name);
- } catch (ex) {
- wpdCommon.addError("[wpdDOMSaver.saveHTMLDocument]", ex);
- }
- }
-
-};
-\ No newline at end of file
diff --git a/chrome/content/zotero/xpcom/attachments.js b/chrome/content/zotero/xpcom/attachments.js
@@ -540,7 +540,7 @@ Zotero.Attachments = new function(){
/**
- * Save a snapshot -- uses synchronous WebPageDump or asynchronous saveURI()
+ * Save a snapshot from a Document
*
* @param {Object} options - 'libraryID', 'document', 'parentItemID', 'forceTitle', 'collections'
* @return {Promise<Zotero.Item>} - A promise for the created attachment item
@@ -567,10 +567,7 @@ Zotero.Attachments = new function(){
var tmpDir = yield this.createTemporaryStorageDirectory();
var tmpFile = tmpDir.clone();
- var fileName = Zotero.File.truncateFileName(
- _getFileNameFromURL(url, contentType),
- 100 //make sure this matches WPD settings in webpagedump/common.js
- );
+ var fileName = Zotero.File.truncateFileName(_getFileNameFromURL(url, contentType), 100);
tmpFile.append(fileName);
// If we're using the title from the document, make some adjustments
@@ -587,20 +584,11 @@ Zotero.Attachments = new function(){
}
if (contentType === 'text/html' || contentType === 'application/xhtml+xml') {
- // Load WebPageDump code
- var wpd = {"Zotero":Zotero};
- Components.classes["@mozilla.org/moz/jssubscript-loader;1"]
- .getService(Components.interfaces.mozIJSSubScriptLoader)
- .loadSubScript("chrome://zotero/content/webpagedump/common.js", wpd);
- Components.classes["@mozilla.org/moz/jssubscript-loader;1"]
- .getService(Components.interfaces.mozIJSSubScriptLoader)
- .loadSubScript("chrome://zotero/content/webpagedump/domsaver.js", wpd);
-
- wpd.wpdDOMSaver.init(tmpFile.path, document);
- wpd.wpdDOMSaver.saveHTMLDocument();
+ Zotero.debug('Saving document with saveURI()');
+ yield Zotero.Utilities.Internal.saveDocument(document, tmpFile.path);
}
else {
- Zotero.debug('Saving with saveURI()');
+ Zotero.debug("Saving file with saveURI()");
const nsIWBP = Components.interfaces.nsIWebBrowserPersist;
var wbp = Components.classes["@mozilla.org/embedding/browser/nsWebBrowserPersist;1"]
.createInstance(nsIWBP);
diff --git a/chrome/content/zotero/xpcom/utilities_internal.js b/chrome/content/zotero/xpcom/utilities_internal.js
@@ -446,6 +446,51 @@ Zotero.Utilities.Internal = {
},
+ saveDocument: function (document, destFile) {
+ const nsIWBP = Components.interfaces.nsIWebBrowserPersist;
+ let wbp = Components.classes["@mozilla.org/embedding/browser/nsWebBrowserPersist;1"]
+ .createInstance(nsIWBP);
+ wbp.persistFlags = nsIWBP.PERSIST_FLAGS_REPLACE_EXISTING_FILES
+ | nsIWBP.PERSIST_FLAGS_FORCE_ALLOW_COOKIES
+ | nsIWBP.PERSIST_FLAGS_AUTODETECT_APPLY_CONVERSION
+ | nsIWBP.PERSIST_FLAGS_FROM_CACHE
+ // Mostly ads
+ | nsIWBP.PERSIST_FLAGS_IGNORE_IFRAMES
+ | nsIWBP.PERSIST_FLAGS_IGNORE_REDIRECTED_DATA;
+
+ let encodingFlags = 0;
+ let filesFolder = null;
+ if (document.contentType == "text/plain") {
+ encodingFlags |= nsIWBP.ENCODE_FLAGS_FORMATTED;
+ encodingFlags |= nsIWBP.ENCODE_FLAGS_ABSOLUTE_LINKS;
+ encodingFlags |= nsIWBP.ENCODE_FLAGS_NOFRAMES_CONTENT;
+ }
+ else {
+ encodingFlags |= nsIWBP.ENCODE_FLAGS_ENCODE_BASIC_ENTITIES;
+
+ // Save auxiliary files to the same folder
+ filesFolder = OS.Path.dirname(destFile);
+ }
+ const wrapColumn = 80;
+
+ var deferred = Zotero.Promise.defer();
+ wbp.progressListener = new Zotero.WebProgressFinishListener(function () {
+ deferred.resolve();
+ });
+
+ wbp.saveDocument(
+ document,
+ Zotero.File.pathToFile(destFile),
+ Zotero.File.pathToFile(filesFolder),
+ null,
+ encodingFlags,
+ wrapColumn
+ );
+
+ return deferred.promise;
+ },
+
+
/**
* Launch a process
* @param {nsIFile|String} cmd Path to command to launch
diff --git a/chrome/content/zotero/xpcom/zotero.js b/chrome/content/zotero/xpcom/zotero.js
@@ -2710,7 +2710,6 @@ Zotero.WebProgressFinishListener = function(onFinish) {
this.onStateChange = function(wp, req, stateFlags, status) {
//Zotero.debug('onStageChange: ' + stateFlags);
if (stateFlags & Components.interfaces.nsIWebProgressListener.STATE_STOP
- && stateFlags & Components.interfaces.nsIWebProgressListener.STATE_IS_REQUEST
&& stateFlags & Components.interfaces.nsIWebProgressListener.STATE_IS_NETWORK) {
onFinish();
}
diff --git a/chrome/content/zotero/zoteroPane.js b/chrome/content/zotero/zoteroPane.js
@@ -3927,7 +3927,7 @@ var ZoteroPane = new function()
}
if (!externalViewer) {
- var url = 'zotero://attachment/' + itemID + '/';
+ let url = Services.io.newFileURI(file).spec;
this.loadURI(url, event);
}
else {
diff --git a/test/tests/attachmentsTest.js b/test/tests/attachmentsTest.js
@@ -177,6 +177,39 @@ describe("Zotero.Attachments", function() {
})
})
+ describe("#importFromDocument()", function () {
+ it("should save a document with embedded files", function* () {
+ var item = yield createDataObject('item');
+
+ var uri = OS.Path.join(getTestDataDirectory().path, "snapshot", "index.html");
+ var deferred = Zotero.Promise.defer();
+ win.addEventListener('pageshow', () => deferred.resolve());
+ win.loadURI(uri);
+ yield deferred.promise;
+
+ var file = getTestDataDirectory();
+ file.append('test.png');
+ var attachment = yield Zotero.Attachments.importFromDocument({
+ document: win.content.document,
+ parentItemID: item.id
+ });
+
+ assert.equal(attachment.getField('url'), "file://" + uri);
+
+ // Check indexing
+ var matches = yield Zotero.Fulltext.findTextInItems([attachment.id], 'share your research');
+ assert.lengthOf(matches, 1);
+ assert.propertyVal(matches[0], 'id', attachment.id);
+
+ // Check for embedded files
+ var storageDir = Zotero.Attachments.getStorageDirectory(attachment).path;
+ var file = yield attachment.getFilePathAsync();
+ assert.equal(OS.Path.basename(file), 'index.html');
+ var filesFolder = OS.Path.join(storageDir, 'index_files');
+ assert.isTrue(yield OS.File.exists(filesFolder, 'img.gif'));
+ });
+ });
+
describe("#getBaseDirectoryRelativePath()", function () {
it("should convert backslashes to forward slashes", function () {
Zotero.Prefs.set('baseAttachmentPath', "C:\\foo\\bar");
diff --git a/test/tests/data/snapshot/index.html b/test/tests/data/snapshot/index.html
@@ -6,5 +6,6 @@
<body>
<h1>Test</h1>
<p>Zotero [zoh-TAIR-oh] is a free, easy-to-use tool to help you collect, organize, cite, and share your research sources.</p>
+ <img src="img.gif"/>
</body>
</html>