commit ccf2a05c49d5d20c22aa73775e7d5b98a057837f
parent 62840f3b9370941b7c7d04f3385898141cc89db5
Author: Dan Stillman <dstillman@zotero.org>
Date: Mon, 4 Nov 2013 01:41:04 -0500
Don't truncate HTML file content in full-text sync
Also write a cache file for HTML files at index time
Diffstat:
1 file changed, 33 insertions(+), 6 deletions(-)
diff --git a/chrome/content/zotero/xpcom/fulltext.js b/chrome/content/zotero/xpcom/fulltext.js
@@ -370,9 +370,22 @@ Zotero.Fulltext = new function(){
text = text.replace(/(>)/g, '$1 ');
text = this.HTMLToText(text);
this.indexString(text, document.characterSet, itemID);
-
var charsIndexed = Math.min(maxLength, text.length);
this.setChars(itemID, { indexed: charsIndexed, total: text.length });
+
+ // Write the converted text to a cache file
+ Q.fcall(function () {
+ let cacheFile = self.getItemCacheFile(itemID);
+ Zotero.debug("Writing converted full-text HTML content to " + cacheFile.path);
+ if (!cacheFile.parent.exists()) {
+ Zotero.Attachments.createDirectoryForItem(itemID);
+ }
+ return Zotero.File.putContentsAsync(cacheFile, text);
+ })
+ .catch(function (e) {
+ Zotero.debug(e, 1);
+ Components.utils.reportError(e);
+ })
}
@@ -597,6 +610,7 @@ Zotero.Fulltext = new function(){
* @return {Array<Object>}
*/
this.getUnsyncedContent = function (maxChars) {
+ var maxLength = Zotero.Prefs.get('fulltext.textMaxLength');
var first = true;
var chars = 0;
var contentItems = [];
@@ -632,15 +646,18 @@ Zotero.Fulltext = new function(){
}
Zotero.debug("Adding full-text content from file for item " + libraryKey);
- text = Zotero.File.getContents(
- file, item.attachmentCharset, row.indexedChars
- );
+ text = Zotero.File.getContents(file, item.attachmentCharset, maxLength);
- // Split elements to avoid word concatentation
+ // If HTML, convert to plain text first, and cache the result
if (item.attachmentMIMEType == 'text/html') {
+ // Split elements to avoid word concatentation
text = text.replace(/(>)/g, '$1 ');
+
text = this.HTMLToText(text);
+ // Include in the cache file only as many characters as we've indexed
+ text = text.substr(0, row.indexedChars);
+
// Write the converted text to a cache file
Zotero.debug("Writing converted full-text HTML content to "
+ cacheFile.path);
@@ -651,7 +668,11 @@ Zotero.Fulltext = new function(){
.catch(function (e) {
Zotero.debug(e, 1);
Components.utils.reportError(e);
- })
+ });
+ }
+ else {
+ // Include only as many characters as we've indexed
+ text = text.substr(0, row.indexedChars);
}
}
}
@@ -1067,6 +1088,12 @@ Zotero.Fulltext = new function(){
content = this.HTMLToText(content);
+ // Include in the cache file only as many characters as we've indexed
+ let chars = this.getChars(itemID);
+ if (chars && chars.indexedChars) {
+ content = content.substr(0, chars.indexedChars);
+ }
+
// Write the converted text to a cache file for future searches
Zotero.debug("Writing converted full-text content to " + cacheFile.path);
if (!cacheFile.parent.exists()) {