commit 62840f3b9370941b7c7d04f3385898141cc89db5
parent afd14ae4b9295735b0c979535998fcf0caa5a59f
Author: Dan Stillman <dstillman@zotero.org>
Date: Sun, 3 Nov 2013 19:14:10 -0500
Full-text content syncing
Full-text content is now synced to the server and other clients. Up to
~500K of content is transferred with each sync, so multiple syncs may be
necessary. Downloaded content is written to temporary files and
processed on idle (currently 5 seconds), so synced content may not be
immediately available.
Performance will improve with API syncing and async DB in 4.1.
This also includes some other full-text tweaks, such as storing
text converted from HTML in cache files, which may speed up phrase
searching.
Diffstat:
4 files changed, 573 insertions(+), 74 deletions(-)
diff --git a/chrome/content/zotero/xpcom/attachments.js b/chrome/content/zotero/xpcom/attachments.js
@@ -567,11 +567,9 @@ Zotero.Attachments = new function(){
var f = function() {
if (mimeType == 'application/pdf') {
Zotero.Fulltext.indexPDF(file, itemID);
- Zotero.Notifier.trigger('refresh', 'item', itemID);
}
- if (Zotero.MIME.isTextType(mimeType)) {
+ else if (Zotero.MIME.isTextType(mimeType)) {
Zotero.Fulltext.indexDocument(document, itemID);
- Zotero.Notifier.trigger('refresh', 'item', itemID);
}
if (callback) {
callback(attachmentItem);
@@ -981,7 +979,7 @@ Zotero.Attachments = new function(){
function getStorageDirectory(itemID) {
if (!itemID) {
- throw ("itemID not provided in Zotero.Attachments.getStorageDirectory()");
+ throw new Error("itemID not provided in Zotero.Attachments.getStorageDirectory()");
}
var item = Zotero.Items.get(itemID);
if (!item) {
diff --git a/chrome/content/zotero/xpcom/fulltext.js b/chrome/content/zotero/xpcom/fulltext.js
@@ -24,7 +24,6 @@
*/
Zotero.Fulltext = new function(){
- const FULLTEXT_VERSION = 1;
const CACHE_FILE = '.zotero-ft-cache';
this.init = init;
@@ -38,7 +37,6 @@ Zotero.Fulltext = new function(){
this.indexFile = indexFile;
this.indexPDF = indexPDF;
this.indexItems = indexItems;
- this.findTextInFile = findTextInFile;
this.findTextInItems = findTextInItems;
this.clearItemWords = clearItemWords;
this.getPages = getPages;
@@ -72,6 +70,7 @@ Zotero.Fulltext = new function(){
this.__defineGetter__("INDEX_STATE_PARTIAL", function () { return 2; });
this.__defineGetter__("INDEX_STATE_INDEXED", function () { return 3; });
+ const _processorCacheFile = '.zotero-ft-unprocessed';
var _pdfConverterVersion = null;
var _pdfConverterFileName = null;
@@ -80,6 +79,16 @@ Zotero.Fulltext = new function(){
var _pdfInfoFileName = null;
var _pdfInfo = null; // nsIFile to executable
+ var _idleObserverIsRegistered = false;
+ var _idleObserverDelay = 5;
+ var _processorTimer = null;
+ var _upgradeCheck = true;
+
+ const SYNC_STATE_UNSYNCED = 0;
+ const SYNC_STATE_IN_SYNC = 1;
+ const SYNC_STATE_TO_PROCESS = 2;
+ const SYNC_STATE_TO_DOWNLOAD = 3;
+
var self = this;
function init() {
@@ -98,6 +107,18 @@ Zotero.Fulltext = new function(){
this.registerPDFTool('converter');
this.registerPDFTool('info');
+
+ // TEMP: Remove after 4.1 DB schema change
+ var cols = Zotero.DB.getColumns('fulltextItems');
+ if (cols.indexOf("synced") == -1) {
+ Zotero.DB.beginTransaction();
+ Zotero.DB.query("ALTER TABLE fulltextItems ADD COLUMN synced INT DEFAULT 0");
+ Zotero.DB.query("REPLACE INTO settings (setting, key, value) VALUES ('fulltext', 'downloadAll', 1)");
+ Zotero.DB.commitTransaction();
+ }
+
+ this.startContentProcessor();
+ Zotero.addShutdownListener(this.stopContentProcessor);
}
@@ -192,7 +213,7 @@ Zotero.Fulltext = new function(){
/*
* Index multiple words at once
*/
- function indexWords(itemID, words){
+ function indexWords(itemID, words) {
if (!words || !words.length || !itemID){
return false;
}
@@ -231,9 +252,10 @@ Zotero.Fulltext = new function(){
}
while (done < numWords);
- Zotero.DB.query("REPLACE INTO fulltextItems (itemID, version) VALUES (?,?)",
- [itemID, FULLTEXT_VERSION]);
-
+ if (!Zotero.DB.valueQuery("SELECT COUNT(*) FROM fulltextItems WHERE itemID=?", itemID)) {
+ let sql = "INSERT INTO fulltextItems (itemID, version) VALUES (?,?)";
+ Zotero.DB.query(sql, [itemID, 0]);
+ }
// Handle bound parameters manually for optimal speed
var statement1 = Zotero.DB.getStatement("INSERT INTO fulltextWords (word) VALUES (?)");
@@ -266,7 +288,7 @@ Zotero.Fulltext = new function(){
}
- function indexString(text, charset, itemID){
+ function indexString(text, charset, itemID, stats, version, synced) {
try {
Zotero.UnresponsiveScriptIndicator.disable();
@@ -275,7 +297,23 @@ Zotero.Fulltext = new function(){
Zotero.DB.beginTransaction();
this.clearItemWords(itemID, true);
- this.indexWords(itemID, words);
+ this.indexWords(itemID, words, stats, version, synced);
+
+ var sql = "UPDATE fulltextItems SET synced=?";
+ var params = [synced ? parseInt(synced) : SYNC_STATE_UNSYNCED];
+ if (stats) {
+ for (let stat in stats) {
+ sql += ", " + stat + "=?";
+ params.push(stats[stat] ? parseInt(stats[stat]) : null);
+ }
+ }
+ if (version) {
+ sql += ", version=?";
+ params.push(parseInt(version));
+ }
+ sql += " WHERE itemID=?";
+ params.push(itemID);
+ Zotero.DB.query(sql, params);
/*
var sql = "REPLACE INTO fulltextContent (itemID, textContent) VALUES (?,?)";
@@ -283,6 +321,14 @@ Zotero.Fulltext = new function(){
*/
Zotero.DB.commitTransaction();
+
+ // If there's a processor cache file, delete it (whether or not we just used it)
+ var cacheFile = this.getItemProcessorCacheFile(itemID);
+ if (cacheFile.exists()) {
+ cacheFile.remove(false);
+ }
+
+ Zotero.Notifier.trigger('refresh', 'item', itemID);
}
finally {
Zotero.UnresponsiveScriptIndicator.enable();
@@ -499,7 +545,7 @@ Zotero.Fulltext = new function(){
function indexItems(items, complete, ignoreErrors) {
- if (items.constructor.name != 'Array') {
+ if (Array.isArray(items)) {
items = [items];
}
var items = Zotero.Items.get(items);
@@ -507,20 +553,22 @@ Zotero.Fulltext = new function(){
Zotero.DB.beginTransaction();
- for each(var i in items){
- if (!i.isAttachment()){
+ for each (let item in items) {
+ if (!item.isAttachment()) {
continue;
}
- var file = i.getFile();
+ let itemID = item.id;
+
+ var file = item.getFile();
if (!file){
- Zotero.debug("No file to index for item " + i.id + " in Fulltext.indexItems()");
+ Zotero.debug("No file to index for item " + itemID + " in Fulltext.indexItems()");
continue;
}
if (ignoreErrors) {
try {
- this.indexFile(file, i.attachmentMIMEType, i.attachmentCharset, i.id, !complete);
+ this.indexFile(file, item.attachmentMIMEType, item.attachmentCharset, itemID, !complete);
}
catch (e) {
Zotero.debug(e, 1);
@@ -529,7 +577,7 @@ Zotero.Fulltext = new function(){
}
}
else {
- this.indexFile(file, i.attachmentMIMEType, i.attachmentCharset, i.id, !complete);
+ this.indexFile(file, item.attachmentMIMEType, item.attachmentCharset, itemID, !complete);
}
}
@@ -537,8 +585,356 @@ Zotero.Fulltext = new function(){
}
+ //
+ // Full-text content syncing
+ //
+ /**
+ * Get content and stats that haven't yet been synced
+ *
+ * @param {Integer} maxChars Maximum total characters to include.
+ * The total can go over this if there's a
+ * single large item.
+ * @return {Array<Object>}
+ */
+ this.getUnsyncedContent = function (maxChars) {
+ var first = true;
+ var chars = 0;
+ var contentItems = [];
+ var sql = "SELECT itemID, indexedChars, totalChars, indexedPages, totalPages "
+ + "FROM fulltextItems WHERE synced=" + SYNC_STATE_UNSYNCED;
+ var rows = Zotero.DB.query(sql) || [];
+ for each (let row in rows) {
+ let text;
+ let itemID = row.itemID;
+ let item = Zotero.Items.get(itemID);
+ let libraryKey = item.libraryID + "/" + item.key;
+ let mimeType = item.attachmentMIMEType;
+ if (isCachedMIMEType(mimeType) || Zotero.MIME.isTextType(mimeType)) {
+ try {
+ let cacheFile = this.getItemCacheFile(itemID);
+ if (cacheFile.exists()) {
+ Zotero.debug("Adding full-text content from cache "
+ + "file for item " + libraryKey);
+ text = Zotero.File.getContents(cacheFile);
+ }
+ else {
+ if (!Zotero.MIME.isTextType(mimeType)) {
+ Zotero.debug("Full-text content cache file doesn't exist for item "
+ + libraryKey, 2);
+ continue;
+ }
+
+ let file = item.getFile();
+ if (!file) {
+ Zotero.debug("File doesn't exist getting full-text content for item "
+ + libraryKey, 2);
+ continue;
+ }
+
+ Zotero.debug("Adding full-text content from file for item " + libraryKey);
+ text = Zotero.File.getContents(
+ file, item.attachmentCharset, row.indexedChars
+ );
+
+ // Split elements to avoid word concatentation
+ if (item.attachmentMIMEType == 'text/html') {
+ text = text.replace(/(>)/g, '$1 ');
+ text = this.HTMLToText(text);
+
+ // Write the converted text to a cache file
+ Zotero.debug("Writing converted full-text HTML content to "
+ + cacheFile.path);
+ if (!cacheFile.parent.exists()) {
+ Zotero.Attachments.createDirectoryForItem(itemID);
+ }
+ Zotero.File.putContentsAsync(cacheFile, text)
+ .catch(function (e) {
+ Zotero.debug(e, 1);
+ Components.utils.reportError(e);
+ })
+ }
+ }
+ }
+ catch (e) {
+ Zotero.debug(e, 1);
+ Components.utils.reportError(e);
+ continue;
+ }
+ }
+ else {
+ Zotero.debug("Skipping non-text file getting full-text content for item "
+ + libraryKey, 2);
+
+ // Delete rows for items that weren't supposed to be indexed
+ this.clearItemWords(itemID);
+ continue;
+ }
+
+ // If this isn't the first item and it would put us over the limit,
+ // skip it
+ if (!first && maxChars && ((chars + text.length) > maxChars)) {
+ continue;
+ }
+ chars += text.length;
+ first = false;
+
+ contentItems.push({
+ libraryID: item.libraryID,
+ key: item.key,
+ text: text,
+ indexedChars: row.indexedChars ? row.indexedChars : 0,
+ totalChars: row.totalChars ? row.totalChars : 0,
+ indexedPages: row.indexedPages ? row.indexedPages : 0,
+ totalPages: row.totalPages ? row.totalPages : 0
+ });
+
+ if (maxChars && chars > maxChars) {
+ break;
+ }
+ }
+ return contentItems;
+ }
+
+
+ /**
+ * @return {String} PHP-formatted POST data for items not yet downloaded
+ */
+ this.getUndownloadedPostData = function () {
+ // On upgrade, get all content
+ var sql = "SELECT value FROM settings WHERE setting='fulltext' AND key='downloadAll'";
+ if (Zotero.DB.valueQuery(sql)) {
+ return "&ftkeys=all";
+ }
+
+ var sql = "SELECT itemID FROM fulltextItems WHERE synced="
+ + SYNC_STATE_TO_DOWNLOAD;
+ var itemIDs = Zotero.DB.columnQuery(sql);
+ if (!itemIDs) {
+ return "";
+ }
+ var undownloaded = {};
+ for each (let itemID in itemIDs) {
+ let item = Zotero.Items.get(itemID);
+ let libraryID = item.libraryID
+ libraryID = libraryID ? libraryID : Zotero.libraryID;
+ if (!undownloaded[libraryID]) {
+ undownloaded[libraryID] = [];
+ }
+ undownloaded[libraryID].push(item.key);
+ }
+ var data = "";
+ for (let libraryID in undownloaded) {
+ for (let i = 0; i < undownloaded[libraryID].length; i++) {
+ data += "&" + encodeURIComponent("ftkeys[" + libraryID + "][" + i + "]")
+ + "=" + undownloaded[libraryID][i];
+ }
+ }
+ return data;
+ }
+
+
+ /**
+ * Save full-text content and stats to a cache file
+ */
+ this.setItemContent = function (libraryID, key, text, stats, version) {
+ var item = Zotero.Items.getByLibraryAndKey(libraryID, key);
+ if (!item) {
+ let msg = "Item not found setting full-text content";
+ Zotero.debug(msg, 1);
+ Components.utils.reportError(msg);
+ return;
+ }
+ var itemID = item.id;
+
+ if (text !== '') {
+ var cacheFile = this.getItemProcessorCacheFile(itemID);
+
+ // If a storage directory doesn't exist, create it
+ if (!cacheFile.parent.exists()) {
+ Zotero.Attachments.createDirectoryForItem(itemID);
+ }
+
+ Zotero.debug("Writing full-text content and data to " + cacheFile.path);
+ Zotero.File.putContents(cacheFile, JSON.stringify({
+ indexedChars: stats.indexedChars,
+ totalChars: stats.totalChars,
+ indexedPages: stats.indexedPages,
+ totalPages: stats.totalPages,
+ version: version,
+ text: text
+ }));
+ var synced = SYNC_STATE_TO_PROCESS;
+ }
+ else {
+ Zotero.debug("Marking full-text content for download");
+ var synced = SYNC_STATE_TO_DOWNLOAD;
+ }
+
+ // Mark the item as unprocessed
+ if (Zotero.DB.valueQuery("SELECT COUNT(*) FROM fulltextItems WHERE itemID=?", itemID)) {
+ Zotero.DB.query("UPDATE fulltextItems SET synced=? WHERE itemID=?", [synced, itemID]);
+ }
+ // If not yet indexed, add an empty row
+ else {
+ Zotero.DB.query(
+ "REPLACE INTO fulltextItems (itemID, version, synced) VALUES (?, 0, ?)",
+ [itemID, synced]
+ );
+ }
+
+ if (_upgradeCheck) {
+ Zotero.DB.query("DELETE FROM settings WHERE setting='fulltext' AND key='downloadAll'");
+ _upgradeCheck = false;
+ }
+
+ this.startContentProcessor();
+ }
+
+
+ /**
+ * Start the idle observer for the background content processor
+ */
+ this.startContentProcessor = function () {
+ if (!_idleObserverIsRegistered) {
+ Zotero.debug("Initializing full-text content ingester idle observer");
+ var idleService = Components.classes["@mozilla.org/widget/idleservice;1"]
+ .getService(Components.interfaces.nsIIdleService);
+ idleService.addIdleObserver(this.idleObserver, _idleObserverDelay);
+ _idleObserverIsRegistered = true;
+ }
+ }
+
+ /**
+ * Stop the idle observer and a running timer, if there is one
+ */
+ this.stopContentProcessor = function () {
+ if (_idleObserverIsRegistered) {
+ var idleService = Components.classes["@mozilla.org/widget/idleservice;1"]
+ .getService(Components.interfaces.nsIIdleService);
+ idleService.removeIdleObserver(this.idleObserver, _idleObserverDelay);
+ _idleObserverIsRegistered = false;
+ }
+
+ if (_processorTimer) {
+ _processorTimer.cancel();
+ _processorTimer = null;
+ }
+ }
+
+ /**
+ *
+ * @param {Array<Integer>} itemIDs An array of itemIDs to process; if this
+ * is omitted, a database query is made
+ * to find unprocessed content
+ * @return {Boolean} TRUE if there's more content to process; FALSE otherwise
+ */
+ this.processUnprocessedContent = function (itemIDs) {
+ if (!itemIDs) {
+ Zotero.debug("Checking for unprocessed full-text content");
+ let sql = "SELECT itemID FROM fulltextItems WHERE synced="
+ + SYNC_STATE_TO_PROCESS;
+ itemIDs = Zotero.DB.columnQuery(sql) || [];
+ }
+ // If there's no more unprocessed content, stop the idle observer
+ if (!itemIDs.length) {
+ Zotero.debug("No unprocessed full-text content found");
+ this.stopContentProcessor();
+ return;
+ }
+
+ let itemID = itemIDs.shift();
+ let item = Zotero.Items.get(itemID);
+
+ Zotero.debug("Processing full-text content for item " + item.libraryKey);
+
+ Zotero.Fulltext.indexFromProcessorCache(itemID)
+ .then(function () {
+ if (itemIDs.length) {
+ if (!_processorTimer) {
+ _processorTimer = Components.classes["@mozilla.org/timer;1"]
+ .createInstance(Components.interfaces.nsITimer);
+ }
+ _processorTimer.initWithCallback(
+ function () {
+ Zotero.Fulltext.processUnprocessedContent(itemIDs);
+ },
+ 100,
+ Components.interfaces.nsITimer.TYPE_ONE_SHOT
+ );
+ }
+ })
+ .done();
+ }
+
+ this.idleObserver = {
+ observe: function (subject, topic, data) {
+ // On idle, start the background processor
+ if (topic == 'idle') {
+ Zotero.Fulltext.processUnprocessedContent();
+ }
+ // When back from idle, stop the processor (but keep the idle
+ // observer registered)
+ else if (topic == 'active') {
+ if (_processorTimer) {
+ Zotero.debug("Stopping full-text content processor");
+ _processorTimer.cancel();
+ }
+ }
+ }
+ };
+
+
+ this.indexFromProcessorCache = function (itemID) {
+ var self = this;
+ return Q.fcall(function () {
+ var cacheFile = self.getItemProcessorCacheFile(itemID);
+ if (!cacheFile.exists()) {
+ Zotero.debug("Full-text content processor cache file doesn't exist for item " + itemID);
+ return false;
+ }
+
+ let data;
+
+ return Zotero.File.getContentsAsync(cacheFile)
+ .then(function (json) {
+ data = JSON.parse(json);
+
+ // Write the text content to the regular cache file
+ cacheFile = self.getItemCacheFile(itemID);
+
+ Zotero.debug("Writing full-text content to " + cacheFile.path);
+ return Zotero.File.putContentsAsync(cacheFile, data.text, "UTF-8");
+ })
+ .then(function () {
+ Zotero.Fulltext.indexString(
+ data.text,
+ "UTF-8",
+ itemID,
+ {
+ indexedChars: data.indexedChars,
+ totalChars: data.totalChars,
+ indexedPages: data.indexedPages,
+ totalPages: data.totalPages
+ },
+ data.version,
+ 1
+ );
+ });
+ })
+ .catch(function (e) {
+ Components.utils.reportError(e);
+ Zotero.debug(e, 1);
+ return false;
+ });
+ }
+
+ //
+ // End full-text content syncing
+ //
+
+
/*
- * Scan a file for a text string
+ * Scan a string for another string
*
* _items_ -- one or more attachment items to search
* _searchText_ -- text pattern to search for
@@ -548,21 +944,7 @@ Zotero.Fulltext = new function(){
*
* - Slashes in regex are optional
*/
- function findTextInFile(file, charset, searchText, mode){
- Zotero.debug("Searching for text '" + searchText + "' in " + file.path);
-
- var maxLength = Zotero.Prefs.get('fulltext.textMaxLength');
- var str = Zotero.File.getContents(file, charset, maxLength);
-
- // If not binary mode, convert HTML to text
- if (!mode || mode.indexOf('Binary')==-1){
- // Split elements to avoid word concatentation
- str = str.replace(/(>)/g, '$1 ');
-
- // Parse to avoid searching on HTML
- str = this.HTMLToText(str);
- }
-
+ this.findTextInString = function (content, searchText, mode) {
switch (mode){
case 'regexp':
case 'regexpCS':
@@ -583,7 +965,7 @@ Zotero.Fulltext = new function(){
try {
var re = new RegExp(searchText, flags);
- var matches = re.exec(str);
+ var matches = re.exec(content);
}
catch (e) {
Zotero.debug(e, 1);
@@ -591,7 +973,7 @@ Zotero.Fulltext = new function(){
}
if (matches){
Zotero.debug("Text found");
- return str.substr(matches.index, 50);
+ return content.substr(matches.index, 50);
}
break;
@@ -599,12 +981,12 @@ Zotero.Fulltext = new function(){
default:
// Case-insensitive
searchText = searchText.toLowerCase();
- str = str.toLowerCase();
+ content = content.toLowerCase();
- var pos = str.indexOf(searchText);
+ var pos = content.indexOf(searchText);
if (pos!=-1){
Zotero.debug('Text found');
- return str.substr(pos, 50);
+ return content.substr(pos, 50);
}
}
@@ -633,42 +1015,78 @@ Zotero.Fulltext = new function(){
var items = Zotero.Items.get(items);
var found = [];
- for each(var i in items){
- if (!i.isAttachment()){
+ for each (let item in items) {
+ if (!item.isAttachment()) {
continue;
}
- var file = i.getFile();
- if (!file){
- continue;
- }
+ let itemID = item.id;
+ let content;
+ let mimeType = item.attachmentMIMEType;
+ let maxLength = Zotero.Prefs.get('fulltext.textMaxLength');
+ let binaryMode = mode && mode.indexOf('Binary') != -1;
- var mimeType = i.attachmentMIMEType;
if (isCachedMIMEType(mimeType)) {
- var file = this.getItemCacheFile(i.id);
+ let file = this.getItemCacheFile(itemID);
if (!file.exists()) {
continue;
}
- mimeType = 'text/plain';
- var charset = 'utf-8';
+ Zotero.debug("Searching for text '" + searchText + "' in " + file.path);
+ content = Zotero.File.getContents(file, 'utf-8', maxLength);
}
else {
// If not binary mode, only scan plaintext files
- if (!mode || mode.indexOf('Binary') == -1) {
+ if (!binaryMode) {
if (!Zotero.MIME.isTextType(mimeType)) {
Zotero.debug('Not scanning MIME type ' + mimeType, 4);
continue;
}
}
- var charset = i.attachmentCharset;
+ // Check for a cache file
+ let cacheFile = this.getItemCacheFile(itemID);
+ if (cacheFile.exists()) {
+ Zotero.debug("Searching for text '" + searchText + "' in " + cacheFile.path);
+ content = Zotero.File.getContents(cacheFile, 'utf-8', maxLength);
+ }
+ else {
+ // If that doesn't exist, check for the actual file
+ let file = item.getFile();
+ if (!file) {
+ continue;
+ }
+
+ Zotero.debug("Searching for text '" + searchText + "' in " + file.path);
+ content = Zotero.File.getContents(file, item.attachmentCharset, maxLength);
+
+ // If HTML and not binary mode, convert to text
+ if (mimeType == 'text/html' && !binaryMode) {
+ // Split elements to avoid word concatentation
+ content = content.replace(/(>)/g, '$1 ');
+
+ content = this.HTMLToText(content);
+
+ // Write the converted text to a cache file for future searches
+ Zotero.debug("Writing converted full-text content to " + cacheFile.path);
+ if (!cacheFile.parent.exists()) {
+ Zotero.Attachments.createDirectoryForItem(itemID);
+ }
+ Zotero.File.putContentsAsync(cacheFile, content)
+ .catch(function (e) {
+ Zotero.debug(e, 1);
+ Components.utils.reportError(e);
+ })
+ }
+ }
}
- var match = this.findTextInFile(file, charset, searchText, mode);
-
- if (match != -1){
- found.push({id:i.getID(), match:match});
+ let match = this.findTextInString(content, searchText, mode);
+ if (match != -1) {
+ found.push({
+ id: itemID,
+ match: match
+ });
}
}
@@ -681,8 +1099,8 @@ Zotero.Fulltext = new function(){
var sql = "SELECT rowid FROM fulltextItems WHERE itemID=? LIMIT 1";
var indexed = Zotero.DB.valueQuery(sql, itemID);
if (indexed) {
- Zotero.DB.query("DELETE FROM fulltextItems WHERE itemID=?", itemID);
Zotero.DB.query("DELETE FROM fulltextItemWords WHERE itemID=?", itemID);
+ Zotero.DB.query("DELETE FROM fulltextItems WHERE itemID=?", itemID);
}
Zotero.DB.commitTransaction();
@@ -760,15 +1178,27 @@ Zotero.Fulltext = new function(){
function setPages(itemID, obj) {
var sql = "UPDATE fulltextItems SET indexedPages=?, totalPages=? WHERE itemID=?";
- Zotero.DB.query(sql, [obj.indexed ? obj.indexed : null,
- obj.total ? obj.total : null, itemID]);
+ Zotero.DB.query(
+ sql,
+ [
+ obj.indexed ? parseInt(obj.indexed) : null,
+ obj.total ? parseInt(obj.total) : null,
+ itemID
+ ]
+ );
}
function setChars(itemID, obj) {
var sql = "UPDATE fulltextItems SET indexedChars=?, totalChars=? WHERE itemID=?";
- Zotero.DB.query(sql, [obj.indexed ? obj.indexed : null,
- obj.total ? obj.total : null, itemID]);
+ Zotero.DB.query(
+ sql,
+ [
+ obj.indexed ? parseInt(obj.indexed) : null,
+ obj.total ? parseInt(obj.total) : null,
+ itemID
+ ]
+ );
}
@@ -878,6 +1308,13 @@ Zotero.Fulltext = new function(){
}
+ this.getItemProcessorCacheFile = function (itemID) {
+ var cacheFile = Zotero.Attachments.getStorageDirectory(itemID);
+ cacheFile.append(_processorCacheFile);
+ return cacheFile;
+ }
+
+
/*
* Returns true if an item can be reindexed
*
@@ -971,18 +1408,14 @@ Zotero.Fulltext = new function(){
}
Zotero.debug('Clearing full-text cache file for item ' + itemID);
- switch (item.attachmentMIMEType) {
- case 'application/pdf':
- var cacheFile = this.getItemCacheFile(itemID);
- if (cacheFile.exists()) {
- try {
- cacheFile.remove(false);
- }
- catch (e) {
- Zotero.File.checkFileAccessError(e, cacheFile, 'delete');
- }
- }
- break;
+ var cacheFile = this.getItemCacheFile(itemID);
+ if (cacheFile.exists()) {
+ try {
+ cacheFile.remove(false);
+ }
+ catch (e) {
+ Zotero.File.checkFileAccessError(e, cacheFile, 'delete');
+ }
}
}
diff --git a/chrome/content/zotero/xpcom/sync.js b/chrome/content/zotero/xpcom/sync.js
@@ -55,6 +55,10 @@ Zotero.Sync = new function() {
setting: {
singular: 'Setting',
plural: 'Settings'
+ },
+ fulltext: {
+ singular: 'Fulltext',
+ plural: 'Fulltexts'
}
};
});
@@ -132,7 +136,7 @@ Zotero.Sync = new function() {
}
for (var type in this.syncObjects) {
- if (type == 'setting') {
+ if (type == 'setting' || type == 'fulltext') {
continue;
}
@@ -451,6 +455,9 @@ Zotero.Sync.EventListener = new function () {
if (type == 'setting') {
[libraryID, key] = ids[i].split("/");
}
+ else if (type == 'fulltext') {
+ continue;
+ }
else {
var oldItem = extraData[ids[i]].old;
libraryID = oldItem.primary.libraryID;
@@ -1462,6 +1469,8 @@ Zotero.Sync.Server = new function () {
body += '&upload=1';
}
+ body += Zotero.Fulltext.getUndownloadedPostData();
+
Zotero.Sync.Runner.setSyncStatus(Zotero.getString('sync.status.gettingUpdatedData'));
Zotero.HTTP.doPost(url, body, function (xmlhttp) {
@@ -1694,6 +1703,14 @@ Zotero.Sync.Server = new function () {
var sql = "UPDATE syncedSettings SET synced=1";
Zotero.DB.query(sql);
+ if (syncSession.fulltextItems && syncSession.fulltextItems.length) {
+ let sql = "UPDATE fulltextItems SET synced=1 WHERE itemID=?";
+ for each (let lk in syncSession.fulltextItems) {
+ let item = Zotero.Items.getByLibraryAndKey(lk.libraryID, lk.key);
+ Zotero.DB.query(sql, item.id);
+ }
+ }
+
//throw('break2');
Zotero.DB.commitTransaction();
@@ -2825,6 +2842,26 @@ Zotero.Sync.Server.Data = new function() {
Zotero.SyncedSettings.setSynchronous(libraryID, name, value, version, true);
continue;
}
+ else if (type == 'fulltext') {
+ if (!libraryID) {
+ libraryID = 0;
+ }
+ let key = objectNode.getAttribute('key');
+ Zotero.debug("Processing remote full-text content for item " + libraryID + "/" + key);
+ Zotero.Fulltext.setItemContent(
+ libraryID,
+ key,
+ objectNode.textContent,
+ {
+ indexedChars: parseInt(objectNode.getAttribute('indexedChars')),
+ totalChars: parseInt(objectNode.getAttribute('totalChars')),
+ indexedPages: parseInt(objectNode.getAttribute('indexedPages')),
+ totalPages: parseInt(objectNode.getAttribute('totalPages'))
+ },
+ parseInt(objectNode.getAttribute('version'))
+ );
+ continue;
+ }
var key = objectNode.getAttribute('key');
var objLibraryKeyHash = Zotero[Types].makeLibraryKeyHash(libraryID, key);
@@ -3537,6 +3574,36 @@ Zotero.Sync.Server.Data = new function() {
docElem.appendChild(settingsNode);
}
+ // Add up to 500K characters of full-text content
+ try {
+ var rows = Zotero.Fulltext.getUnsyncedContent(500000);
+ }
+ catch (e) {
+ Zotero.debug(e, 1);
+ Components.utils.reportError(e);
+ var rows = [];
+ }
+ if (rows.length) {
+ let fulltextsNode = doc.createElement('fulltexts');
+ syncSession.fulltextItems = [];
+ for (let i=0; i<rows.length; i++) {
+ syncSession.fulltextItems.push({
+ libraryID: rows[i].libraryID,
+ key: rows[i].key
+ })
+ let node = doc.createElement('fulltext');
+ node.setAttribute('libraryID', rows[i].libraryID ? rows[i].libraryID : Zotero.libraryID);
+ node.setAttribute('key', rows[i].key);
+ node.setAttribute('indexedChars', rows[i].indexedChars);
+ node.setAttribute('totalChars', rows[i].totalChars);
+ node.setAttribute('indexedPages', rows[i].indexedPages);
+ node.setAttribute('totalPages', rows[i].totalPages);
+ node.appendChild(doc.createTextNode(_xmlize(rows[i].text)));
+ fulltextsNode.appendChild(node);
+ }
+ docElem.appendChild(fulltextsNode);
+ }
+
// Deletions
var deletedNode = doc.createElement('deleted');
var inserted = false;
diff --git a/resource/schema/userdata.sql b/resource/schema/userdata.sql
@@ -271,6 +271,7 @@ CREATE TABLE fulltextItems (
totalPages INT,
indexedChars INT,
totalChars INT,
+ synced INT DEFAULT 0,
FOREIGN KEY (itemID) REFERENCES items(itemID)
);
CREATE INDEX fulltextItems_version ON fulltextItems(version);