commit 1cfc90186f44f9325f2cf6b15894083f814373bd
parent 26dcaad76aa83bed541658b7ecf44a17adff5447
Author: Dan Stillman <dstillman@zotero.org>
Date: Thu, 1 Mar 2018 01:07:14 -0500
Serialize attachment indexing
Add newly added attachments to a queue, start processing it after five
seconds have passed since the last attachment was added, and process
another every half second after that unless another is added.
This queue won't survive a restart, so the queue should really be in the
DB, but this should avoid problems when adding multiple attachments at
once.
Addresses #1284
Diffstat:
3 files changed, 43 insertions(+), 44 deletions(-)
diff --git a/chrome/content/zotero/xpcom/attachments.js b/chrome/content/zotero/xpcom/attachments.js
@@ -409,6 +409,8 @@ Zotero.Attachments = new function(){
attachmentItem.attachmentPath = 'storage:' + fileName;
var itemID = yield attachmentItem.save(saveOptions);
+ Zotero.Fulltext.queueItem(attachmentItem);
+
// DEBUG: Does this fail if 'storage' is symlinked to another drive?
destDir = this.getStorageDirectory(attachmentItem).path;
yield OS.File.move(tmpDir, destDir);
@@ -428,16 +430,6 @@ Zotero.Attachments = new function(){
throw e;
}
- // We don't have any way of knowing that the file is flushed to disk,
- // so we just wait a second before indexing and hope for the best.
- // We'll index it later if it fails. (This may not be necessary.)
- //
- // If this is removed, the afterEach() delay in the server_connector /connector/saveSnapshot
- // tests can also be removed.
- setTimeout(function () {
- Zotero.Fulltext.indexItems([attachmentItem.id]);
- }, 1000);
-
return attachmentItem;
}.bind(this));
@@ -675,6 +667,8 @@ Zotero.Attachments = new function(){
attachmentItem.attachmentPath = 'storage:' + fileName;
var itemID = yield attachmentItem.save();
+ Zotero.Fulltext.queueItem(attachmentItem);
+
// DEBUG: Does this fail if 'storage' is symlinked to another drive?
destDir = this.getStorageDirectory(attachmentItem).path;
yield OS.File.move(tmpDir, destDir);
@@ -699,21 +693,6 @@ Zotero.Attachments = new function(){
throw e;
}
- // We don't have any way of knowing that the file is flushed to disk,
- // so we just wait a second before indexing and hope for the best.
- // We'll index it later if it fails. (This may not be necessary.)
- if (contentType == 'application/pdf') {
- setTimeout(function () {
- Zotero.Fulltext.indexPDF(attachmentItem.getFilePath(), attachmentItem.id);
- }, 1000);
- }
- else if (Zotero.MIME.isTextType(contentType)) {
- // wbp.saveDocument consumes the document context (in Zotero.Utilities.Internal.saveDocument)
- // Seems like a mozilla bug, but nothing on bugtracker.
- // Either way, we don't rely on Zotero.Fulltext.indexDocument here anymore
- yield Zotero.Fulltext.indexItems(attachmentItem.id, true, true);
- }
-
return attachmentItem;
});
diff --git a/chrome/content/zotero/xpcom/fulltext.js b/chrome/content/zotero/xpcom/fulltext.js
@@ -539,6 +539,45 @@ Zotero.Fulltext = Zotero.FullText = new function(){
});
+ // TEMP: Temporary mechanism to serialize indexing of new attachments
+ //
+ // This should instead save the itemID to a table that's read by the content processor
+ var _queue = [];
+ var _indexing = false;
+ var _nextIndexTime;
+ var _indexDelay = 5000;
+ var _indexInterval = 500;
+ this.queueItem = function (item) {
+ // Don't index files in the background during tests
+ if (Zotero.test) return;
+
+ _queue.push(item.id);
+ _nextIndexTime = Date.now() + _indexDelay;
+ setTimeout(() => {
+ _processNextItem()
+ }, _indexDelay);
+ };
+
+ async function _processNextItem() {
+ if (!_queue.length) return;
+ // Another _processNextItem() was scheduled
+ if (Date.now() < _nextIndexTime) return;
+ // If indexing is already running, _processNextItem() will be called when it's done
+ if (_indexing) return;
+ _indexing = true;
+ var itemID = _queue.shift();
+ try {
+ await Zotero.Fulltext.indexItems([itemID], false, true);
+ }
+ finally {
+ _indexing = false;
+ }
+ setTimeout(() => {
+ _processNextItem();
+ }, _indexInterval);
+ };
+
+
//
// Full-text content syncing
//
diff --git a/test/tests/server_connectorTest.js b/test/tests/server_connectorTest.js
@@ -158,9 +158,6 @@ describe("Connector Server", function () {
item = Zotero.Items.get(ids[0]);
assert.isTrue(item.isImportedAttachment());
- // Wait until indexing is done
- yield waitForItemEvent('refresh');
-
var req = yield reqPromise;
assert.equal(req.status, 201);
});
@@ -257,12 +254,6 @@ describe("Connector Server", function () {
});
describe("/connector/saveSnapshot", function () {
- // TEMP: Wait for indexing to complete, which happens after a 1-second delay, after a 201 has
- // been returned to the connector. Would be better to make sure indexing has completed.
- afterEach(function* () {
- yield Zotero.Promise.delay(1050);
- });
-
it("should save a webpage item and snapshot to the current selected collection", function* () {
var collection = yield createDataObject('collection');
yield waitForItemsLoad(win);
@@ -402,12 +393,6 @@ describe("Connector Server", function () {
await waitForItemsLoad(win);
});
- // TEMP: Wait for indexing to complete, which happens after a 1-second delay, after a 201 has
- // been returned to the connector. Would be better to make sure indexing has completed.
- afterEach(function* () {
- yield Zotero.Promise.delay(1050);
- });
-
it("should return 500 if no translator available for page", function* () {
var xmlhttp = yield Zotero.HTTP.request(
'POST',
@@ -510,8 +495,6 @@ describe("Connector Server", function () {
var item = Zotero.Items.get(ids[0]);
assert.isTrue(collection2.hasItem(item.id));
await waitForItemEvent('add');
- // Wait until indexing is done
- await waitForItemEvent('refresh');
var req = await reqPromise;
assert.equal(req.status, 201);
@@ -569,8 +552,6 @@ describe("Connector Server", function () {
var ids = await promise;
var item = Zotero.Items.get(ids[0]);
assert.isTrue(collection2.hasItem(item.id));
- // Wait until indexing is done
- await waitForItemEvent('refresh');
var req = await reqPromise;
assert.equal(req.status, 201);