www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | Submodules | README | LICENSE

commit 8a2dc6e7f2f0854ab84b0325414dd394330eae6d
parent 2d46e3d59b5efa81ff582723a4530f5c556cb565
Author: Adomas Venčkauskas <adomas.ven@gmail.com>
Date:   Tue, 12 Jan 2016 13:28:15 +0000

Adds Zotero.FeedReader tests

Diffstat:
Mchrome/content/zotero/feedSettings.js | 63++++++++++++++++++++++++++++++++-------------------------------
Mchrome/content/zotero/xpcom/data/feed.js | 1+
Mchrome/content/zotero/xpcom/feedReader.js | 863++++++++++++++++++++++++++++++++++++++++---------------------------------------
Mtest/content/runtests.js | 3+++
Mtest/content/support.js | 4++++
Atest/tests/data/feed.rss | 43+++++++++++++++++++++++++++++++++++++++++++
Atest/tests/data/feedDetailed.rss | 90+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/tests/feedReaderTest.js | 168+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
8 files changed, 774 insertions(+), 461 deletions(-)

diff --git a/chrome/content/zotero/feedSettings.js b/chrome/content/zotero/feedSettings.js @@ -118,7 +118,7 @@ var Zotero_Feed_Settings = new function() { document.documentElement.getButton('accept').disabled = true; }; - this.validateUrl = function() { + this.validateUrl = Zotero.Promise.coroutine(function* () { if (feedReader) { feedReader.terminate(); feedReader = null; @@ -128,36 +128,37 @@ var Zotero_Feed_Settings = new function() { urlTainted = false; if (!url) return; - let fr = feedReader = new Zotero.FeedReader(url); - fr.feedProperties - .then( feed => { - if (feedReader !== fr || urlTainted) return; - - let title = document.getElementById('feed-title'); - if (!data.url && feed.title) { - title.value = feed.title; - } - - let ttl = document.getElementById('feed-ttl'); - if (!data.url && feed.ttl) { - ttl.value = Math.floor(feed.ttl / 60) || 1; - } - - document.getElementById('feed-url').value = url; - - urlIsValid = true; - title.disabled = false; - ttl.disabled = false; - document.getElementById('feed-cleanAfter').disabled = false; - document.documentElement.getButton('accept').disabled = false; - }) - .catch( e => { - Zotero.debug(e); - }) - .finally( () => { - if (feedReader === fr) feedReader = null; - }); - }; + try { + let fr = feedReader = new Zotero.FeedReader(url); + yield fr.process(); + let feed = fr.feedProperties; + if (feedReader !== fr || urlTainted) return; + + let title = document.getElementById('feed-title'); + if (!data.url && feed.title) { + title.value = feed.title; + } + + let ttl = document.getElementById('feed-ttl'); + if (!data.url && feed.ttl) { + ttl.value = Math.floor(feed.ttl / 60) || 1; + } + + document.getElementById('feed-url').value = url; + + urlIsValid = true; + title.disabled = false; + ttl.disabled = false; + document.getElementById('feed-cleanAfter').disabled = false; + document.documentElement.getButton('accept').disabled = false; + } + catch (e) { + Zotero.debug(e); + } + finally { + if (feedReader === fr) feedReader = null; + } + }); this.accept = function() { data.url = document.getElementById('feed-url').value; diff --git a/chrome/content/zotero/xpcom/data/feed.js b/chrome/content/zotero/xpcom/data/feed.js @@ -325,6 +325,7 @@ Zotero.Feed.prototype._updateFeed = Zotero.Promise.coroutine(function* () { yield this.clearExpiredItems(); try { let fr = new Zotero.FeedReader(this.url); + yield fr.process(); let itemIterator = new fr.ItemIterator(); let item, toAdd = [], processedGUIDs = []; while (item = yield itemIterator.next().value) { diff --git a/chrome/content/zotero/xpcom/feedReader.js b/chrome/content/zotero/xpcom/feedReader.js @@ -52,477 +52,481 @@ * @method {void} terminate Stops retrieving/parsing the feed. Data parsed up * to this point is still available. */ -Zotero.FeedReader = new function() { - let ios = Components.classes["@mozilla.org/network/io-service;1"] - .getService(Components.interfaces.nsIIOService); - - /***************************** - * Item processing functions * - *****************************/ - - /** - * Determine item type based on item data - */ - function guessItemType(item) { - // Default to journalArticle - item.itemType = 'journalArticle'; +Zotero.FeedReader = function(url) { + if (!url) throw new Error("Feed URL must be supplied"); + + + this._url = url; + this._feedItems = [Zotero.Promise.defer()]; + this._feedProcessed = Zotero.Promise.defer(); + + let feedFetched = Zotero.Promise.defer(); + feedFetched.promise.then(function(feed) { + let info = {}; - if (item.ISSN) { - return; // journalArticle - } + info.title = feed.title ? feed.title.plainText() : ''; + info.subtitle = feed.subtitle ? feed.subtitle.plainText() : ''; - if (item.ISBN) { - item.itemType = 'bookSection'; - return; - } + if (feed.updated) info.updated = new Date(feed.updated); - if (item.publicationType) { - let type = item.publicationType.toLowerCase(); - if (type.indexOf('conference') != -1) { - item.itemType = 'conferencePaper'; - return; - } - if (type.indexOf('journal') != -1) { - item.itemType = 'journalArticle'; - return; - } - if (type.indexOf('book') != -1) { - item.itemType = 'bookSection'; - return; - } - } - }; - - /* - * Fetch creators from given field of a feed entry - */ - function processCreators(feedEntry, field, role) { - let names = [], - nameStr; - try { - let personArr = feedEntry[field]; // Seems like this part can throw if there is no author data in the feed - for (let i=0; i<personArr.length; i++) { - let person = personArr.queryElementAt(i, Components.interfaces.nsIFeedPerson); - if (!person || !person.name) continue; + // categories: MDN says "not yet implemented" + + info.creators = Zotero.FeedReader._processCreators(feed, 'authors', 'author'); + + // TODO: image as icon + + let publicationTitle = Zotero.FeedReader._getFeedField(feed, 'publicationName', 'prism') + || Zotero.FeedReader._getFeedField(feed, 'pubTitle'); + if (publicationTitle) info.publicationTitle = publicationTitle; + + let publisher = Zotero.FeedReader._getFeedField(feed, 'publisher', 'dc'); + if (publisher) info.publisher = publisher; + + let rights = (feed.rights && feed.rights.plainText()) + || Zotero.FeedReader._getFeedField(feed, 'copyright', 'prism') + || Zotero.FeedReader._getFeedField(feed, 'rights', 'dc') + || Zotero.FeedReader._getFeedField(feed, 'copyright'); + if (rights) info.rights = rights; + + let issn = Zotero.FeedReader._getFeedField(feed, 'issn', 'prism'); + if (issn) info.ISSN = issn; + + let isbn = Zotero.FeedReader._getFeedField(feed, 'isbn', 'prism') + || Zotero.FeedReader._getFeedField(feed, 'isbn') + if (isbn) info.ISBN = isbn; + + let language = Zotero.FeedReader._getFeedField(feed, 'language', 'dc') + || Zotero.FeedReader._getFeedField(feed, 'language'); + if (language) info.language = language; + + let ttl = Zotero.FeedReader._getFeedField(feed, 'ttl'); + if (ttl) info.ttl = ttl; + + this._feedProperties = info; + this._feed = feed; + return info; + }.bind(this)).then(function(){ + let items = this._feed.items; + if (items && items.length) { + for (let i=0; i<items.length; i++) { + let item = items.queryElementAt(i, Components.interfaces.nsIFeedEntry); + if (!item) continue; - let name = Zotero.Utilities.trimInternal(person.name); - if (!name) continue; + let feedItem = Zotero.FeedReader._getFeedItem(item, this._feedProperties); + if (!feedItem) continue; - let commas = name.split(',').length - 1, - other = name.split(/\s(?:and|&)\s|;/).length - 1, - separators = commas + other; - if (personArr.length == 1 && - // Has typical name separators - (other || commas > 1 - // If only one comma and first part has more than one space, - // it's probably not lastName, firstName - || (commas == 1 && name.split(/\s*,/)[0].indexOf(' ') != -1) - ) - ) { - // Probably multiple authors listed in a single field - nameStr = name; - break; // For clarity. personArr.length == 1 anyway - } else { - names.push(name); - } + let lastItem = this._feedItems[this._feedItems.length - 1]; + this._feedItems.push(Zotero.Promise.defer()); // Push a new deferred promise so an iterator has something to return + lastItem.resolve(feedItem); } - } catch(e) { - if (e.result != Components.results.NS_ERROR_FAILURE) throw e - - if (field != 'authors') return []; - - // ieeexplore places these in "authors"... sigh - nameStr = getFeedField(feedEntry, null, 'authors'); - if (nameStr) nameStr = Zotero.Utilities.trimInternal(nameStr); - if (!nameStr) return []; - } - - if (nameStr) { - names = nameStr.split(/\s(?:and|&)\s|\s*[,;]\s*/); + + this._feedProcessed.resolve(); } - - let creators = []; - for (let i=0; i<names.length; i++) { - let creator = Zotero.Utilities.cleanAuthor( - names[i], - role, - names[i].split(',').length == 2 - ); - if (!creator.firstName) { - creator.fieldMode = 1; + }.bind(this)).finally(function() { + // Make sure the last promise gets resolved to null + let lastItem = this._feedItems[this._feedItems.length - 1]; + lastItem.resolve(null); + }.bind(this)); + + // Set up asynchronous feed processor + let feedProcessor = Components.classes["@mozilla.org/feed-processor;1"] + .createInstance(Components.interfaces.nsIFeedProcessor); + + let feedUrl = Services.io.newURI(url, null, null); + feedProcessor.parseAsync(null, feedUrl); + + feedProcessor.listener = { + /* + * MDN suggests that we could use nsIFeedProgressListener to handle the feed + * as it gets loaded, but this is actually not implemented (as of 32.0.3), + * so we have to load the whole feed and handle it in handleResult. + */ + handleResult: (result) => { + if (!result.doc) { + this.terminate("No Feed"); + return; } - creators.push(creator); + let newFeed = result.doc.QueryInterface(Components.interfaces.nsIFeed); + feedFetched.resolve(newFeed); } - return creators; - } + }; - /********************* - * Utility functions * - *********************/ - /* - * Convert HTML-formatted text to Zotero-compatible formatting - */ - let domDiv = Zotero.Utilities.Internal.getDOMDocument().createElement("div"); - function getRichText(feedText, field) { - let domFragment = feedText.createDocumentFragment(domDiv); - return Zotero.Utilities.dom2text(domFragment, field); + Zotero.debug("FeedReader: Fetching feed from " + feedUrl.spec); + + this._channel = Services.io.newChannelFromURI2(feedUrl, null, + Services.scriptSecurityManager.getSystemPrincipal(), null, + Ci.nsILoadInfo.SEC_NORMAL, Ci.nsIContentPolicy.TYPE_OTHER); + this._channel.asyncOpen(feedProcessor, null); // Sends an HTTP request +} + +/* + * The constructor initiates async feed processing, but _feedProcessed + * needs to be resolved before proceeding. + */ +Zotero.FeedReader.prototype.process = Zotero.Promise.coroutine(function* () { + return this._feedProcessed.promise; +}); + +/* + * Terminate feed processing at any given time + * @param {String} status Reason for terminating processing + */ +Zotero.FeedReader.prototype.terminate = function(status) { + Zotero.debug("FeedReader: Terminating feed reader (" + status + ")"); + + // Reject feed promise if not resolved yet + if (this._feedProcessed.promise.isPending()) { + this._feedProcessed.reject(status); } - /* - * Format JS date as SQL date - */ - function formatDate(date) { - return Zotero.Date.dateToSQL(date, true); + // Reject feed item promise if not resolved yet + let lastItem = this._feedItems[this._feedItems.length - 1]; + if (lastItem.promise.isPending()) { + lastItem.reject(status); } - /* - * Get field value from feed entry by namespace:fieldName - */ - // Properties are stored internally as ns+name, but only some namespaces are - // supported. Others are just "null" - let ns = { - 'prism': 'null', - 'dc': 'dc:' + // Close feed connection + if (this._channel.isPending) { + this._channel.cancel(Components.results.NS_BINDING_ABORTED); } - function getFeedField(feedEntry, namespace, field) { - let prefix = namespace ? ns[namespace] || 'null' : ''; - try { - return feedEntry.fields.getPropertyAsAUTF8String(prefix+field); - } catch(e) {} - - try { - if (namespace && !ns[namespace]) { - prefix = namespace + ':'; - return feedEntry.fields.getPropertyAsAUTF8String(prefix+field); +}; + +Zotero.defineProperty(Zotero.FeedReader.prototype, 'feedProperties', { + get: function(){ + if (!this._feedProperties) { + throw new Error("Feed has not been resolved yet. Try calling FeedReader#process first") + } + return this._feedProperties + } +}); + +/* + * Feed item iterator + * Each iteration returns a _promise_ for an item. The promise _MUST_ be + * resolved before requesting the next item. + * The last item will always be resolved to `null`, unless the feed processing + * is terminated ahead of time, in which case it will be rejected with the reason + * for termination. + */ +Zotero.defineProperty(Zotero.FeedReader.prototype, 'ItemIterator', { + get: function() { + let items = this._feedItems; + let feedReader = this; + + let iterator = function() { + if (!feedReader._feedProperties) { + throw new Error("Feed has not been resolved yet. Try calling FeedReader#process first") } - } catch(e) {} + this.index = 0; + }; + iterator.prototype.next = function() { + let item = items[this.index++]; + return { + value: item ? item.promise : null, + done: this.index >= items.length + }; + }; + + return iterator; + } +}, {lazy: true}); + + +/***************************** + * Item processing functions * + *****************************/ + +/** + * Determine item type based on item data + */ +Zotero.FeedReader._guessItemType = function(item) { + // Default to journalArticle + item.itemType = 'journalArticle'; + + if (item.ISSN) { + return; // journalArticle + } + + if (item.ISBN) { + item.itemType = 'bookSection'; return; } - /* - * Parse feed entry into a Zotero item - */ - function getFeedItem(feedEntry, feedInfo) { - // ID is not required, but most feeds have these and we have to rely on them - // to handle updating properly - if (!feedEntry.id) { - Zotero.debug("FeedReader: Feed item missing an ID"); + if (item.publicationType) { + let type = item.publicationType.toLowerCase(); + if (type.indexOf('conference') != -1) { + item.itemType = 'conferencePaper'; return; } - - let item = { - guid: feedEntry.id - }; - - if (feedEntry.title) item.title = getRichText(feedEntry.title, 'title'); - - if (feedEntry.summary) { - item.abstractNote = getRichText(feedEntry.summary, 'abstractNote'); - - if (!item.title) { - // We will probably have to trim this, so let's use plain text to - // avoid splitting inside some markup - let title = Zotero.Utilities.trimInternal(feedEntry.summary.plainText()); - let splitAt = title.lastIndexOf(' ', 50); - if (splitAt == -1) splitAt = 50; - - item.title = title.substr(0, splitAt); - if (splitAt <= title.length) item.title += '...'; - } + if (type.indexOf('journal') != -1) { + item.itemType = 'journalArticle'; + return; } - - if (feedEntry.link) item.url = feedEntry.link.spec; - - if (feedEntry.updated) item.dateModified = new Date(feedEntry.updated); - - if (feedEntry.published) { - let date = new Date(feedEntry.published); + if (type.indexOf('book') != -1) { + item.itemType = 'bookSection'; + return; + } + } +}; + +/* + * Fetch creators from given field of a feed entry + */ +Zotero.FeedReader._processCreators = function(feedEntry, field, role) { + let names = [], + nameStr; + try { + let personArr = feedEntry[field]; // Seems like this part can throw if there is no author data in the feed + for (let i=0; i<personArr.length; i++) { + let person = personArr.queryElementAt(i, Components.interfaces.nsIFeedPerson); + if (!person || !person.name) continue; - if (!date.getUTCSeconds() && !(date.getUTCHours() && date.getUTCMinutes())) { - // There was probably no time, but there may have been a a date range, - // so something could have ended up in the hour _or_ minute field - item.date = getFeedField(feedEntry, null, 'pubDate') - /* In case it was magically pulled from some other field */ - || ( date.getUTCFullYear() + '-' - + (date.getUTCMonth() + 1) + '-' - + date.getUTCDate() ); - } else { - item.date = formatDate(date); - // Add time zone - } + let name = Zotero.Utilities.trimInternal(person.name); + if (!name) continue; - if (!item.dateModified) { - items.dateModified = date; + let commas = name.split(',').length - 1, + other = name.split(/\s(?:and|&)\s|;/).length - 1, + separators = commas + other; + if (personArr.length == 1 && + // Has typical name separators + (other || commas > 1 + // If only one comma and first part has more than one space, + // it's probably not lastName, firstName + || (commas == 1 && name.split(/\s*,/)[0].indexOf(' ') != -1) + ) + ) { + // Probably multiple authors listed in a single field + nameStr = name; + break; // For clarity. personArr.length == 1 anyway + } else { + names.push(name); } } + } catch(e) { + if (e.result != Components.results.NS_ERROR_FAILURE) throw e; - if (!item.dateModified) { - // When there's no reliable modification date, we can assume that item doesn't get updated - Zotero.debug("FeedReader: Feed item missing a modification date (" + item.guid + ")"); - } - - if (!item.date && item.dateModified) { - // Use lastModified date - item.date = formatDate(item.dateModified); - } + if (field != 'authors') return []; - // Convert date modified to string, since those are directly comparable - if (item.dateModified) item.dateModified = Zotero.Date.dateToSQL(item.dateModified, true); - - if (feedEntry.rights) item.rights = getRichText(feedEntry.rights, 'rights'); - - item.creators = processCreators(feedEntry, 'authors', 'author'); - if (!item.creators.length) { - // Use feed authors as item author. Maybe not the best idea. - for (let i=0; i<feedInfo.creators.length; i++) { - if (feedInfo.creators[i].creatorType != 'author') continue; - item.creators.push(feedInfo.creators[i]); - } + // ieeexplore places these in "authors"... sigh + nameStr = Zotero.FeedReader._getFeedField(feedEntry, 'authors'); + if (nameStr) nameStr = Zotero.Utilities.trimInternal(nameStr); + if (!nameStr) return []; + } + + if (nameStr) { + names = nameStr.split(/\s(?:and|&)\s|\s*[,;]\s*/); + } + + let creators = []; + for (let i=0; i<names.length; i++) { + let creator = Zotero.Utilities.cleanAuthor( + names[i], + role, + names[i].split(',').length == 2 + ); + if (!creator.firstName) { + creator.fieldMode = 1; } - let contributors = processCreators(feedEntry, 'contributors', 'contributor'); - if (contributors.length) item.creators = item.creators.concat(contributors); - - /** Done with basic metadata, now look for better data **/ - - let date = getFeedField(feedEntry, 'prism', 'publicationDate') - || getFeedField(feedEntry, 'dc', 'date'); - if (date) item.date = date; - - let publicationTitle = getFeedField(feedEntry, 'prism', 'publicationName') - || getFeedField(feedEntry, 'dc', 'source') - || getFeedField(feedEntry, null, 'pubTitle'); - if (publicationTitle) item.publicationTitle = publicationTitle; - - let publicationType = getFeedField(feedEntry, null, 'pubType'); - if (publicationType) item.publicationType = publicationType; - - let startPage = getFeedField(feedEntry, null, 'startPage'); - let endPage = getFeedField(feedEntry, null, 'endPage'); - if (startPage || endPage) { - item.pages = ( startPage || '' ) - + ( endPage && startPage ? '–' : '' ) - + ( endPage || '' ); + creators.push(creator); + } + return creators; +} + +/* + * Parse feed entry into a Zotero item + */ +Zotero.FeedReader._getFeedItem = function(feedEntry, feedInfo) { + // ID is not required, but most feeds have these and we have to rely on them + // to handle updating properly + if (!feedEntry.id) { + Zotero.debug("FeedReader: Feed item missing an ID"); + return; + } + + let item = { + guid: feedEntry.id + }; + + if (feedEntry.title) item.title = Zotero.FeedReader._getRichText(feedEntry.title, 'title'); + + if (feedEntry.summary) { + item.abstractNote = Zotero.FeedReader._getRichText(feedEntry.summary, 'abstractNote'); + + if (!item.title) { + // We will probably have to trim this, so let's use plain text to + // avoid splitting inside some markup + let title = Zotero.Utilities.trimInternal(feedEntry.summary.plainText()); + let splitAt = title.lastIndexOf(' ', 50); + if (splitAt == -1) splitAt = 50; + + item.title = title.substr(0, splitAt); + if (splitAt <= title.length) item.title += '...'; } - - let issn = getFeedField(feedEntry, 'prism', 'issn'); - if (issn) item.ISSN = issn; - - let isbn = getFeedField(feedEntry, 'prism', 'isbn') - || getFeedField(feedEntry, null, 'isbn') - if (isbn) item.ISBN = isbn; - - let identifier = getFeedField(feedEntry, 'dc', 'identifier'); - if (identifier) { - let cleanId = Zotero.Utilities.cleanDOI(identifier); - if (cleanId) { - if (!item.DOI) item.DOI = cleanId; - } else if (cleanId = Zotero.Utilities.cleanISBN(identifier)) { - if (!item.ISBN) item.ISBN = cleanId; - } else if (cleanId = Zotero.Utilities.cleanISSN(identifier)) { - if (!item.ISSN) item.ISSN = cleanId; - } + } + + if (feedEntry.link) item.url = feedEntry.link.spec; + + if (feedEntry.updated) item.dateModified = new Date(feedEntry.updated); + + if (feedEntry.published) { + let date = new Date(feedEntry.published); + + if (!date.getUTCSeconds() && !(date.getUTCHours() && date.getUTCMinutes())) { + // There was probably no time, but there may have been a a date range, + // so something could have ended up in the hour _or_ minute field + item.date = getFeedField(feedEntry, null, 'pubDate') + /* In case it was magically pulled from some other field */ + || ( date.getUTCFullYear() + '-' + + (date.getUTCMonth() + 1) + '-' + + date.getUTCDate() ); + } else { + item.date = Zotero.FeedReader._formatDate(date); + // Add time zone } - let publisher = getFeedField(feedEntry, 'dc', 'publisher'); - if (publisher) item.publisher = publisher; - - let rights = getFeedField(feedEntry, 'prism', 'copyright') - || getFeedField(feedEntry, 'dc', 'rights') - || getFeedField(feedEntry, null, 'copyright'); - if (rights) item.rights = rights; - - let language = getFeedField(feedEntry, 'dc', 'language') - || getFeedField(feedEntry, null, 'language'); - if (language) item.language = language; - - /** Incorporate missing values from feed metadata **/ - - let supplementFields = ['publicationTitle', 'ISSN', 'publisher', 'rights', 'language']; - for (let i=0; i<supplementFields.length; i++) { - let field = supplementFields[i]; - if (!item[field] && feedInfo[field]) { - item[field] = feedInfo[field]; - } + if (!item.dateModified) { + items.dateModified = date; } - - guessItemType(item); - - return item; } - /********************* - * FeedReader object * - *********************/ - let FeedReader = function(url) { - if (!url) throw new Error("Feed URL must be supplied"); - - this._feed = Zotero.Promise.defer(); // Fetched asynchronously - - this._feedProperties = this._feed.promise - .then(function(feed) { - let info = {}; - - info.title = feed.title ? feed.title.plainText() : ''; - info.subtitle = feed.subtitle ? feed.subtitle.plainText() : ''; - - if (feed.updated) info.updated = new Date(feed.updated); - - // categories: MDN says "not yet implemented" - - info.creators = processCreators(feed, 'authors', 'author'); - - // TODO: image as icon - - let publicationTitle = getFeedField(feed, 'prism', 'publicationName') - || getFeedField(feed, null, 'pubTitle'); - if (publicationTitle) info.publicationTitle = publicationTitle; - - let publisher = getFeedField(feed, 'dc', 'publisher'); - if (publisher) info.publisher = publisher; - - let rights = (feed.rights && feed.rights.plainText()) - || getFeedField(feed, 'prism', 'copyright') - || getFeedField(feed, 'dc', 'rights') - || getFeedField(feed, null, 'copyright'); - if (rights) info.rights = rights; - - let issn = getFeedField(feed, 'prism', 'issn'); - if (issn) info.ISSN = issn; - - let isbn = getFeedField(feed, 'prism', 'isbn') - || getFeedField(feed, null, 'isbn') - if (isbn) info.ISBN = isbn; - - let language = getFeedField(feed, 'dc', 'language') - || getFeedField(feed, null, 'language'); - if (language) info.language = language; - - let ttl = getFeedField(feed, null, 'ttl'); - if (ttl) info.ttl = ttl; - - return info; - }); - - // Array of deferred item promises - this._feedItems = [Zotero.Promise.defer()]; - - // Process items once they're available and push them into the array - Zotero.Promise.join( - this._feed.promise, - this._feedProperties, - (feed, feedInfo) => { - let items = feed.items; - if (items && items.length) { - for (let i=0; i<items.length; i++) { - let item = items.queryElementAt(i, Components.interfaces.nsIFeedEntry); - if (!item) continue; - - let feedItem = getFeedItem(item, feedInfo); - if (!feedItem) continue; - - let lastItem = this._feedItems[this._feedItems.length - 1]; - this._feedItems.push(Zotero.Promise.defer()); // Push a new deferred promise so an iterator has something to return - lastItem.resolve(feedItem); - } - } - } - ) - .finally(() => { - // Make sure the last promise gets resolved to null - let lastItem = this._feedItems[this._feedItems.length - 1]; - lastItem.resolve(null); - }); - - // Set up asynchronous feed processor - let feedProcessor = Components.classes["@mozilla.org/feed-processor;1"] - .createInstance(Components.interfaces.nsIFeedProcessor); - - let feedUrl = ios.newURI(url, null, null); - feedProcessor.parseAsync(null, feedUrl); - - feedProcessor.listener = { - /* - * MDN suggests that we could use nsIFeedProgressListener to handle the feed - * as it gets loaded, but this is actually not implemented (as of 32.0.3), - * so we have to load the whole feed and handle it in handleResult. - */ - handleResult: (result) => { - if (!result.doc) { - this.terminate("No Feed"); - return; - } - - let newFeed = result.doc.QueryInterface(Components.interfaces.nsIFeed); - this._feed.resolve(newFeed); - } - }; - - Zotero.debug("FeedReader: Fetching feed from " + feedUrl.spec); - - this._channel = ios.newChannelFromURI2(feedUrl, null, - Services.scriptSecurityManager.getSystemPrincipal(), null, - Ci.nsILoadInfo.SEC_NORMAL, Ci.nsIContentPolicy.TYPE_OTHER); - this._channel.asyncOpen(feedProcessor, null); // Sends an HTTP request + if (!item.dateModified) { + // When there's no reliable modification date, we can assume that item doesn't get updated + Zotero.debug("FeedReader: Feed item missing a modification date (" + item.guid + ")"); } - Zotero.defineProperty(FeedReader.prototype, 'feedProperties', { - get: function() this._feedProperties - }); - - /* - * Feed item iterator - * Each iteration returns a _promise_ for an item. The promise _MUST_ be - * resolved before requesting the next item. - * The last item will always be resolved to `null`, unless the feed processing - * is terminated ahead of time, in which case it will be rejected with the reason - * for termination. - */ - Zotero.defineProperty(FeedReader.prototype, 'ItemIterator', { - get: function() { - let items = this._feedItems; - - let iterator = function() { - this.index = 0; - }; - - iterator.prototype.next = function() { - let item = items[this.index++]; - return { - value: item ? item.promise : null, - done: this.index >= items.length - }; - }; - - return iterator; + if (!item.date && item.dateModified) { + // Use lastModified date + item.date = Zotero.FeedReader._formatDate(item.dateModified); + } + + // Convert date modified to string, since those are directly comparable + if (item.dateModified) item.dateModified = Zotero.Date.dateToSQL(item.dateModified, true); + + if (feedEntry.rights) item.rights = Zotero.FeedReader._getRichText(feedEntry.rights, 'rights'); + + item.creators = Zotero.FeedReader._processCreators(feedEntry, 'authors', 'author'); + if (!item.creators.length) { + // Use feed authors as item author. Maybe not the best idea. + for (let i=0; i<feedInfo.creators.length; i++) { + if (feedInfo.creators[i].creatorType != 'author') continue; + item.creators.push(feedInfo.creators[i]); } - }, {lazy: true}); - - /* - * Terminate feed processing at any given time - * @param {String} status Reason for terminating processing - */ - FeedReader.prototype.terminate = function(status) { - Zotero.debug("FeedReader: Terminating feed reader (" + status + ")"); - - // Reject feed promise if not resolved yet - if (this._feed.promise.isPending()) { - this._feed.reject(status); + } + + let contributors = Zotero.FeedReader._processCreators(feedEntry, 'contributors', 'contributor'); + if (contributors.length) item.creators = item.creators.concat(contributors); + + /** Done with basic metadata, now look for better data **/ + + let date = Zotero.FeedReader._getFeedField(feedEntry, 'publicationDate', 'prism') + || Zotero.FeedReader._getFeedField(feedEntry, 'date', 'dc'); + if (date) item.date = date; + + let publicationTitle = Zotero.FeedReader._getFeedField(feedEntry, 'publicationName', 'prism') + || Zotero.FeedReader._getFeedField(feedEntry, 'source', 'dc') + || Zotero.FeedReader._getFeedField(feedEntry, 'pubTitle'); + if (publicationTitle) item.publicationTitle = publicationTitle; + + let publicationType = Zotero.FeedReader._getFeedField(feedEntry, 'pubType'); + if (publicationType) item.publicationType = publicationType; + + let startPage = Zotero.FeedReader._getFeedField(feedEntry, 'startPage'); + let endPage = Zotero.FeedReader._getFeedField(feedEntry, 'endPage'); + if (startPage || endPage) { + item.pages = ( startPage || '' ) + + ( endPage && startPage ? '–' : '' ) + + ( endPage || '' ); + } + + let issn = Zotero.FeedReader._getFeedField(feedEntry, 'issn', 'prism'); + if (issn) item.ISSN = issn; + + let isbn = Zotero.FeedReader._getFeedField(feedEntry, 'isbn', 'prism') + || Zotero.FeedReader._getFeedField(feedEntry, 'isbn') + if (isbn) item.ISBN = isbn; + + let identifier = Zotero.FeedReader._getFeedField(feedEntry, 'identifier', 'dc'); + if (identifier) { + let cleanId = Zotero.Utilities.cleanDOI(identifier); + if (cleanId) { + if (!item.DOI) item.DOI = cleanId; + } else if (cleanId = Zotero.Utilities.cleanISBN(identifier)) { + if (!item.ISBN) item.ISBN = cleanId; + } else if (cleanId = Zotero.Utilities.cleanISSN(identifier)) { + if (!item.ISSN) item.ISSN = cleanId; } - - // Reject feed item promise if not resolved yet - let lastItem = this._feedItems[this._feedItems.length - 1]; - if (lastItem.promise.isPending()) { - lastItem.reject(status); + } + + let publisher = Zotero.FeedReader._getFeedField(feedEntry, 'publisher', 'dc'); + if (publisher) item.publisher = publisher; + + let rights = Zotero.FeedReader._getFeedField(feedEntry, 'copyright', 'prism') + || Zotero.FeedReader._getFeedField(feedEntry, 'rights', 'dc') + || Zotero.FeedReader._getFeedField(feedEntry, 'copyright'); + if (rights) item.rights = rights; + + let language = Zotero.FeedReader._getFeedField(feedEntry, 'language', 'dc') + || Zotero.FeedReader._getFeedField(feedEntry, 'language'); + if (language) item.language = language; + + /** Incorporate missing values from feed metadata **/ + + let supplementFields = ['publicationTitle', 'ISSN', 'publisher', 'rights', 'language']; + for (let i=0; i<supplementFields.length; i++) { + let field = supplementFields[i]; + if (!item[field] && feedInfo[field]) { + item[field] = feedInfo[field]; } - - // Close feed connection - if (this._channel.isPending) { - this._channel.cancel(Components.results.NS_BINDING_ABORTED); + } + + Zotero.FeedReader._guessItemType(item); + + return item; +} + +/********************* + * Utility functions * + *********************/ +/* + * Convert HTML-formatted text to Zotero-compatible formatting + */ +Zotero.FeedReader._getRichText = function(feedText, field) { + let domDiv = Zotero.Utilities.Internal.getDOMDocument().createElement("div"); + let domFragment = feedText.createDocumentFragment(domDiv); + return Zotero.Utilities.dom2text(domFragment, field); +}; + +/* + * Format JS date as SQL date + */ +Zotero.FeedReader._formatDate = function(date) { + return Zotero.Date.dateToSQL(date, true); +} + +/* + * Get field value from feed entry by namespace:fieldName + */ +// Properties are stored internally as ns+name, but only some namespaces are +// supported. Others are just "null" +let ns = { + 'prism': 'null', + 'dc': 'dc:' +} +Zotero.FeedReader._getFeedField = function(feedEntry, field, namespace) { + let prefix = namespace ? ns[namespace] || 'null' : ''; + try { + return feedEntry.fields.getPropertyAsAUTF8String(prefix+field); + } catch(e) {} + + try { + if (namespace && !ns[namespace]) { + prefix = namespace + ':'; + return feedEntry.fields.getPropertyAsAUTF8String(prefix+field); } - }; + } catch(e) {} - return FeedReader; -}; -\ No newline at end of file + return; +} diff --git a/test/content/runtests.js b/test/content/runtests.js @@ -7,6 +7,9 @@ var ZoteroUnit = Components.classes["@mozilla.org/commandlinehandler/general-sta var dump = ZoteroUnit.dump; +// Mocha HTML reporter doesn't show deepEqual diffs, so we change this. +chai.config.truncateThreshold = 0 + function quit(failed) { // Quit with exit status if(!failed) { diff --git a/test/content/support.js b/test/content/support.js @@ -435,6 +435,10 @@ function getTestDataDirectory() { QueryInterface(Components.interfaces.nsIFileURL).file; } +function getTestDataItemUrl(path) { + return OS.Path.join("resource://zotero-unit-tests/data", path); +} + /** * Returns an absolute path to an empty temporary directory * (i.e., test/tests/data) diff --git a/test/tests/data/feed.rss b/test/tests/data/feed.rss @@ -0,0 +1,42 @@ +<?xml version="1.0"?> +<!-- Lifted from http://cyber.law.harvard.edu/rss/examples/rss2sample.xml --> +<rss version="2.0"> + <channel> + <title>Liftoff News</title> + <link>http://liftoff.msfc.nasa.gov/</link> + <description>Liftoff to Space Exploration.</description> + <language>en-us</language> + <pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate> + <lastBuildDate>Tue, 10 Jun 2003 09:41:01 GMT</lastBuildDate> + <docs>http://blogs.law.harvard.edu/tech/rss</docs> + <generator>Weblog Editor 2.0</generator> + <managingEditor>editor@example.com</managingEditor> + <webMaster>webmaster@example.com</webMaster> + <item> + <title>Star City</title> + <link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link> + <description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm"&gt;Star City&lt;/a&gt;.</description> + <pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate> + <guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid> + </item> + <item> + <description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a &lt;a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm"&gt;partial eclipse of the Sun&lt;/a&gt; on Saturday, May 31st.</description> + <pubDate>Fri, 30 May 2003 11:06:42 GMT</pubDate> + <guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid> + </item> + <item> + <title>The Engine That Does More</title> + <link>http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp</link> + <description>Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that.</description> + <pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate> + <guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid> + </item> + <item> + <title>Astronauts' Dirty Laundry</title> + <link>http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp</link> + <description>Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options.</description> + <pubDate>Tue, 20 May 2003 08:56:02 GMT</pubDate> + <guid>http://liftoff.msfc.nasa.gov/2003/05/20.html#item570</guid> + </item> + </channel> +</rss> +\ No newline at end of file diff --git a/test/tests/data/feedDetailed.rss b/test/tests/data/feedDetailed.rss @@ -0,0 +1,89 @@ +<?xml version="1.0" encoding="UTF-8"?> +<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:dc="http://purl.org/dc/elements/1.1/" + xmlns="http://purl.org/rss/1.0/" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xmlns:prism="http://prismstandard.org/namespaces/1.2/basic/" + xmlns:dcterms="http://purl.org/dc/terms/" + xsi:schemaLocation="http://www.w3.org/1999/02/22-rdf-syntax-ns# uri:atypon.com:cms:schema:rdf.xsd"> + <channel rdf:about="http://www.example.com/feed.rss"> + <title>Feed</title> + <description>Feed Description</description> + <link>http://www.example.com/feed.rss</link> + <dc:publisher>Publisher</dc:publisher> + <dc:language>en</dc:language> + <dc:rights>©2016 Published by Publisher</dc:rights> + <dc:creator>Feed Author</dc:creator> + <prism:publicationName>Publication</prism:publicationName> + <prism:issn>0000-0000</prism:issn> + <prism:publicationDate>2016-01-07-08:00</prism:publicationDate> + <prism:copyright>©2016 Published by Publisher</prism:copyright> + <prism:rightsAgent>rights@example.com</prism:rightsAgent> + <ttl>60</ttl> + <items> + <rdf:Seq> + <rdf:li rdf:resource="http://www.example.com/item1"/> + <rdf:li rdf:resource="http://www.example.com/item2"/> + <rdf:li rdf:resource="http://www.example.com/item3"/> + </rdf:Seq> + </items> + </channel> + <item rdf:about="http://www.example.com/item1"> + <title>Title 1</title> + <link>http://www.example.com/item1</link> + <description>Description 1</description> + <language>en</language> + <startPage>10</startPage> + <endPage>20</endPage> + <dc:title>Title 1</dc:title> + <dc:creator>Author1 A.T. Rohtua, Author2 A. Auth, Author3 Autho</dc:creator> + <dc:contributor>Contributor1 A.T. Rotubirtnoc, Contributor2 C. Contrib, Contributor3 Contr</dc:contributor> + <dc:publisher>Publisher</dc:publisher> + <dc:source>Feed</dc:source> + <dc:date>2016-01-07</dc:date> + <dc:doi>10.1000/182</dc:doi> + <prism:issn>0000-0000</prism:issn> + <prism:publicationName>Publication</prism:publicationName> + <prism:publicationDate>2016-01-07</prism:publicationDate> + <prism:section>Article</prism:section> + </item> + <item rdf:about="http://www.example.com/item2"> + <title>Title 2</title> + <link>http://www.example.com/item2</link> + <description>Description 2</description> + <language>en</language> + <startPage>10</startPage> + <endPage>20</endPage> + <dc:title>Title 2</dc:title> + <dc:creator>Author1 A.T. Rohtua, Author2 A. Auth, Author3 Autho</dc:creator> + <dc:contributor>Contributor1 A.T. Rotubirtnoc, Contributor2 C. Contrib, Contributor3 Contr</dc:contributor> + <dc:publisher>Publisher</dc:publisher> + <dc:source>Feed</dc:source> + <dc:date>2016-01-07</dc:date> + <dc:doi>10.1000/182</dc:doi> + <prism:issn>0000-0000</prism:issn> + <prism:publicationName>Publication</prism:publicationName> + <prism:publicationDate>2016-01-07</prism:publicationDate> + <prism:section>Article</prism:section> + </item> + <item rdf:about="http://www.example.com/item3"> + <title>Title 3</title> + <link>http://www.example.com/item3</link> + <description>Description 3</description> + <language>en</language> + <pubType>Some Publication</pubType> + <startPage>10</startPage> + <endPage>20</endPage> + <dc:title>Title 3</dc:title> + <dc:creator>Author1 A.T. Rohtua, Author2 A. Auth, Author3 Autho</dc:creator> + <dc:contributor>Contributor1 A.T. Rotubirtnoc, Contributor2 C. Contrib, Contributor3 Contr</dc:contributor> + <dc:publisher>Publisher</dc:publisher> + <dc:source>Feed</dc:source> + <dc:date>2016-01-07</dc:date> + <dc:doi>10.1000/182</dc:doi> + <prism:issn>0000-0000</prism:issn> + <prism:publicationName>Publication</prism:publicationName> + <prism:publicationDate>2016-01-07</prism:publicationDate> + <prism:section>Article</prism:section> + </item> +</rdf:RDF> +\ No newline at end of file diff --git a/test/tests/feedReaderTest.js b/test/tests/feedReaderTest.js @@ -0,0 +1,167 @@ +"use strict"; + +describe("Zotero.FeedReader", function () { + + var htmlUrl = getTestDataItemUrl("test.html"); + + var feedUrl = getTestDataItemUrl("feed.rss"); + var feedInfo = { + title: 'Liftoff News', + subtitle: 'Liftoff to Space Exploration.', + updated: new Date("Tue, 10 Jun 2003 09:41:01 GMT"), + creators: [{ + firstName: '', + lastName: 'editor@example.com', + creatorType: 'author', + fieldMode: 1 + }], + language: 'en-us' + }; + + var detailedFeedUrl = getTestDataItemUrl("feedDetailed.rss"); + var detailedFeedInfo = { + title: 'Feed', + subtitle: 'Feed Description', + creators: [{firstName: 'Feed', lastName: 'Author', creatorType: 'author'}], + publicationTitle: 'Publication', + publisher: 'Publisher', + rights: '©2016 Published by Publisher', + ISSN: '0000-0000', + language: 'en' + }; + + describe('FeedReader()', function () { + it('should throw if url not provided', function() { + assert.throw(() => new Zotero.FeedReader()) + }); + + it('should throw if url invalid', function() { + assert.throw(() => new Zotero.FeedReader('invalid url')) + }); + }); + + describe('#process()', function() { + it('should reject if the provided url is not a valid feed', function* () { + let fr = new Zotero.FeedReader(htmlUrl); + let e = yield getPromiseError(fr.process()); + assert.ok(e); + e = yield getPromiseError(fr._feedItems[fr._feedItems.length-1].promise); + assert.ok(e); + }); + + it('should set #feedProperties on FeedReader object', function* () { + let fr = new Zotero.FeedReader(feedUrl); + assert.throw(() => fr.feedProperties); + yield fr.process(); + assert.ok(fr.feedProperties); + }); + }); + + describe('#terminate()', function() { + it('should reject last feed item and feed processing promise if feed not processed yet', function* () { + let fr = new Zotero.FeedReader(feedUrl); + fr.terminate("test"); + let e = yield getPromiseError(fr.process()); + assert.ok(e); + e = yield getPromiseError(fr._feedItems[fr._feedItems.length-1].promise); + assert.ok(e); + }); + + it('should reject last feed item if feed processed', function* () { + let fr = new Zotero.FeedReader(feedUrl); + yield fr.process(); + fr.terminate("test"); + let e = yield getPromiseError(fr._feedItems[fr._feedItems.length-1].promise); + assert.ok(e); + }); + }); + + describe('#feedProperties', function() { + it('should throw if accessed before feed is processed', function () { + let fr = new Zotero.FeedReader(feedUrl); + assert.throw(() => fr.feedProperties); + }); + + it('should have correct values for a sparse feed', function* () { + let fr = new Zotero.FeedReader(feedUrl); + yield fr.process(); + assert.deepEqual(fr.feedProperties, feedInfo); + }); + + it('should have correct values for a detailed feed', function* () { + let fr = new Zotero.FeedReader(detailedFeedUrl); + yield fr.process(); + assert.deepEqual(fr.feedProperties, detailedFeedInfo); + }); + }); + + describe('#ItemIterator()', function() { + it('should throw if called before feed is resolved', function() { + let fr = new Zotero.FeedReader(feedUrl); + assert.throw(() => new fr.ItemIterator); + }); + + it('should parse items correctly for a sparse feed', function* () { + let expected = { + guid: 'http://liftoff.msfc.nasa.gov/2003/06/03.html#item573', + title: 'Star City', + abstractNote: 'How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia\'s Star City.', + url: 'http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp', + dateModified: '2003-06-03 09:39:21', + date: '2003-06-03 09:39:21', + creators: [{ + firstName: '', + lastName: 'editor@example.com', + creatorType: 'author', + fieldMode: 1 + }], + language: 'en-us', + itemType: 'journalArticle' + }; + + let fr = new Zotero.FeedReader(feedUrl); + yield fr.process(); + let itemIterator = new fr.ItemIterator(); + let item = yield itemIterator.next().value; + assert.deepEqual(item, expected); + }); + + it('should parse items correctly for a detailed feed', function* () { + let expected = { guid: 'http://www.example.com/item1', + title: 'Title 1', + abstractNote: 'Description 1', + url: 'http://www.example.com/item1', + dateModified: '2016-01-07 00:00:00', + date: '2016-01-07', + creators: [ + { firstName: 'Author1 A. T.', lastName: 'Rohtua', creatorType: 'author' }, + { firstName: 'Author2 A.', lastName: 'Auth', creatorType: 'author' }, + { firstName: 'Author3', lastName: 'Autho', creatorType: 'author' }, + { firstName: 'Contributor1 A. T.', lastName: 'Rotubirtnoc', creatorType: 'contributor' }, + { firstName: 'Contributor2 C.', lastName: 'Contrib', creatorType: 'contributor' }, + { firstName: 'Contributor3', lastName: 'Contr', creatorType: 'contributor' } + ], + publicationTitle: 'Publication', + ISSN: '0000-0000', + publisher: 'Publisher', + rights: '©2016 Published by Publisher', + language: 'en', + itemType: 'journalArticle' + }; + + let fr = new Zotero.FeedReader(detailedFeedUrl); + yield fr.process(); + let itemIterator = new fr.ItemIterator(); + let item = yield itemIterator.next().value; + assert.deepEqual(item, expected); + }); + it('should resolve last item with null', function* () { + let fr = new Zotero.FeedReader(feedUrl); + yield fr.process(); + let itemIterator = new fr.ItemIterator(); + let item; + while(item = yield itemIterator.next().value); + assert.isNull(item); + }); + }); +}) +\ No newline at end of file