commit 40443c6b9128ceea8b9ca76ca73965112b4d80ab
parent e3e8881282bfa1c737bad64d7f2f746885abab46
Author: Simon Kornblith <simon@simonster.com>
Date: Mon, 13 Jul 2009 22:45:10 +0000
- add xml/e4x and xml/dom dataMode options
- parse XML encoding declarations in translate.js
- fix errors importing MODS from clipboard
Diffstat:
2 files changed, 188 insertions(+), 151 deletions(-)
diff --git a/chrome/content/zotero/xpcom/translate.js b/chrome/content/zotero/xpcom/translate.js
@@ -1820,55 +1820,23 @@ Zotero.Translate.prototype._importDoneSniffing = function(charset) {
/*
* set up import for IO
*/
-Zotero.Translate.prototype._importConfigureIO = function(charset) {
- if(this.configOptions.dataMode && (this.configOptions.dataMode == "rdf" || this.configOptions.dataMode == "rdf/n3")) {
- if(!this._rdf) {
- Zotero.debug("Translate: initializing data store");
- // initialize data store
- this._rdf = new Zotero.RDF.AJAW.RDFIndexedFormula();
-
- Zotero.debug("Translate: loading data");
- // load data into store
- var IOService = Components.classes['@mozilla.org/network/io-service;1']
- .getService(Components.interfaces.nsIIOService);
- if(this._storage) {
- // parse from string
- var baseURI = (this.location ? IOService.newURI(this.location, "utf-8", null) : null);
- var nodeTree = (new DOMParser()).parseFromString(this._storage, 'text/xml');
- } else {
- // get URI
- var fileHandler = IOService.getProtocolHandler("file")
- .QueryInterface(Components.interfaces.nsIFileProtocolHandler);
- var baseURI = fileHandler.getURLSpecFromFile(this.location);
-
- // load XML from file using xmlhttp for charset detection
- var xmlhttp = Components.classes["@mozilla.org/xmlextras/xmlhttprequest;1"].
- createInstance(Components.interfaces.nsIXMLHttpRequest);
- xmlhttp.overrideMimeType("text/xml");
- xmlhttp.open("GET", baseURI, false); // Synchronous
- xmlhttp.send("");
-
- var nodeTree = xmlhttp.responseXML;
- if(nodeTree.getElementsByTagName("parsererror").length) {
- this._rdf = false;
- throw("RDF/XML parse error; loading data into data store failed");
- }
- }
-
- var parser = new Zotero.RDF.AJAW.RDFParser(this._rdf);
- parser.parse(nodeTree, baseURI);
- }
-
- Zotero.debug("adding apis");
- // add RDF features to sandbox
- this._sandbox.Zotero.RDF = new Zotero.Translate.RDF(this._rdf);
- return;
- }
-
+Zotero.Translate.prototype._importConfigureIO = function(charset) {
if(this._storage) {
// import from string
- this._storageFunctions(true);
this._storagePointer = 0;
+
+ if(this.configOptions.dataMode && this.configOptions.dataMode == "xml/dom" || this.configOptions.dataMode == "rdf") {
+ // for DOM XML, handle with parseFromString
+ if(this.configOptions.dataMode == "xml/dom" || !this._rdf) {
+ var xmlNodes = Components.classes["@mozilla.org/xmlextras/domparser;1"]
+ .createInstance(Components.interfaces.nsIDOMParser)
+ .parseFromString(this._storage, "text/xml");
+ }
+ } else if(this.configOptions.dataMode && this.configOptions.dataMode == "xml/e4x") {
+ var xmlNodes = new XML(this._storage.replace(/<\?xml[^>]+\?>/, ""));
+ } else {
+ this._storageFunctions(true);
+ }
} else {
// import from file
@@ -1885,8 +1853,10 @@ Zotero.Translate.prototype._importConfigureIO = function(charset) {
this._streams.push(this._inputStream);
}
+ var sStream = null;
var bomLength = 0;
- if(charset === undefined || (charset && charset.length > 3 && charset.substr(0, 3) == "UTF")) {
+ if(!charset && this._charset) charset = this._charset;
+ if(!charset || (charset && charset.length > 3 && charset.substr(0, 3) == "UTF")) {
// seek past BOM
var bomCharset = this._importGetBOM();
var bomLength = (bomCharset ? BOMs[bomCharset].length : 0);
@@ -1895,80 +1865,185 @@ Zotero.Translate.prototype._importConfigureIO = function(charset) {
if(bomCharset) charset = this._charset = bomCharset;
}
- var intlStream = null;
- if(charset) {
- // if have detected charset
- Zotero.debug("Translate: Using detected character set "+charset, 3);
- // convert from detected charset
- intlStream = Components.classes["@mozilla.org/intl/converter-input-stream;1"]
- .createInstance(Components.interfaces.nsIConverterInputStream);
- intlStream.init(this._inputStream, charset, 65535,
- Components.interfaces.nsIConverterInputStream.DEFAULT_REPLACEMENT_CHARACTER);
- me._streams.push(intlStream);
- }
-
- // allow translator to set charset
- this._sandbox.Zotero.setCharacterSet = function(charset) {
- // seek back to the beginning
- me._inputStream.QueryInterface(Components.interfaces.nsISeekableStream)
- .seek(Components.interfaces.nsISeekableStream.NS_SEEK_SET, bomLength);
+ // look for/seek past XML charset declaration
+ if(this.configOptions.dataMode && (this.configOptions.dataMode == "xml/e4x"
+ || this.configOptions.dataMode == "xml/dom"
+ || this.configOptions.dataMode == "rdf")) {
+
+ sStream = Components.classes["@mozilla.org/scriptableinputstream;1"]
+ .createInstance(Components.interfaces.nsIScriptableInputStream);
+ sStream.init(this._inputStream);
+ this._streams.push(sStream);
- intlStream = Components.classes["@mozilla.org/intl/converter-input-stream;1"]
- .createInstance(Components.interfaces.nsIConverterInputStream);
- try {
- intlStream.init(me._inputStream, charset, 65535,
- Components.interfaces.nsIConverterInputStream.DEFAULT_REPLACEMENT_CHARACTER);
- } catch(e) {
- throw "Text encoding not supported";
+ // read until we see if the file begins with a parse instruction
+ const whitespaceRe = /\s/g;
+ var read;
+ do {
+ read = sStream.read(1);
+ } while(whitespaceRe.test(read))
+
+ if(read != "<") throw "XML load error: text does not start with <";
+
+ var firstPart = read + sStream.read(4);
+ if(firstPart == "<?xml") {
+ // got a parse instruction, read until it ends
+ read = true;
+ while((read !== false) && (read !== ">")) {
+ read = sStream.read(1);
+ firstPart += read;
+ }
+
+ var encodingRe = /encoding=['"]([^'"]+)['"]/;
+ var m = encodingRe.exec(firstPart);
+ if(m) {
+ try {
+ var charconv = Components.classes["@mozilla.org/charset-converter-manager;1"]
+ .getService(Components.interfaces.nsICharsetConverterManager)
+ .getCharsetTitle(m[1]);
+ if(charconv) charset = this._charset = m[1];
+ } catch(e) {}
+ }
+ } else {
+ this._inputStream.QueryInterface(Components.interfaces.nsISeekableStream)
+ .seek(Components.interfaces.nsISeekableStream.NS_SEEK_SET, bomLength);
}
- me._streams.push(intlStream);
+
+ if(!charset) charset = "UTF-8";
}
- var str = new Object();
- if(this.configOptions.dataMode && this.configOptions.dataMode == "line") { // line by line reading
- this._inputStream.QueryInterface(Components.interfaces.nsILineInputStream);
+ if(this.configOptions.dataMode && this.configOptions.dataMode == "xml/dom" || this.configOptions.dataMode == "rdf") {
+ // for DOM XML, pass charset to parseFromStream
+ if(this.configOptions.dataMode == "xml/dom" || !this._rdf) {
+ var xmlNodes = Components.classes["@mozilla.org/xmlextras/domparser;1"]
+ .createInstance(Components.interfaces.nsIDOMParser)
+ .parseFromStream(this._inputStream, charset, this.location.fileSize, "text/xml");
+ }
+ } else {
+ var intlStream = null;
+ if(charset) {
+ // if have detected charset
+ Zotero.debug("Translate: Using detected character set "+charset, 3);
+ // convert from detected charset
+ intlStream = Components.classes["@mozilla.org/intl/converter-input-stream;1"]
+ .createInstance(Components.interfaces.nsIConverterInputStream);
+ intlStream.init(this._inputStream, charset, 65535,
+ Components.interfaces.nsIConverterInputStream.DEFAULT_REPLACEMENT_CHARACTER);
+ me._streams.push(intlStream);
+ }
- this._sandbox.Zotero.read = function() {
- if(intlStream && intlStream instanceof Components.interfaces.nsIUnicharLineInputStream) {
- var amountRead = intlStream.readLine(str);
- } else {
- var amountRead = me._inputStream.readLine(str);
+ if(this.configOptions.dataMode && this.configOptions.dataMode == "xml/e4x") {
+ // read in 16384 byte increments
+ var xmlNodes = "";
+ var str = {};
+ while(intlStream.readString(16384, str)) {
+ xmlNodes += str.value;
}
- if(amountRead) {
- return str.value;
- } else {
- return false;
+
+ // create xml
+ xmlNodes = new XML(xmlNodes);
+ } else {
+ // standard text reading tools
+
+ // allow translator to set charset
+ this._sandbox.Zotero.setCharacterSet = function(charset) {
+ // seek back to the beginning
+ me._inputStream.QueryInterface(Components.interfaces.nsISeekableStream)
+ .seek(Components.interfaces.nsISeekableStream.NS_SEEK_SET, bomLength);
+
+ intlStream = Components.classes["@mozilla.org/intl/converter-input-stream;1"]
+ .createInstance(Components.interfaces.nsIConverterInputStream);
+ try {
+ intlStream.init(me._inputStream, charset, 65535,
+ Components.interfaces.nsIConverterInputStream.DEFAULT_REPLACEMENT_CHARACTER);
+ } catch(e) {
+ throw "Text encoding not supported";
+ }
+ me._streams.push(intlStream);
}
- }
- } else { // block reading
- var sStream;
-
- this._sandbox.Zotero.read = function(amount) {
- if(intlStream) {
- // read from international stream, if one is available
- var amountRead = intlStream.readString(amount, str);
+
+ var str = new Object();
+ if(this.configOptions.dataMode && this.configOptions.dataMode == "line") { // line by line reading
+ this._inputStream.QueryInterface(Components.interfaces.nsILineInputStream);
- if(amountRead) {
- return str.value;
- } else {
- return false;
+ this._sandbox.Zotero.read = function() {
+ if(intlStream && intlStream instanceof Components.interfaces.nsIUnicharLineInputStream) {
+ var amountRead = intlStream.readLine(str);
+ } else {
+ var amountRead = me._inputStream.readLine(str);
+ }
+ if(amountRead) {
+ return str.value;
+ } else {
+ return false;
+ }
}
- } else {
- // allocate sStream on the fly
- if(!sStream) {
- sStream = Components.classes["@mozilla.org/scriptableinputstream;1"]
- .createInstance(Components.interfaces.nsIScriptableInputStream);
- sStream.init(me._inputStream);
+ } else { // block reading
+ this._sandbox.Zotero.read = function(amount) {
+ if(intlStream) {
+ // read from international stream, if one is available
+ var amountRead = intlStream.readString(amount, str);
+
+ if(amountRead) {
+ return str.value;
+ } else {
+ return false;
+ }
+ } else {
+ // allocate sStream on the fly
+ if(!sStream) {
+ sStream = Components.classes["@mozilla.org/scriptableinputstream;1"]
+ .createInstance(Components.interfaces.nsIScriptableInputStream);
+ sStream.init(me._inputStream);
+ this._streams.push(sStream);
+ }
+
+ // read from the scriptable input stream
+ var string = sStream.read(amount);
+ return string;
+ }
}
-
- // read from the scriptable input stream
- var string = sStream.read(amount);
- return string;
}
}
+ }
+ }
+
+ if(this.configOptions.dataMode) {
+ // make sure DOM XML actually got parsed
+ if(xmlNodes && (this.configOptions.dataMode == "rdf" || this.configOptions.dataMode == "xml/dom")
+ && xmlNodes.getElementsByTagName("parsererror").length) {
+ this._rdf = undefined;
+ throw("XML parser error: loading data into data store failed");
+ }
+
+ if(this.configOptions.dataMode == "rdf") {
+ // set up RDF
+ if(!this._rdf) {
+ // get URI
+ var IOService = Components.classes['@mozilla.org/network/io-service;1']
+ .getService(Components.interfaces.nsIIOService);
+ if(this._storage) {
+ var baseURI = this.location ? this.location : null;
+ } else {
+ var fileHandler = IOService.getProtocolHandler("file")
+ .QueryInterface(Components.interfaces.nsIFileProtocolHandler);
+ var baseURI = fileHandler.getURLSpecFromFile(this.location);
+ }
+
+ Zotero.debug("Translate: initializing data store");
+ this._rdf = new Zotero.RDF.AJAW.RDFIndexedFormula();
+
+ Zotero.debug("Translate: loading data");
+ var parser = new Zotero.RDF.AJAW.RDFParser(this._rdf);
+ parser.parse(xmlNodes, "");
+ }
- // attach sStream to stack of streams to close
- this._streams.push(sStream);
+ // add RDF features to sandbox
+ this._sandbox.Zotero.RDF = new Zotero.Translate.RDF(this._rdf);
+ } else if(this.configOptions.dataMode == "xml/e4x" || this.configOptions.dataMode == "xml/dom") {
+ // add getXML function
+ this._sandbox.Zotero.getXML = function() {
+ return xmlNodes;
+ }
}
}
}
diff --git a/translators/MODS.js b/translators/MODS.js
@@ -4,7 +4,7 @@
"label":"MODS",
"creator":"Simon Kornblith",
"target":"xml",
- "minVersion":"1.0.8",
+ "minVersion":"2.0b6.3",
"maxVersion":"",
"priority":50,
"inRepository":true,
@@ -12,13 +12,11 @@
}
Zotero.addOption("exportNotes", true);
+Zotero.configure("dataMode", "xml/e4x");
function detectImport() {
- var read = Zotero.read(512);
- var modsTagRegexp = /<mods[^>]+>/
- if(modsTagRegexp.test(read)) {
- return true;
- }
+ var name = Zotero.getXML().name();
+ return name.uri == "http://www.loc.gov/mods/v3" && (name.localName == "modsCollection" || name.localName == "mods");
}
var partialItemTypes = ["bookSection", "journalArticle", "magazineArticle", "newspaperArticle"];
@@ -341,47 +339,11 @@ function doImport() {
"web site":"webpage"
};
-
- var read;
-
- // read until we see if the file begins with a parse instruction
- read = " ";
- while(read == " " || read == "\n" || read == "\r") {
- read = Zotero.read(1);
- }
-
- var firstPart = read + Zotero.read(4);
- if(firstPart == "<?xml") {
- // got a parse instruction, read until it ends
- read = true;
- while((read !== false) && (read !== ">")) {
- read = Zotero.read(1);
- firstPart += read;
- }
- var encodingRe = /encoding=['"]([^'"]+)['"]/;
- var m = encodingRe.exec(firstPart);
- // set character set
- try {
- Zotero.setCharacterSet(m[1]);
- } catch(e) {
- Zotero.setCharacterSet("utf-8");
- }
- } else {
- Zotero.setCharacterSet("utf-8");
- }
-
- // read in 16384 byte increments
- var text = "";
- while(read = Zotero.read(16384)) {
- text += read;
- }
- text = text.replace(/<\?xml[^>]+\?>/, "");
-
// parse with E4X
var m = new Namespace("http://www.loc.gov/mods/v3");
// why does this default namespace declaration not work!?
default xml namespace = m;
- var xml = new XML(text);
+ var xml = Zotero.getXML();
if(xml.m::mods.length()) {
var modsElements = xml.m::mods;