commit 635ec386fd732575900778e7b143f9595e6489e2
parent 0f83076252d1dd947359dc4920a4b1492b15a6f6
Author: Simon Kornblith <simon@simonster.com>
Date: Sat, 5 Jun 2010 18:36:07 +0000
commit Avram's XML OpenURL ContextObject/FreeCite translator
Diffstat:
| A | translators/CTX.js | | | 248 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
1 file changed, 248 insertions(+), 0 deletions(-)
diff --git a/translators/CTX.js b/translators/CTX.js
@@ -0,0 +1,248 @@
+{
+ "translatorID":"24d9f058-3eb3-4d70-b78f-1ba1aef2128d",
+ "translatorType":5,
+ "label":"CTX",
+ "creator":"Avram Lyon and Simon Kornblith",
+ "target":"^http://freecite.library.brown.edu",
+ "minVersion":"2.0",
+ "maxVersion":"",
+ "priority":100,
+ "inRepository":false,
+ "lastUpdated":"2010-06-05 18:35:14"
+}
+
+/*
+ * This translator imports OpenURL ContextObjects encapsulated in XML
+ * documents, as described at:
+ * http://alcme.oclc.org/openurl/servlet/OAIHandler?verb=GetRecord&metadataPrefix=oai_dc&identifier=info:ofi/fmt:xml:xsd:ctx
+ * The schema for such XML documents is at:
+ * http://www.openurl.info/registry/docs/xsd/info:ofi/fmt:xml:xsd:ctx
+ *
+ * This format is used in several places online, including Brown University's FreeCite
+ * Citation parser (http://freecite.library.brown.edu/welcome) and Oslo University's
+ * X-Port (http://www.ub.uio.no/portal/gs.htm or http://x-port.uio.no/).
+
+ * Our input looks like this:
+<ctx:context-objects xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='info:ofi/fmt:xml:xsd:ctx http://www.openurl.info/registry/docs/info:ofi/fmt:xml:xsd:ctx' xmlns:ctx='info:ofi/fmt:xml:xsd:ctx'>
+<ctx:context-object timestamp='2010-01-02T16:55:48-05:00' encoding='info:ofi/enc:UTF-8' version='Z39.88-2004' identifier=''>
+ <ctx:referent>
+ <ctx:metadata-by-val>
+ <ctx:format>info:ofi/fmt:xml:xsd:journal</ctx:format>
+ <ctx:metadata>
+ <journal xmlns:rft='info:ofi/fmt:xml:xsd:journal' xsi:schemaLocation='info:ofi/fmt:xml:xsd:journal http://www.openurl.info/registry/docs/info:ofi/fmt:xml:xsd:journal'>
+ <rft:atitle>Acute Myocardial Infarction in the Medicare population: process of care and clinical outcomes</rft:atitle>
+ <rft:spage>2530</rft:spage>
+ <rft:date>1992</rft:date>
+ <rft:stitle>Journal of the American Medical Association</rft:stitle>
+ <rft:genre>article</rft:genre>
+ <rft:volume>18</rft:volume>
+ <rft:epage>2536</rft:epage>
+ <rft:au>I S Udvarhelyi</rft:au>
+ <rft:au>C A Gatsonis</rft:au>
+ <rft:au>A M Epstein</rft:au>
+ <rft:au>C L Pashos</rft:au>
+ <rft:au>J P Newhouse</rft:au>
+ <rft:au>B J McNeil</rft:au>
+ </journal>
+ </ctx:metadata>
+ </ctx:metadata-by-val>
+ </ctx:referent>
+</ctx:context-object>
+</ctx:context-objects>
+ *
+ * The approach we will take is to convert this into COinS, so that we can
+ * piggy-back off of the perhaps more robust support in the core Zotero code.
+ */
+
+Zotero.configure("dataMode", "line");
+
+function detectWeb(doc, url) {
+ var texts = [], text = "";
+ var codes = doc.getElementsByTagName("code");
+ for(var i = 0; i < codes.length; i++) {
+ text = codes[i].textContent;
+ text.replace(/</g,"<").replace(/>/g,">");
+ texts.push(text);
+ }
+ return detectInString(texts);
+};
+
+function doWeb(doc, url) {
+ var texts = [], text = "";
+ var codes = doc.getElementsByTagName("code");
+ for(var i = 0; i < codes.length; i++) {
+ text = codes[i].textContent;
+ text.replace(/</g,"<").replace(/>/g,">");
+ texts.push(text);
+ }
+ doImportFromText(texts, true);
+};
+
+function doImport() {
+ var text = "";
+ var line;
+ while(line = Zotero.read()) {
+ text += line;
+ }
+ return doImportFromText(text, false);
+}
+
+function detectImport() {
+ var text = "";
+ var line;
+ while(line = Zotero.read()) {
+ text += line;
+ }
+ return detectInString(text) != false;
+}
+
+function detectInString(text) {
+ var detectedType = false;
+
+ var spans = [];
+
+ // This is because we want to be able to read multiple such CTX elements in a single page
+ if (typeof text != "string" && text.length >= 1) {
+ spans = text.map(contextObjectXMLToCOinS).reduce(function(a,b){return a.concat(b);});
+ } else {
+ spans = contextObjectXMLToCOinS(text);
+ }
+
+ for (var i = 0 ; i < spans.length ; i++) {
+ var item = new Zotero.Item;
+ var success = Zotero.Utilities.parseContextObject(spans[i], item);
+ if(item.itemType) {
+ Zotero.debug("Found " + item.itemType);
+ if (detectedType) {
+ return "multiple";
+ }
+ detectedType = item.itemType;
+ } else {
+ Zotero.debug("Type not found");
+ }
+ }
+ return detectedType;
+};
+
+/* Takes the string of the ContextObject XML format
+ * and returns an array of COinS titles of the same, per the COinS
+ * specification.
+ */
+function contextObjectXMLToCOinS (text) {
+ var doc = new XML(text);
+
+ /* Here and elsewhere, we are using the E4X syntax for XML */
+ var objects = doc..*::["context-object"];
+
+ /* Bail out if no object */
+ if (objects.length() == 0) {
+ Zotero.debug("No context object");
+ return [];
+ }
+
+ var titles = [];
+
+ for (var i = 0; i < objects.length(); i++) {
+ Zotero.debug("Processing object: " + objects[i].text());
+ var pieces = [];
+
+
+ var version = objects[i].@version;
+ pieces.push("ctx_ver="+encodeURIComponent(version));
+
+ var format = objects[i]..*::format;
+ // Now convert this to the corresponding Key/Encoded-Value format; see note below.
+ // Check if this is unknown; if it is, skip
+ if (format.text() == "info:ofi/fmt:xml:xsd:unknown") {
+ Zotero.debug("Skipping object of type 'unknown'");
+ continue;
+ }
+ format = mapXMLtoKEV[format.text()];
+
+ pieces.push("rft_val_fmt=" + encodeURIComponent(format));
+
+ // Here we disregard the namespaces
+ var fields = objects[i]..*::metadata.children()[0].*::*;
+ var field;
+
+ for each (field in fields) {
+ var name = field.localName();
+ // We can hardcode the 'rft' namespace to keep COinS valid
+ name = "rft."+name;
+ var value = encodeURIComponent(field.text());
+ pieces.push(name + "=" + value);
+ }
+
+ var title = pieces.join("&");
+ var span = "<span title='" + title + "' class='Z3988'></span>\n";
+ Zotero.debug("Made span: " + span);
+ titles.push(title);
+ }
+ return titles;
+};
+
+function doImportFromText(text, showPrompt) {
+ var spans = [], items = [], zoteroItems = [];
+
+ // This is because we want to be able to read multiple such CTX elements in a single page
+ if (typeof text != "string" && text.length >= 1) {
+ spans = text.map(contextObjectXMLToCOinS).reduce(function(a,b){return a.concat(b);});
+ } else {
+ spans = contextObjectXMLToCOinS(text);
+ }
+
+ for (var i = 0 ; i < spans.length ; i++) {
+ Zotero.debug("Processing span: "+spans[i]);
+ var item = new Zotero.Item;
+ Zotero.Utilities.parseContextObject(spans[i], item);
+ if(item.itemType) {
+ Zotero.debug("Found " + item.itemType);
+ items.push(item.title);
+ zoteroItems.push(item);
+ // Set publicationTitle to the short title if only the latter is specified
+ if (item.journalAbbreviation && !item.publicationTitle) {
+ item.publicationTitle = item.journalAbbreviation;
+ }
+ // If we're in non-prompting mode, save right away
+ if (showPrompt === false) {
+ item.complete();
+ }
+ } else {
+ Zotero.debug("Type not found");
+ }
+ }
+ // Since we want to prompt, we have to parse twice.
+ if(showPrompt === true) {
+ if(items.length == 1) {
+ item.complete();
+ } else {
+ items = Zotero.selectItems(items);
+ if(!items) return true;
+ for(var i in items) {
+ zoteroItems[i].complete();
+ }
+ }
+ }
+};
+
+/* These two arrays are needed because COinS uses Key/Escaped-Value, which has a different
+ * set of format codes. Codes from "Registry for the OpenURL Framework - ANSI/NISO Z39.88-2004":
+ * http://alcme.oclc.org/openurl/servlet/OAIHandler?verb=ListRecords&metadataPrefix=oai_dc&set=Core:Metadata+Formats
+ */
+var mapKEVtoXML = {
+ 'info:ofi/fmt:kev:mtx:book' : 'info:ofi/fmt:xml:xsd:book', // Books
+ 'info:ofi/fmt:kev:mtx:dc' : 'info:ofi/fmt:xml:xsd:oai_dc', // Dublin Core
+ 'info:ofi/fmt:kev:mtx:dissertation' : 'info:ofi/fmt:xml:xsd:dissertation', // Dissertations
+ 'info:ofi/fmt:kev:mtx:journal' : 'info:ofi/fmt:xml:xsd:journal', // Journals
+ 'info:ofi/fmt:kev:mtx:patent' : 'info:ofi/fmt:xml:xsd:patent', // Patents
+ 'info:ofi/fmt:kev:mtx:sch_svc' : 'info:ofi/fmt:xml:xsd:sch_svc' // Scholarly ServiceTypes
+};
+
+var mapXMLtoKEV = {
+ 'info:ofi/fmt:xml:xsd:book' : 'info:ofi/fmt:kev:mtx:book', // Books
+ 'info:ofi/fmt:xml:xsd:oai_dc' : 'info:ofi/fmt:kev:mtx:dc', // Dublin Core
+ 'info:ofi/fmt:xml:xsd:dissertation' : 'info:ofi/fmt:kev:mtx:dissertation', // Dissertations
+ 'info:ofi/fmt:xml:xsd:journal' : 'info:ofi/fmt:kev:mtx:journal', // Journals
+ 'info:ofi/fmt:xml:xsd:patent' : 'info:ofi/fmt:kev:mtx:patent', // Patents
+ 'info:ofi/fmt:xml:xsd:sch_svc' : 'info:ofi/fmt:kev:mtx:sch_svc' // Scholarly ServiceTypes
+};