www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | Submodules | README | LICENSE

commit 635ec386fd732575900778e7b143f9595e6489e2
parent 0f83076252d1dd947359dc4920a4b1492b15a6f6
Author: Simon Kornblith <simon@simonster.com>
Date:   Sat,  5 Jun 2010 18:36:07 +0000

commit Avram's XML OpenURL ContextObject/FreeCite translator


Diffstat:
Atranslators/CTX.js | 248+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 248 insertions(+), 0 deletions(-)

diff --git a/translators/CTX.js b/translators/CTX.js @@ -0,0 +1,248 @@ +{ + "translatorID":"24d9f058-3eb3-4d70-b78f-1ba1aef2128d", + "translatorType":5, + "label":"CTX", + "creator":"Avram Lyon and Simon Kornblith", + "target":"^http://freecite.library.brown.edu", + "minVersion":"2.0", + "maxVersion":"", + "priority":100, + "inRepository":false, + "lastUpdated":"2010-06-05 18:35:14" +} + +/* + * This translator imports OpenURL ContextObjects encapsulated in XML + * documents, as described at: + * http://alcme.oclc.org/openurl/servlet/OAIHandler?verb=GetRecord&metadataPrefix=oai_dc&identifier=info:ofi/fmt:xml:xsd:ctx + * The schema for such XML documents is at: + * http://www.openurl.info/registry/docs/xsd/info:ofi/fmt:xml:xsd:ctx + * + * This format is used in several places online, including Brown University's FreeCite + * Citation parser (http://freecite.library.brown.edu/welcome) and Oslo University's + * X-Port (http://www.ub.uio.no/portal/gs.htm or http://x-port.uio.no/). + + * Our input looks like this: +<ctx:context-objects xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='info:ofi/fmt:xml:xsd:ctx http://www.openurl.info/registry/docs/info:ofi/fmt:xml:xsd:ctx' xmlns:ctx='info:ofi/fmt:xml:xsd:ctx'> +<ctx:context-object timestamp='2010-01-02T16:55:48-05:00' encoding='info:ofi/enc:UTF-8' version='Z39.88-2004' identifier=''> + <ctx:referent> + <ctx:metadata-by-val> + <ctx:format>info:ofi/fmt:xml:xsd:journal</ctx:format> + <ctx:metadata> + <journal xmlns:rft='info:ofi/fmt:xml:xsd:journal' xsi:schemaLocation='info:ofi/fmt:xml:xsd:journal http://www.openurl.info/registry/docs/info:ofi/fmt:xml:xsd:journal'> + <rft:atitle>Acute Myocardial Infarction in the Medicare population: process of care and clinical outcomes</rft:atitle> + <rft:spage>2530</rft:spage> + <rft:date>1992</rft:date> + <rft:stitle>Journal of the American Medical Association</rft:stitle> + <rft:genre>article</rft:genre> + <rft:volume>18</rft:volume> + <rft:epage>2536</rft:epage> + <rft:au>I S Udvarhelyi</rft:au> + <rft:au>C A Gatsonis</rft:au> + <rft:au>A M Epstein</rft:au> + <rft:au>C L Pashos</rft:au> + <rft:au>J P Newhouse</rft:au> + <rft:au>B J McNeil</rft:au> + </journal> + </ctx:metadata> + </ctx:metadata-by-val> + </ctx:referent> +</ctx:context-object> +</ctx:context-objects> + * + * The approach we will take is to convert this into COinS, so that we can + * piggy-back off of the perhaps more robust support in the core Zotero code. + */ + +Zotero.configure("dataMode", "line"); + +function detectWeb(doc, url) { + var texts = [], text = ""; + var codes = doc.getElementsByTagName("code"); + for(var i = 0; i < codes.length; i++) { + text = codes[i].textContent; + text.replace(/</g,"&lt;").replace(/>/g,"&gt;"); + texts.push(text); + } + return detectInString(texts); +}; + +function doWeb(doc, url) { + var texts = [], text = ""; + var codes = doc.getElementsByTagName("code"); + for(var i = 0; i < codes.length; i++) { + text = codes[i].textContent; + text.replace(/</g,"&lt;").replace(/>/g,"&gt;"); + texts.push(text); + } + doImportFromText(texts, true); +}; + +function doImport() { + var text = ""; + var line; + while(line = Zotero.read()) { + text += line; + } + return doImportFromText(text, false); +} + +function detectImport() { + var text = ""; + var line; + while(line = Zotero.read()) { + text += line; + } + return detectInString(text) != false; +} + +function detectInString(text) { + var detectedType = false; + + var spans = []; + + // This is because we want to be able to read multiple such CTX elements in a single page + if (typeof text != "string" && text.length >= 1) { + spans = text.map(contextObjectXMLToCOinS).reduce(function(a,b){return a.concat(b);}); + } else { + spans = contextObjectXMLToCOinS(text); + } + + for (var i = 0 ; i < spans.length ; i++) { + var item = new Zotero.Item; + var success = Zotero.Utilities.parseContextObject(spans[i], item); + if(item.itemType) { + Zotero.debug("Found " + item.itemType); + if (detectedType) { + return "multiple"; + } + detectedType = item.itemType; + } else { + Zotero.debug("Type not found"); + } + } + return detectedType; +}; + +/* Takes the string of the ContextObject XML format + * and returns an array of COinS titles of the same, per the COinS + * specification. + */ +function contextObjectXMLToCOinS (text) { + var doc = new XML(text); + + /* Here and elsewhere, we are using the E4X syntax for XML */ + var objects = doc..*::["context-object"]; + + /* Bail out if no object */ + if (objects.length() == 0) { + Zotero.debug("No context object"); + return []; + } + + var titles = []; + + for (var i = 0; i < objects.length(); i++) { + Zotero.debug("Processing object: " + objects[i].text()); + var pieces = []; + + + var version = objects[i].@version; + pieces.push("ctx_ver="+encodeURIComponent(version)); + + var format = objects[i]..*::format; + // Now convert this to the corresponding Key/Encoded-Value format; see note below. + // Check if this is unknown; if it is, skip + if (format.text() == "info:ofi/fmt:xml:xsd:unknown") { + Zotero.debug("Skipping object of type 'unknown'"); + continue; + } + format = mapXMLtoKEV[format.text()]; + + pieces.push("rft_val_fmt=" + encodeURIComponent(format)); + + // Here we disregard the namespaces + var fields = objects[i]..*::metadata.children()[0].*::*; + var field; + + for each (field in fields) { + var name = field.localName(); + // We can hardcode the 'rft' namespace to keep COinS valid + name = "rft."+name; + var value = encodeURIComponent(field.text()); + pieces.push(name + "=" + value); + } + + var title = pieces.join("&"); + var span = "<span title='" + title + "' class='Z3988'></span>\n"; + Zotero.debug("Made span: " + span); + titles.push(title); + } + return titles; +}; + +function doImportFromText(text, showPrompt) { + var spans = [], items = [], zoteroItems = []; + + // This is because we want to be able to read multiple such CTX elements in a single page + if (typeof text != "string" && text.length >= 1) { + spans = text.map(contextObjectXMLToCOinS).reduce(function(a,b){return a.concat(b);}); + } else { + spans = contextObjectXMLToCOinS(text); + } + + for (var i = 0 ; i < spans.length ; i++) { + Zotero.debug("Processing span: "+spans[i]); + var item = new Zotero.Item; + Zotero.Utilities.parseContextObject(spans[i], item); + if(item.itemType) { + Zotero.debug("Found " + item.itemType); + items.push(item.title); + zoteroItems.push(item); + // Set publicationTitle to the short title if only the latter is specified + if (item.journalAbbreviation && !item.publicationTitle) { + item.publicationTitle = item.journalAbbreviation; + } + // If we're in non-prompting mode, save right away + if (showPrompt === false) { + item.complete(); + } + } else { + Zotero.debug("Type not found"); + } + } + // Since we want to prompt, we have to parse twice. + if(showPrompt === true) { + if(items.length == 1) { + item.complete(); + } else { + items = Zotero.selectItems(items); + if(!items) return true; + for(var i in items) { + zoteroItems[i].complete(); + } + } + } +}; + +/* These two arrays are needed because COinS uses Key/Escaped-Value, which has a different + * set of format codes. Codes from "Registry for the OpenURL Framework - ANSI/NISO Z39.88-2004": + * http://alcme.oclc.org/openurl/servlet/OAIHandler?verb=ListRecords&metadataPrefix=oai_dc&set=Core:Metadata+Formats + */ +var mapKEVtoXML = { + 'info:ofi/fmt:kev:mtx:book' : 'info:ofi/fmt:xml:xsd:book', // Books + 'info:ofi/fmt:kev:mtx:dc' : 'info:ofi/fmt:xml:xsd:oai_dc', // Dublin Core + 'info:ofi/fmt:kev:mtx:dissertation' : 'info:ofi/fmt:xml:xsd:dissertation', // Dissertations + 'info:ofi/fmt:kev:mtx:journal' : 'info:ofi/fmt:xml:xsd:journal', // Journals + 'info:ofi/fmt:kev:mtx:patent' : 'info:ofi/fmt:xml:xsd:patent', // Patents + 'info:ofi/fmt:kev:mtx:sch_svc' : 'info:ofi/fmt:xml:xsd:sch_svc' // Scholarly ServiceTypes +}; + +var mapXMLtoKEV = { + 'info:ofi/fmt:xml:xsd:book' : 'info:ofi/fmt:kev:mtx:book', // Books + 'info:ofi/fmt:xml:xsd:oai_dc' : 'info:ofi/fmt:kev:mtx:dc', // Dublin Core + 'info:ofi/fmt:xml:xsd:dissertation' : 'info:ofi/fmt:kev:mtx:dissertation', // Dissertations + 'info:ofi/fmt:xml:xsd:journal' : 'info:ofi/fmt:kev:mtx:journal', // Journals + 'info:ofi/fmt:xml:xsd:patent' : 'info:ofi/fmt:kev:mtx:patent', // Patents + 'info:ofi/fmt:xml:xsd:sch_svc' : 'info:ofi/fmt:kev:mtx:sch_svc' // Scholarly ServiceTypes +};