commit 8b00d1198680edd493eb90a7db975d43af8d502f
parent bc53f325950c065022ef886933a01b9d4fcdb289
Author: Avram Lyon <ajlyon@gmail.com>
Date: Wed, 8 Sep 2010 09:41:18 +0000
Updated Sudoc translator by Sylvain.
Diffstat:
| M | translators/Sudoc.js | | | 114 | +++++++++++++++++++++++++++++++++++++------------------------------------------ |
1 file changed, 54 insertions(+), 60 deletions(-)
diff --git a/translators/Sudoc.js b/translators/Sudoc.js
@@ -2,13 +2,13 @@
"translatorID":"1b9ed730-69c7-40b0-8a06-517a89a3a278",
"translatorType":4,
"label":"Sudoc",
- "creator":"Sean Takats and Michael Berkowitz, updated by Sylvain Machefert",
+ "creator":"Sean Takats, Michael Berkowitz, Sylvain Machefert",
"target":"^http://(www|corail)\\.sudoc\\.abes\\.fr",
"minVersion":"1.0.0b3.r1",
"maxVersion":"",
"priority":100,
"inRepository":true,
- "lastUpdated":"2010-02-20 14:40:00"
+ "lastUpdated":"2010-09-03 14:40:00"
}
function detectWeb(doc, url) {
@@ -17,56 +17,53 @@ function detectWeb(doc, url) {
if (prefix == 'x') return namespace; else return null;
} : null;
- var multxpath = '/html/body/div[2]/div/span';
+ var multxpath = "//span[@class='tab1']";
if (elt = doc.evaluate(multxpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
var content = elt.textContent;
- if ( (content == "Résultats") || (content == "Results") )
+ if ( (content == "Liste des résultats") || (content == "shortlist") )
{
return "multiple";
}
- else if ( (content == "Notice complète") || (content == "title data") )
+ else if ( (content == "Notice détaillée") || (content == "title data") )
{
- var xpathimage = '/html/body/div[2]/div[4]/span/img';
+ var xpathimage = "//span[@class='rec_mat_long']/img";
if (elt = doc.evaluate(xpathimage, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext())
{
var type = elt.getAttribute('src');
- if (type.indexOf('article.gif') > 0)
+ if (type.indexOf('article.') > 0)
{
return "journalArticle";
}
- else if (type.indexOf('audiovisual.gif') > 0)
+ else if (type.indexOf('audiovisual.') > 0)
{
return "film";
}
- else if (type.indexOf('book.gif') > 0)
+ else if (type.indexOf('book.') > 0)
{
return "book";
}
- else if (type.indexOf('handwriting.gif') > 0)
+ else if (type.indexOf('handwriting.') > 0)
{
return "manuscript";
}
- else if (type.indexOf('sons.gif') > 0)
+ else if (type.indexOf('sons.') > 0)
{
return "audioRecording";
}
- else if (type.indexOf('sound.gif') > 0)
+ else if (type.indexOf('sound.') > 0)
{
return "audioRecording";
}
- else if (type.indexOf('thesis.gif') > 0)
+ else if (type.indexOf('thesis.') > 0)
{
return "thesis";
}
- else if (type.indexOf('map.gif') > 0)
+ else if (type.indexOf('map.') > 0)
{
return "map";
}
- else
- {
- return "book";
- }
}
+ return "book";
}
}
}
@@ -77,7 +74,7 @@ function scrape(doc, url) {
if (prefix == 'x') return namespace; else return null;
} : null;
- var zXpath = '/html/body/span[@class="Z3988"]';
+ var zXpath = '//span[@class="Z3988"]';
var eltCoins = doc.evaluate(zXpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
if (eltCoins = doc.evaluate(zXpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext())
{
@@ -237,10 +234,8 @@ function scrape(doc, url) {
}
}
- if ( (newItem.url == undefined) && (ppn != "") )
- {
- newItem.url = 'http://www.sudoc.abes.fr/DB=2.1/SRCH?IKT=12&TRM=' + ppn;
- }
+ // We store the original place of the record, using its ppn
+ newItem.attachments = [{url:'http://www.sudoc.abes.fr/DB=2.1/SRCH?IKT=12&TRM=' + ppn, title:"Notice sudoc", mimeType:"text/html", snapshot:false}];
newItem.complete();
}
}
@@ -253,42 +248,41 @@ function doWeb(doc, url) {
if (prefix == 'x') return namespace; else return null;
} : null;
- var multxpath = '/html/body/div[2]/div/span';
- if (elt = doc.evaluate(multxpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
- var content = elt.textContent;
- if ( (content == "Résultats") || (content == "Results") )
- {
- var newUrl = doc.evaluate('//base/@href', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
- var xpath = '/html/body/div[2]/table/tbody/tr/td[3]/div/a';
- var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
- var elmt = elmts.iterateNext();
- var links = new Array();
- var availableItems = new Array();
- var i = 0;
- do {
- var link = doc.evaluate('./@href', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
- var searchTitle = elmt.textContent;
- availableItems[i] = searchTitle;
- links[i] = link;
- i++;
- } while (elmt = elmts.iterateNext());
- var items = Zotero.selectItems(availableItems);
-
- if(!items) {
- return true;
- }
-
- var uris = new Array();
- for(var i in items) {
- uris.push(newUrl + links[i]);
- }
- Zotero.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
- function() { Zotero.done(); }, null);
- Zotero.wait();
- }
- else if ( (content == "Notice complète") || (content == 'title data') )
- {
- scrape(doc, url);
- }
+ var type = detectWeb(doc, url);
+ if (type == "multiple")
+ {
+ // On va lister les titres
+ var newUrl = doc.evaluate('//base/@href', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
+ var xpath = "//table[@summary='short title presentation']/tbody/tr//td[@class='rec_title']";
+ var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
+ var elmt = elmts.iterateNext();
+ var links = new Array();
+ var availableItems = new Array();
+
+ var i = 0;
+ do
+ {
+ var link = doc.evaluate(".//a/@href", elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
+ var searchTitle = doc.evaluate(".//a", elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
+
+ availableItems[i] = searchTitle ;
+ links[i] = link;
+ i++;
+ } while (elmt = elmts.iterateNext());
+ var items = Zotero.selectItems(availableItems);
+ if(!items) {
+ return true;
+ }
+ var uris = new Array();
+ for(var i in items) {
+ uris.push(newUrl + links[i]);
+ }
+ Zotero.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
+ function() { Zotero.done(); }, null);
+ Zotero.wait();
+ }
+ else if (type != "")
+ {
+ scrape(doc, url);
}
}