www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | Submodules | README | LICENSE

commit 78a88f749f0ae1854c13c6df884612a818218150
parent 9c31b10de7350de1eadbfb292722d5aac4464498
Author: Dan Stillman <dstillman@zotero.org>
Date:   Thu, 17 Dec 2009 09:25:43 +0000

Pushed Google Books


Diffstat:
Mtranslators/Google Books.js | 64++++++++++++++++++++++++++++++++++++++++++++++------------------
1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/translators/Google Books.js b/translators/Google Books.js @@ -8,7 +8,7 @@ "maxVersion":"", "priority":100, "inRepository":true, - "lastUpdated":"2009-09-20 03:15:00" + "lastUpdated":"2009-12-17 08:15:00" } @@ -39,8 +39,9 @@ function doWeb(doc, url) { if(m) { newUris.push("http://books.google.com/books/feeds/volumes/"+m[2]); } else { - var items = getItemArrayGB(doc, doc, 'http://'+prefix+'\\.google\\.' + suffix + '/books\\?id=([^&]+)', '^(?:All matching pages|About this Book|Table of Contents|Index)'); + var items = getItemArrayGB(doc, doc, 'google\\.' + suffix + '/books\\?id=([^&]+)', '^(?:All matching pages|About this Book|Table of Contents|Index)'); // Drop " - Page" thing + //Zotero.debug(items); for(var i in items) { items[i] = items[i].replace(/- Page [0-9]+\s*$/, ""); } @@ -112,7 +113,7 @@ function doWeb(doc, url) { } /** - * Grabs items based on URLs + * Grabs items based on URLs, modified for Google Books * * @param {Document} doc DOM document object * @param {Element|Element[]} inHere DOM element(s) to process @@ -122,6 +123,11 @@ function doWeb(doc, url) { * Zotero.selectItems from within a translator */ function getItemArrayGB (doc, inHere, urlRe, rejectRe) { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == 'x') return namespace; else return null; + } : null; + var availableItems = new Object(); // Technically, associative arrays are objects // Require link to match this @@ -148,23 +154,45 @@ function getItemArrayGB (doc, inHere, urlRe, rejectRe) { } for(var j=0; j<inHere.length; j++) { - var links = inHere[j].getElementsByTagName("a"); - for(var i=0; i<links.length; i++) { - if(!urlRe || urlRegexp.test(links[i].href)) { - var text = links[i].textContent; - //Rintze Zelle: the three lines below are for compatibility with Google Books cover view - if(!text) { - var text = links[i].firstChild.alt; + var coverView = doc.evaluate('//div[@class="thumbotron"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();//Detect Cover view + if(coverView){ + var links = inHere[j].getElementsByTagName("a"); + for(var i=0; i<links.length; i++) { + if(!urlRe || urlRegexp.test(links[i].href)) { + var text = links[i].textContent; + if(!text) { + var text = links[i].firstChild.alt; + } + if(text) { + text = Zotero.Utilities.trimInternal(text); + if(!rejectRe || !rejectRegexp.test(text)) { + if(availableItems[links[i].href]) { + if(text != availableItems[links[i].href]) { + availableItems[links[i].href] += " "+text; + } + } else { + availableItems[links[i].href] = text; + } + } + } } - if(text) { - text = Zotero.Utilities.trimInternal(text); - if(!rejectRe || !rejectRegexp.test(text)) { - if(availableItems[links[i].href]) { - if(text != availableItems[links[i].href]) { - availableItems[links[i].href] += " "+text; + } + } + else { + var links = inHere[j].getElementsByTagName("img");//search for <img>-elements, scrape title from alt-attribute, href-link from parent <a>-element + for(var i=0; i<links.length; i++) { + if(!urlRe || urlRegexp.test(links[i].parentNode.href)) { + var text = links[i].alt; + if(text) { + text = Zotero.Utilities.trimInternal(text); + if(!rejectRe || !rejectRegexp.test(text)) { + if(availableItems[links[i].href]) { + if(text != availableItems[links[i].href]) { + availableItems[links[i].href] += " "+text; + } + } else { + availableItems[links[i].parentNode.href] = text; } - } else { - availableItems[links[i].href] = text; } } }