Adding Rintze's changes. Fixes #1299, #818, #963 - www - Unnamed repository; edit this file 'description' to name the repository.

commit 0f2ad9bed91487ada2db3c679c39759dbb5ec95e
parent 6006b597b16bebbcfda6f5d5d6fc5eb18fc5e431
Author: Matt Burton <mcburton@gmail.com>
Date:   Tue, 19 May 2009 01:49:58 +0000

Adding Rintze's changes. Fixes #1299, #818, #963

Diffstat:
M translators/Google Books.js  | 173 +++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------

1 file changed, 117 insertions(+), 56 deletions(-)
diff --git a/translators/Google Books.js b/translators/Google Books.js
@@ -2,15 +2,16 @@
 	"translatorID":"3e684d82-73a3-9a34-095f-19b112d88bbf",
 	"translatorType":4,
 	"label":"Google Books",
-	"creator":"Simon Kornblith and Michael Berkowitz",
-	"target":"^http://(books|www)\\.google\\.[a-z]+(\\.[a-z]+)?/books\\?(.*id=.*|.*q=.*)",
+	"creator":"Simon Kornblith, Michael Berkowitz and Rintze Zelle",
+	"target":"^http://(books|www)\.google\.[a-z]+(\.[a-z]+)?/books\?(.*id=.*|.*q=.*)",
 	"minVersion":"1.0.0b3.r1",
 	"maxVersion":"",
 	"priority":100,
 	"inRepository":true,
-	"lastUpdated":"2009-02-03 05:45:00"
+	"lastUpdated":"2009-05-14 05:45:00"
 }
 
+
 function detectWeb(doc, url) {
 	var re = new RegExp('^http://(books|www)\\.google\\.[a-z]+(\.[a-z]+)?/books\\?id=([^&]+)', 'i');
 	if(re.test(doc.location.href)) {
@@ -19,8 +20,12 @@ function detectWeb(doc, url) {
 		return "multiple";
 	}
 }
-
 function doWeb(doc, url) {
+	var namespace = doc.documentElement.namespaceURI;
+	var nsResolver = namespace ? function(prefix) {
+		if (prefix == 'x') return namespace; else return null;
+		} : null;
+	
 	// get local domain suffix
 	var psRe = new RegExp("https?://(books|www)\.google\.([^/]+)/");
 	var psMatch = psRe.exec(url);
@@ -32,9 +37,9 @@ function doWeb(doc, url) {
 	var re = new RegExp('^http://(?:books|www)\\.google\\.[a-z]+(\.[a-z]+)?/books\\?id=([^&]+)', 'i');
 	var m = re.exec(uri);
 	if(m) {
-		newUris.push('http://'+prefix+'.google.'+suffix+'/books?id='+m[2]);
+		newUris.push("http://books.google.com/books/feeds/volumes/"+m[2]);
 	} else {
-		var items = Zotero.Utilities.getItemArray(doc, doc, 'http://'+prefix+'\\.google\\.' + suffix + '/books\\?id=([^&]+)', '^(?:All matching pages|About this Book|Table of Contents|Index)');
+		var items = getItemArrayGB(doc, doc, 'http://'+prefix+'\\.google\\.' + suffix + '/books\\?id=([^&]+)', '^(?:All matching pages|About this Book|Table of Contents|Index)');
 		// Drop " - Page" thing
 		for(var i in items) {
 			items[i] = items[i].replace(/- Page [0-9]+\s*$/, "");
@@ -47,66 +52,122 @@ function doWeb(doc, url) {
 		
 		for(var i in items) {
 			var m = re.exec(i);
-			newUris.push('http://'+prefix+'.google.'+suffix+'/books?id='+m[2]);
+			newUris.push("http://books.google.com/books/feeds/volumes/"+m[2]);
 		}
 	}
-	Zotero.debug(newUris);
-	Zotero.Utilities.processDocuments(newUris, function(newDoc) {
+	
+	var itemUrlBase = "http://"+prefix+".google."+suffix+"/books?id=";
+	
+	Zotero.Utilities.HTTP.doGet(newUris, function(text) {
+		// Remove xml parse instruction and doctype
+		text = text.replace(/<!DOCTYPE[^>]*>/, "").replace(/<\?xml[^>]*\?>/, "");
+
+		var xml = new XML(text);
+		
+		default xml namespace = "http://purl.org/dc/terms"; with ({});
+		
 		var newItem = new Zotero.Item("book");
-		newItem.extra = "";
 		
-		var namespace = newDoc.documentElement.namespaceURI;
-		var nsResolver = namespace ? function(prefix) {
-		  if (prefix == 'x') return namespace; else return null;
-		} : null;
-
-		var xpath = '//h2[@class="title"]'
-		var elmt;	
-		if (elmt = newDoc.evaluate(xpath, newDoc, nsResolver,
-		                            XPathResult.ANY_TYPE, null).iterateNext()){
-			var title = Zotero.Utilities.superCleanString(elmt.textContent);
-			newItem.title = title;
-			Zotero.debug("title: " + title);
+		var authors = xml.creator;
+		for (var i in authors) {
+			newItem.creators.push(Zotero.Utilities.cleanAuthor(authors[i].toString(), "author"));
 		}
-		xpath = '//div[@class="titlewrap"]/span[@class="addmd"]'
-		if (elmt = newDoc.evaluate(xpath, newDoc, nsResolver,
-		                            XPathResult.ANY_TYPE, null).iterateNext()){
-			var authors = Zotero.Utilities.superCleanString(elmt.textContent);
-			if (authors.substring(0, 3) == "By "){
-				authors = authors.substring(3);
-			}
-			authors = authors.split(", ");
-			for(j in authors) {
-				newItem.creators.push(Zotero.Utilities.cleanAuthor(authors[j], "author"));
+		
+		newItem.date = xml.date.toString();
+		
+		var pages = xml.format.toString();
+		var pagesRe = new RegExp(/(\d+)( pages)/);
+		var pagesMatch = pagesRe.exec(pages);
+		if (pagesMatch!=null) {
+			newItem.pages = pagesMatch[1];
+		} else {
+			newItem.pages = pages;
+		}
+		
+		var ISBN;
+		var identifiers = xml.identifier;
+		var identifiersRe = new RegExp(/(ISBN:)(\w+)/);
+		for (var i in identifiers) {
+			var identifierMatch = identifiersRe.exec(identifiers[i].toString());
+			if (identifierMatch!=null && !ISBN) {
+				ISBN = identifierMatch[2];
+			} else if (identifierMatch!=null){
+				ISBN = ISBN + ", " + identifierMatch[2];
 			}
 		}
+		newItem.ISBN = ISBN;
+		
+		newItem.publisher = xml.publisher[0].toString();
+		
+		newItem.title = xml.title[0].toString();
+		
+		newItem.url = itemUrlBase + xml.identifier[0];
 		
-		xpath = '//td[2][@id="bookinfo"]/div[@class="bookinfo_sectionwrap"]/div';
-		var elmts = newDoc.evaluate(xpath, newDoc, nsResolver,
-		                            XPathResult.ANY_TYPE, null);
-		while(elmt = elmts.iterateNext()) {
-			var fieldelmt = newDoc.evaluate('.//text()', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
-			if(fieldelmt) {
-				field = Zotero.Utilities.superCleanString(fieldelmt.nodeValue);
-				Zotero.debug("output: " + field);
-				if(field.substring(0,10) == "Published ") {
-					newItem.date = field.substring(field.length-4);
-					var publisher = newDoc.evaluate('..//a', fieldelmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
-					if (publisher){
-						publisher =  Zotero.Utilities.superCleanString(publisher.textContent);
-						newItem.publisher = publisher;
+		newItem.complete();
+	}, function() { Zotero.done(); }, null);
+	Zotero.wait();
+}
+
+/**
+ * Grabs items based on URLs
+ *
+ * @param {Document} doc DOM document object
+ * @param {Element|Element[]} inHere DOM element(s) to process
+ * @param {RegExp} [urlRe] Regexp of URLs to add to list
+ * @param {RegExp} [urlRe] Regexp of URLs to reject
+ * @return {Object} Associative array of link => textContent pairs, suitable for passing to
+ *	Zotero.selectItems from within a translator
+ */
+function getItemArrayGB (doc, inHere, urlRe, rejectRe) {
+	var availableItems = new Object();	// Technically, associative arrays are objects
+	
+	// Require link to match this
+	if(urlRe) {
+		if(urlRe.exec) {
+			var urlRegexp = urlRe;
+		} else {
+			var urlRegexp = new RegExp();
+			urlRegexp.compile(urlRe, "i");
+		}
+	}
+	// Do not allow text to match this
+	if(rejectRe) {
+		if(rejectRe.exec) {
+			var rejectRegexp = rejectRe;
+		} else {
+			var rejectRegexp = new RegExp();
+			rejectRegexp.compile(rejectRe, "i");
+		}
+	}
+	
+	if(!inHere.length) {
+		inHere = new Array(inHere);
+	}
+	
+	for(var j=0; j<inHere.length; j++) {
+		var links = inHere[j].getElementsByTagName("a");
+		for(var i=0; i<links.length; i++) {
+			if(!urlRe || urlRegexp.test(links[i].href)) {
+				var text = links[i].textContent;
+				//Rintze Zelle: the three lines below are for compatibility with Google Books cover view
+				if(!text) {
+					var text = links[i].firstChild.alt;
+				}
+				if(text) {
+					text = Zotero.Utilities.trimInternal(text);
+					if(!rejectRe || !rejectRegexp.test(text)) {
+						if(availableItems[links[i].href]) {
+							if(text != availableItems[links[i].href]) {
+								availableItems[links[i].href] += " "+text;
+							}
+						} else {
+							availableItems[links[i].href] = text;
+						}
 					}
-				} else if(field.substring(0,5) == "ISBN ") {
-					newItem.ISBN = field.substring(5);
-				} else if(field.substring(field.length-6) == " pages") {
-					newItem.pages = field.substring(0, field.length-6);
-				} else if(field.substring(0,12) == "Contributor ") {
-					newItem.creators.push(Zotero.Utilities.cleanAuthor(field.substring(12), "contributor"));
 				}
 			}
-		}		
-		newItem.complete();
-	}, function() { Zotero.done(); }, null);
+		}
+	}
 	
-	Zotero.wait();
+	return availableItems;
 }
 \ No newline at end of file

	www Unnamed repository; edit this file 'description' to name the repository.
	Log \| Files \| Refs \| Submodules \| README \| LICENSE