Trans: Changes to NYT: Use standard date when available, grab single page snapshot - www - Unnamed repository; edit this file 'description' to name the repository.

commit 68c2a0039ab8fda226160702d22aff32906061f5
parent 9d78bd70241e3b62a7dee52ad28654b64913b544
Author: Avram Lyon <ajlyon@gmail.com>
Date:   Mon, 21 Mar 2011 13:03:37 +0000

Trans: Changes to NYT: Use standard date when available, grab single page snapshot


Diffstat:
M translators/NYTimes.com.js  | 30 +++++++++++++++++++++++++-----

1 file changed, 25 insertions(+), 5 deletions(-)
diff --git a/translators/NYTimes.com.js b/translators/NYTimes.com.js
@@ -8,7 +8,7 @@
 	"maxVersion":"",
 	"priority":100,
 	"inRepository":true,
-	"lastUpdated":"2011-01-11 04:31:00"
+	"lastUpdated":"2011-03-21 04:31:00"
 }
 
 function detectWeb(doc, url) {
@@ -38,6 +38,11 @@ function associateMeta(newItem, metaTags, field, zoteroField) {
 }
 
 function scrape(doc, url) {
+	var namespace = null;
+	var nsResolver = namespace ? function(prefix) {
+			if (prefix == 'x') return namespace; else return null;
+	} : null;
+	
 	var newItem = new Zotero.Item("newspaperArticle");
 	newItem.publicationTitle = "The New York Times";
 	newItem.ISSN = "0362-4331";
@@ -65,8 +70,8 @@ function scrape(doc, url) {
 		if(!metaTags["hdl"]) {
 			return;
 		}
-		
-		newItem.attachments.push({url:url, title:"New York Times Snapshot",
+		// We want to get everything on one page
+		newItem.attachments.push({url:url.replace(/\.html\??([^/]*)(pagewanted=[^&]*)?([^/]*)$/,".html?pagewanted=all&$1$2"), title:"New York Times Snapshot",
 	 	                          mimeType:"text/html"});
 	} else {
 		newItem.url = doc.location.href;
@@ -78,8 +83,16 @@ function scrape(doc, url) {
 				metaTags[key] = value;
 			}
 		}
-	
-		newItem.attachments.push({document:doc, title:"New York Times Snapshot"});
+		// Get everything on one page is possible
+		var singlePage = false;
+		if (!newItem.url.match(/\?pagewanted=all/)
+				&& (singlePage = doc.evaluate('//ul[@id="toolsList"]/li[@class="singlePage"]/a', doc, nsResolver,
+		             XPathResult.ANY_TYPE, null).iterateNext())) {
+			newItem.attachments.push({url:singlePage.href, title:"New York Times Snapshot",
+	 		                          mimeType:"text/html"});
+		} else {
+			newItem.attachments.push({document:doc, title:"New York Times Snapshot"});
+		}
 	}
 	
 	associateMeta(newItem, metaTags, "dat", "date");
@@ -87,6 +100,10 @@ function scrape(doc, url) {
 	associateMeta(newItem, metaTags, "dsk", "section");
 	associateMeta(newItem, metaTags, "articleid", "accessionNumber");
 	
+	if (metaTags["pdate"]) {
+		newItem.date = metaTags["pdate"].replace(/(\d{4})(\d{2})(\d{2})/,"$1-$2-$3");
+	}
+	
 	if(metaTags["byl"]) {
 		var author = Zotero.Utilities.trimInternal(metaTags["byl"]);
 		if(author.substr(0, 3).toLowerCase() == "by ") {
@@ -118,6 +135,9 @@ function scrape(doc, url) {
 		}
 	}
 	
+	// Remove pagewanted from URL in item (keeping other pieces, in case they might matter)
+	newItem.url = newItem.url.replace(/\?([^/]*)pagewanted=[^&]*/,'');
+	
 	newItem.complete();
 }

	www Unnamed repository; edit this file 'description' to name the repository.
	Log \| Files \| Refs \| Submodules \| README \| LICENSE