www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | Submodules | README | LICENSE

commit 59a1628e5b4830e099012837c3ab38865e70af3c
parent c7eb9d13595b7b45ca3598320b421dc5b4df7070
Author: Simon Kornblith <simon@simonster.com>
Date:   Fri,  1 Sep 2006 02:45:31 +0000

fixes #254, NY Times scraper fails (thanks Sean)


Diffstat:
Mscrapers.sql | 8++++----
1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scrapers.sql b/scrapers.sql @@ -1,7 +1,7 @@ --- 70 +-- 71 -- Set the following timestamp to the most recent scraper update date -REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-15 15:42:00')); +REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-31 22:44:00')); REPLACE INTO "translators" VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-08-11 11:18:00', 4, 'Amazon.com', 'Simon Kornblith', '^http://www\.amazon\.com/', 'function detectWeb(doc, url) { @@ -3324,7 +3324,7 @@ function doWeb(doc, url) { Scholar.wait(); }'); -REPLACE INTO "translators" VALUES ('ce7a3727-d184-407f-ac12-52837f3361ff', '2006-08-26 14:21:00', 4, 'New York Times', 'Simon Kornblith', '^http://(?:query\.nytimes\.com/search/query|www\.nytimes\.com/.+)', +REPLACE INTO "translators" VALUES ('ce7a3727-d184-407f-ac12-52837f3361ff', '2006-08-31 22:44:00', 4, 'New York Times', 'Simon Kornblith', '^http://(?:query\.nytimes\.com/search/query|www\.nytimes\.com/.+)', 'function detectWeb(doc, url) { if(doc.title.substr(0, 30) == "The New York Times: Search for") { var namespace = doc.documentElement.namespaceURI; @@ -3417,7 +3417,7 @@ function scrape(doc, url) { associateMeta(newItem, metaTags, "articleid", "accessionNumber"); if(metaTags["byl"]) { - var author = metaTags["byl"]; + var author = Scholar.Utilities.cleanString(metaTags["byl"]); if(author.substr(0, 3).toLowerCase() == "by ") { author = author.substr(3); }