Permalink
Join GitHub today
GitHub is home to over 36 million developers working together to host and review code, manage projects, and build software together.
Sign up
Fetching contributors…
Cannot retrieve contributors at this time
{ | |
"translatorID": "4da40f07-904b-4472-93b6-9bea1fe7d4df", | |
"label": "Canada.com", | |
"creator": "Adam Crymble", | |
"target": "^https?://www\\.canada\\.com", | |
"minVersion": "1.0.0b4.r5", | |
"maxVersion": "", | |
"priority": 100, | |
"inRepository": true, | |
"translatorType": 4, | |
"browserSupport": "gcsibv", | |
"lastUpdated": "2012-01-30 22:52:08" | |
} | |
function detectWeb(doc, url) { | |
if (doc.location.href.match("story")) { | |
return "newspaperArticle"; | |
} else if (doc.location.href.match("search")) { | |
return "multiple"; | |
} | |
} | |
function scrape(doc, url) { | |
var namespace = doc.documentElement.namespaceURI; | |
var nsResolver = namespace ? function(prefix) { | |
if (prefix == 'x') return namespace; else return null; | |
} : null; | |
var dataTags = new Object(); | |
var tagsContent = new Array(); | |
var fieldTitle; | |
var newItem = new Zotero.Item("newspaperArticle"); | |
newItem.title = doc.title; | |
if (doc.evaluate('//div[@class="storyheader"]/h4', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { | |
newItem.abstractNote = doc.evaluate('//div[@class="storyheader"]/h4', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent; | |
} else if (doc.evaluate('//div[@class="storyheader"]/h2', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { | |
newItem.abstracteNote = doc.evaluate('//div[@class="storyheader"]/h2', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent; | |
} | |
if (doc.evaluate('//meta[@name="Author"]/@content', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { | |
var author = doc.evaluate('//meta[@name="Author"]/@content', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent; | |
if (author.match(/\n/)) { | |
author1 = author.split(/\n/); | |
if (author1[0].match(/ and /)) { | |
author2 = author1[0].split(/ and /); | |
for (var i in author2) { | |
newItem.creators.push(Zotero.Utilities.cleanAuthor(author2[i], "author")); | |
} | |
} else { | |
newItem.creators.push(Zotero.Utilities.cleanAuthor(author1[0], "author")); | |
} | |
} else { | |
newItem.creators.push(Zotero.Utilities.cleanAuthor(author, "author")); | |
} | |
} | |
if (doc.evaluate('//meta[@name="PubDate"]/@content', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { | |
var date1 = doc.evaluate('//meta[@name="PubDate"]/@content', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace(/^\s*|\s*$/g, ''); | |
if (date1) { | |
newItem.date = date1; | |
} | |
} | |
if (doc.evaluate('//ul[@class="home"]/li/a/span', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { | |
var pubTitle = doc.evaluate('//ul[@class="home"]/li/a/span', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent; | |
if (pubTitle.match("Home")) { | |
newItem.publicationTitle = pubTitle.substr(0, pubTitle.length-5); | |
} else { | |
newItem.publicationTitle = pubTitle; | |
} | |
} else { | |
newItem.publicationTitle = "Canada.com"; | |
} | |
newItem.url = doc.location.href; | |
newItem.complete(); | |
} | |
function doWeb(doc, url) { | |
var namespace = doc.documentElement.namespaceURI; | |
var nsResolver = namespace ? function(prefix) { | |
if (prefix == 'x') return namespace; else return null; | |
} : null; | |
var articles = new Array(); | |
if (detectWeb(doc, url) == "multiple") { | |
var items = new Object(); | |
var next_title; | |
if (doc.evaluate('//div[@class="even"]/p/a', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { | |
Zotero.debug("AAAAAA"); | |
var titles0 = doc.evaluate('//div[@class="even"]/p/a', doc, nsResolver, XPathResult.ANY_TYPE, null); | |
while (next_title = titles0.iterateNext()) { | |
if (next_title.href.match("story") && next_title.href.match("canada.com")) { | |
items[next_title.href] = next_title.textContent; | |
} | |
} | |
} | |
if (doc.evaluate('//h1/a', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { | |
Zotero.debug("new site?"); | |
var titles0 = doc.evaluate('//h1/a', doc, nsResolver, XPathResult.ANY_TYPE, null); | |
while (next_title = titles0.iterateNext()) { | |
if (next_title.href.match("story") && next_title.href.match("canada.com")) { | |
items[next_title.href] = next_title.textContent; | |
} | |
} | |
} | |
if (doc.evaluate('//div[@class="odd"]/p/a', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { | |
Zotero.debug("BBBBB"); | |
var titles1 = doc.evaluate('//div[@class="odd"]/p/a', doc, nsResolver, XPathResult.ANY_TYPE, null); | |
while (next_title = titles1.iterateNext()) { | |
if (next_title.href.match("story") && next_title.href.match("canada.com")) { | |
items[next_title.href] = next_title.textContent; | |
} | |
} | |
} | |
if (doc.evaluate('//p/b/a', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { | |
Zotero.debug("CCCCC"); | |
var titles2 = doc.evaluate('//p/b/a', doc, nsResolver, XPathResult.ANY_TYPE, null); | |
while (next_title = titles2.iterateNext()) { | |
if (next_title.href.match("story") && next_title.href.match("canada.com")) { | |
items[next_title.href] = next_title.textContent; | |
} | |
} | |
} | |
if (doc.evaluate('//div[@class="name"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { | |
Zotero.debug("DDDD"); | |
var titles3 = doc.evaluate('//div[@class="name"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null); | |
while (next_title = titles3.iterateNext()) { | |
if (next_title.href.match("story") && next_title.href.match("canada.com")) { | |
items[next_title.href] = next_title.textContent; | |
} | |
} | |
} | |
items = Zotero.selectItems(items); | |
for (var i in items) { | |
articles.push(i); | |
} | |
} else { | |
articles = [url]; | |
} | |
Zotero.Utilities.processDocuments(articles, scrape, function() {Zotero.done();}); | |
Zotero.wait(); | |
}/** BEGIN TEST CASES **/ | |
var testCases = [ | |
{ | |
"type": "web", | |
"url": "http://www.canada.com/search/search.html?stype=si&q=argentina&x=0&y=0&radio_btns=canada.com", | |
"items": "multiple" | |
} | |
] | |
/** END TEST CASES **/ |