Permalink
Join GitHub today
GitHub is home to over 31 million developers working together to host and review code, manage projects, and build software together.
Sign up
translators/Canada.com.js
Find file
Copy path
Fetching contributors…
Cannot retrieve contributors at this time
{ | |
"translatorID": "4da40f07-904b-4472-93b6-9bea1fe7d4df", | |
"label": "Canada.com", | |
"creator": "Adam Crymble", | |
"target": "^https?://www\\.canada\\.com", | |
"minVersion": "1.0.0b4.r5", | |
"maxVersion": "", | |
"priority": 100, | |
"inRepository": true, | |
"translatorType": 4, | |
"browserSupport": "gcsibv", | |
"lastUpdated": "2012-01-30 22:52:08" | |
} | |
function detectWeb(doc, url) { | |
if (doc.location.href.match("story")) { | |
return "newspaperArticle"; | |
} else if (doc.location.href.match("search")) { | |
return "multiple"; | |
} | |
} | |
function scrape(doc, url) { | |
var namespace = doc.documentElement.namespaceURI; | |
var nsResolver = namespace ? function(prefix) { | |
if (prefix == 'x') return namespace; else return null; | |
} : null; | |
var dataTags = new Object(); | |
var tagsContent = new Array(); | |
var fieldTitle; | |
var newItem = new Zotero.Item("newspaperArticle"); | |
newItem.title = doc.title; | |
if (doc.evaluate('//div[@class="storyheader"]/h4', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { | |
newItem.abstractNote = doc.evaluate('//div[@class="storyheader"]/h4', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent; | |
} else if (doc.evaluate('//div[@class="storyheader"]/h2', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { | |
newItem.abstracteNote = doc.evaluate('//div[@class="storyheader"]/h2', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent; | |
} | |
if (doc.evaluate('//meta[@name="Author"]/@content', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { | |
var author = doc.evaluate('//meta[@name="Author"]/@content', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent; | |
if (author.match(/\n/)) { | |
author1 = author.split(/\n/); | |
if (author1[0].match(/ and /)) { | |
author2 = author1[0].split(/ and /); | |
for (var i in author2) { | |
newItem.creators.push(Zotero.Utilities.cleanAuthor(author2[i], "author")); | |
} | |
} else { | |
newItem.creators.push(Zotero.Utilities.cleanAuthor(author1[0], "author")); | |
} | |
} else { | |
newItem.creators.push(Zotero.Utilities.cleanAuthor(author, "author")); | |
} | |
} | |
if (doc.evaluate('//meta[@name="PubDate"]/@content', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { | |
var date1 = doc.evaluate('//meta[@name="PubDate"]/@content', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace(/^\s*|\s*$/g, ''); | |
if (date1) { | |
newItem.date = date1; | |
} | |
} | |
if (doc.evaluate('//ul[@class="home"]/li/a/span', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { | |
var pubTitle = doc.evaluate('//ul[@class="home"]/li/a/span', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent; | |
if (pubTitle.match("Home")) { | |
newItem.publicationTitle = pubTitle.substr(0, pubTitle.length-5); | |
} else { | |
newItem.publicationTitle = pubTitle; | |
} | |
} else { | |
newItem.publicationTitle = "Canada.com"; | |
} | |
newItem.url = doc.location.href; | |
newItem.complete(); | |
} | |
function doWeb(doc, url) { | |
var namespace = doc.documentElement.namespaceURI; | |
var nsResolver = namespace ? function(prefix) { | |
if (prefix == 'x') return namespace; else return null; | |
} : null; | |
var articles = new Array(); | |
if (detectWeb(doc, url) == "multiple") { | |
var items = new Object(); | |
var next_title; | |
if (doc.evaluate('//div[@class="even"]/p/a', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { | |
Zotero.debug("AAAAAA"); | |
var titles0 = doc.evaluate('//div[@class="even"]/p/a', doc, nsResolver, XPathResult.ANY_TYPE, null); | |
while (next_title = titles0.iterateNext()) { | |
if (next_title.href.match("story") && next_title.href.match("canada.com")) { | |
items[next_title.href] = next_title.textContent; | |
} | |
} | |
} | |
if (doc.evaluate('//h1/a', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { | |
Zotero.debug("new site?"); | |
var titles0 = doc.evaluate('//h1/a', doc, nsResolver, XPathResult.ANY_TYPE, null); | |
while (next_title = titles0.iterateNext()) { | |
if (next_title.href.match("story") && next_title.href.match("canada.com")) { | |
items[next_title.href] = next_title.textContent; | |
} | |
} | |
} | |
if (doc.evaluate('//div[@class="odd"]/p/a', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { | |
Zotero.debug("BBBBB"); | |
var titles1 = doc.evaluate('//div[@class="odd"]/p/a', doc, nsResolver, XPathResult.ANY_TYPE, null); | |
while (next_title = titles1.iterateNext()) { | |
if (next_title.href.match("story") && next_title.href.match("canada.com")) { | |
items[next_title.href] = next_title.textContent; | |
} | |
} | |
} | |
if (doc.evaluate('//p/b/a', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { | |
Zotero.debug("CCCCC"); | |
var titles2 = doc.evaluate('//p/b/a', doc, nsResolver, XPathResult.ANY_TYPE, null); | |
while (next_title = titles2.iterateNext()) { | |
if (next_title.href.match("story") && next_title.href.match("canada.com")) { | |
items[next_title.href] = next_title.textContent; | |
} | |
} | |
} | |
if (doc.evaluate('//div[@class="name"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { | |
Zotero.debug("DDDD"); | |
var titles3 = doc.evaluate('//div[@class="name"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null); | |
while (next_title = titles3.iterateNext()) { | |
if (next_title.href.match("story") && next_title.href.match("canada.com")) { | |
items[next_title.href] = next_title.textContent; | |
} | |
} | |
} | |
items = Zotero.selectItems(items); | |
for (var i in items) { | |
articles.push(i); | |
} | |
} else { | |
articles = [url]; | |
} | |
Zotero.Utilities.processDocuments(articles, scrape, function() {Zotero.done();}); | |
Zotero.wait(); | |
}/** BEGIN TEST CASES **/ | |
var testCases = [ | |
{ | |
"type": "web", | |
"url": "http://www.canada.com/search/search.html?stype=si&q=argentina&x=0&y=0&radio_btns=canada.com", | |
"items": "multiple" | |
} | |
] | |
/** END TEST CASES **/ |