Permalink
Join GitHub today
GitHub is home to over 31 million developers working together to host and review code, manage projects, and build software together.
Sign up
translators/The Hindu (old).js
Find file
Copy path
Fetching contributors…
Cannot retrieve contributors at this time
{ | |
"translatorID": "9499c586-d672-42d6-9ec4-ee9594dcc571", | |
"label": "The Hindu (old)", | |
"creator": "Prashant Iyengar and Michael Berkowitz", | |
"target": "^https?://(www\\.)?thehindu\\.com/lr/", | |
"minVersion": "1.0.0b4.r5", | |
"maxVersion": "", | |
"priority": 100, | |
"inRepository": true, | |
"translatorType": 4, | |
"browserSupport": "gcsibv", | |
"lastUpdated": "2017-07-10 15:25:23" | |
} | |
function detectWeb(doc, url) { | |
if (doc.evaluate('//h2[@class="r"]/a[@class="l"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { | |
return "multiple"; | |
} else { | |
return "newspaperArticle"; | |
} | |
} | |
function regexMeta(str, item) { | |
var re = /NAME\=\"([\w\W]*?)\"\s+CONTENT\=\"([\w\W]*?)\"/; | |
var stuff = str.match(re); | |
if (stuff) | |
{ | |
if (stuff[1] == "PAGEHEAD") { | |
item.section = stuff[2].split(/\s+/)[0]; | |
} | |
if (stuff[1] == "ZONE") { | |
item.place = stuff[2].split(/\s+/)[0]; | |
} | |
if (stuff[1] == "EXPORTTIME") { | |
item.date = stuff[2].split(/\s+/)[0]; | |
} | |
if (stuff[1] == "PAGENUMBER") { | |
item.pages = stuff[2].split(/\s+/)[0]; | |
} | |
} | |
} | |
function doWeb(doc, url) { | |
var arts = new Array(); | |
if (detectWeb(doc, url) == "multiple") { | |
var xpath = '//h2[@class="r"]/a[@class="l"]'; | |
var links = doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null); | |
var link; | |
var items = new Object(); | |
while (link = links.iterateNext()) { | |
items[link.href] = link.textContent; | |
} | |
items = Zotero.selectItems(items); | |
for (var i in items) { | |
arts.push(i); | |
} | |
} else { arts = [url]; } | |
for (var i=0; i<arts.length; i++) { | |
var art = arts[i]; | |
Zotero.debug(art); | |
Zotero.Utilities.HTTP.doGet(art, function(text) { | |
var newItem = new Zotero.Item("newspaperArticle"); | |
newItem.url = art; | |
//title | |
var t = /\<TITLE\>[\w\W]*\:([\w\W]*?)<\/TITLE/; | |
newItem.title = Zotero.Utilities.unescapeHTML(Zotero.Utilities.capitalizeTitle(text.match(t)[1])); | |
var auth = /\<font class\=storyhead[\w\W]*?justify\>([\w\W]*?)\<p\>/; | |
if (text.match(auth)) | |
{ | |
//newItem.author=Zotero.Utilities.cleanAuthor(text.match(auth)[1]); | |
cleanauth=Zotero.Utilities.cleanTags(text.match(auth)[1]); | |
newItem.creators.push(Zotero.Utilities.cleanAuthor(cleanauth, "author")); | |
} | |
newItem.publicationTitle="The Hindu"; | |
newItem.ISSN = "0971-751X"; | |
newItem.attachments = [{"title":"The Hindu Snapshot", mimeType:"text/html", url:art}]; | |
//hooray for real meta tags! | |
var meta = /<META NAME[\w\W]*?\>/g; | |
var metaTags = text.match(meta); | |
for (var j = 0 ; j <metaTags.length ; j++) { | |
regexMeta(metaTags[j], newItem); | |
} | |
newItem.complete(); | |
}); | |
} | |
}/** BEGIN TEST CASES **/ | |
var testCases = [ | |
{ | |
"type": "web", | |
"url": "http://www.thehindu.com/lr/2004/01/04/stories/2004010400030100.htm", | |
"items": [ | |
{ | |
"itemType": "newspaperArticle", | |
"title": "Falling at the speed of light", | |
"creators": [ | |
{ | |
"firstName": "To be torn between two languages, discovers H. MASUD TAJ, is to drown soul-deep in the", | |
"lastName": "present", | |
"creatorType": "author" | |
} | |
], | |
"date": "01-01-2004", | |
"ISSN": "0971-751X", | |
"libraryCatalog": "The Hindu (old)", | |
"pages": "01", | |
"place": "CHEN", | |
"publicationTitle": "The Hindu", | |
"section": "LITERARY", | |
"url": "http://www.thehindu.com/lr/2004/01/04/stories/2004010400030100.htm", | |
"attachments": [ | |
{ | |
"title": "The Hindu Snapshot", | |
"mimeType": "text/html" | |
} | |
], | |
"tags": [], | |
"notes": [], | |
"seeAlso": [] | |
} | |
] | |
} | |
] | |
/** END TEST CASES **/ |