Permalink
Join GitHub today
GitHub is home to over 31 million developers working together to host and review code, manage projects, and build software together.
Sign up
translators/The Chronicle of Higher Education.js
Find file
Copy path
Fetching contributors…
Cannot retrieve contributors at this time
{ | |
"translatorID": "1e6d1529-246f-4429-84e2-1f1b180b250d", | |
"label": "The Chronicle of Higher Education", | |
"creator": "Simon Kornblith, Avram Lyon", | |
"target": "^https?://(www\\.)?chronicle\\.com/", | |
"minVersion": "2.1", | |
"maxVersion": "", | |
"priority": 100, | |
"inRepository": true, | |
"translatorType": 4, | |
"browserSupport": "gcsbv", | |
"lastUpdated": "2017-06-30 05:37:23" | |
} | |
/* | |
This translator works on articles posted in The Chronicle of Higher Education. | |
It is based on the earlier translator by Simon Kornblith, but the Chronicle has | |
significantly restructured the site since 2006, breaking the old translator. | |
*/ | |
function detectWeb(doc, url) { | |
/* The /daily/ and /weekly/ sections are leftover from the previous version | |
of the translator; they don't appear to still be on the Chronicle site, but | |
they might persist in older URLs. */ | |
var articleRegexp = /\/(daily|weekly|article|blogPost|blogs\/\w+)\/[^/]+\// ; | |
if (articleRegexp.test(url) && ZU.xpathText(doc, '//h1')) { | |
var section = url.match(articleRegexp); | |
switch (section[1]) { | |
case "weekly": | |
case "daily": | |
case "article": | |
return "magazineArticle"; | |
case "blogPost": | |
return "blogPost"; | |
default: | |
if (section[1].indexOf("blogs") !== -1) | |
return "blogPost"; | |
return false; | |
} | |
} else if (getSearchResults(doc, true)) { | |
return "multiple"; | |
} | |
} | |
function getSearchResults(doc, checkOnly) { | |
var items = {}; | |
var found = false; | |
//search results | |
var rows = ZU.xpath(doc, '//span[@class="content-card__heading"]/h4/a'); | |
if (rows.length<1){ | |
//overview pages | |
rows = ZU.xpath(doc, '//div[contains(@class, "row")]//h1/a|//div[contains(@class, "row")]//h2[contains(@class, "title")]/a'); | |
} | |
for (var i=0; i<rows.length; i++) { | |
var href = rows[i].href; | |
var title = ZU.trimInternal(rows[i].textContent); | |
if (!href || !title) continue; | |
if (checkOnly) return true; | |
found = true; | |
items[href] = title; | |
} | |
return found ? items : false; | |
} | |
function doWeb(doc, url) { | |
if (detectWeb(doc, url) == "multiple") { | |
Zotero.selectItems(getSearchResults(doc, false), function (items) { | |
if (!items) { | |
return true; | |
} | |
var articles = []; | |
for (var i in items) { | |
articles.push(i); | |
} | |
ZU.processDocuments(articles, scrape); | |
}); | |
} else { | |
scrape(doc, url); | |
} | |
} | |
function scrape (doc, url){ | |
var type = detectWeb(doc, url); | |
var item = new Zotero.Item(type); | |
item.url = ZU.xpathText(doc, '//link[@rel="canonical"]/@href'); | |
item.publicationTitle = "The Chronicle of Higher Education"; | |
// Does the ISSN apply to online-only blog posts? | |
item.ISSN = "0009-5982"; | |
item.language = "en-US"; | |
var byline = doc.evaluate('//header/div/span[@class="content-item__byline"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext(); | |
if (!byline) byline = doc.evaluate('//div[@class="blog__author"]/a', doc, null, XPathResult.ANY_TYPE, null).iterateNext(); | |
if (byline !== null) { | |
var authors = parseAuthors(byline.textContent.trim()); | |
for (var i = 0; i < authors.length; i++) { | |
item.creators.push(Zotero.Utilities.cleanAuthor(authors[i], "author")); | |
} | |
} | |
// Behavior for some items is different: | |
if (type === "blogPost") { | |
item.date = ZU.xpathText(doc, '//div[@class="blog__author"]/time'); | |
item.title = ZU.xpathText(doc, '//h2[@class="blog__title"]'); | |
//legacy blogs | |
if (!item.title) item.title = ZU.xpathText(doc, '//h1[@class="title"]') | |
if (!item.date) item.date= ZU.xpathText(doc, '//p[@class="time"]'); | |
var blogname = ZU.xpathText(doc, '//div[@class="blog__mast"]//h2[contains(@class, "blog__name")]'); | |
if (blogname) item.publicationTitle = item.publicationTitle + " Blogs: " + blogname; | |
} else { | |
item.date = ZU.xpathText(doc, '//header/div/span[@class="content-item__date"]') | |
item.title = ZU.xpathText(doc, '//header/h1'); | |
var section = ZU.xpathText(doc, '//div[@class="header-breadcrumb-wrap"]/h1'); | |
if (section) item.section = ZU.trimInternal(section) | |
// Some items have publication details at the end of the article; one | |
// example is: http://chronicle.com/article/Grinnells-Green-Secrets/2653/ | |
var articleParagraphs = doc.evaluate('//div[@class="article-body"]/p', doc, null, XPathResult.ANY_TYPE, null); | |
var par; | |
while ((par = articleParagraphs.iterateNext()) !== null) { | |
var data = par.textContent.match(/Section: ([a-zA-Z -&]+)[\n\t ]+Volume ([0-9]+), Issue ([0-9]+), Page ([0-9A-Za-z]+)/); | |
if (data !== null && data.length > 0) { | |
item.pages = data[4]; | |
// If the section here and in the page proper are different, concatenate | |
if (item.section !== data[1]) | |
item.section = item.section + " : " + Zotero.Utilities.trimInternal(data[1]); | |
// Since newspaperArticle doesn't have Volume / Issue, put as Edition | |
item.edition = "Volume " + data[2] + ", Issue " + data[3]; | |
} | |
} | |
} | |
item.attachments.push({url:doc.location.href, title: ("Chronicle of Higher Education Snapshot"), mimeType:"text/html"}); | |
item.complete(); | |
} | |
function parseAuthors(author) { | |
// Sometimes we have "By Author and Author" | |
if (author.substr(0, 3).toLowerCase() == "by ") { | |
author = author.substr(3); | |
} | |
// Sometimes the author is in all caps | |
var pieces = author.split(" "); | |
for (var i = 0; i < pieces.length; i++) { | |
// TODO Make the all-caps character class more inclusive | |
if (pieces[i].match(/[A-Z-]+/) !== null) | |
pieces[i] = Zotero.Utilities.capitalizeTitle(pieces[i].toLowerCase(), true); | |
} | |
author = pieces.join(" "); | |
// Somtimes we have multiple authors | |
var authors = author.split(" and "); | |
return authors; | |
} | |
/** BEGIN TEST CASES **/ | |
var testCases = [ | |
{ | |
"type": "web", | |
"url": "http://www.chronicle.com/blogs/profhacker/the-second-day-of-thatcamp/23068", | |
"items": [ | |
{ | |
"itemType": "blogPost", | |
"title": "The Second Day of THATCamp", | |
"creators": [ | |
{ | |
"firstName": "Amy", | |
"lastName": "Cavender", | |
"creatorType": "author" | |
} | |
], | |
"date": "March 26, 2010", | |
"blogTitle": "The Chronicle of Higher Education Blogs: ProfHacker", | |
"language": "en-US", | |
"url": "http://www.chronicle.com/blogs/profhacker/the-second-day-of-thatcamp/23068", | |
"attachments": [ | |
{ | |
"title": "Chronicle of Higher Education Snapshot", | |
"mimeType": "text/html" | |
} | |
], | |
"tags": [], | |
"notes": [], | |
"seeAlso": [] | |
} | |
] | |
}, | |
{ | |
"type": "web", | |
"url": "http://www.chronicle.com/article/A-Little-Advice-From-32000/46210/", | |
"items": [ | |
{ | |
"itemType": "magazineArticle", | |
"title": "A Little Advice From 32,000 Graduate Students", | |
"creators": [ | |
{ | |
"firstName": "Adam", | |
"lastName": "Fagen", | |
"creatorType": "author" | |
}, | |
{ | |
"firstName": "Kimberly Suedkamp", | |
"lastName": "Wells", | |
"creatorType": "author" | |
} | |
], | |
"date": "January 14, 2002", | |
"ISSN": "0009-5982", | |
"language": "en-US", | |
"libraryCatalog": "The Chronicle of Higher Education", | |
"publicationTitle": "The Chronicle of Higher Education", | |
"url": "http://www.chronicle.com/article/A-Little-Advice-From-32000/46210", | |
"attachments": [ | |
{ | |
"title": "Chronicle of Higher Education Snapshot", | |
"mimeType": "text/html" | |
} | |
], | |
"tags": [], | |
"notes": [], | |
"seeAlso": [] | |
} | |
] | |
}, | |
{ | |
"type": "web", | |
"url": "http://www.chronicle.com/article/Grinnells-Green-Secrets/2653/", | |
"items": [ | |
{ | |
"itemType": "magazineArticle", | |
"title": "Grinnell's Green Secrets", | |
"creators": [ | |
{ | |
"firstName": "Xiao-Bo", | |
"lastName": "Yuan", | |
"creatorType": "author" | |
} | |
], | |
"date": "June 16, 2006", | |
"ISSN": "0009-5982", | |
"language": "en-US", | |
"libraryCatalog": "The Chronicle of Higher Education", | |
"publicationTitle": "The Chronicle of Higher Education", | |
"url": "http://www.chronicle.com/article/Grinnells-Green-Secrets/2653", | |
"attachments": [ | |
{ | |
"title": "Chronicle of Higher Education Snapshot", | |
"mimeType": "text/html" | |
} | |
], | |
"tags": [], | |
"notes": [], | |
"seeAlso": [] | |
} | |
] | |
}, | |
{ | |
"type": "web", | |
"url": "http://www.chronicle.com/blogs/brainstorm/humanities-cyberinfrastructure-project-bamboo/6138", | |
"items": [ | |
{ | |
"itemType": "blogPost", | |
"title": "Humanities Cyberinfrastructure: Project Bamboo", | |
"creators": [ | |
{ | |
"firstName": "Stan", | |
"lastName": "Katz", | |
"creatorType": "author" | |
} | |
], | |
"date": "July 17, 2008", | |
"blogTitle": "The Chronicle of Higher Education Blogs: Brainstorm", | |
"language": "en-US", | |
"shortTitle": "Humanities Cyberinfrastructure", | |
"url": "http://www.chronicle.com/blogs/brainstorm/humanities-cyberinfrastructure-project-bamboo/6138", | |
"attachments": [ | |
{ | |
"title": "Chronicle of Higher Education Snapshot", | |
"mimeType": "text/html" | |
} | |
], | |
"tags": [], | |
"notes": [], | |
"seeAlso": [] | |
} | |
] | |
}, | |
{ | |
"type": "web", | |
"url": "http://www.chronicle.com/section/Opinion-Ideas/40/?eio=58977", | |
"items": "multiple" | |
}, | |
{ | |
"type": "web", | |
"url": "http://www.chronicle.com/search/?search_siteId=5&contextId=&action=rem&searchQueryString=adjunct", | |
"items": "multiple" | |
} | |
] | |
/** END TEST CASES **/ |