Permalink
Join GitHub today
GitHub is home to over 31 million developers working together to host and review code, manage projects, and build software together.
Sign up
translators/Canadiana.ca.js
Find file
Copy path
Fetching contributors…
Cannot retrieve contributors at this time
{ | |
"translatorID": "2d174277-7651-458f-86dd-20e168d2f1f3", | |
"label": "Canadiana.ca", | |
"creator": "Adam Crymble, Sebastian Karcher", | |
"target": "^https?://eco\\.canadiana\\.ca", | |
"minVersion": "1.0.0b4.r5", | |
"maxVersion": "", | |
"priority": 100, | |
"inRepository": true, | |
"translatorType": 4, | |
"browserSupport": "gcsibv", | |
"lastUpdated": "2012-07-03 16:44:04" | |
} | |
function detectWeb(doc, url) { | |
if (url.match(/\/view\//)) { | |
return "book"; | |
} else if (url.match(/\/search\?/)) { | |
return "multiple"; | |
} | |
} | |
//Canadiana Translator Coding by Adam Crymble updated and cleaned by Sebastian Karcher | |
//because the site uses so many random formats for the "Published" field, it's not always perfect. But it works for MOST entries | |
function associateData(newItem, dataTags, field, zoteroField) { | |
if (dataTags[field]) { | |
newItem[zoteroField] = dataTags[field]; | |
} | |
} | |
function scrape(doc, url) { | |
//declaring variables to be used later. | |
var newItem = new Zotero.Item("book"); | |
var dataTags = new Object(); | |
var fieldTitle; | |
var tagsContent = new Array(); | |
//these variables tell the program where to find the data we want in the HTML file we're looking at. | |
//in this case, the data is found in a table. | |
var xPath1 = '//div[@id="documentRecord"]//table/tbody/tr/th'; | |
var xPath2 = '//div[@id="documentRecord"]//table/tbody/tr/td'; | |
//at this point, all the data we want has been saved into the following 2 Objects: one for the headings, one for the content. | |
// The 3rd object tells us how many items we've found. | |
if (doc.evaluate('//div[@id="documentRecord"]//table/tbody/tr/th', doc, null, XPathResult.ANY_TYPE, null)) { | |
var xPath1Results = doc.evaluate(xPath1, doc, null, XPathResult.ANY_TYPE, null); | |
var xPath2Results = doc.evaluate(xPath2, doc, null, XPathResult.ANY_TYPE, null); | |
var xPathCount = doc.evaluate('count (//div[@id="documentRecord"]//table/tbody/tr/th)', doc, null, XPathResult.ANY_TYPE, null); | |
} | |
//At this point we have two lists (xPath1Results and xPath2Results). this loop matches the first item in the first list | |
//with the first item in the second list, and on until the end. | |
//If we then ask for the "Principal Author" the program returns "J.K. Rowling" instead of "Principal Author" | |
if (doc.evaluate('//div[@id="documentRecord"]//table/tbody/tr/th', doc, null, XPathResult.ANY_TYPE, null)) { | |
for (i = 0; i < xPathCount.numberValue; i++) { | |
fieldTitle = xPath1Results.iterateNext().textContent.replace(/\s+/g, ''); | |
//gets the author's name without cleaning it away using cleanTags. | |
if (fieldTitle == "Creator" || fieldTitle == "Créateur") { | |
fieldTitle = "PrincipalAuthor"; | |
dataTags[fieldTitle] = (xPath2Results.iterateNext().textContent); | |
var authorName = dataTags["PrincipalAuthor"]; | |
newItem.creators.push(Zotero.Utilities.cleanAuthor(dataTags["PrincipalAuthor"], "author")); | |
//Splits Adressebibliographique or Imprint into 3 fields and cleans away any extra whitespace or unwanted characters. | |
} else if (fieldTitle == "Adressebibliographique" || fieldTitle == "Published") { | |
fieldTitle = "Imprint"; | |
dataTags[fieldTitle] = Zotero.Utilities.cleanTags(xPath2Results.iterateNext().textContent); | |
var justDate = dataTags["Imprint"].match(/\d+[-\?\s\d]*/)[0]; | |
if (justDate) dataTags["Date"] = justDate; | |
var place = dataTags["Imprint"].match(/.+?:/)[0]; | |
if (place) dataTags["Place"] = place.trim().replace(/[\[\]\:]*/g, "") | |
var publisher = dataTags["Imprint"].match(/\:[^,\d]+/)[0]; | |
if (publisher) dataTags["Publisher"] = publisher.replace(/[\[\]:\?]/g, "").trim(); | |
// determines how many tags there will be, pushes them into an array and clears away whitespace. | |
} else if (fieldTitle == "Subject" || fieldTitle == "Sujet") { | |
tagsContent = Zotero.Utilities.cleanTags(xPath2Results.iterateNext().textContent.trim()); | |
tagsContent = tagsContent.replace(/\s*\n+\s*/g, "||").split(/\|\|/); | |
Z.debug(tagsContent) | |
} | |
//Adds a string to CIHM no: and ICMH no: so that the resulting number makes sense to the reader. | |
else if (fieldTitle == "Identifier" || fieldTitle == "Identificateur") { | |
fieldTitle = "CIHMno."; | |
dataTags[fieldTitle] = xPath2Results.iterateNext().textContent; | |
dataTags["CIHMno."] = "CIHM Number: " + dataTags["CIHMno."].trim(); | |
} else { | |
dataTags[fieldTitle] = Zotero.Utilities.cleanTags(xPath2Results.iterateNext().textContent.replace(/^\s*|\s*$/g, '')); | |
} | |
} | |
} | |
//makes tags of the items in the "tagsContent" array. | |
for (var i = 0; i < tagsContent.length; i++) { | |
newItem.tags[i] = tagsContent[i]; | |
} | |
//calls the associateData function to put the data in the correct Zotero field. | |
//English | |
associateData(newItem, dataTags, "Title", "title"); | |
associateData(newItem, dataTags, "Place", "place"); | |
associateData(newItem, dataTags, "Publisher", "publisher"); | |
associateData(newItem, dataTags, "Date", "date"); | |
associateData(newItem, dataTags, "Language", "language"); | |
associateData(newItem, dataTags, "Pages", "pages"); | |
associateData(newItem, dataTags, "CIHMno.", "extra"); | |
associateData(newItem, dataTags, "DocumentSource", "rights"); | |
associateData(newItem, dataTags, "PermanentLink", "URL"); | |
//French | |
associateData(newItem, dataTags, "Titre", "title"); | |
associateData(newItem, dataTags, "Langue", "language"); | |
associateData(newItem, dataTags, "Nombredepages", "pages"); | |
associateData(newItem, dataTags, "ICMHno", "extra"); | |
associateData(newItem, dataTags, "Documentoriginal", "rights"); | |
associateData(newItem, dataTags, "Lienpermanent", "URL"); | |
//make sure that English language date is marked as en-US so Zotero doesn't get confused | |
//about title casing. | |
newItem.title = ZU.trimInternal(newItem.title) | |
if (newItem.language) { | |
if (newItem.language.match(/English|Anglais/)) newItem.language = "en-CA"; | |
} | |
//Saves everything to Zotero. | |
newItem.complete(); | |
} | |
function doWeb(doc, url) { | |
var articles = new Array(); | |
if (detectWeb(doc, url) == "multiple") { | |
var items = new Object(); | |
var titles = doc.evaluate('//h2/a[contains(@href, "/view")]', doc, null, XPathResult.ANY_TYPE, null); | |
var next_title; | |
while (next_title = titles.iterateNext()) { | |
items[next_title.href] = next_title.textContent; | |
} | |
Zotero.selectItems(items, function (items) { | |
if (!items) { | |
return true; | |
} | |
for (var i in items) { | |
articles.push(i); | |
} | |
Zotero.Utilities.processDocuments(articles, scrape, function () { | |
Zotero.done(); | |
}); | |
}); | |
} else { | |
scrape(doc, url); | |
} | |
} | |
/** BEGIN TEST CASES **/ | |
var testCases = [ | |
{ | |
"type": "web", | |
"url": "http://eco.canadiana.ca/view/oocihm.44987/2?r=0&s=1", | |
"items": [ | |
{ | |
"itemType": "book", | |
"title": "Toronto Lying-In Hospital. Report of the Toronto Lying-In Hospital : for the year 1857.", | |
"creators": [], | |
"date": "1857?", | |
"extra": "CIHM Number: 44987", | |
"language": "eng", | |
"libraryCatalog": "Canadiana.ca", | |
"place": "Toronto?", | |
"publisher": "s.n.", | |
"shortTitle": "Toronto Lying-In Hospital. Report of the Toronto Lying-In Hospital", | |
"attachments": [], | |
"tags": [ | |
"Hospitals -- Ontario -- Toronto.", | |
"Hôpitaux -- Ontario -- Toronto.", | |
"Toronto Lying-In Hospital." | |
], | |
"notes": [], | |
"seeAlso": [] | |
} | |
] | |
}, | |
{ | |
"type": "web", | |
"url": "http://eco.canadiana.ca/search?q=Toronto&field=", | |
"items": "multiple" | |
} | |
] | |
/** END TEST CASES **/ |