Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
196 lines (172 sloc) 7.2 KB
{
"translatorID": "2d174277-7651-458f-86dd-20e168d2f1f3",
"label": "Canadiana.ca",
"creator": "Adam Crymble, Sebastian Karcher",
"target": "^https?://eco\\.canadiana\\.ca",
"minVersion": "1.0.0b4.r5",
"maxVersion": "",
"priority": 100,
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2012-07-03 16:44:04"
}
function detectWeb(doc, url) {
if (url.match(/\/view\//)) {
return "book";
} else if (url.match(/\/search\?/)) {
return "multiple";
}
}
//Canadiana Translator Coding by Adam Crymble updated and cleaned by Sebastian Karcher
//because the site uses so many random formats for the "Published" field, it's not always perfect. But it works for MOST entries
function associateData(newItem, dataTags, field, zoteroField) {
if (dataTags[field]) {
newItem[zoteroField] = dataTags[field];
}
}
function scrape(doc, url) {
//declaring variables to be used later.
var newItem = new Zotero.Item("book");
var dataTags = new Object();
var fieldTitle;
var tagsContent = new Array();
//these variables tell the program where to find the data we want in the HTML file we're looking at.
//in this case, the data is found in a table.
var xPath1 = '//div[@id="documentRecord"]//table/tbody/tr/th';
var xPath2 = '//div[@id="documentRecord"]//table/tbody/tr/td';
//at this point, all the data we want has been saved into the following 2 Objects: one for the headings, one for the content.
// The 3rd object tells us how many items we've found.
if (doc.evaluate('//div[@id="documentRecord"]//table/tbody/tr/th', doc, null, XPathResult.ANY_TYPE, null)) {
var xPath1Results = doc.evaluate(xPath1, doc, null, XPathResult.ANY_TYPE, null);
var xPath2Results = doc.evaluate(xPath2, doc, null, XPathResult.ANY_TYPE, null);
var xPathCount = doc.evaluate('count (//div[@id="documentRecord"]//table/tbody/tr/th)', doc, null, XPathResult.ANY_TYPE, null);
}
//At this point we have two lists (xPath1Results and xPath2Results). this loop matches the first item in the first list
//with the first item in the second list, and on until the end.
//If we then ask for the "Principal Author" the program returns "J.K. Rowling" instead of "Principal Author"
if (doc.evaluate('//div[@id="documentRecord"]//table/tbody/tr/th', doc, null, XPathResult.ANY_TYPE, null)) {
for (i = 0; i < xPathCount.numberValue; i++) {
fieldTitle = xPath1Results.iterateNext().textContent.replace(/\s+/g, '');
//gets the author's name without cleaning it away using cleanTags.
if (fieldTitle == "Creator" || fieldTitle == "Créateur") {
fieldTitle = "PrincipalAuthor";
dataTags[fieldTitle] = (xPath2Results.iterateNext().textContent);
var authorName = dataTags["PrincipalAuthor"];
newItem.creators.push(Zotero.Utilities.cleanAuthor(dataTags["PrincipalAuthor"], "author"));
//Splits Adressebibliographique or Imprint into 3 fields and cleans away any extra whitespace or unwanted characters.
} else if (fieldTitle == "Adressebibliographique" || fieldTitle == "Published") {
fieldTitle = "Imprint";
dataTags[fieldTitle] = Zotero.Utilities.cleanTags(xPath2Results.iterateNext().textContent);
var justDate = dataTags["Imprint"].match(/\d+[-\?\s\d]*/)[0];
if (justDate) dataTags["Date"] = justDate;
var place = dataTags["Imprint"].match(/.+?:/)[0];
if (place) dataTags["Place"] = place.trim().replace(/[\[\]\:]*/g, "")
var publisher = dataTags["Imprint"].match(/\:[^,\d]+/)[0];
if (publisher) dataTags["Publisher"] = publisher.replace(/[\[\]:\?]/g, "").trim();
// determines how many tags there will be, pushes them into an array and clears away whitespace.
} else if (fieldTitle == "Subject" || fieldTitle == "Sujet") {
tagsContent = Zotero.Utilities.cleanTags(xPath2Results.iterateNext().textContent.trim());
tagsContent = tagsContent.replace(/\s*\n+\s*/g, "||").split(/\|\|/);
Z.debug(tagsContent)
}
//Adds a string to CIHM no: and ICMH no: so that the resulting number makes sense to the reader.
else if (fieldTitle == "Identifier" || fieldTitle == "Identificateur") {
fieldTitle = "CIHMno.";
dataTags[fieldTitle] = xPath2Results.iterateNext().textContent;
dataTags["CIHMno."] = "CIHM Number: " + dataTags["CIHMno."].trim();
} else {
dataTags[fieldTitle] = Zotero.Utilities.cleanTags(xPath2Results.iterateNext().textContent.replace(/^\s*|\s*$/g, ''));
}
}
}
//makes tags of the items in the "tagsContent" array.
for (var i = 0; i < tagsContent.length; i++) {
newItem.tags[i] = tagsContent[i];
}
//calls the associateData function to put the data in the correct Zotero field.
//English
associateData(newItem, dataTags, "Title", "title");
associateData(newItem, dataTags, "Place", "place");
associateData(newItem, dataTags, "Publisher", "publisher");
associateData(newItem, dataTags, "Date", "date");
associateData(newItem, dataTags, "Language", "language");
associateData(newItem, dataTags, "Pages", "pages");
associateData(newItem, dataTags, "CIHMno.", "extra");
associateData(newItem, dataTags, "DocumentSource", "rights");
associateData(newItem, dataTags, "PermanentLink", "URL");
//French
associateData(newItem, dataTags, "Titre", "title");
associateData(newItem, dataTags, "Langue", "language");
associateData(newItem, dataTags, "Nombredepages", "pages");
associateData(newItem, dataTags, "ICMHno", "extra");
associateData(newItem, dataTags, "Documentoriginal", "rights");
associateData(newItem, dataTags, "Lienpermanent", "URL");
//make sure that English language date is marked as en-US so Zotero doesn't get confused
//about title casing.
newItem.title = ZU.trimInternal(newItem.title)
if (newItem.language) {
if (newItem.language.match(/English|Anglais/)) newItem.language = "en-CA";
}
//Saves everything to Zotero.
newItem.complete();
}
function doWeb(doc, url) {
var articles = new Array();
if (detectWeb(doc, url) == "multiple") {
var items = new Object();
var titles = doc.evaluate('//h2/a[contains(@href, "/view")]', doc, null, XPathResult.ANY_TYPE, null);
var next_title;
while (next_title = titles.iterateNext()) {
items[next_title.href] = next_title.textContent;
}
Zotero.selectItems(items, function (items) {
if (!items) {
return true;
}
for (var i in items) {
articles.push(i);
}
Zotero.Utilities.processDocuments(articles, scrape, function () {
Zotero.done();
});
});
} else {
scrape(doc, url);
}
}
/** BEGIN TEST CASES **/
var testCases = [
{
"type": "web",
"url": "http://eco.canadiana.ca/view/oocihm.44987/2?r=0&s=1",
"items": [
{
"itemType": "book",
"title": "Toronto Lying-In Hospital. Report of the Toronto Lying-In Hospital : for the year 1857.",
"creators": [],
"date": "1857?",
"extra": "CIHM Number: 44987",
"language": "eng",
"libraryCatalog": "Canadiana.ca",
"place": "Toronto?",
"publisher": "s.n.",
"shortTitle": "Toronto Lying-In Hospital. Report of the Toronto Lying-In Hospital",
"attachments": [],
"tags": [
"Hospitals -- Ontario -- Toronto.",
"Hôpitaux -- Ontario -- Toronto.",
"Toronto Lying-In Hospital."
],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"url": "http://eco.canadiana.ca/search?q=Toronto&field=",
"items": "multiple"
}
]
/** END TEST CASES **/
You can’t perform that action at this time.