Permalink
Join GitHub today
GitHub is home to over 31 million developers working together to host and review code, manage projects, and build software together.
Sign up
Fetching contributors…
Cannot retrieve contributors at this time
{ | |
"translatorID": "d9a16cf3-8b86-4cab-8610-dbd913ad1a44", | |
"label": "Archives Canada-France", | |
"creator": "Adam Crymble", | |
"target": "^https?://bd\\.archivescanadafrance\\.org", | |
"minVersion": "1.0.0b4.r5", | |
"maxVersion": "", | |
"priority": 100, | |
"browserSupport": "gcsibv", | |
"inRepository": true, | |
"translatorType": 4, | |
"lastUpdated": "2012-01-01 01:42:16" | |
} | |
function detectWeb(doc, url) { | |
if (doc.location.href.match("doc.xsp?")) { | |
return "book"; | |
} else if (doc.evaluate('//li/a', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { | |
return "multiple"; | |
} else if (doc.evaluate('//td[1][@class="icones"]/a', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { | |
return "multiple"; | |
} | |
} | |
function associateData (newItem, dataTags, field, zoteroField) { | |
if (dataTags[field]) { | |
newItem[zoteroField] = dataTags[field]; | |
} | |
} | |
function scrape(doc, url) { | |
var namespace = doc.documentElement.namespaceURI; | |
var nsResolver = namespace ? function(prefix) { | |
if (prefix == 'x') return namespace; else return null; | |
} : null; | |
var dataTags = new Object(); | |
var tagsContent = new Array(); | |
var fieldTitle; | |
var newItem = new Zotero.Item("book"); | |
var xPathHeaders = '//td[2]/div[@class="ead-c"]/div[@class="ead-did"]/table[@class="ead-did"]/tbody/tr/td[1]'; | |
if (doc.evaluate(xPathHeaders, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { | |
var headers = doc.evaluate(xPathHeaders, doc, nsResolver, XPathResult.ANY_TYPE, null); | |
var contents = doc.evaluate('//td[2][@class="did-content"]', doc, nsResolver, XPathResult.ANY_TYPE, null); | |
while (fieldTitle = headers.iterateNext()) { | |
fieldTitle = fieldTitle.textContent.replace(/\s+/g, ''); | |
if (fieldTitle == "Origination" || fieldTitle == "Origine") { | |
fieldTitle = "Origination"; | |
} | |
dataTags[fieldTitle] = Zotero.Utilities.cleanTags(contents.iterateNext().textContent.replace(/^\s*|\s*$/g, '')); | |
} | |
if (dataTags["Origination"]) { | |
var author = dataTags["Origination"]; | |
if (!author.match(", ")) { | |
newItem.creators.push({lastName: author, creatorType: "author"}); | |
} else { | |
var authors = author.split(", "); | |
author = authors[1] + " " + authors[0]; | |
newItem.creators.push(Zotero.Utilities.cleanAuthor(author, "author")); | |
} | |
} | |
} | |
if (doc.evaluate('//h1[@class="doc-title"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { | |
newItem.title = doc.evaluate('//h1[@class="doc-title"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent; | |
} else if (doc.evaluate('//td[2]/div[@class="notice"]/p', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { | |
newItem.title = doc.evaluate('//td[2]/div[@class="notice"]/p', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent; | |
} else { | |
newItem.title = doc.title + " Title Not Found"; | |
} | |
associateData (newItem, dataTags, "PhysicalDescription", "pages"); | |
associateData (newItem, dataTags, "Descriptionmatérielle", "pages"); | |
associateData (newItem, dataTags, "Repository", "repository"); | |
associateData (newItem, dataTags, "Lieudeconservation", "repository"); | |
associateData (newItem, dataTags, "LanguageoftheMaterial", "language"); | |
associateData (newItem, dataTags, "Langue", "language"); | |
associateData (newItem, dataTags, "Identifier", "callNumber"); | |
associateData (newItem, dataTags, "Cote", "callNumber"); | |
associateData (newItem, dataTags, "Datesextrêmes", "date"); | |
associateData (newItem, dataTags, "Dates", "date"); | |
newItem.url = doc.location.href; | |
newItem.complete(); | |
} | |
function doWeb(doc, url) { | |
var namespace = doc.documentElement.namespaceURI; | |
var nsResolver = namespace ? function(prefix) { | |
if (prefix == 'x') return namespace; else return null; | |
} : null; | |
var articles = new Array(); | |
if (detectWeb(doc, url) == "multiple") { | |
var items = new Object(); | |
if (doc.evaluate('//td[1][@class="icones"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { | |
var titles = doc.evaluate('//td[2][@class="ressource"]', doc, nsResolver, XPathResult.ANY_TYPE, null); | |
var titlesCount = doc.evaluate('count (//td[2][@class="ressource"])', doc, nsResolver, XPathResult.ANY_TYPE, null); | |
var links = doc.evaluate('//td[1][@class="icones"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null); | |
var next_link; | |
for (var i = 0; i < titlesCount.numberValue; i++) { | |
next_link = links.iterateNext().href; | |
if (!next_link.match("doc.xsp")) { | |
next_link = links.iterateNext().href; | |
} | |
items[next_link] = titles.iterateNext().textContent; | |
} | |
} | |
if (doc.evaluate('//li/a', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { | |
var titles = doc.evaluate('//li/a', doc, nsResolver, XPathResult.ANY_TYPE, null); | |
var next_title; | |
while (next_title = titles.iterateNext()) { | |
items[next_title.href] = next_title.textContent; | |
} | |
} | |
items = Zotero.selectItems(items); | |
for (var i in items) { | |
articles.push(i); | |
} | |
} else if (doc.evaluate('//div[@class="ancestor"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { | |
var link = doc.evaluate('//div[@class="ancestor"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().href; | |
articles = [link]; | |
} else { | |
articles = [url] | |
} | |
Zotero.Utilities.processDocuments(articles, scrape, function() {Zotero.done();}); | |
Zotero.wait(); | |
}/** BEGIN TEST CASES **/ | |
var testCases = [ | |
{ | |
"type": "web", | |
"url": "http://bd.archivescanadafrance.org/sdx-222-acf-pleade-2/acf/search-acf.xsp?sq1=montreal&fi1=fulltext&sq2=&fi2=fulltext&sq3=&fi3=fulltext&ed=&dbeg=&dend=&frepo=&forig=&x=0&y=0#resume", | |
"items": "multiple" | |
}, | |
{ | |
"type": "web", | |
"url": "http://bd.archivescanadafrance.org/sdx-222-acf-pleade-2/acf/doc.xsp?id=CAANQ_PISTARD_03Q808_392_CAANQ_PISTARD_03Q808_392&qid=sdx_q0&fmt=tab&base=fa&n=5&root=CAANQ_PISTARD_03Q808_392&ss=true&as=&ai=&from=", | |
"items": [ | |
{ | |
"itemType": "book", | |
"creators": [], | |
"notes": [], | |
"tags": [], | |
"seeAlso": [], | |
"attachments": [], | |
"title": "Fonds Intendants", | |
"url": "http://bd.archivescanadafrance.org/sdx-222-acf-pleade-2/acf/doc.xsp?id=ANQ_00001_intendants&fmt=tab&base=fa&root=CAANQ_PISTARD_03Q808_392&n=5&qid=sdx_q0&ss=true&as=&ai=", | |
"libraryCatalog": "Archives Canada-France", | |
"accessDate": "CURRENT_TIMESTAMP" | |
} | |
] | |
} | |
] | |
/** END TEST CASES **/ |