Permalink
Join GitHub today
GitHub is home to over 31 million developers working together to host and review code, manage projects, and build software together.
Sign up
translators/Trove.js
Find file
Copy path
Fetching contributors…
Cannot retrieve contributors at this time
{ | |
"translatorID": "8efcb7cb-4180-4555-969a-08e8b34066c4", | |
"label": "Trove", | |
"creator": "Tim Sherratt", | |
"target": "^https?://trove\\.nla\\.gov\\.au/(?:newspaper|gazette|work|book|article|picture|music|map|collection)/", | |
"minVersion": "3.0", | |
"maxVersion": "", | |
"priority": 100, | |
"inRepository": true, | |
"translatorType": 4, | |
"browserSupport": "gcsibv", | |
"lastUpdated": "2018-09-03 19:41:04" | |
} | |
/* | |
Trove Translator | |
Copyright (C) 2016 Tim Sherratt (tim@discontents.com.au, @wragge) | |
This program is free software: you can redistribute it and/or modify | |
it under the terms of the GNU Affero General Public License as published by | |
the Free Software Foundation, either version 3 of the License, or | |
(at your option) any later version. | |
This program is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
GNU Affero General Public License for more details. | |
You should have received a copy of the GNU Affero General Public License | |
along with this program. If not, see <http://www.gnu.org/licenses/>. | |
*/ | |
function detectWeb(doc, url) { | |
if (url.indexOf('/result?') != -1 || url.indexOf('/newspaper/page') != -1) { | |
return getSearchResults(doc, url, true) ? 'multiple' : false; | |
} else if (url.indexOf('/newspaper/article') != -1) { | |
return "newspaperArticle"; | |
} else if (url.indexOf('/work/') != -1) { | |
return "book"; | |
} | |
} | |
function getSearchResults(doc, url, checkOnly) { | |
var items = {}; | |
var results; | |
var found = false; | |
if (url.indexOf('/result?') != -1) { | |
results = ZU.xpath(doc, "//div[@id='mainresults']//li/dl/dt/a"); | |
} else { | |
results = ZU.xpath(doc, "//ol[@class='list-unstyled articles']/li/h4/a"); | |
} | |
for (var i=0; i<results.length; i++) { | |
var link = results[i].href; | |
var title = ZU.trimInternal(results[i].textContent); | |
if (!title || !link) continue; | |
if (checkOnly) return true; | |
found = true; | |
items[link] = title; | |
} | |
return found ? items : false; | |
} | |
function doWeb(doc, url) { | |
if (detectWeb(doc, url) == "multiple") { | |
Zotero.selectItems(getSearchResults(doc, url), function (items) { | |
if (!items) { | |
return true; | |
} | |
for (var i in items) { | |
scrape(null, i); | |
} | |
}); | |
} else { | |
scrape(doc, url); | |
} | |
} | |
function scrape(doc, url) { | |
if (url.indexOf('/newspaper/article/') != -1) { | |
scrapeNewspaper(doc, url); | |
} else { | |
scrapeWork(doc, url); | |
} | |
} | |
function scrapeNewspaper(doc, url) { | |
var articleID = url.match(/newspaper\/article\/(\d+)/)[1]; | |
var bibtexURL = "http://trove.nla.gov.au/newspaper/citations/bibtex-article-" + articleID + ".bibtex"; | |
ZU.HTTP.doGet(bibtexURL, function(bibtex) { | |
var translator = Zotero.loadTranslator("import"); | |
translator.setTranslator("9cb70025-a888-4a29-a210-93ec52da40d4"); | |
translator.setString(bibtex); | |
// Clean up the BibTex results and add some extra stuff. | |
translator.setHandler("itemDone", function (obj, item) { | |
item.itemType = 'newspaperArticle'; | |
item.pages = item.numPages; | |
delete item.numPages; | |
delete item.type; | |
delete item.itemID; | |
// doc is null during multiple call | |
if (doc) { | |
item.abstractNote = ZU.xpathText(doc, "//meta[@property='og:description']/@content"); | |
// Add tags | |
var tags = ZU.xpath(doc, "//ul[contains(@class,'nlaTagContainer')]/li"); | |
for (var i = 0; i < tags.length; i++) { | |
tag = ZU.xpathText(tags[i], "a[not(contains(@class,'anno-remove'))]"); | |
item.tags.push(tag); | |
} | |
} | |
// I've created a proxy server to generate the PDF and return the URL without locking up the browser. | |
var proxyURL = "http://trove-proxy.herokuapp.com/pdf/" + articleID; | |
ZU.HTTP.doGet(proxyURL, function(pdfURL) { | |
item.attachments.push({ | |
url: pdfURL, | |
title: 'Trove newspaper PDF', | |
mimeType:'application/pdf' | |
}); | |
// Get the OCRd text and save in a note. | |
var textURL = "http://trove.nla.gov.au/newspaper/rendition/nla.news-article" + articleID + ".txt"; | |
ZU.HTTP.doGet(textURL, function(text) { | |
item.notes.push({ | |
note: text.trim() | |
}); | |
item.complete(); | |
}); | |
}); | |
}); | |
translator.translate(); | |
}); | |
} | |
var troveTypes = { | |
"Book": "book", | |
"Article Article/Book chapter": "bookSection", | |
"Thesis": "thesis", | |
"Archived website": "webpage", | |
"Conference Proceedings": "book", | |
"Audio book": "book", | |
"Article": "journalArticle", | |
"Article Article/Journal or magazine article": "journalArticle", | |
"Article Article/Conference paper": "conferencePaper", | |
"Article Article/Report": "report", | |
"Photograph": "artwork", | |
"Poster, chart, other": "artwork", | |
"Art work": "artwork", | |
"Object": "artwork", | |
"Microform Photograph": "artwork", | |
"Microform Object": "artwork", | |
"Sound": "audioRecording", | |
"Video": "videoRecording", | |
"Printed music": "book", | |
"Map": "map", | |
"Unpublished": "manuscript", | |
"Published": "document" | |
}; | |
//The function ... | |
function checkType(string) { | |
var types = string.split("; "); | |
var newString; | |
while (types.length > 0) { | |
newString = types.join(" "); | |
if (troveTypes.hasOwnProperty(newString)) { | |
return troveTypes[newString]; | |
} | |
types.pop(); | |
} | |
return "book"; | |
} | |
//Sometimes authors are a little messy and we need to clean them | |
//e.g. author = { Bayley, William A. (William Alan), 1910-1981 }, | |
//results in | |
// "firstName": "1910-1981, William A. (William Alan)", | |
// "lastName": "Bayley" | |
function cleanCreators(creators) { | |
for (var i = 0; i < creators.length; i++) { | |
var name = creators[i].firstName; | |
name = name.replace(/\(?\d{4}-\d{0,4}\)?,?/, "").trim(); | |
var posParenthesis = name.indexOf("("); | |
if (posParenthesis>-1) { | |
var first = name.substr(0, posParenthesis); | |
var second = name.substr(posParenthesis+1, name.length-posParenthesis-2); | |
if (second.indexOf( first.replace('.', '').trim() )>-1) { | |
name = second; | |
} else { | |
name = first; | |
} | |
} | |
creators[i].firstName = name.trim(); | |
} | |
return creators; | |
} | |
function scrapeWork(doc, url) { | |
var thumbnailURL; | |
// Remove all params from url | |
var workURL = url.replace(/[?#].*/, ''); | |
var bibtexURL = workURL + '?citationFormat=BibTeX'; | |
if (doc) { | |
// Need to get version identifier for the BibText url | |
var versionID = doc.body.innerHTML.match(/displayCiteDialog\(\'(.+?)\'/); | |
if (versionID !== null) { | |
bibtexURL += '&selectedversion=' + versionID[1]; | |
thumbnailURL = ZU.xpathText(doc, "//a/img[@class='mosaic ui-shdw']/@src"); | |
} else { | |
// It's a work -- so thumbnails are different | |
thumbnailURL = ZU.xpathText(doc, "//li[@class='imgfirst']//img/@src"); | |
} | |
} | |
// Get the BibTex and feed it to the translator. | |
ZU.HTTP.doGet(bibtexURL, function(bibtex) { | |
var translator = Zotero.loadTranslator("import"); | |
translator.setTranslator("9cb70025-a888-4a29-a210-93ec52da40d4"); | |
translator.setString(bibtex); | |
translator.setHandler("itemDone", function (obj, item) { | |
item.itemType = checkType(item.type); | |
item.creators = cleanCreators(item.creators); | |
// Attach a link to the contributing repository if available | |
if (item.hasOwnProperty('url')) { | |
item.attachments.push({ | |
title: "Record from contributing repository", | |
url: item.url, | |
mimeType: 'text/html', | |
snapshot: false | |
}); | |
} | |
if (doc) { | |
// This gives a better version-aware url. | |
item.url = ZU.xpathText(doc, "//meta[@property='og:url']/@content"); | |
item.abstractNote = ZU.xpathText(doc, "//meta[@property='og:description']/@content"); | |
// Add tags | |
tags = ZU.xpath(doc, "//div[@id='tagswork']/ul/li"); | |
for (var i = 0; i < tags.length; i++) { | |
tag = ZU.xpathText(tags[i], "a"); | |
item.tags.push(tag); | |
} | |
} | |
if (thumbnailURL !== null) { | |
item.attachments.push({ | |
url: thumbnailURL, | |
title: 'Trove thumbnail image', | |
mimeType:'image/jpeg' | |
}); | |
} | |
item.complete(); | |
}); | |
translator.translate(); | |
}); | |
} | |
/** BEGIN TEST CASES **/ | |
var testCases = [ | |
{ | |
"type": "web", | |
"url": "http://trove.nla.gov.au/work/9958833?q&versionId=11567057", | |
"items": [ | |
{ | |
"itemType": "book", | |
"title": "Experiences of a meteorologist in South Australia", | |
"creators": [ | |
{ | |
"firstName": "Clement Lindley", | |
"lastName": "Wragge", | |
"creatorType": "author" | |
} | |
], | |
"date": "1980", | |
"ISBN": "9780908065073", | |
"abstractNote": "In 14 libraries. 24 p. : ill. ; 22 cm. Wragge, Clement L. (Clement Lindley), 1852-1922. South Australia. Climate, 1883-1884. Meteorologists -- South Australia -- Biography. South Australia -- Climate -- History.", | |
"itemID": "trove.nla.gov.au/work/9958833", | |
"language": "English", | |
"libraryCatalog": "Trove", | |
"publisher": "Warradale, S.Aust. : Pioneer Books", | |
"url": "http://trove.nla.gov.au/version/11567057", | |
"attachments": [], | |
"tags": [], | |
"notes": [ | |
{ | |
"note": "<p> Reprinted from Good words for 1887/ edited by Donald Macleod, published: London: Isbister and Co </p>" | |
} | |
], | |
"seeAlso": [] | |
} | |
] | |
}, | |
{ | |
"type": "web", | |
"url": "http://trove.nla.gov.au/newspaper/article/70068753", | |
"items": [ | |
{ | |
"itemType": "newspaperArticle", | |
"title": "'WRAGGE.'", | |
"creators": [], | |
"date": "7 Feb 1903", | |
"abstractNote": "We have received a copy of the above which is a journal devoted chiefly to the science of meteorology. It is owned and conducted by Mr. Clement ...", | |
"libraryCatalog": "Trove", | |
"pages": "4", | |
"place": "Vic.", | |
"publicationTitle": "Sunbury News (Vic. : 1900 - 1910)", | |
"url": "http://nla.gov.au/nla.news-article70068753", | |
"attachments": [ | |
{ | |
"title": "Trove newspaper PDF", | |
"mimeType": "application/pdf" | |
} | |
], | |
"tags": [ | |
"Meteorology Journal - Clement Wragge" | |
], | |
"notes": [ | |
{ | |
"note": "<html>\n <head>\n <title>07 Feb 1903 - 'WRAGGE.'</title>\n </head>\n <body>\n <p>Sunbury News (Vic. : 1900 - 1910), Saturday 7 February 1903, page 4</p>\n <hr/>\n <div class='zone'><p>'WRAGGE' - we have received a copy of the above, which is a journal devoted chiefly to the science of meteorology. It is owned and conducted by Mr. Clement Wragge. </p></div>\n </body>\n</html>" | |
} | |
], | |
"seeAlso": [] | |
} | |
] | |
}, | |
{ | |
"type": "web", | |
"url": "http://trove.nla.gov.au/newspaper/result?l-australian=y&q=wragge", | |
"items": "multiple" | |
}, | |
{ | |
"type": "web", | |
"url": "http://trove.nla.gov.au/book/result?l-australian=y&q=wragge", | |
"items": "multiple" | |
}, | |
{ | |
"type": "web", | |
"url": "http://trove.nla.gov.au/newspaper/page/7013947", | |
"items": "multiple" | |
}, | |
{ | |
"type": "web", | |
"url": "http://trove.nla.gov.au/work/9531118?q&sort=holdings+desc&_=1483112824975&versionId=14744047", | |
"items": [ | |
{ | |
"itemType": "book", | |
"title": "Lithgow zig zag railway, Blue Mountains, New South Wales", | |
"creators": [ | |
{ | |
"firstName": "William Alan", | |
"lastName": "Bayley", | |
"creatorType": "author" | |
} | |
], | |
"date": "1969", | |
"abstractNote": "In 19 libraries. 40 p. : ill., map ; 22 cm. Great Zig Zag Railway (Lithgow, N.S.W.) Railroads -- Blue Mountains (N.S.W. : Mountains) Zig Zag Railway -- Lithgow, Australia. Railroads -- New South Wales -- Blue Mountains. Blue Mountains (N.S.W.)", | |
"itemID": "trove.nla.gov.au/work/9531118", | |
"language": "English", | |
"libraryCatalog": "Trove", | |
"publisher": "[Bulli, N.S.W. : Zig Zag Press", | |
"url": "http://trove.nla.gov.au/version/14744047", | |
"attachments": [], | |
"tags": [], | |
"notes": [ | |
{ | |
"note": "<p> Cover title </p>" | |
} | |
], | |
"seeAlso": [] | |
} | |
] | |
} | |
] | |
/** END TEST CASES **/ |