Permalink
Join GitHub today
GitHub is home to over 31 million developers working together to host and review code, manage projects, and build software together.
Sign up
translators/Library Catalog (SIRSI eLibrary).js
Find file
Copy path
Fetching contributors…
Cannot retrieve contributors at this time
{ | |
"translatorID": "23ba3be6-412d-4dde-9cc1-c4df0cc09378", | |
"label": "Library Catalog (SIRSI eLibrary)", | |
"creator": "Mang Sun", | |
"target": "/uhtbin/(cgisirsi|quick_keyword)", | |
"minVersion": "3.0", | |
"maxVersion": "", | |
"priority": 250, | |
"inRepository": true, | |
"translatorType": 4, | |
"browserSupport": "gcsibv", | |
"lastUpdated": "2014-08-26 04:08:13" | |
} | |
/* Based on the SIRSI translator by Simon Kornblith and Michael Berkowitz, | |
and the modifications for Rutgers (IRIS) by Chad Mills. | |
Includes code for Spanish version, e.g. PUCP: http://biblioteca.pucp.edu.pe/ (no permalink) | |
and UChile www.catalogo.uchile.cl | |
*/ | |
function detectWeb(doc, url) { | |
if (doc.evaluate('//div[@class="columns_container"]/div[contains(@class, "left_column")]/div[@class="content_container"]/div[@class="content"]/form[@id="hitlist"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { | |
return "multiple"; | |
} else if (doc.evaluate('//div[@class="columns_container"]/div[contains(@class, "left_column")]/form[@name="item_view"]/div[@class="content_container item_details"]/div[@class="content"]/h3[.="Item Details" or .="Detalles del ítem" or .="Detalle"] ', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { | |
return "book"; | |
} | |
} | |
function scrape(doc, url) { | |
var xpath = '//ul[contains(@class, "detail_page")]/li[@id="detail_marc_record"]/dl/dt[@class="viewmarctags"]'; | |
var elmts = doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null); | |
var elmt = elmts.iterateNext(); | |
if (!elmt) { | |
return false; | |
} | |
var newItem = new Zotero.Item("book"); | |
newItem.extra = ""; | |
newItem.series = ""; | |
var seriesItemCount = 0; | |
while (elmt) { | |
try { | |
//By Rice. Select all non space text nodes. | |
var node = doc.evaluate('./text()[normalize-space()]', elmt, null, XPathResult.ANY_TYPE, null).iterateNext(); | |
if (node) { | |
if (doc.evaluate('following-sibling::dd[position()=1]/a/text()', elmt, null, XPathResult.ANY_TYPE, null).iterateNext()) { | |
//By Rice. Some meta data must be retrieved from the text node of anchor tags. | |
var value = Zotero.Utilities.superCleanString(doc.evaluate('following-sibling::dd[position()=1]/a/text()[normalize-space()]', elmt, null, XPathResult.ANY_TYPE, null).iterateNext().nodeValue); | |
} else { | |
//while other metadata can be retrieved directly from the text node of DD tags | |
var value = Zotero.Utilities.superCleanString(doc.evaluate('following-sibling::dd[position()=1]/text()[normalize-space()]', elmt, null, XPathResult.ANY_TYPE, null).iterateNext().nodeValue); | |
} | |
//acquire label and create super clean text by removing colon, space and etc. | |
casedField = Zotero.Utilities.superCleanString(node.nodeValue); | |
field = casedField.toLowerCase(); | |
//Z.debug(field) | |
if (field == "publisher") { | |
newItem.publisher = value; | |
} else if (field == "pub date") { | |
var re = /[0-9]+/; | |
var m = re.exec(value); | |
newItem.date = m[0]; | |
} else if (field == "isbn") { | |
var re = /^[0-9](?:[0-9X]+)/; | |
var m = re.exec(value); | |
newItem.ISBN = m[0]; | |
} else if (field == "title" || field =="titulo" || field == "título") { | |
var titleParts = value.split(" / "); | |
re = /\[(.+)\]/i; | |
if (re.test(titleParts[0])) { | |
var ar = re.exec(titleParts[0]); | |
var itype = ar[1].toLowerCase(); | |
if (itype == "phonodisc" || itype == "sound recording") { | |
newItem.itemType = "audioRecording"; | |
} else if (itype == "videorecording") { | |
newItem.itemType = "videoRecording"; | |
} else if (itype == "electronic resource") { | |
//newItem.itemType = "webPage"; | |
//Rice treats eletronic resource as book | |
newItem.itemType = "book"; | |
} | |
} | |
newItem.title = Zotero.Utilities.capitalizeTitle(titleParts[0]); | |
} else if (field == "series"|| field =="serie"||field == "series title") { //push onto item, delimit with semicolon when needed | |
if (seriesItemCount != 0) { | |
newItem.series += "; " + value; | |
} else if (seriesItemCount == 0) { | |
newItem.series = value; | |
} | |
seriesItemCount++; //bump counter | |
} else if (field == "dissertation note" || field == "nota de tesis") { | |
newItem.itemType = "thesis"; | |
var thesisParts = value.split("--"); | |
var uniDate = thesisParts[1].split(", "); | |
newItem.university = uniDate[0]; | |
newItem.date = uniDate[1]; | |
} else if (field == "edition"|| field =="edicion" || field =="edición") { | |
newItem.edition = value; | |
} else if (field == "physical description" || field =="descripcion" || field == "descripción física") { | |
var physParts = value.split(" : "); | |
var physParts = physParts[0].split(" ; "); | |
//determine pages, split on " p." | |
var physPages = value.split(/ p.*/); | |
//break off anything in the beginning before the numbers | |
var pageParts = physPages[0].split(" "); | |
newItem.numPages = pageParts[pageParts.length - 1]; | |
} else if (field == "publication info" || field =="pie de imprenta" || field == "datos publicación") { | |
var pubParts = value.split(" : "); | |
newItem.place = pubParts[0]; | |
//drop off first part of array and recombine | |
pubParts.shift(); | |
var i; | |
var publisherInfo; | |
for (i in pubParts) { | |
if (i == 0) { | |
publisherInfo = pubParts[i] + " : "; | |
} else { | |
publisherInfo = publisherInfo + pubParts[i] + " : "; | |
} | |
} //END for | |
//drop off last colon | |
publisherInfo = publisherInfo.substring(0, (publisherInfo.length - 3)); | |
//break apart publication parts into Publisher and Date | |
var publisherParts = publisherInfo.split(","); | |
newItem.publisher = publisherParts[0]; | |
//check that first character isn't a 'c', if so drop it | |
if (publisherParts[1].substring(1, 2) == "c") { | |
newItem.date = publisherParts[1].substring(2); | |
} else { | |
newItem.date = publisherParts[1]; | |
} | |
} else if (field == "personal author" || field=="autor personal") { | |
newItem.creators.push(Zotero.Utilities.cleanAuthor(value, "author", true)); | |
} else if (field == "performer") { | |
newItem.creators.push(Zotero.Utilities.cleanAuthor(value, "performer", true)); | |
} else if (field == "author") { | |
newItem.creators.push(Zotero.Utilities.cleanAuthor(value, "author", true)); | |
} else if (field == "added author" || field == "otros autores") { | |
newItem.creators.push(Zotero.Utilities.cleanAuthor(value, "contributor", true)); | |
} else if (field == "conference author" || field == "corporate author") { | |
//The following line is included by Rice to handle corporate or conference author | |
newItem.creators.push(Zotero.Utilities.cleanAuthor(value, "author", true)); | |
} else if (field == "subject" || field == "corporate subject" || field == "geographic term" || field=="tema" || field=="materia") { | |
var subjects = value.split("--"); | |
newItem.tags = newItem.tags.concat(subjects); | |
} else if (field == "personal subject") { | |
var subjects = value.split(", "); | |
newItem.tags = newItem.tags.push(value[0] + ", " + value[1]); | |
} else if (value && field != "http") { | |
newItem.extra += casedField + ": " + value + "\n"; | |
} | |
} | |
} catch (e) {} | |
elmt = elmts.iterateNext(); | |
} //END if node | |
if (newItem.extra) { | |
newItem.extra = newItem.extra.substr(0, newItem.extra.length - 1); | |
} | |
var callNumber = doc.evaluate('//tr/td[1][@class="holdingslist"]/strong/text()|//tr/td[1][@class="holdingslist"]/b/text()', doc, null, XPathResult.ANY_TYPE, null).iterateNext(); | |
if (callNumber && callNumber.nodeValue) { | |
newItem.callNumber = callNumber.nodeValue; | |
} | |
newItem.libraryCatalog = "Library Catalog"; | |
newItem.complete(); | |
return true; | |
} | |
function doWeb(doc, url) { | |
var sirsiNew = true; //toggle between SIRSI -2003 and SIRSI 2003+ | |
//Adapted to catch the hitlist page of Rice Catalog | |
var xpath = '/html/body/div[@class="columns_container"]/div[contains(@class, "left_column")]/div[@class="content_container"]/div[@class="content"]/form[@id="hitlist"]/ul[@class="hit_list"]/li/ul[starts-with(@class, "hit_list_row")]/li[@class="hit_list_item_info"]/dl'; | |
if (doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { | |
Zotero.debug("SIRSI doWeb: searchsum"); | |
sirsiNew = true; | |
} else if (doc.evaluate('//form[@name="hitlist"]/table/tbody/tr', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { | |
Zotero.debug("SIRSI doWeb: hitlist"); | |
sirsiNew = false; | |
} else if (doc.evaluate('//tr[th[@class="viewmarctags"]][td[@class="viewmarctags"]]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { | |
Zotero.debug("SIRSI doWeb: viewmarctags"); | |
sirsiNew = true; | |
} else if (doc.evaluate('//input[@name="VOPTIONS"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { | |
Zotero.debug("SIRSI doWeb: VOPTIONS"); | |
sirsiNew = false; | |
} else { | |
var elmts = doc.evaluate('/html/body/form//text()', doc, null, XPathResult.ANY_TYPE, null); | |
//var elmts = doc.evaluate(' ', doc, null, XPathResult.ANY_TYPE, null); | |
while (elmt = elmts.iterateNext()) { | |
if (Zotero.Utilities.superCleanString(elmt.nodeValue) == "Viewing record") { | |
Zotero.debug("SIRSI doWeb: Viewing record"); | |
sirsiNew = false; | |
} | |
} //END while elmts | |
} //END FUNCTION doWeb | |
// Zotero.debug(xpath); | |
if (sirsiNew) { //executes Simon's SIRSI 2003+ scraper code | |
if (!scrape(doc)) { | |
var checkboxes = new Array(); | |
var urls = new Array(); | |
var availableItems = new Array(); | |
//pull items | |
var tableRows = doc.evaluate('//ul[@class="hit_list"]/li/ul[contains(@class, "hit_list_row")][//input[@value="Details" or @value="Detalles"]]', doc, null, XPathResult.ANY_TYPE, null); | |
Z.debug(ZU.xpath(doc, '//ul[@class="hit_list"]/li/ul[contains(@class, "hit_list_row")][//input[@value="Details" or @value="Detalles"]]').length) | |
// Go through table rows | |
while (tableRow = tableRows.iterateNext()) { | |
Z.debug("here") | |
var input = doc.evaluate('.//input[@value="Details" or @value="Detalles"]', tableRow, null, XPathResult.ANY_TYPE, null).iterateNext(); | |
//var text = doc.evaluate('.//strong', tableRow, null, XPathResult.ANY_TYPE, null).iterateNext().textContent; | |
var text = doc.evaluate('.//dd[@class="title"]/a', tableRow, null, XPathResult.ANY_TYPE, null).iterateNext().textContent; | |
if (text) { | |
availableItems[input.name] = text.trim(); | |
} | |
} //END while | |
var items = Zotero.selectItems(availableItems); | |
if (!items) { | |
return true; | |
} | |
var hostRe = new RegExp("^http(?:s)?://[^/]+"); | |
var m = hostRe.exec(doc.location.href); | |
Zotero.debug("href: " + doc.location.href); | |
var hitlist = doc.forms.namedItem("hitlist"); | |
var baseUrl = m[0] + hitlist.getAttribute("action") + "?first_hit=" + hitlist.elements.namedItem("first_hit").value + "&last_hit=" + hitlist.elements.namedItem("last_hit").value; | |
var alexandria = new Array(); | |
for (var i in items) { | |
alexandria.push(baseUrl + "&" + i + "=Details"); | |
} | |
Zotero.Utilities.processDocuments(alexandria, function (doc) { | |
scrape(doc) | |
}, function () { | |
Zotero.done() | |
}, null); | |
Zotero.wait(); | |
} //END if not scrape(doc) | |
} else { //executes Simon's SIRSI -2003 translator code | |
Zotero.debug("Running SIRSI -2003 code"); | |
var uri = doc.location.href; | |
var recNumbers = new Array(); | |
var xpath = '//form[@name="hitlist"]/table/tbody/tr'; | |
var elmts = doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null); | |
var elmt = elmts.iterateNext(); | |
if (elmt) { // Search results page | |
var uriRegexp = /^https?:\/\/[^\/]+/; | |
var m = uriRegexp.exec(uri); | |
var postAction = doc.forms.namedItem("hitlist").getAttribute("action"); | |
var newUri = m[0] + postAction.substr(0, postAction.length - 1) + "40"; | |
var titleRe = /<br>\s*(.*[^\s])\s*<br>/i; | |
var items = new Array(); | |
do { | |
var checkbox = doc.evaluate('.//input[@type="checkbox"]', elmt, null, XPathResult.ANY_TYPE, null).iterateNext(); | |
// Collect title | |
var title = doc.evaluate("./td[2]", elmt, null, XPathResult.ANY_TYPE, null).iterateNext().textContent; | |
if (checkbox && title) { | |
items[checkbox.name] = Zotero.Utilities.trimInternal(title); | |
} | |
} while (elmt = elmts.iterateNext()); | |
items = Zotero.selectItems(items); | |
if (!items) { | |
return true; | |
} | |
for (var i in items) { | |
recNumbers.push(i); | |
} | |
} else { // Normal page | |
// this regex will fail about 1/100,000,000 tries | |
var uriRegexp = /^((.*?)\/([0-9]+?))\//; | |
var m = uriRegexp.exec(uri); | |
var newUri = m[1] + "/40" | |
var elmts = doc.evaluate('/html/body/form', doc, null, XPathResult.ANY_TYPE, null); | |
while (elmt = elmts.iterateNext()) { | |
var initialText = doc.evaluate('.//text()[1]', elmt, null, XPathResult.ANY_TYPE, null).iterateNext(); | |
if (initialText && initialText.nodeValue && Zotero.Utilities.superCleanString(initialText.nodeValue) == "Viewing record") { | |
recNumbers.push(doc.evaluate('./b[1]/text()[1]', elmt, null, XPathResult.ANY_TYPE, null).iterateNext().nodeValue); | |
break; | |
} | |
} | |
} | |
var translator = Zotero.loadTranslator("import"); | |
translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973"); | |
translator.getTranslatorObject(function(marc) { | |
Zotero.Utilities.loadDocument(newUri + '?marks=' + recNumbers.join(",") + '&shadow=NO&format=FLAT+ASCII&sort=TITLE&vopt_elst=ALL&library=ALL&display_rule=ASCENDING&duedate_code=l&holdcount_code=t&DOWNLOAD_x=22&DOWNLOAD_y=12&address=&form_type=', function (doc) { | |
var pre = doc.getElementsByTagName("pre"); | |
var text = pre[0].textContent; | |
var documents = text.split("*** DOCUMENT BOUNDARY ***"); | |
for (var j = 1; j < documents.length; j++) { | |
var uri = newUri + "?marks=" + recNumbers[j] + "&shadow=NO&format=FLAT+ASCII&sort=TITLE&vopt_elst=ALL&library=ALL&display_rule=ASCENDING&duedate_code=l&holdcount_code=t&DOWNLOAD_x=22&DOWNLOAD_y=12&address=&form_type="; | |
var lines = documents[j].split("\n"); | |
var record = new marc.record(); | |
var tag, content; | |
var ind = ""; | |
for (var i = 0; i < lines.length; i++) { | |
var line = lines[i]; | |
if (line[0] == "." && line.substr(4, 2) == ". ") { | |
if (tag) { | |
content = content.replace(/\|([a-z])/g, marc.subfieldDelimiter + "$1"); | |
record.addField(tag, ind, content); | |
} | |
} else { | |
content += " " + line.substr(6); | |
continue; | |
} | |
tag = line.substr(1, 3); | |
if (tag[0] != "0" || tag[1] != "0") { | |
ind = line.substr(6, 2); | |
content = line.substr(8); | |
} else { | |
content = line.substr(7); | |
if (tag == "000") { | |
tag = undefined; | |
record.leader = "00000" + content; | |
Zotero.debug("the leader is: " + record.leader); | |
} | |
} | |
} //end FOR | |
var newItem = new Zotero.Item(); | |
record.translate(newItem); | |
newItem.libraryCatalog = "Library Catalog"; | |
newItem.complete(); | |
} //end FOR | |
}); | |
}); | |
} //END while | |
} //END scrape function | |
/** BEGIN TEST CASES **/ | |
var testCases = [ | |
{ | |
"type": "web", | |
"url": "https://library.usc.edu/uhtbin/cgisirsi/x/0/0/5?searchdata1=2420992{CKEY}", | |
"items": [ | |
{ | |
"itemType": "book", | |
"creators": [ | |
{ | |
"firstName": "Pierre", | |
"lastName": "Bourdieu", | |
"creatorType": "author" | |
} | |
], | |
"notes": [], | |
"tags": [ | |
"Real estate business", | |
"Social aspects", | |
"France", | |
"Val-d'Oise", | |
"Housing policy", | |
"France", | |
"Economics", | |
"Sociological aspects" | |
], | |
"seeAlso": [], | |
"attachments": [], | |
"extra": "Uniform title: Structures sociales de l'économie. English\nGeneral note: Translated from the French\nLanguage: Translated from the French\nBibliography note: Includes bibliographical references (p. [233]-251) and index\nLCCN: 2005620708\nControl Number: ocm61244051", | |
"title": "The social structures of the economy", | |
"place": "Cambridge, UK ; Malden, MA", | |
"publisher": "Polity", | |
"date": "2005", | |
"numPages": "263", | |
"ISBN": "0745625401", | |
"libraryCatalog": "Library Catalog" | |
} | |
] | |
} | |
] | |
/** END TEST CASES **/ |