Permalink
Join GitHub today
GitHub is home to over 31 million developers working together to host and review code, manage projects, and build software together.
Sign up
Fetching contributors…
Cannot retrieve contributors at this time
{ | |
"translatorID": "cf87eca8-041d-b954-795a-2d86348999d5", | |
"label": "Library Catalog (Aleph)", | |
"creator": "Simon Kornblith, Michael Berkowitz, Ming Yeung Cheung", | |
"target": "^https?://[^/]+/F(/?[A-Z0-9\\-]*(\\?.*)?$|\\?func=find|\\?func=scan|\\?func=short|\\?local_base=)", | |
"minVersion": "1.0.0b3.r1", | |
"maxVersion": "", | |
"priority": 250, | |
"inRepository": true, | |
"translatorType": 4, | |
"browserSupport": "gcsb", | |
"lastUpdated": "2016-12-17 23:52:01" | |
} | |
/* | |
Aleph OPAC Translator | |
Example installations (mainly French): | |
http://naude.bibliotheque-mazarine.fr/ | |
http://bibli.polytechnique.fr/ | |
http://sifrix2.sdv.fr/ | |
http://aleph.insa-rouen.fr | |
http://brenet.ens-lyon.fr | |
http://bu-pau.univ-pau.fr/ | |
http://babel.bu.univ-paris5.fr | |
http://inti.univ-paris4.fr/ | |
http://servaleph.univ-catholyon.fr/ | |
http://armada.scd.univ-paris12.fr/ | |
http://catalogue.univ-angers.fr/ | |
http://biblio.ville-lehavre.fr/ | |
http://opac.nebis.ch/ | |
http://scd2.univ-lille1.fr/ | |
http://catalogue.univ-paris1.fr/ | |
http://source.ulg.ac.be/ | |
http://med.cite-sciences.fr/ | |
http://biblio.mulhouse.fr/ | |
http://mediatheque.sigdci76.fr/ | |
http://opac.biu-montpellier.fr/ | |
Japanese Diet Library: | |
https://ndlopac.ndl.go.jp | |
Germany: | |
http://aleph-www.ub.fu-berlin.de | |
http://opac.hu-berlin.de | |
http://alephdai.ub.hu-berlin.de | |
https://aleph.mpg.de | |
Mexico: | |
iibiblio.unam.mx | |
Poland: | |
https://aleph.bg.pwr.wroc.pl/F | |
*/ | |
function detectWeb(doc, url) { | |
var singleRe = new RegExp("^https?://[^/]+/F/?[A-Z0-9\-]*\?.*(?:func=full-set-set|func=direct|func=myshelf-full.*)"); | |
if (singleRe.test(doc.location.href)) { | |
return "book"; | |
} else { | |
var tags = doc.getElementsByTagName("a"); | |
for (var i=0; i<tags.length; i++) { | |
if (singleRe.test(tags[i].href)) { | |
return "multiple"; | |
} | |
} | |
} | |
} | |
function doWeb(doc, url) { | |
var detailRe = new RegExp("^https?://[^/]+/F/?[A-Z0-9\-]*\?.*(?:func=full-set-set|func=direct|func=myshelf-full|func=myself_full.*)"); | |
var mab2Opac = new RegExp("^https?://(?!alephdai)[^/]+berlin|193\.30\.112\.134|duisburg-essen/F/[A-Z0-9\-]+\?.*|^https?://katalog\.ub\.uni-duesseldorf\.de/F/|^https?://aleph\.mpg\.de/F/"); | |
var uri = doc.location.href; | |
var newUris = new Array(); | |
if (detailRe.test(uri)) { | |
// find the 'add to basket' link where it will have the document number, replace the function with 'direct' | |
if (doc.evaluate('//*[contains(@href, "myshelf-add-ful-1")]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { | |
var elmts_add = doc.evaluate('//*[contains(@href, "myshelf-add-ful-1")]', doc, null, XPathResult.ANY_TYPE, null); | |
var adduri = elmts_add.iterateNext().attributes.getNamedItem("href").value; | |
adduri = adduri.replace("myshelf-add-ful-1", "direct"); | |
//adduri = adduri.replace("myshelf-add-ful-1", "myshelf-full"); | |
var baseuri = uri.match(".*\\?"); | |
var funcuri = adduri.match("\\?.*"); | |
newuri = baseuri + funcuri[0].match("[^\\?].*"); | |
newuri += "&format=001"; | |
//Zotero.debug('baseuri = ' + baseuri); | |
//Zotero.debug('funcuri = ' + funcuri); | |
Zotero.debug('directuri = ' + newuri); | |
} else { | |
var newuri = uri.replace(/\&format=[0-9]{3}/, "&format=001"); | |
if (newuri == uri) newuri += "&format=001"; | |
} | |
var translator = Zotero.loadTranslator("import"); | |
if (mab2Opac.test(uri)) { | |
translator.setTranslator("91acf493-0de7-4473-8b62-89fd141e6c74"); | |
} else { | |
translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973"); | |
} | |
translator.getTranslatorObject(function (marc) { | |
Zotero.Utilities.processDocuments([newuri], function(newDoc) { | |
scrape(newDoc, marc, url); | |
}); | |
}); | |
} else { | |
var itemRegexp = '^https?://[^/]+/F/?[A-Z0-9\-]*\?.*(?:func=full-set-set.*\&format=999|func=direct|func=myshelf-full.*)' | |
var items = Zotero.Utilities.getItemArray(doc, doc, itemRegexp, '^[0-9]+$'); | |
// ugly hack to see if we have any items | |
var haveItems = false; | |
for (var i in items) { | |
haveItems = true; | |
break; | |
} | |
// If we don't have any items otherwise, let us use the numbers | |
if (!haveItems) { | |
var items = Zotero.Utilities.getItemArray(doc, doc, itemRegexp); | |
// We try to get more text by grabbing the whole table row | |
var newItems = {}; | |
for (var link in items) { | |
//Z.debug(link.match(/[A-Z0-9]{20}[A-Z0-9]*-[0-9]+\?func.*$/)[0]); | |
var text = ZU.xpathText(doc, '//a[contains(@href,"'+link.match(/[A-Z0-9]{20}[A-Z0-9]*-[0-9]+\?func.*$/)[0]+'")]/ancestor::tr[1]'); | |
if (text) { | |
newItems[link]=text; | |
haveItems = true; | |
} | |
} | |
if (haveItems) items = newItems; | |
} | |
Zotero.selectItems(items, function (items) { | |
if (!items) { | |
return true; | |
} | |
for (var i in items) { | |
var newUri = i.replace("&format=999", "&format=001"); | |
if (newUri == i) { | |
newUri += "&format=001"; | |
} | |
newUris.push(newUri); | |
} | |
var translator = Zotero.loadTranslator("import"); | |
if (mab2Opac.test(uri)) { | |
translator.setTranslator("91acf493-0de7-4473-8b62-89fd141e6c74"); | |
} else { | |
translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973"); | |
} | |
translator.getTranslatorObject(function (marc) { | |
Zotero.Utilities.processDocuments(newUris, function(newDoc) { | |
scrape(newDoc, marc, url); | |
}); | |
}); | |
}); | |
} | |
} | |
function scrape(newDoc, marc, url) { | |
var uri = newDoc.location.href; | |
var nonstandard = false; | |
var th = false; | |
var ndl = false; | |
var xpath; | |
//Z.debug(uri) | |
if (newDoc.evaluate('//*[tr[td/text()="LDR"]]/tr[td[2]]', newDoc, null, XPathResult.ANY_TYPE, null).iterateNext()) { | |
xpath = '//*[tr[td/text()="LDR"]]/tr[td[2]]'; | |
} else if (newDoc.evaluate('//tbody[tr/td[@scope="row"]/strong[contains(text(), "LDR")]]', newDoc, null, XPathResult.ANY_TYPE, null).iterateNext()) { | |
//UCSB Pegasus | |
xpath = '//tbody[tr/td[@scope="row"]/strong[contains(text(), "LDR")]]/tr'; | |
} else if (newDoc.evaluate('//*[tr[th[normalize-space(text())="LDR"]]]/tr[td[1]]', newDoc, null, XPathResult.ANY_TYPE, null).iterateNext()) { | |
xpath = '//*[tr[th[normalize-space(text())="LDR"]]]/tr[td[1]]'; | |
th = true; | |
} else if (newDoc.evaluate('//tr[2]//table[2]//tr', newDoc, null, XPathResult.ANY_TYPE, null).iterateNext()) { | |
xpath = '//tr[2]//table[2]//tr[td[2]]'; | |
nonstandard = true; | |
} else if (newDoc.evaluate('//table//tr[td[2][@class="td1"]]', newDoc, null, XPathResult.ANY_TYPE, null).iterateNext()) { | |
xpath = '//table//tr[td[2][@class="td1"]]'; | |
nonstandard = true | |
} else if (newDoc.evaluate('//table/tbody/tr[td/span/b]', newDoc, null, XPathResult.ANY_TYPE, null).iterateNext()) { | |
//for NDL library | |
xpath = '//table/tbody/tr[td/span/b]' | |
ndl = true; | |
} else if (newDoc.evaluate('//tr/td[2]/table/tbody[tr/td[contains(text(), "LDR")]]', newDoc, null, XPathResult.ANY_TYPE, null).iterateNext()) { | |
xpath = '//tr/td[2]/table/tbody[tr/td[contains(text(), "LDR")]]/tr'; | |
nonstandard = true; | |
} | |
//Z.debug(xpath) | |
var elmts = newDoc.evaluate(xpath, newDoc, null, XPathResult.ANY_TYPE, null); | |
var elmt; | |
var record = new marc.record(); | |
while (elmt = elmts.iterateNext()) { | |
if (th) { | |
var field = Zotero.Utilities.superCleanString(newDoc.evaluate('./th', elmt, null, XPathResult.ANY_TYPE, null).iterateNext().textContent); | |
} else { | |
var field = Zotero.Utilities.superCleanString(newDoc.evaluate('./td[1]', elmt, null, XPathResult.ANY_TYPE, null).iterateNext().textContent); | |
} | |
// if (nonstandard) { | |
// var field = Zotero.Utilities.superCleanString(newDoc.evaluate('./td[1]', elmt, null, XPathResult.ANY_TYPE, null).iterateNext().textContent); | |
// } else { | |
// var field = Zotero.Utilities.superCleanString(newDoc.evaluate('./TD[1]/text()[1]', elmt, null, XPathResult.ANY_TYPE, null).iterateNext().nodeValue); | |
// } | |
// var field = Zotero.Utilities.superCleanString(newDoc.evaluate('./td[1]', elmt, null, XPathResult.ANY_TYPE, null).iterateNext().textContent); | |
if (field) { | |
Z.debug(field) | |
var value; | |
if (th) { | |
value = newDoc.evaluate('./TD[1]', elmt, null, XPathResult.ANY_TYPE, null).iterateNext().textContent; //.split(/\n/)[1]; | |
} else if (ndl){ | |
value = newDoc.evaluate('./TD[3]', elmt, null, XPathResult.ANY_TYPE, null).iterateNext().textContent; | |
} else { | |
value = newDoc.evaluate('./TD[2]', elmt, null, XPathResult.ANY_TYPE, null).iterateNext().textContent; //.split(/\n/)[1]; | |
} | |
if (value.split(/\n/)[1]) value = Zotero.Utilities.trimInternal(value.split(/\n/)[1]); | |
Zotero.debug(field + " : " + value); | |
if (field == "LDR") { | |
record.leader = value; | |
} else if (field != "FMT") { | |
value = value.replace(/\|([a-z]) /g, marc.subfieldDelimiter+"$1"); | |
var code = field.substring(0, 3); | |
var ind = ""; | |
if (field.length > 3) { | |
ind = field[3]; | |
if (field.length > 4) { | |
ind += field[4]; | |
} | |
} | |
record.addField(code, ind, value); | |
} | |
} | |
} | |
var newItem = new Zotero.Item(); | |
record.translate(newItem); | |
var domain = url.match(/https?:\/\/([^\/]+)/); | |
newItem.repository = domain[1]+" Library Catalog"; | |
for (var i in newItem.creators) { | |
if (!newItem.creators[i]['firstName']) { | |
var name = newItem.creators[i]['lastName'].split(/([^\s]+)\s+(.*)$/); | |
newItem.creators[i] = {lastName:name[1], firstName:name[2], creatorType:'author'}; | |
} | |
} | |
var oldCreators = newItem.creators; | |
newItem.creators = new Array(); | |
var transient = new Array(); | |
for (var i=0; i<oldCreators.length; i++) { | |
var a = oldCreators[i]; | |
if (a.lastName) { | |
if (!a.lastName.match(/\d+/)) transient.push(a); | |
} | |
} | |
for (var i=0; i<transient.length; i++) { | |
var a = transient[i]; | |
if (a.firstName) { | |
if (a.firstName.match(/\|/)) a.firstName = a.firstName.match(/([^|]+)\s+|/)[1]; | |
} | |
} | |
newItem.creators = transient; | |
newItem.title = newItem.title.replace(/(<<|>>)/g, ''); | |
newItem.complete(); | |
} | |
/** BEGIN TEST CASES **/ | |
var testCases = [] | |
/** END TEST CASES **/ |