Permalink
Join GitHub today
GitHub is home to over 31 million developers working together to host and review code, manage projects, and build software together.
Sign up
Fetching contributors…
Cannot retrieve contributors at this time
{ | |
"translatorID": "91acf493-0de7-4473-8b62-89fd141e6c74", | |
"label": "MAB2", | |
"creator": "Simon Kornblith. Adaptions for MAB2: Leon Krauthausen (FUB)", | |
"target": "mab2", | |
"minVersion": "1.0.0b3.r1", | |
"maxVersion": "", | |
"priority": 100, | |
"inRepository": true, | |
"translatorType": 1, | |
"browserSupport": "gcs", | |
"lastUpdated": "2014-05-20 17:57:47" | |
} | |
function detectImport() { | |
var mab2RecordRegexp = /^[0-9]{3}[a-z ]{2}[a-z ]{3}$/ | |
var read = Zotero.read(8); | |
if (mab2RecordRegexp.test(read)) { | |
return true; | |
} | |
} | |
var fieldTerminator = "\x1E"; | |
var recordTerminator = "\x1D"; | |
var subfieldDelimiter = "\x1F"; | |
/* | |
* CLEANING FUNCTIONS | |
*/ | |
// general purpose cleaning | |
function clean(value) { | |
value = value.replace(/^[\s\.\,\/\:;]+/, ''); | |
value = value.replace(/[\s\.\,\/\:;]+$/, ''); | |
value = value.replace(/<<+/g, ''); | |
value = value.replace(/>>+/g, ''); | |
value = value.replace(/ +/g, ' '); | |
var char1 = value[0]; | |
var char2 = value[value.length-1]; | |
if ((char1 == "[" && char2 == "]") || (char1 == "(" && char2 == ")")) { | |
// chop of extraneous characters | |
return value.substr(1, value.length-2); | |
} | |
return value; | |
} | |
function cleanTag(value) { | |
// Chop off Authority-IDs | |
value = value.slice(0, value.indexOf('|')); | |
return value; | |
} | |
// number extraction | |
function pullNumber(text) { | |
var pullRe = /[0-9]+/; | |
var m = pullRe.exec(text); | |
if (m) { | |
return m[0]; | |
} | |
} | |
// ISBN extraction | |
function pullISBN(text) { | |
var pullRe = /[0-9X\-]+/; | |
var m = pullRe.exec(text); | |
if (m) { | |
return m[0]; | |
} | |
} | |
// corporate author extraction | |
function corpAuthor(author) { | |
return {lastName:author, fieldMode:true}; | |
} | |
// regular author extraction | |
function author(author, type, useComma) { | |
return Zotero.Utilities.cleanAuthor(author, type, useComma); | |
} | |
// MAB2 author extraction | |
// evaluates subfield $b and sets authType | |
function authorMab(author, authType, useComma) { | |
if (!authType) var authType='author'; | |
authType = authType.replace('[Hrsg.]', 'editor'); | |
authType = authType.replace('[Mitarb.]', 'contributor'); | |
authType = authType.replace('[Übers.]', 'translator'); | |
return Zotero.Utilities.cleanAuthor(author, authType, useComma); | |
} | |
/* | |
* END CLEANING FUNCTIONS | |
*/ | |
var record = function() { | |
this.directory = new Object(); | |
this.leader = ""; | |
this.content = ""; | |
// defaults | |
this.indicatorLength = 2; | |
this.subfieldCodeLength = 2; | |
} | |
// import a binary MAB2 record into this record | |
record.prototype.importBinary = function(record) { | |
// get directory and leader | |
var directory = record.substr(0, record.indexOf(fieldTerminator)); | |
this.leader = directory.substr(0, 24); | |
var directory = directory.substr(24); | |
// get various data | |
this.indicatorLength = parseInt(this.leader[10], 10); | |
this.subfieldCodeLength = parseInt(this.leader[11], 10); | |
var baseAddress = parseInt(this.leader.substr(12, 5), 10); | |
// get record data | |
var contentTmp = record.substr(baseAddress); | |
// MARC wants one-byte characters, so when we have multi-byte UTF-8 | |
// sequences, add null characters so that the directory shows up right. we | |
// can strip the nulls later. | |
this.content = ""; | |
for (i=0; i<contentTmp.length; i++) { | |
this.content += contentTmp[i]; | |
if (contentTmp.charCodeAt(i) > 0x00FFFF) { | |
this.content += "\x00\x00\x00"; | |
} else if (contentTmp.charCodeAt(i) > 0x0007FF) { | |
this.content += "\x00\x00"; | |
} else if (contentTmp.charCodeAt(i) > 0x00007F) { | |
this.content += "\x00"; | |
} | |
} | |
// read directory | |
for (var i=0; i<directory.length; i+=12) { | |
var tag = parseInt(directory.substr(i, 3), 10); | |
var fieldLength = parseInt(directory.substr(i+3, 4), 10); | |
var fieldPosition = parseInt(directory.substr(i+7, 5), 10); | |
if (!this.directory[tag]) { | |
this.directory[tag] = new Array(); | |
} | |
this.directory[tag].push([fieldPosition, fieldLength]); | |
} | |
} | |
// add a field to this record | |
record.prototype.addField = function(field, indicator, value) { | |
field = parseInt(field, 10); | |
// make sure indicator is the right length | |
if (indicator.length > this.indicatorLength) { | |
indicator = indicator.substr(0, this.indicatorLength); | |
} else if (indicator.length != this.indicatorLength) { | |
indicator = Zotero.Utilities.lpad(indicator, " ", this.indicatorLength); | |
} | |
// add terminator | |
value = indicator+value+fieldTerminator; | |
// add field to directory | |
if (!this.directory[field]) { | |
this.directory[field] = new Array(); | |
} | |
this.directory[field].push([this.content.length, value.length]); | |
// add field to record | |
this.content += value; | |
} | |
// get all fields with a certain field number | |
record.prototype.getField = function(field) { | |
field = parseInt(field, 10); | |
var fields = new Array(); | |
// make sure fields exist | |
if (!this.directory[field]) { | |
return fields; | |
} | |
// get fields | |
for (var i in this.directory[field]) { | |
var location = this.directory[field][i]; | |
// add to array, replacing null characters | |
fields.push([this.content.substr(location[0], this.indicatorLength), | |
this.content.substr(location[0]+this.indicatorLength, | |
location[1]-this.indicatorLength-1).replace(/\x00/g, "")]); | |
} | |
return fields; | |
} | |
// get subfields from a field | |
record.prototype.getFieldSubfields = function(tag) { // returns a two-dimensional array of values | |
var fields = this.getField(tag); | |
var returnFields = new Array(); | |
for (var i in fields) { | |
returnFields[i] = new Object(); | |
var subfields = fields[i][1].split(subfieldDelimiter); | |
if (subfields.length == 1) { | |
returnFields[i]["?"] = fields[i][1]; | |
} else { | |
for (var j in subfields) { | |
if (subfields[j]) { | |
var subfieldIndex = subfields[j].substr(0, this.subfieldCodeLength-1); | |
if (!returnFields[i][subfieldIndex]) { | |
returnFields[i][subfieldIndex] = subfields[j].substr(this.subfieldCodeLength-1); | |
} | |
} | |
} | |
} | |
} | |
return returnFields; | |
} | |
// add field to DB | |
record.prototype._associateDBField = function(item, fieldNo, part, fieldName, execMe, arg1, arg2) { | |
var field = this.getFieldSubfields(fieldNo); | |
Zotero.debug('MAB2: found '+field.length+' matches for '+fieldNo+part); | |
if (field) { | |
for (var i in field) { | |
var value = false; | |
for (var j=0; j<part.length; j++) { | |
var myPart = part[j]; | |
if (field[i][myPart]) { | |
if (value) { | |
value += " "+field[i][myPart]; | |
} else { | |
value = field[i][myPart]; | |
} | |
} | |
} | |
if (value) { | |
value = clean(value); | |
if (execMe) { | |
value = execMe(value, arg1, arg2); | |
} | |
if (fieldName == "creator") { | |
item.creators.push(value); | |
} else { | |
item[fieldName] = value; | |
return; | |
} | |
} | |
} | |
} | |
} | |
// add field to DB as tags | |
record.prototype._associateTags = function(item, fieldNo, part) { | |
var field = this.getFieldSubfields(fieldNo); | |
for (var i in field) { | |
for (var j=0; j<part.length; j++) { | |
var myPart = part[j]; | |
if (field[i][myPart]) { | |
item.tags.push(cleanTag(field[i][myPart])); | |
} | |
} | |
} | |
} | |
// this function loads a MAB2 record into our database | |
record.prototype.translate = function(item) { | |
// get item type | |
if (this.leader) { | |
var marcType = this.leader[6]; | |
if (marcType == "g") { | |
item.itemType = "film"; | |
} else if (marcType == "k" || marcType == "e" || marcType == "f") { | |
item.itemType = "artwork"; | |
} else if (marcType == "t") { | |
item.itemType = "manuscript"; | |
} else { | |
item.itemType = "book"; | |
} | |
} else { | |
item.itemType = "book"; | |
} | |
// Extract MAB2 fields | |
// FUB Added language, edition, pages, url, edition, series, ISBN, url | |
for (var i = 100; i <= 196; i++) { | |
if (this.getFieldSubfields(i)[0]) { | |
var authorfield = "a" | |
var field = this.getFieldSubfields(i)[0]['a']; | |
if (!field){ | |
//sometimes (or always?) the author is in p not a | |
var field = this.getFieldSubfields(i)[0]['p']; | |
authorfield = "p"; | |
} | |
var authType = this.getFieldSubfields(i)[0]['b']; | |
this._associateDBField(item, i, authorfield, "creator", authorMab, authType, true); | |
} | |
} | |
// if (this.getFieldSubfields("800")[0]) this._associateDBField(item, "800", "a", "creator", author, "author", true); | |
if (!item.language) this._associateDBField(item, "037b", "a", "language"); | |
this._associateDBField(item, "200", "a", "creator", corpAuthor); | |
if (!item.title) this._associateDBField(item, "331", "a", "title"); | |
this._associateDBField(item, "304", "a", "extra"); | |
if (this.getFieldSubfields("335")[0]) { | |
item.title = item.title + ": " + this.getFieldSubfields("335")[0]['a']; | |
} | |
if (!item.edition) this._associateDBField(item, "403", "a", "edition"); | |
if (!item.place) this._associateDBField(item, "410", "a", "place"); | |
if (!item.publisher) this._associateDBField(item, "412", "a", "publisher"); | |
if (!item.title) this._associateDBField(item, "1300", "a", "title"); | |
if (!item.date) this._associateDBField(item, "425", "a", "date", pullNumber); | |
if (!item.pages) this._associateDBField(item, "433", "a", "pages", pullNumber); | |
if (!item.series) this._associateDBField(item, "451", "a", "series"); | |
this._associateDBField(item, "501", "a", "extra"); | |
this._associateDBField(item, "519", "a", "extra"); | |
if (!item.edition) this._associateDBField(item, "523", "a", "edition"); | |
if (!item.ISBN) this._associateDBField(item, "540", "a", "ISBN", pullISBN); | |
if (!item.date) this._associateDBField(item, "595", "a", "date", pullNumber); | |
if (!item.url) this._associateDBField(item, "655e", "u", "url"); | |
// Extract German subject headings (RSWK) as tags | |
this._associateTags(item, "902", "acfgpkstz"); | |
this._associateTags(item, "907", "acfgpkstz"); | |
this._associateTags(item, "912", "acfgpkstz"); | |
this._associateTags(item, "917", "acfgpkstz"); | |
this._associateTags(item, "922", "acfgpkstz"); | |
this._associateTags(item, "927", "acfgpkstz"); | |
this._associateTags(item, "932", "acfgpkstz"); | |
this._associateTags(item, "937", "acfgpkstz"); | |
this._associateTags(item, "942", "acfgpkstz"); | |
} | |
function doImport() { | |
var text; | |
var holdOver = ""; // part of the text held over from the last loop | |
Zotero.setCharacterSet("utf-8"); | |
while (text = Zotero.read(4096)) { // read in 4096 byte increments | |
var records = text.split("\x1D"); | |
if (records.length > 1) { | |
records[0] = holdOver + records[0]; | |
holdOver = records.pop(); // skip last record, since it's not done | |
for (var i in records) { | |
var newItem = new Zotero.Item(); | |
// create new record | |
var rec = new record(); | |
rec.importBinary(records[i]); | |
rec.translate(newItem); | |
newItem.complete(); | |
} | |
} else { | |
holdOver += text; | |
} | |
} | |
} | |
var exports = { | |
"record":record, | |
"fieldTerminator":fieldTerminator, | |
"recordTerminator":recordTerminator, | |
"subfieldDelimiter":subfieldDelimiter | |
}; |