Permalink
Join GitHub today
GitHub is home to over 31 million developers working together to host and review code, manage projects, and build software together.
Sign up
translators/BOCC.js
Find file
Copy path
Fetching contributors…
Cannot retrieve contributors at this time
{ | |
"translatorID": "ecd1b7c6-8d31-4056-8c15-1807b2489254", | |
"label": "BOCC", | |
"creator": "José Antonio Meira da Rocha", | |
"target": "^https?://[^/]*bocc[^/]*/(_listas|_esp)", | |
"minVersion": "1.0", | |
"maxVersion": "", | |
"priority": 100, | |
"inRepository": true, | |
"translatorType": 4, | |
"browserSupport": "gcsbv", | |
"lastUpdated": "2014-04-04 10:08:43" | |
} | |
/* Translator for Biblioteca Online de Ciências da Comunicação (BOCC, | |
Communication Science Online Library, http://www.bocc.ubi.pt/) */ | |
/* | |
BOCC Translator - Parses BOCC indexes and creates Zotero-based metadata. | |
Copyright (C) 2010 José Antonio Meira da Rocha | |
This program is free software: you can redistribute it and/or modify | |
it under the terms of the GNU General Public License as published by | |
the Free Software Foundation, either version 3 of the License, or | |
(at your option) any later version. | |
This program is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
GNU General Public License for more details. | |
You should have received a copy of the GNU General Public License | |
along with this program. If not, see <http://www.gnu.org/licenses/>. | |
*/ | |
// Standard Zotero function | |
function detectWeb(doc, url) { | |
if (doc.evaluate("//table[@class='ag']/tbody/tr[1]/td[@class='agenda']", doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { | |
Zotero.debug("multiple"); | |
return "multiple"; | |
} | |
} | |
/////////////////////////////////////// | |
function getAuthors(newItem, itemsAutors) { | |
//Formatting and saving "Author" field | |
if (items["AUTOR"]) { | |
var author = itemsAutors["AUTOR"]; | |
if (author.match(";")) { | |
var authors = author.split(";"); | |
for (var i in authors) { | |
newItem.creators.push(Zotero.Utilities.cleanAuthor(authors[i], "author")); | |
} | |
} else { | |
newItem.creators.push(Zotero.Utilities.cleanAuthor(author, "author")); | |
} | |
} | |
} | |
// Standard Zotero translator entry point | |
function doWeb(doc, url) { | |
var articles = new Array(); | |
var items = new Object(); | |
var itemsAutors = new Object(); | |
var itemDate = new Object(); | |
var nextTitle; | |
var urls = new Array(); | |
var bloco; | |
var lines = new Array(); | |
var resite = /^https?:\/\/[^\/]*bocc[^\/]*\/(?:_listas|_esp)/; | |
var site = resite.exec(url); | |
site = site[0]; | |
site = site.replace("/_esp", ""); | |
site = site.replace("/_listas", ""); | |
Zotero.debug('Site===>'+site+'<==='); | |
if (detectWeb(doc, url) == "multiple") { | |
// Return XPathResult object | |
// accessible with .iterateNext() method | |
var content = doc.evaluate("//table[@class='ag']/tbody/tr[1]/td[@class='agenda']", doc, null, XPathResult.ANY_TYPE, null); | |
// All articles are in same <td> | |
// Get the first <td> data | |
bloco = content.iterateNext().innerHTML; | |
lines = bloco.split('<br><br>'); | |
//Zotero.debug('Artigo===>'+lines[0]+'<==='); | |
/////////////////////////////////////////////// | |
// Try get tags | |
var tematica = doc.evaluate("//title", doc, null, XPathResult.ANY_TYPE, null); | |
tematica = tematica.iterateNext().textContent; | |
//Zotero.debug('<Title>===>'+tematica+'<==='); | |
var isTematica = tematica.match('Temática'); | |
if (isTematica) { | |
// Get tematicas list to build tags list | |
var tematicanum; | |
var tematicasnums = doc.evaluate('//a[@class="tematica"]/@href', doc, null, XPathResult.ANY_TYPE, null); | |
var tematicasname; | |
var tematicasnames = doc.evaluate('//a[@class="tematica"]', doc, null, XPathResult.ANY_TYPE, null); | |
var tematicas = new Object(); | |
while (tematicanum = tematicasnums.iterateNext()) { | |
tematicanum = tematicanum.textContent; | |
tematicanum = tematicanum.match(/=[\d]+$/)[0]; | |
tematicanum = tematicanum.replace('=',''); | |
tematicaname = tematicasnames.iterateNext().textContent; | |
tematicas[tematicanum] = tematicaname; | |
} | |
//////////////////////////////////////////// | |
// Get current tematica | |
var tagsContent = new Array(); | |
tematica = tematica.match(/:\s[\d]*\s-/)[0]; | |
tematica = tematica.replace(': ',''); | |
tematica = tematica.replace(' -',''); | |
tematicaname = tematicas[tematica]; | |
//Zotero.debug('Tematica ===>'+tematicaname+'<==='); | |
// Build tags | |
if (tematicaname.match(' e ')) { | |
tagsContent = tematicaname.split(' e '); | |
if (tagsContent[0].match(',')) { | |
var temp = tagsContent[0].split(','); | |
tagsContent.push(temp[1]); | |
tagsContent[0] = temp[0]; | |
} | |
} else { | |
tagsContent[0] = tematicaname; | |
} | |
//for (var i in tagsContent) { | |
// Zotero.debug('Tag ===>'+i+'='+tagsContent[i]+'<==='); | |
//} | |
} // if (isTematica) | |
///////////////////////////////////////////// | |
var title; | |
var docurl; | |
var autores = new Array(); | |
var reurl = /href="([^"]+)/ ; | |
var reautor= /autor.php[^>]+"agenda">([^<]+)/g ; | |
var redate = /(\d\d\d\d$)/g ; | |
for (var n in lines) { | |
title = Zotero.Utilities.cleanTags(lines[n].split('<br>')[0]); | |
title = Zotero.Utilities.trimInternal(Zotero.Utilities.trim(title)); | |
title = Zotero.Utilities.unescapeHTML(title); | |
docurl = reurl.exec(lines[n]); | |
if (docurl) { | |
if (docurl[1].match('autor')) { | |
docurl = ''; | |
} else { | |
items[docurl[1]] = title; | |
autores = lines[n].match(reautor); | |
for (var i in autores){ | |
autores[i] = autores[i].split('>')[1]; | |
} | |
itemsAutors[docurl[1]] = autores ; | |
date = lines[n].match(redate); | |
//Zotero.debug('Data===>'+date[0]+'<==='); | |
itemDate[docurl[1]] = date[0] ; | |
} | |
} | |
} | |
//Zotero.debug('URL===>'+docurl[1]+'<==='); | |
/* Zotero.selectItems() | |
* Presents items to select in the select box. | |
* Assumes window.arguments[0].dataIn is an object with | |
* URLs as keys and descriptions as values | |
*/ | |
Zotero.selectItems(items, function (items) { | |
if (!items) { | |
return true; | |
} | |
var filetitle; | |
var filemime; | |
for (var item in items) { | |
var newItem = new Zotero.Item("journalArticle"); | |
newItem.title = items[item]; | |
newItem.date = itemDate[item]; | |
newItem.publicationTitle = "Biblioteca Online de Ciências da Comunicação"; | |
newItem.ISSN = '1646-3137'; | |
newItem.journalAbbreviation = 'BOCC' ; | |
// http://www.bocc.ubi.pt | |
newItem.url = site+item.replace("..", ""); | |
fileurl = site+item.replace("..", "") | |
Zotero.debug('Doc ===>'+fileurl+'<==='); | |
if (fileurl.match('.html$|.htm$|.HTML$|.HTM$')) { | |
filetitle = 'Anexo HTML'; | |
filemime = 'text/html'; | |
}; | |
if (fileurl.match('.pdf$|.PDF$')) { | |
filetitle = 'Anexo PDF'; | |
filemime = 'application/pdf'; | |
}; | |
Zotero.debug('File title ===>'+filetitle+'<==='); | |
Zotero.debug('File mime ===>'+filemime+'<==='); | |
newItem.attachments.push( | |
{url:fileurl, title:filetitle, mimeType:filemime} | |
); | |
temp = itemsAutors[item]; | |
for (var i in temp) { | |
newItem.creators.push(Zotero.Utilities.cleanAuthor(temp[i], "author")); | |
} | |
if (isTematica) { | |
for (var i = 0; i < tagsContent.length; i++) { | |
newItem.tags[i] = tagsContent[i]; | |
} | |
} | |
newItem.complete(); | |
} | |
}) | |
} | |
} | |
/** BEGIN TEST CASES **/ | |
var testCases = [ | |
{ | |
"type": "web", | |
"url": "http://www.bocc.ubi.pt/_listas/titulos_letra.php?letra=B", | |
"items": "multiple" | |
} | |
] | |
/** END TEST CASES **/ |