Permalink
Join GitHub today
GitHub is home to over 31 million developers working together to host and review code, manage projects, and build software together.
Sign up
Fetching contributors…
Cannot retrieve contributors at this time
{ | |
"translatorID": "ecd1b7c6-8d31-4056-8c15-1807b2489254", | |
"label": "BOCC", | |
"creator": "José Antonio Meira da Rocha", | |
"target": "^https?://[^/]*bocc[^/]*/(_listas|_esp)", | |
"minVersion": "1.0", | |
"maxVersion": "", | |
"priority": 100, | |
"inRepository": true, | |
"translatorType": 4, | |
"browserSupport": "gcsbv", | |
"lastUpdated": "2014-04-04 10:08:43" | |
} | |
/* Translator for Biblioteca Online de Ciências da Comunicação (BOCC, | |
Communication Science Online Library, http://www.bocc.ubi.pt/) */ | |
/* | |
BOCC Translator - Parses BOCC indexes and creates Zotero-based metadata. | |
Copyright (C) 2010 José Antonio Meira da Rocha | |
This program is free software: you can redistribute it and/or modify | |
it under the terms of the GNU General Public License as published by | |
the Free Software Foundation, either version 3 of the License, or | |
(at your option) any later version. | |
This program is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
GNU General Public License for more details. | |
You should have received a copy of the GNU General Public License | |
along with this program. If not, see <http://www.gnu.org/licenses/>. | |
*/ | |
// Standard Zotero function | |
function detectWeb(doc, url) { | |
if (doc.evaluate("//table[@class='ag']/tbody/tr[1]/td[@class='agenda']", doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { | |
Zotero.debug("multiple"); | |
return "multiple"; | |
} | |
} | |
/////////////////////////////////////// | |
function getAuthors(newItem, itemsAutors) { | |
//Formatting and saving "Author" field | |
if (items["AUTOR"]) { | |
var author = itemsAutors["AUTOR"]; | |
if (author.match(";")) { | |
var authors = author.split(";"); | |
for (var i in authors) { | |
newItem.creators.push(Zotero.Utilities.cleanAuthor(authors[i], "author")); | |
} | |
} else { | |
newItem.creators.push(Zotero.Utilities.cleanAuthor(author, "author")); | |
} | |
} | |
} | |
// Standard Zotero translator entry point | |
function doWeb(doc, url) { | |
var articles = new Array(); | |
var items = new Object(); | |
var itemsAutors = new Object(); | |
var itemDate = new Object(); | |
var nextTitle; | |
var urls = new Array(); | |
var bloco; | |
var lines = new Array(); | |
var resite = /^https?:\/\/[^\/]*bocc[^\/]*\/(?:_listas|_esp)/; | |
var site = resite.exec(url); | |
site = site[0]; | |
site = site.replace("/_esp", ""); | |
site = site.replace("/_listas", ""); | |
Zotero.debug('Site===>'+site+'<==='); | |
if (detectWeb(doc, url) == "multiple") { | |
// Return XPathResult object | |
// accessible with .iterateNext() method | |
var content = doc.evaluate("//table[@class='ag']/tbody/tr[1]/td[@class='agenda']", doc, null, XPathResult.ANY_TYPE, null); | |
// All articles are in same <td> | |
// Get the first <td> data | |
bloco = content.iterateNext().innerHTML; | |
lines = bloco.split('<br><br>'); | |
//Zotero.debug('Artigo===>'+lines[0]+'<==='); | |
/////////////////////////////////////////////// | |
// Try get tags | |
var tematica = doc.evaluate("//title", doc, null, XPathResult.ANY_TYPE, null); | |
tematica = tematica.iterateNext().textContent; | |
//Zotero.debug('<Title>===>'+tematica+'<==='); | |
var isTematica = tematica.match('Temática'); | |
if (isTematica) { | |
// Get tematicas list to build tags list | |
var tematicanum; | |
var tematicasnums = doc.evaluate('//a[@class="tematica"]/@href', doc, null, XPathResult.ANY_TYPE, null); | |
var tematicasname; | |
var tematicasnames = doc.evaluate('//a[@class="tematica"]', doc, null, XPathResult.ANY_TYPE, null); | |
var tematicas = new Object(); | |
while (tematicanum = tematicasnums.iterateNext()) { | |
tematicanum = tematicanum.textContent; | |
tematicanum = tematicanum.match(/=[\d]+$/)[0]; | |
tematicanum = tematicanum.replace('=',''); | |
tematicaname = tematicasnames.iterateNext().textContent; | |
tematicas[tematicanum] = tematicaname; | |
} | |
//////////////////////////////////////////// | |
// Get current tematica | |
var tagsContent = new Array(); | |
tematica = tematica.match(/:\s[\d]*\s-/)[0]; | |
tematica = tematica.replace(': ',''); | |
tematica = tematica.replace(' -',''); | |
tematicaname = tematicas[tematica]; | |
//Zotero.debug('Tematica ===>'+tematicaname+'<==='); | |
// Build tags | |
if (tematicaname.match(' e ')) { | |
tagsContent = tematicaname.split(' e '); | |
if (tagsContent[0].match(',')) { | |
var temp = tagsContent[0].split(','); | |
tagsContent.push(temp[1]); | |
tagsContent[0] = temp[0]; | |
} | |
} else { | |
tagsContent[0] = tematicaname; | |
} | |
//for (var i in tagsContent) { | |
// Zotero.debug('Tag ===>'+i+'='+tagsContent[i]+'<==='); | |
//} | |
} // if (isTematica) | |
///////////////////////////////////////////// | |
var title; | |
var docurl; | |
var autores = new Array(); | |
var reurl = /href="([^"]+)/ ; | |
var reautor= /autor.php[^>]+"agenda">([^<]+)/g ; | |
var redate = /(\d\d\d\d$)/g ; | |
for (var n in lines) { | |
title = Zotero.Utilities.cleanTags(lines[n].split('<br>')[0]); | |
title = Zotero.Utilities.trimInternal(Zotero.Utilities.trim(title)); | |
title = Zotero.Utilities.unescapeHTML(title); | |
docurl = reurl.exec(lines[n]); | |
if (docurl) { | |
if (docurl[1].match('autor')) { | |
docurl = ''; | |
} else { | |
items[docurl[1]] = title; | |
autores = lines[n].match(reautor); | |
for (var i in autores){ | |
autores[i] = autores[i].split('>')[1]; | |
} | |
itemsAutors[docurl[1]] = autores ; | |
date = lines[n].match(redate); | |
//Zotero.debug('Data===>'+date[0]+'<==='); | |
itemDate[docurl[1]] = date[0] ; | |
} | |
} | |
} | |
//Zotero.debug('URL===>'+docurl[1]+'<==='); | |
/* Zotero.selectItems() | |
* Presents items to select in the select box. | |
* Assumes window.arguments[0].dataIn is an object with | |
* URLs as keys and descriptions as values | |
*/ | |
Zotero.selectItems(items, function (items) { | |
if (!items) { | |
return true; | |
} | |
var filetitle; | |
var filemime; | |
for (var item in items) { | |
var newItem = new Zotero.Item("journalArticle"); | |
newItem.title = items[item]; | |
newItem.date = itemDate[item]; | |
newItem.publicationTitle = "Biblioteca Online de Ciências da Comunicação"; | |
newItem.ISSN = '1646-3137'; | |
newItem.journalAbbreviation = 'BOCC' ; | |
// http://www.bocc.ubi.pt | |
newItem.url = site+item.replace("..", ""); | |
fileurl = site+item.replace("..", "") | |
Zotero.debug('Doc ===>'+fileurl+'<==='); | |
if (fileurl.match('.html$|.htm$|.HTML$|.HTM$')) { | |
filetitle = 'Anexo HTML'; | |
filemime = 'text/html'; | |
}; | |
if (fileurl.match('.pdf$|.PDF$')) { | |
filetitle = 'Anexo PDF'; | |
filemime = 'application/pdf'; | |
}; | |
Zotero.debug('File title ===>'+filetitle+'<==='); | |
Zotero.debug('File mime ===>'+filemime+'<==='); | |
newItem.attachments.push( | |
{url:fileurl, title:filetitle, mimeType:filemime} | |
); | |
temp = itemsAutors[item]; | |
for (var i in temp) { | |
newItem.creators.push(Zotero.Utilities.cleanAuthor(temp[i], "author")); | |
} | |
if (isTematica) { | |
for (var i = 0; i < tagsContent.length; i++) { | |
newItem.tags[i] = tagsContent[i]; | |
} | |
} | |
newItem.complete(); | |
} | |
}) | |
} | |
} | |
/** BEGIN TEST CASES **/ | |
var testCases = [ | |
{ | |
"type": "web", | |
"url": "http://www.bocc.ubi.pt/_listas/titulos_letra.php?letra=B", | |
"items": "multiple" | |
} | |
] | |
/** END TEST CASES **/ |