Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
205 lines (181 sloc) 6.25 KB
{
"translatorID": "2e4ebd19-83ab-4a56-8fa6-bcd52b576470",
"label": "Sueddeutsche.de",
"creator": "Martin Meyerhoff",
"target": "^https?://www\\.sueddeutsche\\.de",
"minVersion": "3.0",
"maxVersion": "",
"priority": 100,
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2017-06-24 21:03:57"
}
/*
Sueddeutsche.de Translator
Copyright (C) 2011 Martin Meyerhoff
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
This one has the search function on a different host, so I cannot scan the search results. A multiple option, though, is given for the page itself.
Test here:
http://www.sueddeutsche.de/politik
http://www.sueddeutsche.de/thema/Krieg_in_Libyen
http://www.sueddeutsche.de/muenchen
Reference article: http://www.sueddeutsche.de/wissen/embryonale-stammzellen-wo-sind-die-naiven-1.1143034
*/
function detectWeb(doc, url) {
if (ZU.xpathText(doc, '//h2/strong')) {
return "newspaperArticle";
} else if (ZU.xpath(doc, '//div[@id="topthemen" or @class="panoramateaser" \
or contains(@class,"maincolumn") or contains(@class, "teaser")]\
//a[starts-with(@class,"entry-title") \
and starts-with(@href,"http://www.sueddeutsche.de") \
and not(contains(@href,"/app/"))]').length){
return "multiple";
}
}
function scrape(doc, url) {
//don't parse things like image galleries
//e.g. http://www.sueddeutsche.de/kultur/thomas-manns-villa-in-los-angeles-weimar-am-pazifik-1.1301388
if (!ZU.xpathText(doc, '//h2/strong')) return;
var newItem = new Zotero.Item("newspaperArticle");
newItem.url = url;
var title = ZU.xpathText(doc, '//meta[contains(@property, "og:title")]/@content');
newItem.title = Zotero.Utilities.trim(title.replace(/\s?\s?/, ": "));
// Author. This is tricky, the SZ uses the author field for whatever they like.
// Sometimes, there is no author.
var author = ZU.xpathText(doc, '//section[contains(@class, "authors")]//span[contains(@class, "moreInfo")]/strong')
// One case i've seen: A full sentence as the "author", with no author in it.
if (author && author.trim().charAt(author.length - 1) != '.') {
author = author.replace(/^\s*Von\s|Ein Kommentar von/i, '')
// For multiple Authors, the SZ uses comma, und and u
.split(/\s+(?:und|u|,)\s+/);
for (var i in author) {
if (author[i].match(/\s/)) { // only names that contain a space!
newItem.creators.push(ZU.cleanAuthor(author[i], "author"));
}
}
}
// summary
newItem.abstractNote = ZU.xpathText(doc, '//meta[contains(@property, "og:description")]/@content');
// Date
newItem.date = ZU.xpathText(doc, "//time[@class='timeformat']");
if (newItem.date) {
newItem.date = ZU.strToISO(newItem.date);
}
// Section
var section = url.match(/sueddeutsche\.de\/([^\/]+)/);
newItem.section = ZU.capitalizeTitle(section[1]);
// Tags
var tags = ZU.xpathText(doc, '//meta[@name="keywords"]/@content');
if (tags) {
tags = tags.split(/\s*,\s+/);
for (var i=0, n=tags.length; i<n; i++) {
newItem.tags.push(ZU.trimInternal(tags[i]));
}
}
// Publication
newItem.publicationTitle = "sueddeutsche.de"
newItem.ISSN = "0174-4917";
newItem.language = "de";
// Attachment. inserting /2.220/ gives us a printable version
var printurl = url.replace(/(.*\/)(.*$)/, '$12.220/$2');
newItem.attachments.push({
url: printurl,
title: "Snapshot",
mimeType: "text/html",
snapshot: true
});
newItem.complete()
}
function doWeb(doc, url) {
if (detectWeb(doc, url) == "multiple") {
var links = ZU.xpath(doc,
'//div[@id="topthemen" or @class="panoramateaser" \
or contains(@class,"maincolumn") or contains(@class, "teaser")]\
//a[starts-with(@class,"entry-title") \
and starts-with(@href,"http://www.sueddeutsche.de") \
and not(contains(@href,"/app/"))]');
var items = new Object();
var title;
for (var i=0, n=links.length; i<n; i++) {
title = ZU.xpathText(links[i], './node()[not(self::div)]', null, '');
items[links[i].href] = ZU.trimInternal(title);
}
Zotero.selectItems(items, function(items) {
if (!items) return true;
var articles = new Array();
for (var i in items) {
articles.push(i);
}
ZU.processDocuments(articles, scrape);
});
} else {
scrape(doc, url);
}
}/** BEGIN TEST CASES **/
var testCases = [
{
"type": "web",
"url": "http://www.sueddeutsche.de/politik/verdacht-gegen-hessischen-verfassungsschuetzer-spitzname-kleiner-adolf-1.1190178",
"items": [
{
"itemType": "newspaperArticle",
"title": "Spitzname \"Kleiner Adolf\"",
"creators": [
{
"firstName": "Peter",
"lastName": "Blechschmidt",
"creatorType": "author"
},
{
"firstName": "Marc",
"lastName": "Widmann",
"creatorType": "author"
}
],
"date": "2011-11-16",
"ISSN": "0174-4917",
"abstractNote": "Als die Zwickauer Zelle in einem Kasseler Internet-Café Halit Y. hinrichtet, surft ein hessischer Verfassungsschützer dort im Netz. In seiner Wohnung findet die Polizei später Hinweise auf eine rechtsradikale Gesinnung - doch die Ermittlungen gegen den Mann werden eingestellt. Dabei bleiben viele Fragen offen.",
"language": "de",
"libraryCatalog": "Sueddeutsche.de",
"publicationTitle": "sueddeutsche.de",
"section": "politik",
"url": "http://www.sueddeutsche.de/politik/verdacht-gegen-hessischen-verfassungsschuetzer-spitzname-kleiner-adolf-1.1190178",
"attachments": [
{
"title": "Snapshot",
"mimeType": "text/html",
"snapshot": true
}
],
"tags": [
"Internet",
"Politik",
"Polizei",
"SZ",
"Süddeutsche Zeitung",
"rechter Terror"
],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"url": "http://www.sueddeutsche.de/politik",
"items": "multiple"
}
]
/** END TEST CASES **/
You can’t perform that action at this time.