Skip to content
Permalink
Browse files

Fixes and Updates - L (#1178)

* Fix authors in La Croix.js
* Update framework fand fix LA Times.js
* Update Lagen.nu.js
* Update tests in Le Figaro.js
* Complete Rewrite of Le Devoir.js
* Update tests in Library Catalog (Amicus).js
* Update framework in LiveJournal.js
* Update framework in London Review of Books.js
* Update Lulu.js
* Rewrite and automatic rename Le monde diplomatique.js
  • Loading branch information...
zuphilip authored and adam3smith committed Nov 7, 2016
1 parent dc2354e commit 3a62b64c9369a2b55fd6936250f5060b68fadc5a
Showing with 561 additions and 487 deletions.
  1. +86 −47 LA Times.js
  2. +2 −2 La Croix.js
  3. +61 −70 Lagen.nu.js
  4. +144 −119 Le Devoir.js
  5. +1 −1 Le Figaro.js
  6. +0 −169 Le Monde Diplomatique_de.js
  7. +200 −0 Le monde diplomatique.js
  8. +13 −13 Library Catalog (Amicus).js
  9. +32 −41 LiveJournal.js
  10. +13 −16 London Review of Books.js
  11. +9 −9 Lulu.js

Large diffs are not rendered by default.

Oops, something went wrong.
@@ -9,7 +9,7 @@
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2016-06-09 05:53:08"
"lastUpdated": "2016-11-03 20:18:42"
}

/*
@@ -85,7 +85,7 @@ function scrape(doc, url) {
translator.setDocument(doc);

translator.setHandler('itemDone', function (obj, item) {
var bylineNodes = ZU.xpath(doc, '//div[contains(@class, "visible-xs")]//div[contains(@class, "meta-author")]');
var bylineNodes = ZU.xpath(doc, '//div[contains(@class, "visible-xs")]//div[contains(@class, "meta-author")]/a');
item.creators = [];
if (bylineNodes.length>0) {
var authorPart = bylineNodes[0].textContent.split(",")[0];

Large diffs are not rendered by default.

Oops, something went wrong.
@@ -1,142 +1,167 @@
{
"translatorID":"d1605270-d7dc-459f-9875-74ad8dde1f7d",
"translatorType":4,
"label":"Le Devoir",
"creator":"Adam Crymble",
"target":"http://www.ledevoir.com",
"minVersion":"1.0.0b4.r5",
"maxVersion":"",
"priority":100,
"inRepository":true,
"lastUpdated":"2008-08-21 15:45:00"
"translatorID": "d1605270-d7dc-459f-9875-74ad8dde1f7d",
"label": "Le Devoir",
"creator": "Philipp Zumstein",
"target": "https?://www\\.ledevoir\\.com",
"minVersion": "3.0",
"maxVersion": "",
"priority": 100,
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2016-11-04 07:35:05"
}

/*
***** BEGIN LICENSE BLOCK *****
Copyright © 2016 Philipp Zumstein
This file is part of Zotero.
Zotero is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Zotero is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
***** END LICENSE BLOCK *****
*/


function detectWeb(doc, url) {
if (doc.location.href.match("Recherche")) {
if (url.indexOf("/recherche")>-1) {
return "multiple";
} else if (doc.evaluate('//div[@id="autresArticles"]/p', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
} else if (ZU.xpathText(doc, '//article[@id="article"]')) {
return "newspaperArticle";
}
}

//Le Devoir Translator. Code by Adam Crymble

function scrape(doc, url) {
function getSearchResults(doc, checkOnly) {
var items = {};
var found = false;
var rows = ZU.xpath(doc, '//article/h2/a');
for (var i=0; i<rows.length; i++) {
var href = rows[i].href;
var title = ZU.trimInternal(rows[i].textContent);
if (!href || !title) continue;
if (checkOnly) return true;
found = true;
items[href] = title;
}
return found ? items : false;
}


function doWeb(doc, url) {
if (detectWeb(doc, url) == "multiple") {
Zotero.selectItems(getSearchResults(doc, false), function (items) {
if (!items) {
return true;
}
var articles = [];
for (var i in items) {
articles.push(i);
}
ZU.processDocuments(articles, scrape);
});
} else {
scrape(doc, url);
}
}

var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;

var tagsContent = new Array();
function scrape(doc, url) {

var newItem = new Zotero.Item("newspaperArticle");
newItem.title = ZU.xpathText(doc, '//h1');

var contents = doc.evaluate('//div[@id="autresArticles"]/p', doc, nsResolver, XPathResult.ANY_TYPE, null);

var j = 0;
var n = 0;
var contentsArray = new Array();
var contents1;
var specs = ZU.xpath(doc, '//span[@class="specs_content"]');
var date = ZU.xpathText(specs, './text()[1]');
newItem.date = ZU.strToISO(date);

var authors = ZU.xpath(specs, './a[contains(@href, "/auteur")]');
for (var i=0; i<authors.length; i++) {
newItem.creators.push(ZU.cleanAuthor(authors[i].textContent, "author"));
}

newItem.section = ZU.xpathText(specs, './a[@class="section"]');

newItem.abstractNote = ZU.xpathText(doc, '//meta[@property="og:description"]/@content');

while (contents1 = contents.iterateNext()) {
contentsArray.push(contents1.textContent);
j++;
}

var author;
var author1;
var author2;

if (j > 1) {
for (var i in contentsArray) {
if (contentsArray[i].match("Édition du ")) {
var date1 = contentsArray[i].split("Édition du ");

newItem.date = date1[1];

if (date1[0].match(/\w/)) {

author = date1[0];
if (author.match(/\n/)) {
author1 = author.split(/\n/);

for (var k = 0; k < author1.length; k++) {
if (author1[k].match(/\w/) && author1[k].match(", ")) {
author2 = author1[k].split(", ");
if (author2[0].match(/\w/)) {
newItem.creators.push(Zotero.Utilities.cleanAuthor(author2[0], "author"));
} else {
newItem.creators.push(Zotero.Utilities.cleanAuthor(author2[1], "author"));
}
} else if (author1[k].match(/\w/) && !author1[k].match(", ")) {
newItem.creators.push(Zotero.Utilities.cleanAuthor(author1[k], "author"));
}
}
} else if (author.match(" et ")) {
author1 = author.split(" et ");
for (var k in author1) {
newItem.creators.push(Zotero.Utilities.cleanAuthor(author1[k], "author"));
}
} else if (author.match(", ")) {
author1 = author.split(", ");
for (var k in author1) {
newItem.creators.push(Zotero.Utilities.cleanAuthor(author1[k], "author"));
}
} else {
newItem.creators.push(Zotero.Utilities.cleanAuthor(date1[0], "author"));
}
}
} else if (contentsArray[i].match("Mots clés")) {
contentsArray[i] = contentsArray[i].substr(11);
if (contentsArray[i].match(", ")) {
tagsContent = contentsArray[i].split(", ");
} else {
newItem.tags = ontentsArray[i];
n = 1;
}
}
}
}

if (n == 0 && tagsContent.length>1) {
for (var i = 0; i < tagsContent.length; i++) {
newItem.tags[i] = tagsContent[i];
}
}
newItem.language = ZU.xpathText(doc, '//meta[@name="language"]/@content');

newItem.title = doc.title;
newItem.url = doc.location.href;
newItem.url = url;
newItem.publicationTitle = "Le Devoir";
newItem.ISSN = "0319-0722";

newItem.complete();
}

function doWeb(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;

var articles = new Array();

if (detectWeb(doc, url) == "multiple") {
var items = new Object();

var titles = doc.evaluate('//td[2]/a', doc, nsResolver, XPathResult.ANY_TYPE, null);

var next_title;
while (next_title = titles.iterateNext()) {
items[next_title.href] = next_title.textContent;
}
items = Zotero.selectItems(items);
for (var i in items) {
articles.push(i);
}
} else {
articles = [url];
/** BEGIN TEST CASES **/
var testCases = [
{
"type": "web",
"url": "http://www.ledevoir.com/politique/quebec/483793/journalistes-surveilles-le-comite-d-experts-sur-travail-des-policiers-aura-les-pouvoirs-d-une-commission-d-enquete",
"items": [
{
"itemType": "newspaperArticle",
"title": "Des dérives policières alarmantes",
"creators": [
{
"firstName": "Marco",
"lastName": "Bélair-Cirino",
"creatorType": "author"
}
],
"date": "2016-11-04",
"ISSN": "0319-0722",
"abstractNote": "Après avoir appris la mise sur pied d’une commission d’enquête sur la liberté de la presse, la salle des nouvelles de Radio-Canada a été frappée de consternation. La Sûreté du Québec a réussi à mettre la main sur des liasses de relevés téléphoniques, s’échelonnant de 2008 à 2013, des journalistes d’enquête Alain Gravel, Marie-Maude Denis et Isabelle Richer.",
"language": "fr",
"libraryCatalog": "Le Devoir",
"publicationTitle": "Le Devoir",
"section": "Québec",
"url": "http://www.ledevoir.com/politique/quebec/483793/journalistes-surveilles-le-comite-d-experts-sur-travail-des-policiers-aura-les-pouvoirs-d-une-commission-d-enquete",
"attachments": [],
"tags": [],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"url": "http://www.ledevoir.com/societe/medias/110295/medias-l-ipi-appelle-a-la-liberte-de-presse",
"items": [
{
"itemType": "newspaperArticle",
"title": "Médias - L'IPI appelle à la liberté de presse",
"creators": [],
"date": "2006",
"ISSN": "0319-0722",
"abstractNote": "Édimbourg — Le directeur de l'Institut international de la presse (IPI), Johann Fritz, a appelé hier à Édimbourg les gouvernements du monde entier à défendre et renforcer la liberté de la presse menacée de toutes parts.",
"language": "fr",
"libraryCatalog": "Le Devoir",
"publicationTitle": "Le Devoir",
"url": "http://www.ledevoir.com/societe/medias/110295/medias-l-ipi-appelle-a-la-liberte-de-presse",
"attachments": [],
"tags": [],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"url": "http://www.ledevoir.com/recherche?expression=libert%C3%A9",
"items": "multiple"
}
Zotero.Utilities.processDocuments(articles, scrape, function() {Zotero.done();});
Zotero.wait();
}
]
/** END TEST CASES **/
@@ -130,9 +130,9 @@ var testCases = [
}
],
"tags": [
"A la Une",
"Celtes",
"Culture",
"Diaporama",
"Pays de Galles"
],
"notes": [],
Oops, something went wrong.

1 comment on commit 3a62b64

@adam3smith

This comment has been minimized.

Copy link
Collaborator

adam3smith commented on 3a62b64 Nov 7, 2016

and, of course, thanks @zuphilip for all of these

Please sign in to comment.
You can’t perform that action at this time.