Skip to content
Permalink
Browse files

Add languages to some translators, #1303 #wikicite17 (#1304)

  • Loading branch information...
zuphilip authored and adam3smith committed May 23, 2017
1 parent bb1cb17 commit 427690f75646c0f00a730b74b4f7c18d30b26c23
Showing with 92 additions and 70 deletions.
  1. +5 −1 Die Zeit.js
  2. +23 −21 Foreign Affairs.js
  3. +7 −3 NYTimes.com.js
  4. +12 −10 National Post.js
  5. +3 −2 Paris Review.js
  6. +16 −16 The Chronicle of Higher Education.js
  7. +7 −5 Time.com.js
  8. +2 −1 Toronto Star.js
  9. +17 −11 Wall Street Journal.js
@@ -9,7 +9,7 @@
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2016-09-12 05:37:03"
"lastUpdated": "2017-05-23 21:14:22"
}

/*
@@ -126,6 +126,7 @@ function scrape(doc, url){

newItem.publicationTitle = "Die Zeit";
newItem.ISSN = "0044-2070";
newItem.language = "de-DE";
newItem.place = "Hamburg";

var keywordsString = ZU.xpathText(doc, '//meta[@name="keywords"]/@content');
@@ -170,6 +171,7 @@ var testCases = [
"date": "2011-09-04",
"ISSN": "0044-2070",
"abstractNote": "Die von Gadhafi-Anhängern geführte Stadt ist von Rebellentruppen eingekreist. Gespräche über eine friedliche Übergabe sind gescheitert, ein Angriff steht offenbar bevor.",
"language": "de-DE",
"libraryCatalog": "Die Zeit",
"place": "Hamburg",
"publicationTitle": "Die Zeit",
@@ -209,6 +211,7 @@ var testCases = [
"date": "2011-09-01",
"ISSN": "0044-2070",
"abstractNote": "Tschechow und Robben, Drama im Flutlicht und Wahrhaftigkeit bei der Arbeit. Der Fußballprofi und Autor Philipp Lahm im Gespräch mit dem Schriftsteller und Fußballer Moritz Rinke",
"language": "de-DE",
"libraryCatalog": "Die Zeit",
"place": "Hamburg",
"publicationTitle": "Die Zeit",
@@ -275,6 +278,7 @@ var testCases = [
"date": "2009-03-05",
"ISSN": "0044-2070",
"abstractNote": "Ein Iraner in Wien. Der Fotograf Daniel Shaked, 31, gibt Österreichs einziges Hip-Hop-Magazin heraus",
"language": "de-DE",
"libraryCatalog": "Die Zeit",
"place": "Hamburg",
"publicationTitle": "Die Zeit",
@@ -9,7 +9,7 @@
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2016-09-20 08:59:00"
"lastUpdated": "2017-05-23 21:21:00"
}

/*
@@ -78,27 +78,28 @@ function doWeb(doc, url) {


function scrape(doc, url) {
var translator = Zotero.loadTranslator('web');
// Embedded Metadata
translator.setTranslator('951c027d-74ac-47d4-a107-9c3069ab7b48');
var translator = Zotero.loadTranslator('web');
// Embedded Metadata
translator.setTranslator('951c027d-74ac-47d4-a107-9c3069ab7b48');

translator.setHandler('itemDone', function (obj, item) {
var creators = ZU.xpath(doc, '//div[@class="article-header__byline-container"]/span/a');
for (var i=0; i<creators.length; i++) {
item.creators.push(ZU.cleanAuthor(creators[i].textContent, "author"));
}
var issue = ZU.xpathText(doc, '//span[@class="article-header__metadata-date"]/a');
if (!item.issue && issue) {
item.issue = issue.replace('Issue', '');
}
item.ISSN = "0015-7120";
item.complete();
});
translator.setHandler('itemDone', function (obj, item) {
var creators = ZU.xpath(doc, '//div[@class="article-header__byline-container"]/span/a');
for (var i=0; i<creators.length; i++) {
item.creators.push(ZU.cleanAuthor(creators[i].textContent, "author"));
}
var issue = ZU.xpathText(doc, '//span[@class="article-header__metadata-date"]/a');
if (!item.issue && issue) {
item.issue = issue.replace('Issue', '');
}
item.ISSN = "0015-7120";
item.language = "en-US";
item.complete();
});

translator.getTranslatorObject(function(trans) {
trans.itemType = "magazineArticle";
trans.doWeb(doc, url);
});
translator.getTranslatorObject(function(trans) {
trans.itemType = "magazineArticle";
trans.doWeb(doc, url);
});
}
/** BEGIN TEST CASES **/
var testCases = [
@@ -125,6 +126,7 @@ var testCases = [
"ISSN": "0015-7120",
"abstractNote": "A fascinating and well-translated account of Argentina's misadventures over the last century by one of that country's brightest historians. Absorbing vast amounts of British capital and tens of thousands of European immigrants, Argentina began the century with great promise. In 1914, with half of its population still foreign, a dynamic society had emerged that was both open and mobile.",
"issue": "May/June 2003",
"language": "en-US",
"libraryCatalog": "www.foreignaffairs.com",
"publicationTitle": "Foreign Affairs",
"url": "https://www.foreignaffairs.com/reviews/capsule-review/2003-05-01/history-argentina-twentieth-century",
@@ -266,4 +268,4 @@ var testCases = [
]
}
]
/** END TEST CASES **/
/** END TEST CASES **/
@@ -9,7 +9,7 @@
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsv",
"lastUpdated": "2015-06-02 20:52:10"
"lastUpdated": "2017-05-23 21:27:51"
}

function detectWeb(doc, url) {
@@ -49,6 +49,7 @@ function scrape(doc, url) {
var newItem = new Zotero.Item("newspaperArticle");
newItem.publicationTitle = "The New York Times";
newItem.ISSN = "0362-4331";
newItem.language = "en-US";

This comment has been minimized.

Copy link
@avram

avram May 24, 2017

Contributor

So NYT makes a regular practice of publishing non English versions of stories of particular interest to speakers of a given language- I've seen it with Russian, and Spanish and Chinese and I'm sure other languages have appeared as well

This comment has been minimized.

Copy link
@dstillman

This comment has been minimized.

Copy link
@zuphilip

zuphilip May 24, 2017

Author Collaborator

Thank you for the info. I will switch to the metadata field as Dan suggested.

var metaTags = new Object();
var metaTagsProperty = new Object();
if (url != undefined) {
@@ -240,6 +241,7 @@ var testCases = [
"date": "1912-03-05",
"ISSN": "0362-4331",
"abstractNote": "WASHINGTON, March 4. -- The Money Trust inquiry and consideration of the proposed Aldrich monetary legislation will probably be handled side by side by the House Banking and Currency Committee. The present tentative plan is to divide the committee into two parts, one of which, acting as a sub-committee, will investigate as far as it can those allegations of the Henry Money Trust resolution which fall within the jurisdiction of the Banking and Currency Committee.",
"language": "en-US",
"libraryCatalog": "NYTimes.com",
"publicationTitle": "The New York Times",
"url": "http://query.nytimes.com/gst/abstract.html?res=9C07E4DC143CE633A25756C0A9659C946396D6CF&legacy=true",
@@ -275,6 +277,7 @@ var testCases = [
"date": "2010-08-20",
"ISSN": "0362-4331",
"abstractNote": "The university has found Marc Hauser “solely responsible” for eight instances of scientific misconduct.",
"language": "en-US",
"libraryCatalog": "NYTimes.com",
"publicationTitle": "The New York Times",
"url": "http://www.nytimes.com/2010/08/21/education/21harvard.html",
@@ -303,7 +306,7 @@ var testCases = [
},
{
"type": "web",
"url": "http://opinionator.blogs.nytimes.com/2013/06/19/our-broken-social-contract/",
"url": "https://opinionator.blogs.nytimes.com/2013/06/19/our-broken-social-contract/",
"items": [
{
"itemType": "blogPost",
@@ -318,7 +321,8 @@ var testCases = [
"date": "2013-06-19",
"abstractNote": "At their core, are America’s problems primarily economic or moral?",
"blogTitle": "The New York Times",
"url": "http://opinionator.blogs.nytimes.com/2013/06/19/our-broken-social-contract/",
"language": "en-US",
"url": "https://opinionator.blogs.nytimes.com/2013/06/19/our-broken-social-contract/",
"attachments": [
{
"title": "New York Times Snapshot"
@@ -1,14 +1,15 @@
{
"translatorID":"1c5b122c-7e58-4cd5-932b-93f5ca0b7e1a",
"translatorType":4,
"label":"National Post",
"creator":"Adam Crymble",
"target":"http://www.(national|financial)post.com/",
"minVersion":"1.0.0b4.r5",
"maxVersion":"",
"priority":100,
"inRepository":true,
"lastUpdated":"2008-08-11 20:40:00"
"translatorID": "1c5b122c-7e58-4cd5-932b-93f5ca0b7e1a",
"label": "National Post",
"creator": "Adam Crymble",
"target": "http://www.(national|financial)post.com/",
"minVersion": "1.0.0b4.r5",
"maxVersion": "",
"priority": 100,
"inRepository": true,
"translatorType": 4,
"browserSupport": "g",
"lastUpdated": "2017-05-23 21:23:02"
}

function detectWeb(doc, url) {
@@ -84,6 +85,7 @@ function scrape(doc) {
newItem.title = title1;
newItem.publication = "The National Post";
newItem.ISSN = "1486-8008";
newItem.language = "en-CA";

newItem.complete();
}
@@ -9,7 +9,7 @@
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2014-04-03 18:50:53"
"lastUpdated": "2017-05-23 21:29:39"
}

/*
@@ -93,7 +93,8 @@ function magazineArticle(doc,url) {
item.issue = doc.evaluate('//div[@class="moreonissue-right"]/h3/text()[2]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent.match(/[0-9]+/)[0];
item.publicationTitle = "Paris Review";
item.url = url;
item.ISSN="0031-2037";
item.ISSN = "0031-2037";
item.language = "en-US";
item.attachments.push({url:url})
item.complete();
}
@@ -9,7 +9,7 @@
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsbv",
"lastUpdated": "2017-04-09 14:53:05"
"lastUpdated": "2017-05-23 21:34:32"
}

/*
@@ -86,6 +86,7 @@ function scrape (doc, url){
item.publicationTitle = "The Chronicle of Higher Education";
// Does the ISSN apply to online-only blog posts?
item.ISSN = "0009-5982";
item.language = "en-US";

var byline = doc.evaluate('//header/div/span[@class="content-item__byline"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext();
if (!byline) byline = doc.evaluate('//div[@class="blog__author"]/a', doc, null, XPathResult.ANY_TYPE, null).iterateNext();
@@ -159,7 +160,7 @@ function parseAuthors(author) {
var testCases = [
{
"type": "web",
"url": "http://chronicle.com/blogs/profhacker/the-second-day-of-thatcamp/23068",
"url": "http://www.chronicle.com/blogs/profhacker/the-second-day-of-thatcamp/23068",
"items": [
{
"itemType": "blogPost",
@@ -172,11 +173,9 @@ var testCases = [
}
],
"date": "March 26, 2010",
"ISSN": "0009-5982",
"accessDate": "CURRENT_TIMESTAMP",
"libraryCatalog": "The Chronicle of Higher Education",
"publicationTitle": "The Chronicle of Higher Education Blogs: ProfHacker",
"url": "http://chronicle.com/blogs/profhacker/the-second-day-of-thatcamp/23068",
"blogTitle": "The Chronicle of Higher Education Blogs: ProfHacker",
"language": "en-US",
"url": "http://www.chronicle.com/blogs/profhacker/the-second-day-of-thatcamp/23068",
"attachments": [
{
"title": "Chronicle of Higher Education Snapshot",
@@ -191,7 +190,7 @@ var testCases = [
},
{
"type": "web",
"url": "http://chronicle.com/article/A-Little-Advice-From-32000/46210/",
"url": "http://www.chronicle.com/article/A-Little-Advice-From-32000/46210/",
"items": [
{
"itemType": "magazineArticle",
@@ -210,11 +209,10 @@ var testCases = [
],
"date": "January 14, 2002",
"ISSN": "0009-5982",
"accessDate": "CURRENT_TIMESTAMP",
"language": "en-US",
"libraryCatalog": "The Chronicle of Higher Education",
"publicationTitle": "The Chronicle of Higher Education",
"section": "Advice",
"url": "http://chronicle.com/article/A-Little-Advice-From-32000/46210/",
"url": "http://www.chronicle.com/article/A-Little-Advice-From-32000/46210/",
"attachments": [
{
"title": "Chronicle of Higher Education Snapshot",
@@ -229,7 +227,7 @@ var testCases = [
},
{
"type": "web",
"url": "http://chronicle.com/article/Grinnells-Green-Secrets/2653/",
"url": "http://www.chronicle.com/article/Grinnells-Green-Secrets/2653/",
"items": [
{
"itemType": "magazineArticle",
@@ -243,9 +241,10 @@ var testCases = [
],
"date": "June 16, 2006",
"ISSN": "0009-5982",
"language": "en-US",
"libraryCatalog": "The Chronicle of Higher Education",
"publicationTitle": "The Chronicle of Higher Education",
"url": "http://chronicle.com/article/Grinnells-Green-Secrets/2653/",
"url": "http://www.chronicle.com/article/Grinnells-Green-Secrets/2653/",
"attachments": [
{
"title": "Chronicle of Higher Education Snapshot",
@@ -260,7 +259,7 @@ var testCases = [
},
{
"type": "web",
"url": "http://chronicle.com/blogs/brainstorm/humanities-cyberinfrastructure-project-bamboo/6138",
"url": "http://www.chronicle.com/blogs/brainstorm/humanities-cyberinfrastructure-project-bamboo/6138",
"items": [
{
"itemType": "blogPost",
@@ -274,8 +273,9 @@ var testCases = [
],
"date": "July 17, 2008",
"blogTitle": "The Chronicle of Higher Education Blogs: Brainstorm",
"language": "en-US",
"shortTitle": "Humanities Cyberinfrastructure",
"url": "http://chronicle.com/blogs/brainstorm/humanities-cyberinfrastructure-project-bamboo/6138",
"url": "http://www.chronicle.com/blogs/brainstorm/humanities-cyberinfrastructure-project-bamboo/6138",
"attachments": [
{
"title": "Chronicle of Higher Education Snapshot",
@@ -299,4 +299,4 @@ var testCases = [
"items": "multiple"
}
]
/** END TEST CASES **/
/** END TEST CASES **/
@@ -9,7 +9,7 @@
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsv",
"lastUpdated": "2016-06-14 03:54:31"
"lastUpdated": "2017-05-23 21:37:25"
}

function detectWeb(doc, url) {
@@ -64,6 +64,7 @@ function scrape(doc, url) {
item.publicationTitle = "Time";
item.url = url;
item.ISSN = "0040-781X";
item.language = "en-US";

var authors = article.getElementsByClassName('byline');
if (authors.length) {
@@ -90,6 +91,7 @@ function scrape(doc, url) {
item.publicationTitle = "Time";
item.url = url;
item.ISSN = "0040-781X";
item.language = "en-US";

var authors = ZU.xpathText(doc, '//meta[@name="byline"]/@content')
|| ZU.xpathText(doc, '//span[@class="author vcard"]/a', null, ' and ')
@@ -233,7 +235,7 @@ var testCases = [
"date": "Thursday, Nov. 17, 2011",
"ISSN": "0040-781X",
"abstractNote": "Battling debilitating congressional mandates and competition online, the USPS is closing thousands of post offices and struggling to find a place in the modern world. But there are people behind the scenes trying to save this American institution",
"accessDate": "CURRENT_TIMESTAMP",
"language": "en-US",
"libraryCatalog": "content.time.com",
"publicationTitle": "Time",
"url": "http://content.time.com/time/nation/article/0,8599,2099187,00.html",
@@ -270,7 +272,7 @@ var testCases = [
"date": "Sunday, Mar. 04, 2012",
"ISSN": "0040-781X",
"abstractNote": "The month of March isn't really the heart of the tornado season but they have come fast and with awesome destruction.",
"accessDate": "CURRENT_TIMESTAMP",
"language": "en-US",
"libraryCatalog": "content.time.com",
"publicationTitle": "Time",
"shortTitle": "On Scene in Indiana and Kentucky",
@@ -310,7 +312,7 @@ var testCases = [
],
"ISSN": "0040-781X",
"abstractNote": "Obama rejected any notion that his administration has not been in Israel's corner. “Over the last three years, as President of the United States, I have kept my commitments to the state of Israel.\" The President then ticked off the number of ways he has supported Israel in the last year.",
"accessDate": "CURRENT_TIMESTAMP",
"language": "en-US",
"libraryCatalog": "swampland.time.com",
"publicationTitle": "Time",
"url": "http://swampland.time.com/2012/03/04/obama-courts-aipac-before-netanyahu-meeting/?iid=sl-main-lede",
@@ -352,7 +354,7 @@ var testCases = [
],
"ISSN": "0040-781X",
"abstractNote": "Despite signs that some housing markets are improving, the overall trend is for home prices (and values) to keep dropping—and dropping. As values shrink, more and more homeowners find themselves underwater, the unfortunate scenario in which one owes more on the mortgage than the home is worth.",
"accessDate": "CURRENT_TIMESTAMP",
"language": "en-US",
"libraryCatalog": "business.time.com",
"publicationTitle": "Time",
"shortTitle": "Struggling to Stay Afloat",
Oops, something went wrong.

0 comments on commit 427690f

Please sign in to comment.
You can’t perform that action at this time.