Permalink
Please
sign in to comment.
Browse files
Fixes T (zotero#1351)
* Update Tagesspiegel.js * Update Talis Aspire.js * Delete Tatar zamanı.js The website for this newspaper does not exists anymore and the domain www.tatartime.com/ just show some information about hard drive destruction. * Rewrite taz.de.js * Update Taylor and Francis+NEJM.js * Update tests in The Atlantic.js * Update The Boston Globe.js * Update The Chronicle of Higher Education.js * Rewrite The Daily Beast.js I used newspaperArticle for the type rather than generic website. Moreover, it seems that the blogs are not anymore hosted under this domain. The test case for this for example moved to this new url http://pulitzercenter.org/reporting/egyptian-theater-troupe-brings-domestic-violence-light The cheatsheet seems to be a page with multiple smaller news articles and is currently skipped completely. * Update The Economist.js The translator should work now when the page is fully loaded. However, the website uses quite extensively some delayed loading of contents, such that test sometimes fail. * Update The Free Dictionary.js * Update tests in The Globe and Mail.js * Rewrite The Guardian.js Closes zotero#1347 * Rewrite The Hamilton Spectator.js * Rewrite The Hindu.js * Update The Hindu (old).js * Rewrite The Met.js * Rewrite The Microfinance Gateway.js * Rewrite The Nation.js * Update The New Republic.js * Rewrite The New York Review of Books.js * Rewrite The New Yorker.js * Update tests in The Telegraph.js * Rename and rewrite The Times and Sunday Times.js Name corresponds now to the label There seems to exist no search on the website --> no multiples * Update TheMarker.js * Update tests in Theory of Computing.js There is a 404 on the Indian mirror, switched test url to Swedish mirror. * Rewrite Toronto Star.js * Update Treesearch.js * Rewrite TVNZ.js * Update Twitter.js Note that multiples work when site is loaded but not as autoamtic test cases. * Fix date in The Hindu.js
- Loading branch information...
Showing
with
2,469 additions
and 2,241 deletions.
- +114 −160 TVNZ.js
- +135 −59 Tagesspiegel.js
- +58 −120 Talis Aspire.js
- +0 −111 Tatar zamani.js
- +96 −91 Taylor and Francis+NEJM.js
- +8 −8 The Atlantic.js
- +23 −25 The Boston Globe.js
- +36 −30 The Chronicle of Higher Education.js
- +99 −175 The Daily Beast.js
- +48 −40 The Economist.js
- +18 −29 The Free Dictionary.js
- +4 −4 The Globe and Mail.js
- +168 −65 The Guardian.js
- +85 −79 The Hamilton Spectator.js
- +16 −17 The Hindu (old).js
- +207 −192 The Hindu.js
- +92 −68 The Met.js
- +129 −65 The Microfinance Gateway.js
- +126 −118 The Nation.js
- +15 −17 The New Republic.js
- +123 −88 The New York Review of Books.js
- +113 −53 The New Yorker.js
- +16 −22 The Telegraph.js
- +0 −105 The Times UK.js
- +178 −0 The Times and Sunday Times.js
- +7 −4 TheMarker.js
- +111 −114 Theory of Computing.js
- +137 −106 Toronto Star.js
- +93 −99 Treesearch.js
- +82 −37 Twitter.js
- +4 −2 deleted.txt
- +128 −138 taz.de.js
@@ -1,215 +1,169 @@ | |||
{ | |||
"translatorID": "649c2836-a94d-4bbe-8e28-6771f283702f", | |||
"label": "TVNZ", | |||
"creator": "Sopheak Hean", | |||
"target": "^https?://tvnz\\.co\\.nz", | |||
"minVersion": "1.0", | |||
"creator": "Philipp Zumstein", | |||
"target": "^https?://(www\\.)?tvnz\\.co\\.nz/one-news/", | |||
"minVersion": "3.0", | |||
"maxVersion": "", | |||
"priority": 100, | |||
"inRepository": true, | |||
"translatorType": 4, | |||
"browserSupport": "gcsibv", | |||
"lastUpdated": "2014-04-04 10:01:57" | |||
"lastUpdated": "2017-07-05 06:37:09" | |||
} | |||
|
|||
/* | |||
***** BEGIN LICENSE BLOCK ***** | |||
Copyright © 2017 Philipp Zumstein | |||
This file is part of Zotero. | |||
Zotero is free software: you can redistribute it and/or modify | |||
it under the terms of the GNU Affero General Public License as published by | |||
the Free Software Foundation, either version 3 of the License, or | |||
(at your option) any later version. | |||
Zotero is distributed in the hope that it will be useful, | |||
but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
GNU Affero General Public License for more details. | |||
You should have received a copy of the GNU Affero General Public License | |||
along with Zotero. If not, see <http://www.gnu.org/licenses/>. | |||
***** END LICENSE BLOCK ***** | |||
*/ | |||
|
|||
|
|||
function detectWeb(doc, url) { | |||
if (url.indexOf("/search/") !=-1){ | |||
if (url.indexOf("/search?") !=-1 && getSearchResults(doc, true)){ | |||
return "multiple"; | |||
} | |||
else if ((url.indexOf("politics-news/") !=-1) && (url.indexOf("-video") !=-1) | |||
|| (url.indexOf("politics-news/") !=-1) && (url.indexOf("/video") !=-1) | |||
|| (url.indexOf("business-news/") !=-1) && (url.indexOf("-video") !=-1) | |||
|| (url.indexOf("national-news/") !=-1) && (url.indexOf("-video") !=-1) | |||
|| (url.indexOf("breakfast-news/") !=-1) && (url.indexOf("-video") !=-1) | |||
|| (url.indexOf("breakfast-news/") !=-1) && (url.indexOf("/video") !=-1) | |||
|| (url.indexOf("world-news/") !=-1) && (url.indexOf("-video") !=-1) | |||
|| (url.indexOf("all-blacks/") !=-1) && (url.indexOf("-video") !=-1) | |||
|| (url.indexOf("weather/") !=-1) && (url.indexOf("-video") !=-1) | |||
|| (url.indexOf("-news/") !=-1) && (url.indexOf("-video") !=-1) | |||
|| (url.indexOf("-news/") !=-1) && (url.indexOf("/video") !=-1) | |||
|| (url.indexOf("on/") !=-1) && (url.indexOf("-video") !=-1) | |||
|| (url.indexOf("up/") !=-1) && (url.indexOf("/video") !=-1)){ | |||
return "tvBroadcast"; | |||
} | |||
else if ((url.indexOf("news/") !=-1) || (url.indexOf("all-blacks/") !=-1) || (url.indexOf("up/")!=-1)){ | |||
if (ZU.xpathText(doc, '//meta[@property="og:type"]/@content')) { | |||
return "newspaperArticle"; | |||
} | |||
} | |||
|
|||
function scrape(doc, url){ | |||
if (detectWeb(doc, url) == "newspaperArticle") { | |||
var newItem = new Zotero.Item('newspaperArticle'); | |||
newItem.url = url; | |||
newItem.publicationTitle = "TVNZ"; | |||
newItem.language = "en"; | |||
|
|||
newItem.title = ZU.xpathText(doc, '//h1'); | |||
|
|||
var date = ZU.xpathText(doc, '//p[@class="time"]'); | |||
if(date){ | |||
newItem.date = ZU.trimInternal(date.replace(/\W\bPublished:\W\d{1,2}:\d{1,2}(AM|PM) (\w)+ /g, '')); | |||
} | |||
|
|||
//get Author from the article | |||
var author = ZU.xpathText(doc, '//p[@class="source"]'); | |||
if (author){ | |||
newItem.creators.push(ZU.cleanAuthor(author.replace(/\W\bSource:\W+/g, '').replace(/\W+/g, '-'), "author")); | |||
} | |||
|
|||
//get Section of the article | |||
var section = ZU.xpathText(doc, '//li[@class="selectedLi"]/a/span'); | |||
if (section){ | |||
section = section.replace(/^s/g, ''); | |||
var sectionArray = new Array("Rugby", "All Blacks", "Cricket", "League", "Football", "Netball", "Basketball", "Tennis", "Motor", "Golf", "Other", "Tipping"); | |||
|
|||
//loop through the Array and check for condition for section category | |||
//var count =0; | |||
for (var i=0; i <sectionArray.length; i++){ | |||
//count = 1; | |||
//if there is a match in the loop then replacing the section found with SPORT | |||
if(section == sectionArray[i]){ | |||
newItem.section = "Sport"; | |||
break; | |||
} | |||
} | |||
//if not found then take the value from XPath | |||
if(i == sectionArray.length) { | |||
newItem.section = section; | |||
} | |||
function scrape(doc, url){ | |||
var item = new Zotero.Item("newspaperArticle"); | |||
item.title = ZU.xpathText(doc, '//meta[@property="og:title"]/@content'); | |||
item.date = ZU.xpathText(doc, '(//div[contains(@class, "storyPage") and h1]//time)[1]'); | |||
if (item.date) { | |||
if (item.date.match(/\d\d?:\d\d[pa]m/)) { | |||
item.date = "Today" | |||
} else if (!item.date.match(/\d\d\d\d/)) { | |||
item.date += " 2017"; | |||
} | |||
|
|||
//get Abstract | |||
newItem.abstractNote = ZU.xpathText(doc, "//meta[@name='description']"); | |||
|
|||
//closed up NewItem | |||
newItem.complete(); | |||
} else if (detectWeb(doc, url) == "tvBroadcast"){ | |||
var newItem = new Zotero.Item("tvBroadcast"); | |||
newItem.url = url; | |||
|
|||
newItem.network = "TVNZ"; | |||
newItem.language = "en"; | |||
item.date = ZU.strToISO(item.date); | |||
} | |||
item.abstractNote = ZU.xpathText(doc, '//meta[@property="og:description"]/@content'); | |||
var tagString = ZU.xpathText(doc, '//meta[@name="news_keywords"]/@content'); | |||
if (tagString) { | |||
item.tags = tagString.split(', '); | |||
} | |||
item.section = ZU.xpathText(doc, '//div[@class="colStorySectionHeader"]/div[@class="tagItem"]/h2'); | |||
item.url = ZU.xpathText(doc, '//link[@rel="canonical"]/@href') || url; | |||
item.publicationTitle = "TVNZ"; | |||
item.language = "en-NZ"; | |||
item.complete(); | |||
} | |||
|
|||
/* get Title and Running time for video clip */ | |||
//if meta title exist | |||
|
|||
//if the array is true then do this | |||
var date = ZU.xpathText(doc, '//p[@class="added"]'); | |||
|
|||
if (date){ | |||
newItem.date = ZU.trim(date.replace(/\W\bAdded:\W\d{1,2}:\d{1,2}(AM|PM) (\w)+ /g, '')); | |||
} else { | |||
newItem.date = ZU.trim(ZU.xpathText(doc, '//p[@class="time"]') | |||
.replace(/\W\bPublished:\W\d{1,2}:\d{1,2}(AM|PM) (\w)+ /g, '')); | |||
} | |||
|
|||
var myTitle= ZU.xpathText(doc, '//meta[@name="title" or @name="og:title"]/@content'); | |||
if (myTitle){ | |||
myTitle = myTitle.replace(/\b[)]+/g, ''); | |||
var TitleResult= myTitle.split(" ("); | |||
newItem.title = TitleResult[0]; | |||
if(TitleResult[1] == undefined) { | |||
newItem.runningTime =""; | |||
} else { | |||
newItem.runningTime = TitleResult[1]; | |||
} | |||
}else{ | |||
newItem.title= ZU.xpathText(doc, '//head/title').split(" | ")[0]; | |||
} | |||
|
|||
//get Author from the article | |||
var author = ZU.xpathText(doc, '//p[@class="source"]'); | |||
if (author){ | |||
author = author.replace(/\W\bSource:\W+/g, ''); | |||
newItem.creators.push(ZU.cleanAuthor(author.replace(/\W+/g, '-'), "author")); | |||
|
|||
} else { | |||
var keywordsObject = ZU.xpathText(doc, '//meta[@name="keywords"]').replace(/\s+/g, '-').split(","); | |||
newItem.creators.push(ZU.cleanAuthor(keywordsObject[0], "author")); | |||
} | |||
|
|||
//get Abstract | |||
newItem.abstractNote = ZU.xpathText(doc, "//meta[@name='description']"); | |||
|
|||
//get Section of the video, not sure if this meant for Archive location, if incorrect then leave it commented. | |||
//var sectionPath = "//meta[@name='keywords']"; | |||
//var sectionPathObject = doc.evaluate(sectionPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().content; | |||
//var sectionResult = sectionMetaObject.split(","); | |||
//newItem.archiveLocation = sectionPathObject; | |||
|
|||
newItem.complete(); | |||
function getSearchResults(doc, checkOnly) { | |||
var items = {}; | |||
var found = false; | |||
var rows = ZU.xpath(doc, '//div[@class="tileContent"]/a[h3]'); | |||
for (var i=0; i<rows.length; i++) { | |||
var href = rows[i].href; | |||
var title = ZU.trimInternal(rows[i].textContent); | |||
if (!href || !title) continue; | |||
if (checkOnly) return true; | |||
found = true; | |||
items[href] = title; | |||
} | |||
return found ? items : false; | |||
} | |||
|
|||
function doWeb(doc, url){ | |||
if ( detectWeb(doc, url) == "multiple"){ | |||
var titles = ZU.xpath(doc, '//div[@class="readItem"]/h4'); | |||
Zotero.selectItems(ZU.getItemArray(doc, titles), function(selectedItems) { | |||
if(!selectedItems) return true; | |||
|
|||
var articles = new Array(); | |||
for (var i in selectedItems){ | |||
function doWeb(doc, url) { | |||
if (detectWeb(doc, url) == "multiple") { | |||
Zotero.selectItems(getSearchResults(doc, false), function (items) { | |||
if (!items) { | |||
return true; | |||
} | |||
var articles = []; | |||
for (var i in items) { | |||
articles.push(i); | |||
} | |||
ZU.processDocuments(articles, function(doc) { scrape(doc, doc.location.href); }); | |||
ZU.processDocuments(articles, scrape); | |||
}); | |||
} else { | |||
scrape(doc, url); | |||
} | |||
} | |||
|
|||
/** BEGIN TEST CASES **/ | |||
var testCases = [ | |||
{ | |||
"type": "web", | |||
"url": "http://tvnz.co.nz/politics-news/jon-johansson-s-all-2014-4523189", | |||
"url": "https://www.tvnz.co.nz/one-news/new-zealand/below-average-temperatures-forecast-across-nz-first-half-july", | |||
"items": [ | |||
{ | |||
"itemType": "newspaperArticle", | |||
"title": "Below average temperatures forecast across NZ for first half of July", | |||
"creators": [], | |||
"notes": [], | |||
"tags": [], | |||
"seeAlso": [], | |||
"attachments": [], | |||
"url": "http://tvnz.co.nz/politics-news/jon-johansson-s-all-2014-4523189", | |||
"publicationTitle": "TVNZ", | |||
"language": "en", | |||
"title": "Jon Johansson: It's all about 2014", | |||
"date": "2017-07-05", | |||
"abstractNote": "MetService's long-range forecast for this month has colder temperatures than usual.", | |||
"language": "en-NZ", | |||
"libraryCatalog": "TVNZ", | |||
"accessDate": "CURRENT_TIMESTAMP", | |||
"shortTitle": "Jon Johansson" | |||
"publicationTitle": "TVNZ", | |||
"section": "New Zealand", | |||
"url": "https://www.tvnz.co.nz/one-news/new-zealand/below-average-temperatures-forecast-across-nz-first-half-july", | |||
"attachments": [], | |||
"tags": [ | |||
"new-zealand", | |||
"nzn", | |||
"one-news", | |||
"weather-news" | |||
], | |||
"notes": [], | |||
"seeAlso": [] | |||
} | |||
] | |||
}, | |||
{ | |||
"type": "web", | |||
"url": "http://tvnz.co.nz/search/ta_ent_search_news_skin.xhtml?q=storm&sort=date%3AD%3AS%3Ad1", | |||
"items": "multiple" | |||
}, | |||
{ | |||
"type": "web", | |||
"url": "http://tvnz.co.nz/national-news/patea-devastated-storm-video-4752377", | |||
"url": "https://www.tvnz.co.nz/one-news/new-zealand/watch-stunning-aurora-australis-storm-lights-up-southern-skies", | |||
"items": [ | |||
{ | |||
"itemType": "tvBroadcast", | |||
"creators": [ | |||
{ | |||
"firstName": "", | |||
"lastName": "Breakfast-", | |||
"creatorType": "author" | |||
} | |||
"itemType": "newspaperArticle", | |||
"title": "Watch: Stunning Aurora Australis storm lights up southern skies", | |||
"creators": [], | |||
"date": "2017-05-28", | |||
"abstractNote": "A geomagnetic storm made for some incredible photos in the South Island overnight.", | |||
"language": "en-NZ", | |||
"libraryCatalog": "TVNZ", | |||
"publicationTitle": "TVNZ", | |||
"section": "New Zealand", | |||
"shortTitle": "Watch", | |||
"url": "https://www.tvnz.co.nz/one-news/new-zealand/watch-stunning-aurora-australis-storm-lights-up-southern-skies", | |||
"attachments": [], | |||
"tags": [ | |||
"breakfast", | |||
"new-zealand", | |||
"space" | |||
], | |||
"notes": [], | |||
"tags": [], | |||
"seeAlso": [], | |||
"attachments": [], | |||
"url": "http://tvnz.co.nz/national-news/patea-devastated-storm-video-4752377", | |||
"network": "TVNZ", | |||
"language": "en", | |||
"date": "March 03, 2012", | |||
"title": "Patea devastated by storm", | |||
"libraryCatalog": "TVNZ" | |||
"seeAlso": [] | |||
} | |||
] | |||
}, | |||
{ | |||
"type": "web", | |||
"url": "https://www.tvnz.co.nz/one-news/search?q=storm", | |||
"items": "multiple" | |||
} | |||
] | |||
/** END TEST CASES **/ |
Oops, something went wrong.
0 comments on commit
1797efb