Skip to content
Browse files

Eastview: Fix after sited redesign (#1732)

fix Eastview after redesign; improve attachments
  • Loading branch information...
adam3smith committed Sep 4, 2018
1 parent 803f04c commit ec55cd8030bba6c93e59d75b0099d05294c32b84
Showing with 66 additions and 79 deletions.
  1. +66 −79 Eastview.js
@@ -9,7 +9,7 @@
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2018-07-15 20:34:03"
"lastUpdated": "2018-09-02 23:10:00"

@@ -36,13 +36,31 @@
function detectWeb(doc, url) {
if (url.includes("/search/simple/articles?") || url.includes("/search/advanced/articles") ||\/(favorites|issue)/) != -1) {
Z.monitorDOMChanges(doc.getElementById("container"), {childList: true});
if (ZU.xpath(doc, '//td[contains(@class, "title-cell")]/a').length) return "multiple";
Z.monitorDOMChanges(doc.getElementById("articleSearchContainer"), {
childList: true
if (getSearchResults(doc, true)) return "multiple";
} else {
return "newspaperArticle"

function getSearchResults(doc, checkOnly) {
var items = {};
var found = false;
var rows = ZU.xpath(doc, '//div[@id="articleSearchContainer"]//a[@class="Link" and contains(@href, "doc?")]');

for (var i = 0; i < rows.length; i++) {
var href = rows[i].href;
var title = ZU.trimInternal(rows[i].textContent);
if (!href || !title) continue;
if (checkOnly) return true;
found = true;
items[href] = title;
return found ? items : false;

var typeMap = {
"Argumenty i fakty": "magazineArticle",
"Argumenty nedeli": "magazineArticle",
@@ -79,13 +97,18 @@ var typeMap = {

function permaLink(URL) {
var id = URL.match(/id=(\d+)/);
if (id) return "" + id[1];
else return URL
if (id) return "/browse/doc/" + id[1];
else return URL;

function pdfLink(URL) {
var id = URL.match(/id=(\d+)/);
if (id) return "/browse/pdf-download?articleid=" + id[1];
else return URL;

function scrape(doc, url) {
var item = new Zotero.Item("newspaperArticle");
var publication = ZU.xpathText(doc, '//a[@class="path" and contains(@href, "browse/publication")]');
item.publicationTitle = publication;
@@ -98,11 +121,11 @@ function scrape(doc, url) {
var database = ZU.xpathText(doc, '//a[@class="path" and contains(@href, "browse/udb")]');
if (database) item.libraryCatalog = database.replace(/\(.+\)/, "") + "(Eastview)";
if (doc.getElementById('metatable')) {
if (ZU.xpathText(doc, '//table[@class="table table-condensed Table Table-noTopBorder"]//td[contains(text(), "Article")]')) {
//we have the metadata in a table
var metatable = doc.getElementById('metatable');
var title = ZU.xpathText(metatable, './/td[@class="hdr" and contains(text(), "Article")]/following-sibling::td[@class="val"]');
var source = ZU.xpathText(metatable, './/td[@class="hdr" and contains(text(), "Source")]/following-sibling::td[@class="val"]');
var metatable = ZU.xpath(doc, '//table[tbody/tr/td[contains(text(), "Article")]]');
var title = ZU.xpathText(metatable, './/td[contains(text(), "Article")]/following-sibling::td');
var source = ZU.xpathText(metatable, './/td[contains(text(), "Source")]/following-sibling::td');
if (source) {
var date = source.match(/(January|February|March|April|May|Juni|July|August|September|October|November|December)\s+(\d{1,2},\s+)?\d{4}/);
if (date) = ZU.trimInternal(date[0]);
@@ -114,28 +137,28 @@ function scrape(doc, url) {
if (!item.publicationTitle) {
item.publicationTitle = ZU.xpathText(metatable, './/td[@class="hdr" and text()="Title"]/following-sibling::td[@class="val"]');
item.publicationTitle = ZU.xpathText(metatable, './/td[text()="Title"]/following-sibling::td');

if (!item.pages) {
var pagesOnly = ZU.xpathText(metatable, './/td[@class="hdr" and contains(text(), "Page(s)")]/following-sibling::td[@class="val"]');
var pagesOnly = ZU.xpathText(metatable, './/td[contains(text(), "Page(s)")]/following-sibling::td');
item.pages = pagesOnly;
var author = ZU.xpathText(metatable, './/td[@class="hdr" and contains(text(), "Author(s)")]/following-sibling::td[@class="val"]');
var author = ZU.xpathText(metatable, './/td[contains(text(), "Author(s)")]/following-sibling::td');
if (author) {
authors = author.trim().split(/\s*,\s*/);
for (var i=0; i<authors.length; i++) {
for (var i = 0; i < authors.length; i++) {
item.creators.push(ZU.cleanAuthor(authors[i], "author"));
var place = ZU.xpathText(doc, '//table[@id="metatable"]//td[@class="hdr" and contains(text(), "Place of Publication")]/following-sibling::td');
var place = ZU.xpathText(metatable, './/td[contains(text(), "Place of Publication")]/following-sibling::td');
if (place) = ZU.trimInternal(place);
} else {
var title = ZU.xpathText(doc, '//div[@class="ArticleTitle"]');
var title = ZU.xpathText(doc, '//div[@class="table-responsive"]/div[@class="change_font"]');
//the "old" page format. We have very little structure here, doing the best we can.
var header = ZU.xpathText(doc, '//div[@class="Article"]/ul');
var header = ZU.xpathText(doc, '//div[@class="table-responsive"]/ul[1]');
var date = header.match(/Date:\s*(\d{2}-\d{2}-\d{2,4})/);
if (date) = date[1];
if (!item.publicationTitle) {
@@ -148,15 +171,25 @@ function scrape(doc, url) {

//see if we have a match for item type; default to newspaper otherwise.
var itemType = typeMap[item.publicationTitle];
if (itemType) item.itemType = itemType;
url: url,
title: "Eastview Fulltext Snapshot",
mimeType: "text/html"
//Attach real PDF for PDFs:
if (doc.querySelectorAll('#pdfjsContainer').length) {
url: pdfLink(url),
title: "Eastview Fulltext PDF",
mimeType: "application/pdf"
else {
document: doc,
title: "Eastview Fulltext Snapshot",
mimeType: "text/html"

if (title && title == title.toUpperCase()) {
title = ZU.capitalizeTitle(title, true);
@@ -165,74 +198,28 @@ function scrape(doc, url) {
//sometimes items actually don't have a title: use the publication title instead.
if (!item.title) item.title = item.publicationTitle;


* function to scrape directly from the search table. Not used at this point, but leaving in case we'll want to implement it
function scrapeSearch(doc, url) {
//Z.debug(ZU.xpathText(doc, './td'))
var dataTags = new Object();
var newItem = new Zotero.Item("journalArticle");
var title = ZU.xpathText(doc, './td[contains(@class, "title-cell")]/a');
if (title==title.toUpperCase()){
title = ZU.capitalizeTitle(title.toLowerCase(), true);
newItem.title= title;
var author = ZU.xpathText(doc, './td[contains(@class, "title-cell")]/following-sibling::td[1]');
if (author){
authors = author.replace(/—/, "").trim().split(/\s*,\s/);
for (var i in authors){
if (authors[i]) newItem.creators.push(ZU.cleanAuthor(authors[i], "author"))
newItem.publication = ZU.xpathText(doc, './td[contains(@class, "source-cell")]'); = ZU.xpathText(doc, './td[contains(@class, "source-cell")]/following-sibling::td[1]');
var attachmentLink = ZU.xpathText(doc, './td[contains(@class, "title-cell")]/a/@href');
if (attachmentLink){
newItem.attachments.push({url:attachmentLink, title:title, mimeType:"text/html"})
} */

function doWeb(doc, url) {
var articles = new Array();
var articles = [];
var items = {};
if (detectWeb(doc, url) == "multiple") {
var titles = ZU.xpath(doc, '//td[contains(@class, "title-cell")]/a');
//var number = ZU.xpath(doc, '//td[contains(@class, "check-cell")]/following-sibling::td[1]');
for (var i = 0; i < titles.length; i++) {
items[titles[i].href] = titles[i].textContent.trim();
Zotero.selectItems(items, function(items) {
Zotero.selectItems(getSearchResults(doc, false), function(items) {
if (!items) {
return true;
var articles = [];
for (var i in items) {
/* For scraping search table
var xpath = '//tr[td[text()="' + i + '"]]'
var node = ZU.xpath(doc, xpath);
scrapeSearch(node, url); */
ZU.processDocuments(articles, scrape)
ZU.processDocuments(articles, scrape);
} else {
if (\/\d+/) != -1) {
scrape(doc, url);
//always scrape from the permalink page, which has extra publication info at the top
else {
ZU.processDocuments(permaLink(url), scrape);
scrape(doc, url);

var testCases = [
"type": "web",

0 comments on commit ec55cd8

Please sign in to comment.
You can’t perform that action at this time.