Skip to content
Permalink
Browse files

Bryn mawr (#1115)

This is the continuation of #1045 :

    adapted the detectWeb
    restructure the doWeb
    use ZU.xpath and ZU.xpathText instead of doc.evaluate
    try to use a more stable method for reviewAuthors
    use url directly for extracting date
    save the BMCR ID in extra
  • Loading branch information...
zuphilip authored and adam3smith committed Aug 23, 2016
1 parent 6bb5857 commit 1f0634ab9b3caca2782b297a1adae568f77cb804
Showing with 215 additions and 75 deletions.
  1. +215 −75 Bryn Mawr Classical Review.js
@@ -3,93 +3,146 @@
"label": "Bryn Mawr Classical Review",
"creator": "Michael Berkowitz",
"target": "^https?://bmcr\\.brynmawr\\.edu/",
"minVersion": "1.0.0b4.r5",
"minVersion": "3.0",
"maxVersion": "",
"priority": 100,
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2015-06-02 10:52:41"
"lastUpdated": "2016-08-23 05:51:18"
}

/*
***** BEGIN LICENSE BLOCK *****
Copyright © 2016 Michael Berkowitz and John Muccigrosso
This file is part of Zotero.
Zotero is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Zotero is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
***** END LICENSE BLOCK *****
*/

function detectWeb(doc, url) {
if (url.match(/by_reviewer/) || url.match(/by_author/) || url.match(/recent.html/) || url.match(/\/\d{4}\/$/)) {
if (url.search(/by_reviewer|by_author|recent\.html|\/\d{4}\/(indexb?\.html)?$/) != -1) {
return "multiple";
} else if (url.match(/[\d\-]+\.html$/)) {
} else if (url.search(/\d\.html$/)>-1 && ZU.xpathText(doc, '//h3/i')) {
return "journalArticle";
}
}


function getSearchResults(doc, checkOnly) {
var items = {};
var found = false;
var rows = ZU.xpath(doc, '//*[@id="indexcontent" or @id="twocol-mainContent"]//li//a');
for (var i=0; i<rows.length; i++) {
var href = rows[i].href;
var title = ZU.trimInternal(rows[i].textContent);
var title = ZU.xpathText(rows[i], '..');
if (!href || !title) continue;
if (checkOnly) return true;
found = true;
items[href] = title;
}
return found ? items : false;
}


function doWeb(doc, url) {
var ns = doc.documentElement.namespaceURI;
var nsResolver = ns ? function(prefix) {
if (prefix == 'x') return ns; else return null;
} : null;
var arts = new Array();
if (detectWeb(doc, url) == "multiple") {
var items = new Object();
if (doc.evaluate('//table/tbody/tr/td/ul/li/i', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
var boxes = doc.evaluate('//table/tbody/tr/td/ul/li', doc, nsResolver, XPathResult.ANY_TYPE, null);
var box;
while (box = boxes.iterateNext()) {
var link = doc.evaluate('./a', box, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().href;
var title = doc.evaluate('./i', box, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
items[link] = title;
Zotero.selectItems(getSearchResults(doc, false), function (items) {
if (!items) {
return true;
}
} else if (doc.evaluate('//table/tbody/tr/td/ul/li', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
var title = doc.evaluate('//table/tbody/tr/td/ul/li', doc, nsResolver, XPathResult.ANY_TYPE, null);
var next;
while (next = title.iterateNext()) {
items[next.href] = Zotero.Utilities.trimInternal(next.textContent);
}
} else if (url.match(/google\.com/)) {
var titles = doc.evaluate('//h2[@class="r"]/a[@class="l"]', doc, nsResolver, XPathResult.ANY_TYPE, null);
var title;
while (title = titles.iterateNext()) {
items[title.href] = title.textContent;
var articles = [];
for (var i in items) {
articles.push(i);
}
ZU.processDocuments(articles, scrape);
});
} else {
scrape(doc, url);
}
}

function scrape(doc, url) {
var item = new Zotero.Item("journalArticle");

var title = ZU.xpathText(doc, '//h3/i');
item.title = "Review of: " + Zotero.Utilities.trimInternal(title);

var author = ZU.xpathText(doc, '//b[contains(text(), "Reviewed by")]');
if (author) {
author = author.match(/Reviewed by\s+([^,\(]+)/);
if (author) {
item.creators.push(ZU.cleanAuthor(author[1], "author"));
}
items = Zotero.selectItems(items);
for (var i in items) {
arts.push(i);
}

//The authors of the reviewed book are also child nodes of h3
//and before the book title which is set in italics.
var dataChildrens = ZU.xpath(doc, '//h3[i]')[0].childNodes;
var authorString = "";
for (var i=0; i<dataChildrens.length; i++) {
if (dataChildrens[i].tagName == "I") {
break;
}
authorString += dataChildrens[i].textContent;
}
var authors = authorString.replace(/\([^)]+\)/, "").split(/(,|and)\s+/);
//Zotero.debug(authors);
for (var i=0; i<authors.length; i++) {
var aut = authors[i];
if (aut.match(/\w/) && (aut !== "and")) {
item.creators.push(ZU.cleanAuthor(aut, "reviewedAuthor"));
}
} else {
arts = [url];
}
Zotero.Utilities.processDocuments(arts, function(doc) {
var item = new Zotero.Item("journalArticle");
var title = doc.evaluate('//h3/i', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
item.title = "Review of: " + Zotero.Utilities.trimInternal(title);
var data = doc.evaluate('//h3[i]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
var title = title.replace("(", "\\(").replace(")", "\\)");
var author = doc.evaluate('//b[contains(text(), "Reviewed")]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.match(/Reviewed by\s+([^,]+),/)[1];
item.creators.push(Zotero.Utilities.cleanAuthor(author, "author"));
var splitRe = new RegExp(title);
var authors = data.split(splitRe)[0].replace(/\([^)]+\)/, "").split(/(,|and)\s+/);
Zotero.debug(authors);
for (var i=0; i<authors.length; i++) {
var aut = authors[i];
if (aut.match(/\w/) && (aut != "and")) {
item.creators.push(Zotero.Utilities.cleanAuthor(aut, "reviewedAuthor"));

//The BMCR ID for 1998ff contains the 4-digit year, 2-digit month and an increasing number.
//The BMCR ID for 1994-1998 contains the 2-digit year, 1- or 2-digit month and an increasing number.
//The BMCR ID for 1990-1993 is different.
var m = url.match(/(\d{4})\/(\d{2,4})[\-\.](\d{1,2})[\-\.](\d{2})/);
if (m) {
item.extra = "BMCR ID: " + m[2] + "." + m[3] + "." + m[4];
if (m[1]>=1994) {
if (m[2].length==2) {
m[2] = "19" + m[2];
}
if (m[3].length==1) {
m[3] = "0" + m[3];
}
item.date = m[2] + "-" + m[3];
}
item.url = doc.location.href;
item.attachments = [{url:item.url, title:item.title, mimeType:"text/html"}];
if (doc.evaluate('/html/body/center/table/tbody/tr/td/center/table/tbody/tr/td//font', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
item.date = Zotero.Utilities.trimInternal(doc.evaluate('/html/body/center/table/tbody/tr/td/center/table/tbody/tr/td//font', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace("Bryn Mawr Classical Review ", "").replace(/\./g, "/"));
} else {
item.date = Zotero.Utilities.trimInternal(doc.evaluate('/html/body/h3', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace("Bryn Mawr Classical Review ", "").replace(/\./g, "/"))
if (m[1]<=1993) {
item.date = m[1];
}
item.complete();
});
}/** BEGIN TEST CASES **/
}

item.publicationTitle = "Bryn Mawr Classical Review";
item.journalAbbreviation = "BMCR";
item.ISSN = "1055-7660";
item.url = url;
item.attachments.push({url:url, title:item.title, mimeType:"text/html"});

item.complete();
}

/** BEGIN TEST CASES **/
var testCases = [
{
"type": "web",
"url": "http://bmcr.brynmawr.edu/2010/2010-01-02.html",
"items": [
{
"itemType": "journalArticle",
"title": "Review of: Sallust: The War Against Jugurtha. Aris and Phillips Classical Texts",
"creators": [
{
"firstName": "Christina S.",
@@ -107,22 +160,23 @@ var testCases = [
"creatorType": "reviewedAuthor"
}
],
"notes": [],
"tags": [],
"seeAlso": [],
"date": "2010-01",
"ISSN": "1055-7660",
"extra": "BMCR ID: 2010.01.02",
"journalAbbreviation": "BMCR",
"libraryCatalog": "Bryn Mawr Classical Review",
"publicationTitle": "Bryn Mawr Classical Review",
"shortTitle": "Review of",
"url": "http://bmcr.brynmawr.edu/2010/2010-01-02.html",
"attachments": [
{
"url": "http://bmcr.brynmawr.edu/2010/2010-01-02.html",
"title": "Review of: Sallust: The War Against Jugurtha. Aris and Phillips Classical Texts",
"mimeType": "text/html"
}
],
"title": "Review of: Sallust: The War Against Jugurtha. Aris and Phillips Classical Texts",
"url": "http://bmcr.brynmawr.edu/2010/2010-01-02.html",
"date": "2010/01/02",
"libraryCatalog": "Bryn Mawr Classical Review",
"accessDate": "CURRENT_TIMESTAMP",
"shortTitle": "Review of"
"tags": [],
"notes": [],
"seeAlso": []
}
]
},
@@ -132,6 +186,7 @@ var testCases = [
"items": [
{
"itemType": "journalArticle",
"title": "Review of: The Classical Tradition",
"creators": [
{
"firstName": "Christina S.",
@@ -154,21 +209,106 @@ var testCases = [
"creatorType": "reviewedAuthor"
}
],
"notes": [],
"tags": [],
"seeAlso": [],
"date": "2013-01",
"ISSN": "1055-7660",
"extra": "BMCR ID: 2013.01.44",
"journalAbbreviation": "BMCR",
"libraryCatalog": "Bryn Mawr Classical Review",
"publicationTitle": "Bryn Mawr Classical Review",
"shortTitle": "Review of",
"url": "http://bmcr.brynmawr.edu/2013/2013-01-44.html",
"attachments": [
{
"title": "Review of: The Classical Tradition",
"mimeType": "text/html"
}
],
"title": "Review of: The Classical Tradition",
"url": "http://bmcr.brynmawr.edu/2013/2013-01-44.html",
"date": "2013/01/44",
"tags": [],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"url": "http://bmcr.brynmawr.edu/recent.html",
"items": "multiple"
},
{
"type": "web",
"url": "http://bmcr.brynmawr.edu/1999/1999-11-02.html",
"items": [
{
"itemType": "journalArticle",
"title": "Review of: Epic Traditions in the Contemporary World. The Poetics of Community",
"creators": [
{
"firstName": "Margaret",
"lastName": "Beissinger",
"creatorType": "reviewedAuthor"
},
{
"firstName": "Jane",
"lastName": "Tylus",
"creatorType": "reviewedAuthor"
},
{
"firstName": "Susanne",
"lastName": "Wofford",
"creatorType": "reviewedAuthor"
}
],
"date": "1999-11",
"ISSN": "1055-7660",
"extra": "BMCR ID: 1999.11.02",
"journalAbbreviation": "BMCR",
"libraryCatalog": "Bryn Mawr Classical Review",
"accessDate": "CURRENT_TIMESTAMP",
"shortTitle": "Review of"
"publicationTitle": "Bryn Mawr Classical Review",
"shortTitle": "Review of",
"url": "http://bmcr.brynmawr.edu/1999/1999-11-02.html",
"attachments": [
{
"title": "Review of: Epic Traditions in the Contemporary World. The Poetics of Community",
"mimeType": "text/html"
}
],
"tags": [],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"url": "http://bmcr.brynmawr.edu/1998/98.1.04.html",
"items": [
{
"itemType": "journalArticle",
"title": "Review of: Athens and Persians in the Fifth Century BC: A Study in Cultural Receptivity.",
"creators": [
{
"firstName": "Margaret C.",
"lastName": "Miller",
"creatorType": "reviewedAuthor"
}
],
"date": "1998-01",
"ISSN": "1055-7660",
"extra": "BMCR ID: 98.1.04",
"journalAbbreviation": "BMCR",
"libraryCatalog": "Bryn Mawr Classical Review",
"publicationTitle": "Bryn Mawr Classical Review",
"shortTitle": "Review of",
"url": "http://bmcr.brynmawr.edu/1998/98.1.04.html",
"attachments": [
{
"title": "Review of: Athens and Persians in the Fifth Century BC: A Study in Cultural Receptivity.",
"mimeType": "text/html"
}
],
"tags": [],
"notes": [],
"seeAlso": []
}
]
}

0 comments on commit 1f0634a

Please sign in to comment.
You can’t perform that action at this time.