Skip to content
Permalink
Browse files

basic fix and some clean-up for PEP.

This is all regex magic and I don't think it's super well written, but I'm not interested enough to spend more time on this than for a basic fix
  • Loading branch information...
adam3smith committed Mar 20, 2015
1 parent 458005a commit 98d2e739cb93af72f5a97499d78550897fa187f3
Showing with 73 additions and 61 deletions.
  1. +73 −61 PEP Web.js
@@ -9,46 +9,28 @@
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2012-03-12 01:22:21"
"lastUpdated": "2015-03-20 06:07:35"
}

//Only works for journal articles, and only for single entries.
//Author names sometimes omit periods after the first initials.

function detectWeb(doc, url) {

var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;


if (url.match(/document/))
if (url.indexOf("/document.php")!=-1)
return "journalArticle";
}


function scrape(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;

var newItem = new Zotero.Item("journalArticle");
newItem.url = doc.location.href;

var xPathString = "//span[@id='maincite']";

var xPathString2 = '//p[@class="title"]/a';

var myXPathObject = doc.evaluate(xPathString, doc, nsResolver, XPathResult.ANY_TYPE, null);
var myXPathObject2 = doc.evaluate(xPathString2, doc, nsResolver, XPathResult.ANY_TYPE, null);

var citeString = myXPathObject.iterateNext().textContent;
var titleString = myXPathObject2.iterateNext().textContent;

var citeString = ZU.xpathText(doc, '//span[@id="maincite"]');
var titleString = ZU.xpathText(doc, '//p[@class="title"]/a/text()');
if (!titleString) titleString = ZU.xpathText(doc, '//p[@class="title"]/text()[1]');
//authors
var authors = citeString.match(/(^.*)\(/)[1].toString();
var authors = citeString.match(/(^.*?)\(/)[1].toString();
if (authors == ' ') {
authors = "Unknown";
}
@@ -78,41 +60,41 @@ function scrape(doc, url) {
newItem.date = year;

//publication name
var pubname = citeString.match(/(\)\.\s)(.*)(\,)/)[0].toString();
var pubminus = citeString.match(/(\)\.\s)(.*?)(\.)/)[0].toString();
pubname = pubname.replace(pubminus, '');
pubname = pubname.replace(/\,/, '');
pubname = pubname.replace(/\.*/, '');
pubname = pubname.replace(/^\s*/, '');
pubname = pubname.replace(/\s*$/, '')
newItem.publicationTitle = pubname;

var pubname = citeString.match(/(\)\.\s)(.*)(\,)/)
if (pubname) pubname = pubname[0];
var pubminus = citeString.match(/(\)\.\s)(.*?)(\.)/);
if (pubminus) pubminus = pubminus[0];
else pubminus = "";
if (pubname){
pubname = pubname.replace(pubminus, '');
pubname = pubname.replace(/\,/, '');
pubname = pubname.replace(/\.*/, '');
pubname = pubname.replace(/^\s*/, '');
pubname = pubname.replace(/\s*$/, '')
newItem.publicationTitle = pubname;
}

//volume
var volumeandpages = citeString.match(/[0-9]*\:([0-9]*(\-?)[0-9]*)\.$/)[0].toString();
var volume = volumeandpages.match(/[0-9]*\:/)[0].toString();
volume = volume.replace(":", '');
newItem.volume = volume;

//pages
var pages = volumeandpages.match(/\:([0-9]*)(\-?)([0-9]*)/)[0].toString();
pages = pages.replace(":", '');
pages = pages.replace(".", '');
newItem.pages = pages;

var volumeandpages = citeString.match(/[0-9]*\:([0-9]*(\-?)[0-9]*)\.$/);
if (volumeandpages){
volumeandpages = volumeandpages[0];
var volume = volumeandpages.match(/([0-9]*)\:/);
if (volume) newItem.volume = volume[1];

//pages
var pages = volumeandpages.match(/\:([0-9]*)(\-?)([0-9]*)/)[0].toString();
pages = pages.replace(":", '');
pages = pages.replace(".", '');
newItem.pages = pages;
}
newItem.attachments.push({url:doc.location.href, title:"PEP Web Snapshot", mimeType:"text/html"});

newItem.complete();
}


function doWeb(doc, url) {

var articles = new Array();

articles = [url];

Zotero.Utilities.processDocuments(articles, scrape, function(){Zotero.done();});
Zotero.wait();
scrape(doc, url)
}
/** BEGIN TEST CASES **/
var testCases = [
@@ -122,30 +104,60 @@ var testCases = [
"items": [
{
"itemType": "journalArticle",
"title": "Childhood and Trauma",
"creators": [
{
"firstName": "P. J.",
"lastName": "Boschan",
"creatorType": "author"
}
],
"notes": [],
"tags": [],
"seeAlso": [],
"date": "2008",
"accessDate": "CURRENT_TIMESTAMP",
"libraryCatalog": "PEP Web",
"pages": "24-32",
"publicationTitle": "Am. J. Psychoanal.",
"url": "http://www.pep-web.org/document.php?id=ajp.068.0024a",
"volume": "68",
"attachments": [
{
"title": "PEP Web Snapshot",
"mimeType": "text/html"
}
],
"url": "http://www.pep-web.org/document.php?id=ajp.068.0024a",
"title": "Childhood and Trauma",
"date": "2008",
"publicationTitle": "Am. J. Psychoanal.",
"volume": "68",
"pages": "24-32",
"tags": [],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"url": "http://www.pep-web.org/document.php?id=se.014.0237a&type=hitlist&num=0&query=zone1%2Cparagraphs|zone2%2Cparagraphs|author%2Cfreud|title%2Cmourning+and+melancholia|viewperiod%2Cweek|sort%2Cyear%2Ca#hit1",
"items": [
{
"itemType": "journalArticle",
"title": "Mourning and Melancholia",
"creators": [
{
"firstName": "S.",
"lastName": "Freud",
"creatorType": "author"
}
],
"date": "1917",
"libraryCatalog": "PEP Web",
"accessDate": "CURRENT_TIMESTAMP"
"publicationTitle": "The Standard Edition of the Complete Psychological Works of Sigmund Freud Volume XIV (1914-1916): On the History of the Psycho-Analytic Movement, Papers on Metapsychology and Other Works,",
"url": "http://www.pep-web.org/document.php?id=se.014.0237a&type=hitlist&num=0&query=zone1%2Cparagraphs|zone2%2Cparagraphs|author%2Cfreud|title%2Cmourning+and+melancholia|viewperiod%2Cweek|sort%2Cyear%2Ca#hit1",
"attachments": [
{
"title": "PEP Web Snapshot",
"mimeType": "text/html"
}
],
"tags": [],
"notes": [],
"seeAlso": []
}
]
}

0 comments on commit 98d2e73

Please sign in to comment.
You can’t perform that action at this time.