Skip to content
Permalink
Browse files

Blogpost: Small fixes & code update

  • Loading branch information...
adam3smith committed Sep 5, 2016
1 parent 963a8aa commit f2c210c10854da89c99766c4a9eb90921ee9a015
Showing with 88 additions and 76 deletions.
  1. +88 −76 Blogger.js
@@ -9,7 +9,7 @@
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2014-02-14 00:20:16"
"lastUpdated": "2016-09-05 23:14:05"
}

function detectWeb(doc, url) {
@@ -24,23 +24,37 @@ function detectWeb(doc, url) {
}
}


function getSearchResults(doc, checkOnly) {
var items = {};
var found = false;
var rows = ZU.xpath(doc, '//h3[@class="post-title entry-title"]/a|//li[@class="archivedate expanded"]/ul[@class="posts"]/li/a');
for (var i=0; i<rows.length; i++) {
var href = rows[i].href;
var title = ZU.trimInternal(rows[i].textContent);
if (!href || !title) continue;
if (checkOnly) return true;
found = true;
items[href] = title;
}
return found ? items : false;
}
//Blogger translator. Code by Adam Crymble

function scrape(doc, url) {
var tagsContent = new Array();
var newItem = new Zotero.Item("blogPost");

//title
if (doc.evaluate('//h3[@class="post-title entry-title"]/a', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {

newItem.title = doc.evaluate('//h3[@class="post-title entry-title"]/a', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent;
if (ZU.xpathText(doc, '//h3[@class="post-title entry-title"]/a')) {
newItem.title = ZU.xpathText(doc, '//h3[@class="post-title entry-title"]/a');
} else {
newItem.title = doc.title;
}

//author, if available
if (doc.evaluate('//span[@class="post-author vcard"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
var author = doc.evaluate('//span[@class="post-author vcard"]//span[@class="fn"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace(/^\s*|\s*$/g, '');
if (ZU.xpathText(doc, '//span[@class="post-author vcard"]//span[@class="fn"]')) {
var author = ZU.xpathText(doc, '//span[@class="post-author vcard"]//span[@class="fn"]').trim();
var author = author.toLowerCase();
if (author.match(/\sby\s/)) {
var shortenAuthor = author.indexOf(" by");
@@ -55,23 +69,13 @@ function scrape(doc, url) {
}

//date, if available
if (doc.evaluate('//h2[@class="date-header"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
newItem.date = doc.evaluate('//h2[@class="date-header"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent;
newItem.date = ZU.xpathText(doc, '//h2[@class="date-header"]');

}


//tags, if available
if (doc.evaluate('//span[@class="post-labels"]/a', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
var tags = doc.evaluate('//span[@class="post-labels"]/a', doc, null, XPathResult.ANY_TYPE, null);

var tags1;
while (tags1 = tags.iterateNext()) {
tagsContent.push(tags1.textContent);
}

for (var i = 0; i < tagsContent.length; i++) {
newItem.tags[i] = tagsContent[i];
}
var tags = ZU.xpath(doc, '//span[@class="post-labels"]/a');
for (var i = 0; i < tags.length; i++) {
newItem.tags.push(tags[i].textContent);
}

var blogTitle1 = doc.title.split(":");
@@ -88,27 +92,15 @@ function doWeb(doc, url) {
var articles = new Array();

if (detectWeb(doc, url) == "multiple") {
var items = new Object();

var titles = doc.evaluate('//h3[@class="post-title entry-title"]/a', doc, null, XPathResult.ANY_TYPE, null);
var titles1 = doc.evaluate('//li[@class="archivedate expanded"]/ul[@class="posts"]/li/a', doc, null, XPathResult.ANY_TYPE, null);

var next_title;
while (next_title = titles.iterateNext()) {
items[next_title.href] = next_title.textContent;
}

while (next_title = titles1.iterateNext()) {
items[next_title.href] = next_title.textContent;
}
Zotero.selectItems(items, function (items) {
Zotero.selectItems(getSearchResults(doc, false), function(items) {
if (!items) {
return true;
}
var articles = [];
for (var i in items) {
articles.push(i);
}
Zotero.Utilities.processDocuments(articles, scrape, function () {});
ZU.processDocuments(articles, scrape);
});
} else {
scrape(doc, url);
@@ -121,31 +113,31 @@ var testCases = [
"items": [
{
"itemType": "blogPost",
"title": "A tweet from Matt Yglesias",
"creators": [
{
"firstName": "",
"lastName": "Joseph",
"creatorType": "author"
}
],
"notes": [],
"tags": [
"Mark",
"Matthew Yglesias"
],
"seeAlso": [],
"date": "Monday, October 24, 2011",
"accessDate": "CURRENT_TIMESTAMP",
"blogTitle": "West Coast Stat Views (on Observational Epidemiology and more)",
"libraryCatalog": "Blogger",
"url": "http://observationalepidemiology.blogspot.com/2011/10/tweet-from-matt-yglesias.html",
"attachments": [
{
"title": "Blogspot Snapshot",
"mimeType": "text/html"
}
],
"title": "A tweet from Matt Yglesias",
"date": "Monday, October 24, 2011",
"blogTitle": "West Coast Stat Views (on Observational Epidemiology and more)",
"url": "http://observationalepidemiology.blogspot.com/2011/10/tweet-from-matt-yglesias.html",
"libraryCatalog": "Blogger",
"accessDate": "CURRENT_TIMESTAMP"
"tags": [
"Mark",
"Matthew Yglesias"
],
"notes": [],
"seeAlso": []
}
]
},
@@ -160,30 +152,30 @@ var testCases = [
"items": [
{
"itemType": "blogPost",
"title": "Politica Argentina - Blog de Psicología Política de Federico González: Perciben una caída en la imagen de la Presidenta",
"creators": [
{
"firstName": "Federico",
"lastName": "Gonzalez",
"creatorType": "author"
}
],
"notes": [],
"tags": [
"Cristina Kirchner",
"imagen"
],
"seeAlso": [],
"date": "domingo, 11 de marzo de 2012",
"blogTitle": "Politica Argentina - Blog de Psicología Política de Federico González",
"shortTitle": "Politica Argentina - Blog de Psicología Política de Federico González",
"url": "http://argentina-politica.blogspot.com/2012/03/perciben-una-caida-en-la-imagen-de-la.html",
"attachments": [
{
"title": "Blogspot Snapshot",
"mimeType": "text/html"
}
],
"title": "Politica Argentina - Blog de Psicología Política de Federico González: Perciben una caída en la imagen de la Presidenta",
"date": "domingo, 11 de marzo de 2012",
"blogTitle": "Politica Argentina - Blog de Psicología Política de Federico González",
"url": "http://argentina-politica.blogspot.com/2012/03/perciben-una-caida-en-la-imagen-de-la.html",
"shortTitle": "Politica Argentina - Blog de Psicología Política de Federico González"
"tags": [
"Cristina Kirchner",
"imagen"
],
"notes": [],
"seeAlso": []
}
]
},
@@ -193,38 +185,58 @@ var testCases = [
"items": [
{
"itemType": "blogPost",
"title": "National Humanities Report Reinforces Stereotypes about the Humanities ~ Remaking the University",
"creators": [
{
"firstName": "Michael",
"lastName": "Meranze",
"creatorType": "author"
}
],
"notes": [],
"date": "Monday, November 25, 2013",
"blogTitle": "National Humanities Report Reinforces Stereotypes about the Humanities ~ Remaking the University",
"url": "http://utotherescue.blogspot.com/2013/11/the-heart-of-matter-humanities-do-more.html",
"attachments": [
{
"title": "Blogspot Snapshot",
"mimeType": "text/html"
}
],
"tags": [
"academic development",
"arts and sciences",
"cutting arts and humanities",
"guest post",
"Humanities and Social Science Knowledge",
"humanities disciplines",
"public purpose",
"thought",
"writing"
"Cuts",
"Development",
"Humanities",
"Liberal Arts",
"guest post"
],
"seeAlso": [],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"url": "https://jamsubuntu.blogspot.com/2009/01/unmount-command-not-found.html",
"items": [
{
"itemType": "blogPost",
"title": "Jam's Ubuntu Linux Blog: unmount: command not found",
"creators": [],
"date": "Wednesday, 7 January 2009",
"blogTitle": "Jam's Ubuntu Linux Blog",
"shortTitle": "Jam's Ubuntu Linux Blog",
"url": "https://jamsubuntu.blogspot.com/2009/01/unmount-command-not-found.html",
"attachments": [
{
"title": "Blogspot Snapshot",
"mimeType": "text/html"
}
],
"title": "National Humanities Report Reinforces Stereotypes about the Humanities ~ Remaking the University",
"date": "Monday, November 25, 2013",
"blogTitle": "National Humanities Report Reinforces Stereotypes about the Humanities ~ Remaking the University",
"url": "http://utotherescue.blogspot.com/2013/11/the-heart-of-matter-humanities-do-more.html",
"libraryCatalog": "Blogger",
"accessDate": "CURRENT_TIMESTAMP"
"tags": [
"Command Line"
],
"notes": [],
"seeAlso": []
}
]
}

0 comments on commit f2c210c

Please sign in to comment.
You can’t perform that action at this time.