Permalink
Browse files

Update for PennState WARCshop

* Update to Ubuntu 16.04
* Update Spark and Hadoop
* Update Spark Notebook
* Update warcbase
* Update documentation and lesson plan
* Update README
  • Loading branch information...
ruebot committed Apr 27, 2017
1 parent 00e53dd commit 12318223614d7140ca2dcee44fb39bfbc07cc82c
Showing with 279 additions and 104 deletions.
  1. +1 −0 .gitignore
  2. +121 −45 README.md
  3. +1 −1 Vagrantfile
  4. +1 −1 coursework/README.md
  5. +134 −41 coursework/lessonplan.md
  6. +6 −1 scripts/bootstrap.sh
  7. +15 −15 scripts/warcbase.sh
View
@@ -2,3 +2,4 @@
downloads
scripts/custom.sh
package.box
ubuntu-xenial-16.04-cloudimg-console.log
View
166 README.md

Large diffs are not rendered by default.

Oops, something went wrong.
View
@@ -14,7 +14,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
config.vm.hostname = "warcbase"
# Every Vagrant virtual environment requires a box to build off of.
config.vm.box = "ubuntu/trusty64"
config.vm.box = "ubuntu/xenial64"
config.vm.network :forwarded_port, guest: 9000, host: 9000 # Spark Notebook
View
@@ -1,3 +1,3 @@
# IIPC General Assembly 2016, Workshop
Ian Milligan (Waterloo) and Nick Ruest (York) will be leading this workshop through warcbase at IIPC GA 2016. Our lesson can be [found here](https://github.com/web-archive-group/warcbase_workshop_vagrant/blob/master/coursework/lessonplan.md).
Ian Milligan (Waterloo) and Nick Ruest (York) will be leading this workshop through warcbase at IIPC GA 2016. Our lesson can be [found here](https://github.com/web-archive-group/warcbase_workshop_vagrant/blob/master/coursework/lessonplan.md).
View

Large diffs are not rendered by default.

Oops, something went wrong.
View
@@ -8,6 +8,11 @@ sudo echo "LANGUAGE=en_US.UTF-8" >> /etc/environment
sudo echo "LC_ALL=en_US.UTF-8" >> /etc/environment
sudo echo "LC_CTYPE=en_US.UTF-8" >> /etc/environment
#######################################################################
# Work around for https://bugs.launchpad.net/cloud-images/+bug/1569237
echo "ubuntu:ubuntu" | chpasswd
#######################################################################
# Update
apt-get -y update && apt-get -y upgrade
@@ -49,4 +54,4 @@ MAN_FILES=$(wget -qO- "http://sourceforge.net/projects/zsh/files/zsh/5.0.2/zsh-5
for MAN_FILE in $MAN_FILES; do gzip /usr/share/man/man1/"${MAN_FILE##*/}"; done
# More helpful packages
apt-get -y install htop tree zsh
apt-get -y install htop tree zsh unzip
View
@@ -1,20 +1,20 @@
#/bin/bash
# warcbase
cd /home/vagrant
cd /home/ubuntu
mkdir project
# Apache Spark
cd /home/vagrant/project
wget http://d3kbcqa49mib13.cloudfront.net/spark-1.5.1-bin-hadoop2.6.tgz
tar -xvf spark-1.5.1-bin-hadoop2.6.tgz
rm spark-1.5.1-bin-hadoop2.6.tgz
cd /home/ubuntu/project
wget "http://d3kbcqa49mib13.cloudfront.net/spark-1.6.1-bin-hadoop2.6.tgz"
tar -xvf spark-1.6.1-bin-hadoop2.6.tgz
rm spark-1.6.1-bin-hadoop2.6.tgz
# Spark Notebook
cd /home/vagrant/project
wget https://s3.eu-central-1.amazonaws.com/spark-notebook/tgz/spark-notebook-master-scala-2.10.4-spark-1.5.1-hadoop-2.6.0-cdh5.4.2.tgz
tar -xvf spark-notebook-master-scala-2.10.4-spark-1.5.1-hadoop-2.6.0-cdh5.4.2.tgz
rm spark-notebook-master-scala-2.10.4-spark-1.5.1-hadoop-2.6.0-cdh5.4.2.tgz
cd /home/ubuntu/project
wget "https://s3.eu-central-1.amazonaws.com/spark-notebook/zip/spark-notebook-0.6.3-scala-2.10.5-spark-1.6.1-hadoop-2.6.0.zip"
unzip spark-notebook-0.6.3-scala-2.10.5-spark-1.6.1-hadoop-2.6.0.zip
rm spark-notebook-0.6.3-scala-2.10.5-spark-1.6.1-hadoop-2.6.0.zip
# warcbase dependencies (vagrant isn't playing nice with maven, or I don't have paths setup right)
cd /tmp
@@ -34,9 +34,9 @@ wget http://central.maven.org/maven2/org/apache/commons/commons-compress/1.9/com
wget http://central.maven.org/maven2/org/apache/commons/commons-compress/1.9/commons-compress-1.9.pom
# warcbase
cd /home/vagrant/project
cd /home/ubuntu/project
git clone http://github.com/lintool/warcbase.git
cd /home/vagrant/project/warcbase
cd /home/ubuntu/project/warcbase
mvn install:install-file -Dfile=/usr/share/java/bsh-2.0b4.jar -DpomFile=/usr/share/maven-repo/org/beanshell/bsh/2.0b4/bsh-2.0b4.pom
mvn install:install-file -Dfile=/usr/share/java/commons-cli-1.2.jar -DpomFile=/usr/share/maven-repo/commons-cli/commons-cli/1.2/commons-cli-1.2.pom
mvn install:install-file -Dfile=/tmp/commons-logging-api-1.1.jar -DpomFile=/tmp/commons-logging-api-1.1.pom
@@ -46,12 +46,12 @@ mvn install:install-file -Dfile=/tmp/commons-lang-2.6.jar -DpomFile=/tmp/commons
mvn install:install-file -Dfile=/tmp/commons-collections-3.2.1.jar -DpomFile=/tmp/commons-collections-3.2.1.pom
mvn install:install-file -Dfile=/tmp/hamcrest-core-1.3.jar -DpomFile=/tmp/hamcrest-core-1.3.pom
mvn install:install-file -Dfile=/tmp/commons-compress-1.9.jar -DpomFile=/tmp/commons-compress-1.9.pom
mvn clean package appassembler:assemble -DskipTests
mvn clean package -pl warcbase-core -DskipTests
# sample files
cd /home/vagrant/project
cd /home/ubuntu/project
git clone https://github.com/lintool/warcbase-resources.git
# make sure permissions are fine
cd /home/vagrant
chown -hR vagrant:vagrant *
cd /home/ubuntu
chown -hR ubuntu:ubuntu *

0 comments on commit 1231822

Please sign in to comment.