Permalink
Browse files
Updates for 0.50.0 release
- Loading branch information
Showing
with
5 additions
and
6 deletions.
-
+2
−2
Dockerfile
-
+3
−4
README.md
|
@@ -8,7 +8,7 @@ LABEL website="http://archivesunleashed.org/" |
|
|
|
|
|
## Build variables |
|
|
####################### |
|
|
ARG SPARK_VERSION=2.4.3 |
|
|
ARG SPARK_VERSION=2.4.4 |
|
|
|
|
|
# Git and Wget |
|
|
RUN apk add --update \ |
|
@@ -31,4 +31,4 @@ RUN mkdir /spark \ |
|
|
&& tar -xf "/tmp/spark-$SPARK_VERSION-bin-hadoop2.7.tgz" -C /spark --strip-components=1 \ |
|
|
&& rm "/tmp/spark-$SPARK_VERSION-bin-hadoop2.7.tgz" |
|
|
|
|
|
CMD /spark/bin/spark-shell --packages "io.archivesunleashed:aut:0.18.1-SNAPSHOT" |
|
|
CMD /spark/bin/spark-shell --packages "io.archivesunleashed:aut:0.50.1-SNAPSHOT" |
|
@@ -62,7 +62,7 @@ Welcome to |
|
|
____ __ |
|
|
/ __/__ ___ _____/ /__ |
|
|
_\ \/ _ \/ _ `/ __/ '_/ |
|
|
/___/ .__/\_,_/_/ /_/\_\ version 2.4.3 |
|
|
/___/ .__/\_,_/_/ /_/\_\ version 2.4.4 |
|
|
/_/ |
|
|
|
|
|
Using Scala version 2.11.12 (OpenJDK 64-Bit Server VM, Java 1.8.0_212) |
|
@@ -85,14 +85,13 @@ Type |
|
|
|
|
|
And then paste the following script in: |
|
|
|
|
|
``` |
|
|
|
|
|
```scala |
|
|
import io.archivesunleashed._ |
|
|
import io.archivesunleashed.matchbox._ |
|
|
|
|
|
val r = RecordLoader.loadArchives("/aut-resources/Sample-Data/*.gz", sc) |
|
|
.keepValidPages() |
|
|
.map(r => ExtractDomain(r.getUrl)) |
|
|
.map(r => ExtractDomainRDD(r.getUrl)) |
|
|
.countItems() |
|
|
.take(10) |
|
|
``` |
|
|
0 comments on commit
d6071cb