Permalink
Please sign in to comment.
Browse files
Setup mycila plugin, and normalize all license headers; Resolves #4. (#…
…20) Travis-CI had one check fail during GitHub outage, others cleared. Overriding defaults.
- Loading branch information...
Showing
with
426 additions
and 132 deletions.
- +14 −0 LICENSE_HEADER.txt
- +38 −0 pom.xml
- +2 −2 src/main/java/io/archivesunleashed/data/ArcRecordUtils.java
- +2 −1 src/main/java/io/archivesunleashed/data/WarcRecordUtils.java
- +2 −1 src/main/java/io/archivesunleashed/data/package-info.java
- +2 −2 src/main/java/io/archivesunleashed/io/ArcRecordWritable.java
- +2 −2 src/main/java/io/archivesunleashed/io/GenericArchiveRecordWritable.java
- +2 −2 src/main/java/io/archivesunleashed/io/WarcRecordWritable.java
- +2 −1 src/main/java/io/archivesunleashed/io/package-info.java
- +2 −2 src/main/java/io/archivesunleashed/mapreduce/WacArcInputFormat.java
- +2 −2 src/main/java/io/archivesunleashed/mapreduce/WacGenericInputFormat.java
- +2 −2 src/main/java/io/archivesunleashed/mapreduce/WacWarcInputFormat.java
- +2 −1 src/main/java/io/archivesunleashed/mapreduce/package-info.java
- +16 −0 src/main/scala/io/archivesunleashed/spark/archive/io/ArcRecord.scala
- +16 −0 src/main/scala/io/archivesunleashed/spark/archive/io/ArchiveRecord.scala
- +16 −0 src/main/scala/io/archivesunleashed/spark/archive/io/GenericArchiveRecord.scala
- +16 −0 src/main/scala/io/archivesunleashed/spark/archive/io/WarcRecord.scala
- +16 −0 src/main/scala/io/archivesunleashed/spark/matchbox/ComputeImageSize.scala
- +16 −0 src/main/scala/io/archivesunleashed/spark/matchbox/ComputeMD5.scala
- +2 −2 src/main/scala/io/archivesunleashed/spark/matchbox/DetectLanguage.scala
- +2 −2 src/main/scala/io/archivesunleashed/spark/matchbox/DetectMimeTypeTika.scala
- +2 −2 src/main/scala/io/archivesunleashed/spark/matchbox/ExtractAtMentions.scala
- +2 −2 src/main/scala/io/archivesunleashed/spark/matchbox/ExtractBoilerpipeText.scala
- +16 −0 src/main/scala/io/archivesunleashed/spark/matchbox/ExtractDate.scala
- +2 −2 src/main/scala/io/archivesunleashed/spark/matchbox/ExtractDomain.scala
- +2 −2 src/main/scala/io/archivesunleashed/spark/matchbox/ExtractEntities.scala
- +2 −2 src/main/scala/io/archivesunleashed/spark/matchbox/ExtractGraph.scala
- +2 −2 src/main/scala/io/archivesunleashed/spark/matchbox/ExtractHashtags.scala
- +2 −2 src/main/scala/io/archivesunleashed/spark/matchbox/ExtractImageLinks.scala
- +2 −2 src/main/scala/io/archivesunleashed/spark/matchbox/ExtractLinks.scala
- +16 −0 src/main/scala/io/archivesunleashed/spark/matchbox/ExtractPopularImages.scala
- +2 −24 src/main/scala/io/archivesunleashed/spark/matchbox/ExtractTextFromPDFs.scala
- +2 −2 src/main/scala/io/archivesunleashed/spark/matchbox/ExtractUrls.scala
- +2 −2 src/main/scala/io/archivesunleashed/spark/matchbox/NER3Classifier.scala
- +23 −7 src/main/scala/io/archivesunleashed/spark/matchbox/NERCombinedJson.scala
- +2 −2 src/main/scala/io/archivesunleashed/spark/matchbox/RecordLoader.scala
- +2 −2 src/main/scala/io/archivesunleashed/spark/matchbox/RemoveHTML.scala
- +16 −0 src/main/scala/io/archivesunleashed/spark/matchbox/RemoveHttpHeader.scala
- +2 −2 src/main/scala/io/archivesunleashed/spark/matchbox/StringUtils.scala
- +16 −0 src/main/scala/io/archivesunleashed/spark/matchbox/TupleFormatter.scala
- +16 −0 src/main/scala/io/archivesunleashed/spark/matchbox/TweetUtils.scala
- +3 −3 src/main/scala/io/archivesunleashed/spark/matchbox/WriteGDF.scala
- +2 −2 src/main/scala/io/archivesunleashed/spark/pythonconverters/ArcRecordConverter.scala
- +2 −2 src/main/scala/io/archivesunleashed/spark/rdd/RecordRDD.scala
- +16 −0 src/main/scala/io/archivesunleashed/spark/scripts/CrawlStatistics.scala
- +2 −2 src/main/scala/io/archivesunleashed/spark/scripts/Filter.scala
- +2 −2 src/main/scala/io/archivesunleashed/spark/scripts/SocialMediaLinks.scala
- +16 −0 src/main/scala/io/archivesunleashed/spark/utils/JsonUtil.scala
- +2 −2 src/test/java/io/archivesunleashed/ingest/WacArcLoaderTest.java
- +2 −1 src/test/java/io/archivesunleashed/ingest/WacWarcLoaderTest.java
- +2 −1 src/test/java/io/archivesunleashed/ingest/package-info.java
- +2 −2 src/test/java/io/archivesunleashed/io/ArcRecordWritableTest.java
- +2 −2 src/test/java/io/archivesunleashed/io/GenericArchiveRecordWritableTest.java
- +2 −2 src/test/java/io/archivesunleashed/io/WarcRecordWritableTest.java
- +2 −1 src/test/java/io/archivesunleashed/io/package-info.java
- +2 −2 src/test/java/io/archivesunleashed/mapreduce/WacArcInputFormatTest.java
- +2 −2 src/test/java/io/archivesunleashed/mapreduce/WacGenericInputFormatTest.java
- +2 −2 src/test/java/io/archivesunleashed/mapreduce/WacWarcInputFormatTest.java
- +2 −1 src/test/java/io/archivesunleashed/mapreduce/package-info.java
- +2 −2 src/test/scala/io/archivesunleashed/spark/ArcTest.scala
- +2 −2 src/test/scala/io/archivesunleashed/spark/GenericArchiveRecordTest.scala
- +2 −2 src/test/scala/io/archivesunleashed/spark/WarcTest.scala
- +2 −2 src/test/scala/io/archivesunleashed/spark/matchbox/ExtractAtMentionsTest.scala
- +16 −0 src/test/scala/io/archivesunleashed/spark/matchbox/ExtractDateTest.scala
- +2 −2 src/test/scala/io/archivesunleashed/spark/matchbox/ExtractDomainTest.scala
- +2 −2 src/test/scala/io/archivesunleashed/spark/matchbox/ExtractEntitiesTest.scala
- +2 −2 src/test/scala/io/archivesunleashed/spark/matchbox/ExtractHashtagsTest.scala
- +2 −2 src/test/scala/io/archivesunleashed/spark/matchbox/ExtractImageLinksTest.scala
- +2 −2 src/test/scala/io/archivesunleashed/spark/matchbox/ExtractLinksTest.scala
- +2 −2 src/test/scala/io/archivesunleashed/spark/matchbox/ExtractUrlsTest.scala
- +2 −2 src/test/scala/io/archivesunleashed/spark/matchbox/StringUtilsTest.scala
- +16 −0 src/test/scala/io/archivesunleashed/spark/matchbox/TupleFormatterTest.scala
- +2 −2 src/test/scala/io/archivesunleashed/spark/rdd/CountableRDDTest.scala
@@ -0,0 +1,14 @@ | ||
Archives Unleashed Toolkit (AUT): | ||
An open-source platform for analyzing web archives. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. |
Oops, something went wrong.
0 comments on commit
eb0e8a9