Skip to content
Please note that GitHub no longer supports your web browser.

We recommend upgrading to the latest Google Chrome or Firefox.

Learn more
Permalink
Browse files

Clean up test descriptions, addresses #372. (#416)

- Clean up test descriptions
- Rename typo filename
  • Loading branch information
ruebot authored and ianmilligan1 committed Jan 21, 2020
1 parent 71b459c commit ffef735721ab2448f0e01d4b0c4cc7530a488335
Showing with 130 additions and 130 deletions.
  1. +6 −6 src/test/scala/io/archivesunleashed/ArcTest.scala
  2. +5 −5 src/test/scala/io/archivesunleashed/ArchiveRecordTest.scala
  3. +1 −1 src/test/scala/io/archivesunleashed/CountableRDDTest.scala
  4. +19 −19 src/test/scala/io/archivesunleashed/RecordDFTest.scala
  5. +1 −1 src/test/scala/io/archivesunleashed/RecordLoaderTest.scala
  6. +20 −20 src/test/scala/io/archivesunleashed/RecordRDDTest.scala
  7. +3 −3 src/test/scala/io/archivesunleashed/WarcTest.scala
  8. +1 −1 src/test/scala/io/archivesunleashed/app/CommandLineAppTest.scala
  9. +1 −1 src/test/scala/io/archivesunleashed/app/DomainFrequencyExtractorTest.scala
  10. +1 −1 src/test/scala/io/archivesunleashed/app/DomainGraphExtractorDfTest.scala
  11. +1 −1 src/test/scala/io/archivesunleashed/app/DomainGraphExtractorTest.scala
  12. +1 −1 src/test/scala/io/archivesunleashed/app/ExtractEntitiesTest.scala
  13. +3 −3 src/test/scala/io/archivesunleashed/app/ExtractGraphXTest.scala
  14. +1 −1 ...cala/io/archivesunleashed/app/{ExtarctImageDetailsDFTest.scala → ExtractImageDetailsDFTest.scala}
  15. +1 −1 src/test/scala/io/archivesunleashed/app/ExtractPopularImagesDFTest.scala
  16. +1 −1 src/test/scala/io/archivesunleashed/app/ExtractPopularImagesRDDTest.scala
  17. +1 −1 src/test/scala/io/archivesunleashed/app/PlainTextExtractorTest.scala
  18. +3 −3 src/test/scala/io/archivesunleashed/app/WriteGEXFTest.scala
  19. +2 −2 src/test/scala/io/archivesunleashed/app/WriteGraphMLTest.scala
  20. +9 −9 src/test/scala/io/archivesunleashed/app/WriteGraphTest.scala
  21. +2 −2 src/test/scala/io/archivesunleashed/app/WriteGraphXMLTest.scala
  22. +1 −1 src/test/scala/io/archivesunleashed/df/DataFrameLoaderTest.scala
  23. +1 −1 src/test/scala/io/archivesunleashed/df/ExtractAudioDetailsTest.scala
  24. +5 −5 src/test/scala/io/archivesunleashed/df/ExtractDateDFTest.scala
  25. +1 −1 src/test/scala/io/archivesunleashed/df/ExtractHyperlinksTest.scala
  26. +1 −1 src/test/scala/io/archivesunleashed/df/ExtractImageDetailsTest.scala
  27. +1 −1 src/test/scala/io/archivesunleashed/df/ExtractImageLinksTest.scala
  28. +1 −1 src/test/scala/io/archivesunleashed/df/ExtractPDFDetailsTest.scala
  29. +1 −1 src/test/scala/io/archivesunleashed/df/ExtractPresentationProgramDetailsTest.scala
  30. +1 −1 src/test/scala/io/archivesunleashed/df/ExtractSpreadsheetDetailsTest.scala
  31. +3 −3 src/test/scala/io/archivesunleashed/df/ExtractTextFilesDetailsTest.scala
  32. +1 −1 src/test/scala/io/archivesunleashed/df/ExtractVideoDetailsTest.scala
  33. +1 −1 src/test/scala/io/archivesunleashed/df/ExtractWordProcessorDetailsTest.scala
  34. +2 −2 src/test/scala/io/archivesunleashed/df/SaveMediaBytesTest.scala
  35. +1 −1 src/test/scala/io/archivesunleashed/df/SimpleDfTest.scala
  36. +1 −1 src/test/scala/io/archivesunleashed/matchbox/ComputeImageSizeTest.scala
  37. +2 −2 src/test/scala/io/archivesunleashed/matchbox/ExtractBoilerPipeTextTest.scala
  38. +1 −1 src/test/scala/io/archivesunleashed/matchbox/ExtractDateRDDTest.scala
  39. +4 −4 src/test/scala/io/archivesunleashed/matchbox/ExtractDomainTest.scala
  40. +3 −3 src/test/scala/io/archivesunleashed/matchbox/ExtractImageLinksTest.scala
  41. +3 −3 src/test/scala/io/archivesunleashed/matchbox/ExtractLinksTest.scala
  42. +1 −1 src/test/scala/io/archivesunleashed/matchbox/ExtractTextFromPDFsTest.scala
  43. +1 −1 src/test/scala/io/archivesunleashed/matchbox/RemoveHTMLTest.scala
  44. +1 −1 src/test/scala/io/archivesunleashed/matchbox/RemoveHTTPHeaderTest.scala
  45. +4 −4 src/test/scala/io/archivesunleashed/matchbox/StringUtilsTest.scala
  46. +2 −2 src/test/scala/io/archivesunleashed/matchbox/TupleFormatterTest.scala
  47. +3 −3 src/test/scala/io/archivesunleashed/util/JsonUtilsTest.scala
@@ -41,11 +41,11 @@ class ArcTest extends FunSuite with BeforeAndAfter {

val dayMonthTestA = "200805"

test("count records") {
test("Count records") {
assert(RecordLoader.loadArchives(arcPath, sc).count == 300L)
}

test("filter date") {
test("Filter date RDD") {
val startSS = 0
val monthSS = 6
val four = RecordLoader.loadArchives(arcPath, sc)
@@ -62,7 +62,7 @@ class ArcTest extends FunSuite with BeforeAndAfter {
five.foreach(date => assert(date.substring(startSS, monthSS) == dayMonthTestA))
}

test("filter url pattern") {
test("Filter URL pattern RDD") {
val keepMatches = RecordLoader.loadArchives(arcPath, sc)
.keepUrlPatterns(Set("http://www.archive.org/about/.*".r))
val discardMatches = RecordLoader.loadArchives(arcPath, sc)
@@ -71,14 +71,14 @@ class ArcTest extends FunSuite with BeforeAndAfter {
assert(discardMatches.count == 284L)
}

test("count links") {
test("Count links RDD") {
val links = RecordLoader.loadArchives(arcPath, sc)
.map(r => ExtractLinksRDD(r.getUrl, r.getContentString))
.reduce((a, b) => a ++ b)
assert(links.size == 664)
}

test("detect language") {
test("Detect language RDD") {
val languageCounts = RecordLoader.loadArchives(arcPath, sc)
.keepMimeTypes(Set("text/html"))
.map(r => RemoveHTMLRDD(r.getContentString))
@@ -99,7 +99,7 @@ class ArcTest extends FunSuite with BeforeAndAfter {
}
}

test("detect mime type tika") {
test("Detect MIMEtype Tika RDD") {
val mimeTypeCounts = RecordLoader.loadArchives(arcPath, sc)
.map(r => RemoveHTTPHeaderRDD(r.getContentString))
.groupBy(content => DetectMimeTypeTika(content.getBytes))
@@ -46,12 +46,12 @@ class ArchiveRecordTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}

test("count records") {
test("Count records") {
assert(RecordLoader.loadArchives(arcPath, sc).count == 300L)
assert(RecordLoader.loadArchives(warcPath, sc).count == 299L)
}

test("Resource name produces expected result.") {
test("Resource name produces expected result") {
val textSampleArc = RecordLoader.loadArchives(arcPath, sc)
.map(x => FilenameUtils.getName(x.getArchiveFilename))
.take(3)
@@ -81,7 +81,7 @@ class ArchiveRecordTest extends FunSuite with BeforeAndAfter {
assert(textSampleWarc.deep == Array("", exampleUrl, exampleUrl).deep)
}

test("Urls") {
test("URLs") {
val textSampleArc = RecordLoader.loadArchives(arcPath, sc)
.map(x => x.getUrl).take(3)
val textSampleWarc = RecordLoader.loadArchives(warcPath, sc)
@@ -92,7 +92,7 @@ class ArchiveRecordTest extends FunSuite with BeforeAndAfter {
"http://www.archive.org/robots.txt", "http://www.archive.org/").deep)
}

test("Mime-Type") {
test("MIMEtype") {
val textSampleArc = RecordLoader.loadArchives(arcPath, sc)
.map(x => x.getMimeType).take(3)
val textSampleWarc = RecordLoader.loadArchives(warcPath, sc)
@@ -103,7 +103,7 @@ class ArchiveRecordTest extends FunSuite with BeforeAndAfter {
"text/html").deep)
}

test("Get Http Status") {
test("Get HTTP status") {
val textSampleArc = RecordLoader.loadArchives(arcPath, sc)
.map(x => x.getHttpStatus).take(3)
val textSampleWarc = RecordLoader.loadArchives(warcPath, sc)
@@ -38,7 +38,7 @@ class CountableRDDTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}

test("count records") {
test("Count records; Extract Domain RDD ") {
val base = RecordLoader.loadArchives(arcPath, sc)
.keepValidPages()
.map(r => ExtractDomainRDD(r.getUrl))
@@ -39,7 +39,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}

test("keep Valid Pages") {
test("Keep valid pages DF") {
val expected = "http://www.archive.org/"
val base = RecordLoader.loadArchives(arcPath, sc)
.all()
@@ -48,7 +48,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Discard MimeTypes") {
test("Discard MIMEtypes DF") {
val expected = "filedesc://IAH-20080430204825-00000-blackbook.arc"
val mimeTypes = Set("text/html")
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -59,7 +59,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Discard Date") {
test("Discard date DF") {
val expected = "20080430"
val date = "20080429"
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -70,7 +70,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Discard Urls") {
test("Discard URLs DF") {
val expected = "http://www.archive.org/index.php"
val url = Set("http://www.archive.org/")
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -81,7 +81,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Discard Domains") {
test("Discard domains DF") {
val expected = "http://www.hideout.com.br/"
val domain = Set("www.archive.org")
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -92,7 +92,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Discard HttpStatus") {
test("Discard HTTP status DF") {
val expected = "200"
val statusCode = Set("000")
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -103,7 +103,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Discard Content") {
test("Discard content DF") {
val expected = "dns:www.archive.org"
val contentRegex = Set("Content-Length: [0-9]{4}".r)
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -115,7 +115,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Discard UrlPatterns") {
test("Discard URL patterns DF") {
val expected = "dns:www.archive.org"
val urlRegex = Set(".*images.*".r)
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -127,7 +127,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Discard Languages") {
test("Discard languages DF") {
val expected = "dns:www.archive.org"
val languages = Set("th","de","ht")
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -139,7 +139,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep HttpStatus") {
test("Keep HTTP status DF") {
val expected = "http://www.archive.org/robots.txt"
val statusCode = Set("200")
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -150,7 +150,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep Date") {
test("Keep date DF") {
val expected = "http://www.archive.org/"
val month = List("04")
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -161,7 +161,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep Urls") {
test("Keep URLs DF") {
val expected = "http://www.archive.org/"
val url = Set("http://www.archive.org/")
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -172,7 +172,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep Domains") {
test("Keep domains DF") {
val expected = "http://www.archive.org/robots.txt"
val domain = Set("www.archive.org")
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -183,7 +183,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep MimeTypesTika") {
test("Keep MIMEtypes Tika DF") {
val expected = "image/jpeg"
val mimeType = Set("image/jpeg")
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -194,7 +194,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep MimeTypes") {
test("Keep MIMEtypes DF") {
val expected = "text/html"
val mimeType = Set("text/html")
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -205,7 +205,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep content") {
test("Keep content DF") {
val expected = "http://www.archive.org/images/logoc.jpg"
val contentRegex = Set("Content-Length: [0-9]{4}".r)
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -217,7 +217,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep UrlPatterns") {
test("Keep URL patterns DF") {
val expected = "http://www.archive.org/images/go-button-gateway.gif"
val urlRegex = Set(".*images.*".r)
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -229,7 +229,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep Languages") {
test("Keep languages DF") {
val expected = "http://www.archive.org/images/logoc.jpg"
val languages = Set("th","de","ht")
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -241,7 +241,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep keepMimeTypes") {
test("Keep images DF") {
val expected = "image/jpeg"
val base = RecordLoader.loadArchives(arcPath, sc)
.all()
@@ -37,7 +37,7 @@ class RecordLoaderTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}

test("loads Warc") {
test("Load WARC") {
val base = RecordLoader.loadArchives(warcPath, sc)
.keepValidPages()
.map(x => x.getUrl)

0 comments on commit ffef735

Please sign in to comment.
You can’t perform that action at this time.