Skip to content
Permalink
Browse files

Clean-up underscore import, and scalastyle warnings. (#386)

  • Loading branch information
ruebot authored and ianmilligan1 committed Nov 28, 2019
1 parent 4042180 commit 560ed2b0fd6f982b2dfba334c721d139bf60b49d
Showing with 39 additions and 90 deletions.
  1. +1 −1 config/checkstyle/scalastyle_config.xml
  2. +2 −2 src/main/scala/io/archivesunleashed/app/ExtractPopularImagesDF.scala
  3. +1 −1 src/main/scala/io/archivesunleashed/app/WriteGraphXML.scala
  4. +1 −1 src/main/scala/io/archivesunleashed/df/package.scala
  5. +2 −0 src/main/scala/io/archivesunleashed/matchbox/DetectMimeTypeTika.scala
  6. +5 −3 src/main/scala/io/archivesunleashed/package.scala
  7. +1 −3 src/test/scala/io/archivesunleashed/ArcTest.scala
  8. +1 −3 src/test/scala/io/archivesunleashed/CountableRDDTest.scala
  9. +1 −3 src/test/scala/io/archivesunleashed/RecordRDDTest.scala
  10. +3 −7 src/test/scala/io/archivesunleashed/app/ExtractGraphXTest.scala
  11. +1 −1 src/test/scala/io/archivesunleashed/app/ExtractPopularImagesDFTest.scala
  12. +0 −3 src/test/scala/io/archivesunleashed/app/WriteGraphXMLTest.scala
  13. +0 −3 src/test/scala/io/archivesunleashed/df/DataFrameLoaderTest.scala
  14. +3 −5 src/test/scala/io/archivesunleashed/df/ExtarctHyperlinksTest.scala
  15. +1 −4 src/test/scala/io/archivesunleashed/df/ExtractAudioDetailsTest.scala
  16. +1 −4 src/test/scala/io/archivesunleashed/df/ExtractImageDetailsTest.scala
  17. +1 −4 src/test/scala/io/archivesunleashed/df/ExtractImageLinksTest.scala
  18. +1 −4 src/test/scala/io/archivesunleashed/df/ExtractPDFDetailsTest.scala
  19. +1 −4 src/test/scala/io/archivesunleashed/df/ExtractPresentationProgramDetailsTest.scala
  20. +1 −4 src/test/scala/io/archivesunleashed/df/ExtractSpreadsheetDetailsTest.scala
  21. +1 −4 src/test/scala/io/archivesunleashed/df/ExtractTextFilesDetailsTest.scala
  22. +1 −4 src/test/scala/io/archivesunleashed/df/ExtractVideoDetailsTest.scala
  23. +1 −4 src/test/scala/io/archivesunleashed/df/ExtractWordProcessorDetailsTest.scala
  24. +3 −5 src/test/scala/io/archivesunleashed/df/SaveMediaBytesTest.scala
  25. +2 −4 src/test/scala/io/archivesunleashed/df/SimpleDfTest.scala
  26. +1 −3 src/test/scala/io/archivesunleashed/matchbox/ExtractDateTest.scala
  27. +0 −2 src/test/scala/io/archivesunleashed/matchbox/ExtractTextFromPDFsTest.scala
  28. +2 −4 src/test/scala/io/archivesunleashed/matchbox/GetExtensionMimeTest.scala
@@ -149,7 +149,7 @@
<check class="org.scalastyle.scalariform.EmptyInterpolatedStringChecker" level="warning" enabled="true"></check>
<check class="org.scalastyle.scalariform.MultipleStringLiteralsChecker" level="warning" enabled="true">
<parameters>
<parameter name="allowed"><![CDATA[2]]></parameter>
<parameter name="allowed"><![CDATA[10]]></parameter>
<parameter name="ignoreRegex"><![CDATA[^""$]]></parameter>
</parameters>
</check>
@@ -47,8 +47,8 @@ object ExtractPopularImagesDF {

df.join(count,"md5")
.groupBy("md5")
.agg(first("url").as("url"), first("count").as("count"))
.select("url","count")
.agg(first("url").as("url"), first("count").as("count"))
.select("url","count")
.orderBy(desc("count"))
.limit(limit)
}
@@ -17,7 +17,7 @@ package io.archivesunleashed.app
import org.apache.spark.graphx.Graph
import java.nio.charset.StandardCharsets
import java.nio.file.{Files, Paths}
import io.archivesunleashed.app.ExtractGraphX.{VertexData,EdgeData,VertexDataPR}
import io.archivesunleashed.app.ExtractGraphX.{EdgeData, VertexData, VertexDataPR}

import org.apache.spark.rdd.RDD

@@ -16,7 +16,7 @@
package io.archivesunleashed

import org.apache.commons.io.IOUtils
import io.archivesunleashed.matchbox.{ComputeMD5RDD}
import io.archivesunleashed.matchbox.ComputeMD5RDD
import org.apache.spark.sql.functions.udf
import org.apache.spark.sql.DataFrame
import java.io.ByteArrayInputStream
@@ -15,7 +15,9 @@
*/
package io.archivesunleashed.matchbox

// scalastyle:off underscore.import
import scala.collection.JavaConverters._
// scalastyle:on underscore.import
import org.apache.tika.Tika
import org.apache.tika.detect.DefaultDetector
import org.apache.tika.io.TikaInputStream
@@ -21,7 +21,9 @@ import java.util.Base64

import io.archivesunleashed.data.{ArchiveRecordInputFormat, ArchiveRecordWritable}
import ArchiveRecordWritable.ArchiveFormat
import io.archivesunleashed.matchbox.{DetectLanguage, DetectMimeTypeTika, ExtractDate, ExtractDomainRDD, ExtractImageDetails, ExtractImageLinksRDD, ExtractLinksRDD, GetExtensionMimeRDD, RemoveHTMLRDD}
import io.archivesunleashed.matchbox.{DetectLanguage, DetectMimeTypeTika, ExtractDate,
ExtractDomainRDD, ExtractImageDetails, ExtractImageLinksRDD,
ExtractLinksRDD, GetExtensionMimeRDD, RemoveHTMLRDD}
import io.archivesunleashed.matchbox.ExtractDate.DateComponent
import org.apache.commons.codec.binary.Hex
import org.apache.commons.io.FilenameUtils
@@ -88,8 +90,8 @@ package object archivesunleashed {
*/
implicit class WARecordRDD(rdd: RDD[ArchiveRecord]) extends java.io.Serializable {

/*Creates a column for Bytes as well in Dataframe.
Call KeepImages OR KeepValidPages on RDD depending upon the requirement before calling this method */
/* Creates a column for Bytes as well in Dataframe.
Call KeepImages OR KeepValidPages on RDD depending upon the requirement before calling this method */
def all(): DataFrame = {
val records = rdd.map(r => Row(r.getCrawlDate, r.getUrl, r.getMimeType,
DetectMimeTypeTika(r.getBinaryBytes), r.getContentString, r.getBinaryBytes))
@@ -17,10 +17,8 @@
package io.archivesunleashed

import com.google.common.io.Resources
import io.archivesunleashed.matchbox.{DetectLanguage, DetectMimeTypeTika, ExtractLinksRDD, RemoveHTMLRDD, RemoveHTTPHeaderRDD}
import io.archivesunleashed.matchbox.ExtractDate.DateComponent
// scalastyle:off underscore.import
import io.archivesunleashed.matchbox._
// scalastyle:on underscore.import
import org.apache.spark.{SparkConf, SparkContext}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
@@ -17,9 +17,7 @@
package io.archivesunleashed

import com.google.common.io.Resources
// scalastyle:off underscore.import
import io.archivesunleashed.matchbox._
// scalastyle:on underscore.import
import io.archivesunleashed.matchbox.ExtractDomainRDD
import org.apache.spark.{SparkConf, SparkContext}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
@@ -17,10 +17,8 @@
package io.archivesunleashed

import com.google.common.io.Resources
import io.archivesunleashed.matchbox.ExtractDate
import io.archivesunleashed.matchbox.ExtractDate.DateComponent
// scalastyle:off underscore.import
import io.archivesunleashed.matchbox._
// scalastyle:on underscore.import
import org.apache.spark.{SparkConf, SparkContext}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
@@ -20,13 +20,9 @@ import java.io.File
import java.nio.file.{Files, Paths}

import com.google.common.io.Resources
// scalastyle:off underscore.import
import io.archivesunleashed._
import io.archivesunleashed.matchbox._
import io.archivesunleashed.app._
import io.archivesunleashed.util._
import org.apache.spark.graphx._
// scalastyle:on underscore.import
import io.archivesunleashed.{ArchiveRecord, RecordLoader}
import io.archivesunleashed.app.ExtractGraphX.{EdgeData, VertexData, VertexDataPR}
import io.archivesunleashed.matchbox.{ExtractDomainRDD, ExtractLinksRDD, WWWLink}
import org.apache.commons.io.FileUtils
import org.apache.spark.{SparkConf, SparkContext}
import org.junit.runner.RunWith
@@ -53,4 +53,4 @@ class ExtractPopularImagesDFTest extends FunSuite with BeforeAndAfter {
sc.stop()
}
}
}
}
@@ -22,9 +22,6 @@ import org.apache.spark.{SparkConf, SparkContext}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{BeforeAndAfter, FunSuite}
// scalastyle:off underscore.import
import org.apache.spark.graphx._
// scalastyle:on underscore.import
import scala.io.Source

@RunWith(classOf[JUnitRunner])
@@ -17,9 +17,6 @@ package io.archivesunleashed.df

import io.archivesunleashed.DataFrameLoader
import com.google.common.io.Resources
// scalastyle:off underscore.import
import org.apache.spark.sql.functions._
// scalastyle:on underscore.import
import org.apache.spark.sql.SparkSession
import org.apache.spark.{SparkConf, SparkContext}
import org.junit.runner.RunWith
@@ -17,10 +17,8 @@
package io.archivesunleashed

import com.google.common.io.Resources
// scalastyle:off underscore.import
import io.archivesunleashed.df._
import org.apache.spark.sql.functions._
// scalastyle:on underscore.import
import io.archivesunleashed.df.{ExtractDomainDF, ExtractLinksDF, RemovePrefixWWWDF}
import org.apache.spark.sql.functions.{array, explode_outer, lower, udf}
import org.apache.spark.sql.SparkSession
import org.apache.spark.{SparkConf, SparkContext}
import org.junit.runner.RunWith
@@ -58,7 +56,7 @@ class ExtractHyperlinksTest extends FunSuite with BeforeAndAfter {
$"crawl_date",
explode_outer(ExtractLinksDF($"url",$"content")).as("link")
)
.filter(lower($"content").contains("keynote")) //filtered on keyword internet
.filter(lower($"content").contains("keynote")) // filtered on keyword internet

val results = interResults.select($"url",$"Domain",$"crawl_date",dest(array($"link")).as("destination_page")).head(3)

@@ -18,10 +18,7 @@ package io.archivesunleashed

import com.google.common.io.Resources
import org.apache.spark.sql.SparkSession
// scalastyle:off underscore.import
import io.archivesunleashed.df._
import org.apache.spark.sql.functions._
// scalastyle:on underscore.import
import org.apache.spark.sql.functions.desc
import org.apache.spark.{SparkConf, SparkContext}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
@@ -18,10 +18,7 @@ package io.archivesunleashed

import com.google.common.io.Resources
import org.apache.spark.sql.SparkSession
// scalastyle:off underscore.import
import io.archivesunleashed.df._
import org.apache.spark.sql.functions._
// scalastyle:on underscore.import
import org.apache.spark.sql.functions.desc
import org.apache.spark.{SparkConf, SparkContext}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
@@ -17,10 +17,7 @@
package io.archivesunleashed

import com.google.common.io.Resources
// scalastyle:off underscore.import
import io.archivesunleashed.df._
import org.apache.spark.sql.functions._
// scalastyle:on underscore.import
import org.apache.spark.sql.functions.desc
import org.apache.spark.sql.SparkSession
import org.apache.spark.{SparkConf, SparkContext}
import org.junit.runner.RunWith
@@ -18,10 +18,7 @@ package io.archivesunleashed

import com.google.common.io.Resources
import org.apache.spark.sql.SparkSession
// scalastyle:off underscore.import
import io.archivesunleashed.df._
import org.apache.spark.sql.functions._
// scalastyle:on underscore.import
import org.apache.spark.sql.functions.desc
import org.apache.spark.{SparkConf, SparkContext}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
@@ -18,10 +18,7 @@ package io.archivesunleashed

import com.google.common.io.Resources
import org.apache.spark.sql.SparkSession
// scalastyle:off underscore.import
import io.archivesunleashed.df._
import org.apache.spark.sql.functions._
// scalastyle:on underscore.import
import org.apache.spark.sql.functions.desc
import org.apache.spark.{SparkConf, SparkContext}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
@@ -18,10 +18,7 @@ package io.archivesunleashed

import com.google.common.io.Resources
import org.apache.spark.sql.SparkSession
// scalastyle:off underscore.import
import io.archivesunleashed.df._
import org.apache.spark.sql.functions._
// scalastyle:on underscore.import
import org.apache.spark.sql.functions.desc
import org.apache.spark.{SparkConf, SparkContext}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
@@ -18,10 +18,7 @@ package io.archivesunleashed

import com.google.common.io.Resources
import org.apache.spark.sql.SparkSession
// scalastyle:off underscore.import
import io.archivesunleashed.df._
import org.apache.spark.sql.functions._
// scalastyle:on underscore.import
import org.apache.spark.sql.functions.desc
import org.apache.spark.{SparkConf, SparkContext}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
@@ -18,10 +18,7 @@ package io.archivesunleashed

import com.google.common.io.Resources
import org.apache.spark.sql.SparkSession
// scalastyle:off underscore.import
import io.archivesunleashed.df._
import org.apache.spark.sql.functions._
// scalastyle:on underscore.import
import org.apache.spark.sql.functions.desc
import org.apache.spark.{SparkConf, SparkContext}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
@@ -18,10 +18,7 @@ package io.archivesunleashed

import com.google.common.io.Resources
import org.apache.spark.sql.SparkSession
// scalastyle:off underscore.import
import io.archivesunleashed.df._
import org.apache.spark.sql.functions._
// scalastyle:on underscore.import
import org.apache.spark.sql.functions.desc
import org.apache.spark.{SparkConf, SparkContext}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
@@ -17,12 +17,10 @@
package io.archivesunleashed

import com.google.common.io.Resources
// scalastyle:off underscore.import
import io.archivesunleashed.df._
import org.apache.spark.sql.functions._
import io.archivesunleashed.matchbox._
// scalastyle:on underscore.import
import io.archivesunleashed.df.SaveBytes
import io.archivesunleashed.matchbox.ComputeMD5RDD
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions.desc
import org.apache.spark.{SparkConf, SparkContext}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
@@ -17,11 +17,9 @@
package io.archivesunleashed

import com.google.common.io.Resources
// scalastyle:off underscore.import
import io.archivesunleashed.df._
import org.apache.spark.sql.functions._
// scalastyle:on underscore.import
import io.archivesunleashed.df.ExtractDomainDF
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions.desc
import org.apache.spark.{SparkConf, SparkContext}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
@@ -16,9 +16,7 @@

package io.archivesunleashed.matchbox

// scalastyle:off underscore.import
import io.archivesunleashed.matchbox.ExtractDate.DateComponent._
// scalastyle:on underscore.import
import io.archivesunleashed.matchbox.ExtractDate.DateComponent.{DD, MM, YYYY, YYYYMM, YYYYMMDD}
import org.junit.runner.RunWith
import org.scalatest.FunSuite
import org.scalatest.junit.JUnitRunner
@@ -19,9 +19,7 @@ package io.archivesunleashed.matchbox
import org.apache.tika.parser.pdf.PDFParser
import org.junit.runner.RunWith
import org.scalatest.FunSuite
// scalastyle:off underscore.import
import org.scalatest.Matchers
// scalastyle:on underscore.import
import org.scalatest.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
@@ -18,10 +18,8 @@ package io.archivesunleashed.matchbox

import com.google.common.io.Resources
import org.apache.spark.sql.{DataFrame, Row}
// scalastyle:off underscore.import
import io.archivesunleashed._
import org.apache.spark.sql.functions._
// scalastyle:on underscore.import
import io.archivesunleashed.{ArchiveRecord, RecordLoader}
import org.apache.spark.sql.functions.desc
import org.apache.spark.{SparkConf, SparkContext}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner

0 comments on commit 560ed2b

Please sign in to comment.
You can’t perform that action at this time.