Permalink
Browse files
Fixed the scala style issues (some errors are now warnings)
- Loading branch information...
|
@@ -68,12 +68,12 @@ |
|
|
<check level="error" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="true"></check> |
|
|
<check level="error" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="true"></check> |
|
|
<check level="error" class="org.scalastyle.scalariform.ReturnChecker" enabled="true"></check> |
|
|
<check level="error" class="org.scalastyle.scalariform.NullChecker" enabled="false"></check> |
|
|
<check level="warn" class="org.scalastyle.scalariform.NullChecker" enabled="true"></check> |
|
|
<check level="error" class="org.scalastyle.scalariform.NoCloneChecker" enabled="true"></check> |
|
|
<check level="error" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="true"></check> |
|
|
<check level="error" class="org.scalastyle.scalariform.CovariantEqualsChecker" enabled="true"></check> |
|
|
<check level="error" class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"></check> |
|
|
<check level="error" class="org.scalastyle.file.RegexChecker" enabled="true"> |
|
|
<check level="warn" class="org.scalastyle.file.RegexChecker" enabled="true"> |
|
|
<parameters> |
|
|
<parameter name="regex"><![CDATA[println]]></parameter> |
|
|
</parameters> |
|
|
|
@@ -25,8 +25,8 @@ import org.apache.nutch.protocol.Content |
|
|
*/ |
|
|
trait SparklerSink { |
|
|
|
|
|
def configure() |
|
|
def configure(): Unit |
|
|
|
|
|
def consume(jobId: String, iterationId: String, taskId: String, iterator: Iterator[Content]) |
|
|
def consume(jobId: String, iterationId: String, taskId: String, iterator: Iterator[Content]): Unit |
|
|
|
|
|
} |
|
@@ -49,11 +49,10 @@ class SparklerJob extends Serializable { |
|
|
} |
|
|
|
|
|
def newCrawlDbSolrClient(): SolrProxy = { |
|
|
if (crawlDbUri.startsWith("http://")) { |
|
|
return new SolrProxy(new HttpSolrClient(crawlDbUri)) |
|
|
if (!crawlDbUri.startsWith("http://")) { |
|
|
throw new RuntimeException(s"$crawlDbUri not supported") |
|
|
} |
|
|
|
|
|
throw new RuntimeException(s"$crawlDbUri not supported") |
|
|
new SolrProxy(new HttpSolrClient(crawlDbUri)) |
|
|
} |
|
|
|
|
|
} |
|
|
|
@@ -97,30 +97,4 @@ class SolrResultIterator[T] extends Iterator[T] { |
|
|
|
|
|
object SolrResultIterator { |
|
|
val LOG = org.slf4j.LoggerFactory.getLogger(SolrResultIterator.getClass) |
|
|
|
|
|
def main(args: Array[String]) { |
|
|
val solrq = new SolrQuery("*:*") |
|
|
solrq.setRows(100) |
|
|
val crawlDbUrl = "http://localhost:8983/solr/crawldb" |
|
|
val topN = 5 |
|
|
/* |
|
|
val sc: SparkContext = new SparkContext(new SparkConf().setMaster("local").setAppName("test")) |
|
|
val rdd = new CrawlDbRDD(sc, |
|
|
"http://localhost:8983/solr/crawldb", maxGroups = 1, topN = 1) |
|
|
|
|
|
println(rdd.count()) |
|
|
sc.stop()*/ |
|
|
|
|
|
val batchSize = 100 |
|
|
val query = new SolrQuery("status:NEW") |
|
|
query.setFilterQueries(s"${Resource.GROUP}:twitter.com") |
|
|
//query.set("sort", "order") |
|
|
query.setRows(batchSize) |
|
|
|
|
|
val itt = new SolrResultIterator[Resource](new HttpSolrClient(crawlDbUrl), query, batchSize, |
|
|
classOf[Resource], limit = topN, closeClient = true) |
|
|
println(itt.toList.size) |
|
|
|
|
|
|
|
|
} |
|
|
} |
0 comments on commit
364e955