Permalink
Browse files
Add additional Spark conf options:
- spark.rdd.compress
- spark.shuffle.compress
- spark.serializer
- Loading branch information...
Showing
with
7 additions
and
1 deletion.
-
+4
−1
app/jobs/spark_job.rb
-
+3
−0
config/application.yml.example
|
@@ -38,6 +38,9 @@ def perform(user_id, collection_id) |
|
|
|
spark_threads = ENV['SPARK_THREADS'] |
|
|
|
spark_heartbeat_interval = ENV['SPARK_HEARTBEAT_INTERVAL'] |
|
|
|
spark_driver_max_result_size = ENV['SPARK_DRIVER_MAXRESULTSIZE'] |
|
|
|
spark_rdd_compress = ENV['SPARK_RDD_COMPRESS'] |
|
|
|
spark_serializer = ENV['SPARK_SERIALIZER'] |
|
|
|
spark_shuffle_compress = ENV['SPARK_SHUFFLE_COMPRESS'] |
|
|
|
spark_job = %( |
|
|
|
import io.archivesunleashed._ |
|
|
|
import io.archivesunleashed.app._ |
|
@@ -70,7 +73,7 @@ def perform(user_id, collection_id) |
|
|
|
sys.exit |
|
|
|
) |
|
|
|
File.open(collection_spark_job_file, 'w') { |file| file.write(spark_job) } |
|
|
|
spark_job_cmd = spark_shell + ' --master local[' + spark_threads + '] --driver-memory ' + spark_memory_driver + ' --conf spark.network.timeout=' + spark_network_timeout + ' --conf spark.executor.heartbeatInterval=' + spark_heartbeat_interval + ' --conf spark.driver.maxResultSize=' + spark_driver_max_result_size + ' --packages "io.archivesunleashed:aut:' + aut_version + '" -i ' + collection_spark_job_file + ' | tee ' + collection_spark_job_file + '.log' |
|
|
|
spark_job_cmd = spark_shell + ' --master local[' + spark_threads + '] --driver-memory ' + spark_memory_driver + ' --conf spark.network.timeout=' + spark_network_timeout + ' --conf spark.executor.heartbeatInterval=' + spark_heartbeat_interval + ' --conf spark.driver.maxResultSize=' + spark_driver_max_result_size + ' --conf spark.rdd.compress=' + spark_rdd_compress + ' --conf spark.serializer=' + spark_serializer + ' --conf spark.shuffle.compress=' + spark_shuffle_compress + ' --packages "io.archivesunleashed:aut:' + aut_version + '" -i ' + collection_spark_job_file + ' | tee ' + collection_spark_job_file + '.log' |
|
|
|
logger.info 'Executing: ' + spark_job_cmd |
|
|
|
system(spark_job_cmd) |
|
|
|
domain_success = collection_derivatives + '/all-domains/output/_SUCCESS' |
|
|
|
@@ -21,6 +21,9 @@ SPARK_MEMORY_DRIVER: "5G" |
|
|
|
SPARK_NETWORK_TIMEOUT: "10000000" |
|
|
|
SPARK_HEARTBEAT_INTERVAL: "600s" |
|
|
|
SPARK_THREADS: "*" |
|
|
|
SPARK_RDD_COMPRESS: "true" |
|
|
|
SPARK_SHUFFLE_COMPRESS: "true" |
|
|
|
SPARK_SERIALIZER: "org.apache.spark.serializer.KryoSerializer" |
|
|
|
AUT_VERSION: "0.17.0" |
|
|
|
EMAIL_SERVER_NAME: "smtp.something.ca" |
|
|
|
EMAIL_DOMAIN: "something.ca" |
|
|
0 comments on commit
55b8aed