summaryrefslogtreecommitdiff
path: root/scripts/05-dataproc-submit.sh
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/05-dataproc-submit.sh')
-rwxr-xr-xscripts/05-dataproc-submit.sh8
1 files changed, 7 insertions, 1 deletions
diff --git a/scripts/05-dataproc-submit.sh b/scripts/05-dataproc-submit.sh
index b70e138..b2c9e42 100755
--- a/scripts/05-dataproc-submit.sh
+++ b/scripts/05-dataproc-submit.sh
@@ -2,6 +2,12 @@
set -e
+if [ "$#" -ne 1 ]; then
+ echo "Usage: 'sh ${PWD}/$0 <num-partitions>'"
+ exit 1
+fi
+
+NUM_PARTITIONS="$1"
INPUT_PATH="gs://${BUCKET_NAME}/input/"
OUTPUT_PATH="gs://${BUCKET_NAME}/output"
@@ -35,4 +41,4 @@ gcloud dataproc jobs submit spark \
--jar="gs://${BUCKET_NAME}/scala/co-purchase-analysis_2.12-1.0.jar" \
--region="${REGION}" \
--properties="spark.hadoop.fs.gs.impl=com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem" \
- -- "${INPUT_PATH}" "${OUTPUT_PATH}"
+ -- "${INPUT_PATH}" "${OUTPUT_PATH}" "${NUM_PARTITIONS}"