diff options
Diffstat (limited to 'scripts/05-dataproc-submit.sh')
-rwxr-xr-x | scripts/05-dataproc-submit.sh | 8 |
1 files changed, 7 insertions, 1 deletions
diff --git a/scripts/05-dataproc-submit.sh b/scripts/05-dataproc-submit.sh index b70e138..b2c9e42 100755 --- a/scripts/05-dataproc-submit.sh +++ b/scripts/05-dataproc-submit.sh @@ -2,6 +2,12 @@ set -e +if [ "$#" -ne 1 ]; then + echo "Usage: 'sh ${PWD}/$0 <num-partitions>'" + exit 1 +fi + +NUM_PARTITIONS="$1" INPUT_PATH="gs://${BUCKET_NAME}/input/" OUTPUT_PATH="gs://${BUCKET_NAME}/output" @@ -35,4 +41,4 @@ gcloud dataproc jobs submit spark \ --jar="gs://${BUCKET_NAME}/scala/co-purchase-analysis_2.12-1.0.jar" \ --region="${REGION}" \ --properties="spark.hadoop.fs.gs.impl=com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem" \ - -- "${INPUT_PATH}" "${OUTPUT_PATH}" + -- "${INPUT_PATH}" "${OUTPUT_PATH}" "${NUM_PARTITIONS}" |