From 80930bb9d945b2ffee0fdda78ebd8cbe1caa4dc2 Mon Sep 17 00:00:00 2001 From: Santo Cariotti <santo@dcariotti.me> Date: Mon, 13 Jan 2025 19:08:36 +0100 Subject: Partitions number as argument --- scripts/05-dataproc-submit.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'scripts/05-dataproc-submit.sh') diff --git a/scripts/05-dataproc-submit.sh b/scripts/05-dataproc-submit.sh index b70e138..b2c9e42 100755 --- a/scripts/05-dataproc-submit.sh +++ b/scripts/05-dataproc-submit.sh @@ -2,6 +2,12 @@ set -e +if [ "$#" -ne 1 ]; then + echo "Usage: 'sh ${PWD}/$0 <num-partitions>'" + exit 1 +fi + +NUM_PARTITIONS="$1" INPUT_PATH="gs://${BUCKET_NAME}/input/" OUTPUT_PATH="gs://${BUCKET_NAME}/output" @@ -35,4 +41,4 @@ gcloud dataproc jobs submit spark \ --jar="gs://${BUCKET_NAME}/scala/co-purchase-analysis_2.12-1.0.jar" \ --region="${REGION}" \ --properties="spark.hadoop.fs.gs.impl=com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem" \ - -- "${INPUT_PATH}" "${OUTPUT_PATH}" + -- "${INPUT_PATH}" "${OUTPUT_PATH}" "${NUM_PARTITIONS}" -- cgit v1.2.3-18-g5258