From 80930bb9d945b2ffee0fdda78ebd8cbe1caa4dc2 Mon Sep 17 00:00:00 2001
From: Santo Cariotti <santo@dcariotti.me>
Date: Mon, 13 Jan 2025 19:08:36 +0100
Subject: Partitions number as argument

---
 scripts/05-dataproc-submit.sh | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'scripts/05-dataproc-submit.sh')

diff --git a/scripts/05-dataproc-submit.sh b/scripts/05-dataproc-submit.sh
index b70e138..b2c9e42 100755
--- a/scripts/05-dataproc-submit.sh
+++ b/scripts/05-dataproc-submit.sh
@@ -2,6 +2,12 @@
 
 set -e
 
+if [ "$#" -ne 1 ]; then
+    echo "Usage: 'sh ${PWD}/$0 <num-partitions>'"
+    exit 1
+fi
+
+NUM_PARTITIONS="$1"
 INPUT_PATH="gs://${BUCKET_NAME}/input/"
 OUTPUT_PATH="gs://${BUCKET_NAME}/output"
 
@@ -35,4 +41,4 @@ gcloud dataproc jobs submit spark \
     --jar="gs://${BUCKET_NAME}/scala/co-purchase-analysis_2.12-1.0.jar" \
     --region="${REGION}" \
     --properties="spark.hadoop.fs.gs.impl=com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem" \
-    -- "${INPUT_PATH}" "${OUTPUT_PATH}"
+    -- "${INPUT_PATH}" "${OUTPUT_PATH}" "${NUM_PARTITIONS}"
-- 
cgit v1.2.3-18-g5258