summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSanto Cariotti <santo@dcariotti.me>2025-01-13 15:13:49 +0100
committerSanto Cariotti <santo@dcariotti.me>2025-01-13 15:13:49 +0100
commite6d14f7b388f4f866234d444668d8801cbf9661c (patch)
treec0ab81ff9a7c931dd3420f104d026b635d1506e0
parente2ada26b97e97f1e182ce78e13171281bfbe6979 (diff)
Use for instead of combinations + partition after flatmap
-rw-r--r--co-purchase-analysis/src/main/scala/Main.scala11
1 files changed, 7 insertions, 4 deletions
diff --git a/co-purchase-analysis/src/main/scala/Main.scala b/co-purchase-analysis/src/main/scala/Main.scala
index d575ba6..8fd82cb 100644
--- a/co-purchase-analysis/src/main/scala/Main.scala
+++ b/co-purchase-analysis/src/main/scala/Main.scala
@@ -109,12 +109,15 @@ object CoPurchaseAnalysis {
val pairs = data
.map(order => (order.orderId, order.productId))
.groupByKey()
- .partitionBy(new HashPartitioner(50))
.flatMap { case (_, productIds) =>
- productIds.toSeq.sorted.combinations(2).map { case List(p1, p2) =>
- ProductPair(Math.min(p1, p2), Math.max(p1, p2)) -> 1
- }
+ val products = productIds.toSeq
+ for {
+ x <- products
+ y <- products if x < y
+ } yield (ProductPair(x, y), 1)
}
+ .partitionBy(new HashPartitioner(50))
+
val coProducts = pairs.reduceByKey(_ + _)
coProducts.map { case (ProductPair(product1, product2), count) =>