diff options
author | Santo Cariotti <santo@dcariotti.me> | 2025-01-13 15:13:49 +0100 |
---|---|---|
committer | Santo Cariotti <santo@dcariotti.me> | 2025-01-13 15:13:49 +0100 |
commit | e6d14f7b388f4f866234d444668d8801cbf9661c (patch) | |
tree | c0ab81ff9a7c931dd3420f104d026b635d1506e0 | |
parent | e2ada26b97e97f1e182ce78e13171281bfbe6979 (diff) |
Use for instead of combinations + partition after flatmap
-rw-r--r-- | co-purchase-analysis/src/main/scala/Main.scala | 11 |
1 files changed, 7 insertions, 4 deletions
diff --git a/co-purchase-analysis/src/main/scala/Main.scala b/co-purchase-analysis/src/main/scala/Main.scala index d575ba6..8fd82cb 100644 --- a/co-purchase-analysis/src/main/scala/Main.scala +++ b/co-purchase-analysis/src/main/scala/Main.scala @@ -109,12 +109,15 @@ object CoPurchaseAnalysis { val pairs = data .map(order => (order.orderId, order.productId)) .groupByKey() - .partitionBy(new HashPartitioner(50)) .flatMap { case (_, productIds) => - productIds.toSeq.sorted.combinations(2).map { case List(p1, p2) => - ProductPair(Math.min(p1, p2), Math.max(p1, p2)) -> 1 - } + val products = productIds.toSeq + for { + x <- products + y <- products if x < y + } yield (ProductPair(x, y), 1) } + .partitionBy(new HashPartitioner(50)) + val coProducts = pairs.reduceByKey(_ + _) coProducts.map { case (ProductPair(product1, product2), count) => |