diff options
Diffstat (limited to 'co-purchase-analysis/src')
| -rw-r--r-- | co-purchase-analysis/src/main/scala/Main.scala | 11 | 
1 files changed, 7 insertions, 4 deletions
diff --git a/co-purchase-analysis/src/main/scala/Main.scala b/co-purchase-analysis/src/main/scala/Main.scala index d575ba6..8fd82cb 100644 --- a/co-purchase-analysis/src/main/scala/Main.scala +++ b/co-purchase-analysis/src/main/scala/Main.scala @@ -109,12 +109,15 @@ object CoPurchaseAnalysis {      val pairs = data        .map(order => (order.orderId, order.productId))        .groupByKey() -      .partitionBy(new HashPartitioner(50))        .flatMap { case (_, productIds) => -        productIds.toSeq.sorted.combinations(2).map { case List(p1, p2) => -          ProductPair(Math.min(p1, p2), Math.max(p1, p2)) -> 1 -        } +        val products = productIds.toSeq +        for { +          x <- products +          y <- products if x < y +        } yield (ProductPair(x, y), 1)        } +      .partitionBy(new HashPartitioner(50)) +      val coProducts = pairs.reduceByKey(_ + _)      coProducts.map { case (ProductPair(product1, product2), count) =>  |