1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
| from pyspark.sql import functions as F
from pyspark.sql.functions import col, when,ltrim,sum,mean,row_number
from pyspark.sql.window import *
df = spark.createDataFrame([("232340024", "Philippe"),
("232340024", "Jean"),
("304435353", "Louis" ),
("304435353", "Marie"),
("304435353", "Ernesto"),
("304435353", "Adel"),
("723320533", "Remy"),
("723320533", "Franck")],
("CONTRAT", "PRENOM"))
df=df.withColumn("row_num", row_number().over(Window.partitionBy("CONTRAT").orderBy("PRENOM")))
df.groupBy(F.col("CONTRAT")).pivot("row_num").agg(F.sum("PRENOM")).display() |
Partager