1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
| from pyspark.sql import functions as F
from pyspark.sql.functions import col, when,ltrim,sum,mean
from pyspark.sql.functions import *
df = spark.createDataFrame([("232340024", "Philippe"),
("232340024", "Jean"),
("304435353", "Louis" ),
("304435353", "Marie"),
("304435353", "Ernesto"),
("304435353", "Adel"),
("723320533", "Remy"),
("723320533", "Franck")],
("CONTRAT", "PRENOM"))
df.groupBy(F.col("CONTRAT"),F.col("PRENOM"))\
.agg(F.count(F.col("PRENOM"))).sort(["CONTRAT"],descending=True).filter(F.col("CONTRAT")=="304435353").display() |
Partager