import pandas as pd import numpy as np seeding = 0 R_mu, R_sig = (16.0, 0.5) B_mu, B_sig = (16.0, 0.2) G_mu, G_sig = (16.4, 0.4) P_mu, P_sig = (16.0, 0.3) O_mu, O_sig = (16.8, 0.8) df_config = pd.DataFrame({ "Colour_raw":['RED','R','Red','red','BLUE','Bleu','Blue','B','G','Green','GREEN','green','O','Orang','Orange','PURP','Purpal','Purple'], "Colour_True" : ['Red','Red','Red','Red','Blue','Blue','Blue','Blue','Green','Green','Green','Green','Orange','Orange','Orange','Purple','Purple','Purple'], "N_parts" : [2301,5325,6003,8002,1200,490,4450,1151,1990,9012,342,153,5515,23,6351,741,698,4900], "Mu": [R_mu,R_mu,R_mu,R_mu,B_mu,B_mu,B_mu,B_mu,G_mu,G_mu,G_mu,G_mu,O_mu,O_mu,O_mu,P_mu,P_mu,P_mu], "Sig": [R_sig,R_sig,R_sig,R_sig,B_sig,B_sig,B_sig,B_sig,G_sig,G_sig,G_sig,G_sig,O_sig,O_sig,O_sig,P_sig,P_sig,P_sig] }) df_mast = pd.DataFrame() for n1 in range(0,len(df_config)): np.random.seed(seeding) df_local = pd.DataFrame( {'Colour':df_config.loc[n1,'Colour_raw'], 'Length':np.round(np.random.normal(df_config.loc[n1,'Mu'],df_config.loc[n1,'Sig'],df_config.loc[n1,'N_parts']),3)}) df_mast = pd.concat([df_mast,df_local]) Measurements = df_mast.sample(frac=1).reset_index(drop=True) Tolerences = pd.DataFrame({'Colour':['Red','Blue','Green','Orange','Purple'], 'Lower Tolerence':[14.50,15.625,15.60,14.15,15.25], 'Upper Tolerence':[19.150,17.755,18.25,17.95,17.25]}) #Numberparts = df_config['N_parts'].sum() #print (Numberparts) #Red=Measurements.loc[Measurements['Colour'] == 'RED'] #print (Red) # On homogéneise le nom de la colonne Colour_raw df_config['Colour_raw'].loc[df_config['Colour_raw'] == 'RED'] = 'Red' df_config['Colour_raw'].loc[df_config['Colour_raw'] == 'R'] = 'Red' df_config['Colour_raw'].loc[df_config['Colour_raw'] == 'red'] = 'Red' df_config['Colour_raw'].loc[df_config['Colour_raw'] == 'BLUE'] = 'Blue' df_config['Colour_raw'].loc[df_config['Colour_raw'] == 'Bleu'] = 'Blue' df_config['Colour_raw'].loc[df_config['Colour_raw'] == 'Blue'] = 'Blue' df_config['Colour_raw'].loc[df_config['Colour_raw'] == 'B'] = 'Blue' df_config['Colour_raw'].loc[df_config['Colour_raw'] == 'G'] = 'Green' df_config['Colour_raw'].loc[df_config['Colour_raw'] == 'GREEN'] = 'Green' df_config['Colour_raw'].loc[df_config['Colour_raw'] == 'Green'] = 'Green' df_config['Colour_raw'].loc[df_config['Colour_raw'] == 'green'] = 'Green' df_config['Colour_raw'].loc[df_config['Colour_raw'] == 'O'] = 'Orange' df_config['Colour_raw'].loc[df_config['Colour_raw'] == 'Orang'] = 'Orange' df_config['Colour_raw'].loc[df_config['Colour_raw'] == 'Orange'] = 'Orange' df_config['Colour_raw'].loc[df_config['Colour_raw'] == 'PURP'] = 'Purple' df_config['Colour_raw'].loc[df_config['Colour_raw'] == 'Purpal'] = 'Purple' df_config['Colour_raw'].loc[df_config['Colour_raw'] == 'Purple'] = 'Purple' # On homogéneise le nom de la colonne Colour Measurements['Colour'].loc[Measurements['Colour'] == 'RED'] = 'Red' Measurements['Colour'].loc[Measurements['Colour'] == 'R'] = 'Red' Measurements['Colour'].loc[Measurements['Colour'] == 'red'] = 'Red' Measurements['Colour'].loc[Measurements['Colour'] == 'BLUE'] = 'Blue' Measurements['Colour'].loc[Measurements['Colour'] == 'Bleu'] = 'Blue' Measurements['Colour'].loc[Measurements['Colour'] == 'Blue'] = 'Blue' Measurements['Colour'].loc[Measurements['Colour'] == 'B'] = 'Blue' Measurements['Colour'].loc[Measurements['Colour'] == 'G'] = 'Green' Measurements['Colour'].loc[Measurements['Colour'] == 'GREEN'] = 'Green' Measurements['Colour'].loc[Measurements['Colour'] == 'Green'] = 'Green' Measurements['Colour'].loc[Measurements['Colour'] == 'green'] = 'Green' Measurements['Colour'].loc[Measurements['Colour'] == 'O'] = 'Orange' Measurements['Colour'].loc[Measurements['Colour'] == 'Orang'] = 'Orange' Measurements['Colour'].loc[Measurements['Colour'] == 'Orange'] = 'Orange' Measurements['Colour'].loc[Measurements['Colour'] == 'PURP'] = 'Purple' Measurements['Colour'].loc[Measurements['Colour'] == 'Purpal'] = 'Purple' Measurements['Colour'].loc[Measurements['Colour'] == 'Purple'] = 'Purple' #Red=Measurements.loc[Measurements['Colour'] == 'Red'] #print (Red) ## #On extrait de df_config, l'ensemble des lignes "Red" Red=df_config.loc[df_config['Colour_raw'] == 'Red'] #On extrait de df_config, l'ensemble des lignes "Blue" Blue=df_config.loc[df_config['Colour_raw'] == 'Blue'] #On extrait de df_config, l'ensemble des lignes "Green" Green=df_config.loc[df_config['Colour_raw'] == 'Green'] #On extrait de df_config, l'ensemble des lignes "Purple" Purple=df_config.loc[df_config['Colour_raw'] == 'Purple'] #On compte le nombre d'éléments "Red" Numberparts_Red = Red['N_parts'].sum() print ("Numberparts_Red=", Numberparts_Red) #On compte le nombre d'éléments "Blue" Numberparts_Blue = Blue['N_parts'].sum() print ("Numberparts_Blue=", Numberparts_Blue) #On compte le nombre d'éléments "Green" Numberparts_Green = Green['N_parts'].sum() print ("Numberparts_Green=", Numberparts_Green) #On compte le nombre d'éléments "Purple" Numberparts_Purple = Purple['N_parts'].sum() print ("Numberparts_Purple=", Numberparts_Purple) ## #On extrait de Measurements, l'ensemble des lignes "Purple" Purple_M=Measurements.loc[Measurements['Colour'] == 'Purple'] #On calcule la longueur mediane des éléments Purple #Mediane_Purple = Purple_M['Length'].median() #print ("Mediane_Purple=", Mediane_Purple) ## #On extrait de Measurements, l'ensemble des lignes "Orange" Orange_M=Measurements.loc[Measurements['Colour'] == 'Orange'] #On calcule la longueur mediane des éléments Purple #Mediane_Orange = Orange_M['Length'].median() #print ("Mediane_Orange=", Mediane_Orange) ## #On extrait de Measurements l'ensemble des lignes Green Green_M=Measurements.loc[Measurements['Colour'] == 'Green'] ## #On extrait de Measurements l'ensemble des lignes Orange Orange_M=Measurements.loc[Measurements['Colour'] == 'Orange'] ## #On extrait de Measurements l'ensemble des lignes "Blue" Blue_M=Measurements.loc[Measurements['Colour'] == 'Blue'] #On extrait les valeurs bleu situées entre 15,625 et 17,755 cond1_B = Blue_M.apply ( lambda row : row["Length"] >= 15.625, axis = 1 ) sub1_B = Blue_M[cond1_B] cond2_B = sub1_B.apply ( lambda row : row["Length"] <= 17.755, axis = 1 ) sub2_B = sub1_B[cond2_B] #print (sub2) #On compte le nombre de lignes dans le tableau sub2_B N_Blue_in_tolerence = len(sub2_B) print("N_Blue_in_tolerence=", N_Blue_in_tolerence) #Pourcentage d'éléments dans la tolérance percent_part_Blue = N_Blue_in_tolerence*100/Numberparts_Blue print("%_part_Blue=", percent_part_Blue) ## #On extrait de Measurements l'ensemble des lignes "Green" Green_M=Measurements.loc[Measurements['Colour'] == 'Green'] #On extrait les valeurs Green situées entre 15,6 et 17,755 cond1 = Green_M.apply ( lambda row : row["Length"] >= 15.6, axis = 1 ) sub1_G = Green_M[cond1] cond2_G = sub1_G.apply ( lambda row : row["Length"] <= 18.25, axis = 1 ) sub2_G = sub1_G[cond2_G] #print (sub2) #On compte le nombre de lignes dans le tableau sub2 N_Green_in_tolerence = len(sub2_G) print("N_Green_in_tolerence=", N_Green_in_tolerence) #Pourcentage d'éléments dans la tolérance percent_part_Green = N_Green_in_tolerence*100/Numberparts_Green print("%_part_Green=", percent_part_Green) #Différentiel des tolérances #Tolerences["Difference"] = Tolerences['Upper Tolerence'].subtract(Tolerences['Lower Tolerence'], fill_value=0) #print (Tolerences) #Cacul de l'interquartile range des longueurs des éléments verts #On importe la function iqr from scipy.stats import iqr #On extrait la colonne Lenght du tableau Data_length_G = Green_M[('Length')] print(Data_length_G) Interquartile_G = iqr(Data_length_G) print (Interquartile_G) #On extrait la colonne Lenght du tableau Data_length_O = Orange_M[('Length')] #print(Data_length_G) Interquartile_O = iqr(Data_length_O) print (Interquartile_O)