Bonjour,
je veux appliquer les deux méthodes K-means et classification hiérarchique ascendante sur une matrice de similarité pour identifier les différentes cluster,
j'ai réalisé un code Python, mais j'ai une erreur d'exécution:

ValueError: could not convert string to float: '0,366666667'
voici mon code:

Code : Sélectionner tout - Visualiser dans une fenêtre à part
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import pandas as pd
from pandas import read_csv
import numpy as np
from matplotlib import pyplot as plt
from pylab import rcParams
from scipy.cluster.hierarchy import dendrogram
from sklearn.cluster import AgglomerativeClustering, KMeans
 
 
# figure size
rcParams['figure.figsize'] = 70, 40
 
 
def plot_dendrogram(model, labels, **kwargs):
    """ Create linkage matrix and then plot the dendrogram """
 
    # create the counts of samples under each node
    counts = np.zeros(model.children_.shape[0])
    n_samples = len(model.labels_)
 
    for i, merge in enumerate(model.children_):
        current_count = 0
        for child_idx in merge:
            if child_idx < n_samples:
                current_count += 1  # leaf node
            else:
                current_count += counts[child_idx - n_samples]
        counts[i] = current_count
 
    linkage_matrix = np.column_stack([model.children_, model.distances_,
                                      counts]).astype(float)
 
    # Plot the corresponding dendrogram
    dendrogram(linkage_matrix, labels=labels, **kwargs)
 
 
def main(sim_mat, names):
 
    # load the similarity matrix
    df = pd.read_csv(sim_mat, delimiter=";", header=None)
    matrix = df.to_numpy()
 
    # perform the clustering
    model_1 = AgglomerativeClustering(n_clusters=None, affinity="precomputed",
                                      linkage="average", distance_threshold=0)
 
    # NOTE the number of clusters is here!!
    model_2 = KMeans(n_clusters=40)
 
    # get the names
    de = pd.read_csv(names, encoding="latin-1", header=None)
    labels = de[0].to_list()
 
    # print("Sizes:", matrix.shape, len(labels))
 
    model_1 = model_1.fit(matrix)
    model_2 = model_2.fit(matrix)
    return model_1, model_2, labels
 
 
if __name__ == "__main__":
 
    model_1, model_2, labels = main(sim_mat='C:/Users/click/Desktop/doctorat/rfiddd/fichier trouvé resultats/2020-07-22/FDA_matrice_bisim.csv',
                                    names='C:/Users/click/Desktop/doctorat/rfiddd/fichier trouvé resultats/2020-07-22/FDA_med-unique.csv')
 
    # print the KMeans results
    print("KMeans results")
    for name, cluster in zip(labels, model_2.labels_):
        print(f"- {name:<25}: {cluster}")
 
    # plot the top three levels of the dendrogram
    print("\nCreating the dendrogram...")
    plot_dendrogram(model_1, labels=labels, truncate_mode='level')
    plt.savefig("dendrogram.png")
    print("Done!")
merci d'avance pour vos aides