1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
| import pandas as pd
from pandas import read_csv
import numpy as np
from matplotlib import pyplot as plt
from pylab import rcParams
from scipy.cluster.hierarchy import dendrogram
from sklearn.cluster import AgglomerativeClustering, KMeans
# figure size
rcParams['figure.figsize'] = 70, 40
def plot_dendrogram(model, labels, **kwargs):
""" Create linkage matrix and then plot the dendrogram """
# create the counts of samples under each node
counts = np.zeros(model.children_.shape[0])
n_samples = len(model.labels_)
for i, merge in enumerate(model.children_):
current_count = 0
for child_idx in merge:
if child_idx < n_samples:
current_count += 1 # leaf node
else:
current_count += counts[child_idx - n_samples]
counts[i] = current_count
linkage_matrix = np.column_stack([model.children_, model.distances_,
counts]).astype(float)
# Plot the corresponding dendrogram
dendrogram(linkage_matrix, labels=labels, **kwargs)
def main(sim_mat, names):
# load the similarity matrix
df = pd.read_csv(sim_mat, delimiter=";", header=None)
matrix = df.to_numpy()
# perform the clustering
model_1 = AgglomerativeClustering(n_clusters=None, affinity="precomputed",
linkage="average", distance_threshold=0)
# NOTE the number of clusters is here!!
model_2 = KMeans(n_clusters=40)
# get the names
de = pd.read_csv(names, encoding="latin-1", header=None)
labels = de[0].to_list()
# print("Sizes:", matrix.shape, len(labels))
model_1 = model_1.fit(matrix)
model_2 = model_2.fit(matrix)
return model_1, model_2, labels
if __name__ == "__main__":
model_1, model_2, labels = main(sim_mat='C:/Users/click/Desktop/doctorat/rfiddd/fichier trouvé resultats/2020-07-22/FDA_matrice_bisim.csv',
names='C:/Users/click/Desktop/doctorat/rfiddd/fichier trouvé resultats/2020-07-22/FDA_med-unique.csv')
# print the KMeans results
print("KMeans results")
for name, cluster in zip(labels, model_2.labels_):
print(f"- {name:<25}: {cluster}")
# plot the top three levels of the dendrogram
print("\nCreating the dendrogram...")
plot_dendrogram(model_1, labels=labels, truncate_mode='level')
plt.savefig("dendrogram.png")
print("Done!") |
Partager