1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
| import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
# In[1] : Importer les données
data = pd.read_excel('Entropie.xlsx')
temp = data[data.label != 1] # suppression d'un label pour avoir pour avoir un SVM a 2 classe
df = temp.drop(['label'], axis = 1)
X = df.values
categ = temp['label']
# In[2]: Standardisation des données
from sklearn import preprocessing
std_scale = preprocessing.StandardScaler().fit(X)
X_scaled = std_scale.transform(X)
# In[3]: Calcul des composantes pricipales
from sklearn import decomposition
pca = decomposition.PCA(n_components=2) # Choix du nombre de composante principal
pca.fit(X_scaled)
# In[6]: projeter X sur les composantes principales
X_projected = pca.transform(X_scaled)
# In[7]: afficher chaque observation
plt.figure()
plt.scatter(X_projected[:, 0], X_projected[:, 1], s=10,
c=temp.get('label'), cmap='viridis')
plt.xlabel('PC 1')
plt.ylabel('PC 2')
plt.xlim([-3, 6])
plt.ylim([-3, 4])
plt.colorbar()
plt.title('Classification selon composante principal')
# Ajout des 2 composante principal au dataset "temp"
temp['PC1'] = X_projected[:,0]
temp['PC2'] = X_projected[:,1]
# In[8]: SVM
from sklearn import svm
import seaborn as sns;
#sns.set(font_scale=1.2)
# In[4]: Fit the SVM model with a Low C
model = svm.SVC(kernel='linear', C = 2**-5)
model.fit(X_projected, categ)
# In[5]: Visualize Results
# Get the separating hyperplane
w = model.coef_[0]
a = -w[0] / w[1]
xx = np.linspace(-3, 5)
yy = a * xx - (model.intercept_[0]) / w[1]
# Plot the parallels to the separating hyperplane that pass through the support vectors
b = model.support_vectors_[0]
yy_down = a * xx + (b[1] - a * b[0])
b = model.support_vectors_[-1]
yy_up = a * xx + (b[1] - a * b[0])
# Plot the hyperplane
sns.lmplot('PC1', 'PC2', data=temp, hue='label', palette='Set1', fit_reg=False, scatter_kws={"s": 20})
plt.plot(xx, yy, linewidth=2, color='black');
plt.ylim([-5,5])
# In[6]: Look at the margins and support vectors
sns.lmplot('PC1', 'PC2', data=temp, hue='label', palette='Set1', fit_reg=False, scatter_kws={"s": 20})
plt.plot(xx, yy, linewidth=2, color='black')
plt.plot(xx, yy_down, 'k--')
plt.plot(xx, yy_up, 'k--')
plt.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], s=80, facecolors='none');
plt.ylim([-5,5]) |
Partager