# Importation des librairies #%matplotlib import numpy as np import matplotlib.pyplot as plt import pandas as pd from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression from sklearn.metrics import confusion_matrix from sklearn.metrics import classification_report from matplotlib.colors import ListedColormap from mpl_toolkits.mplot3d import Axes3D import seaborn as sns import statsmodels.api as sm # Importer le dataset dataset = pd.read_csv('clients.csv') # Visualisation des données print(dataset.head()) #Suppression de User ID dataset.drop(["User ID"],axis='columns',inplace=True) # Transformer la variable Gender\ dataset.Gender = dataset.Gender.map({"Male": 1, "Female": 2}) # Influence du genre sur l'acte d'achat table= pd.crosstab(dataset.Gender,dataset.Purchased) table.div(table.sum(1).astype(float), axis=0).plot(kind='bar', stacked=True) dataset.drop(["Gender"],axis='columns',inplace=True) print(dataset.head()) table= pd.crosstab(dataset.Age,dataset.Purchased) table= pd.crosstab(dataset.EstimatedSalary,dataset.Purchased) # Définir notre variable dépendante y et nos varaibles indépendantes X X = dataset.iloc[:, [0, 1]].values y = dataset.iloc[:, -1].values # Diviser le dataset entre le Training set et le Test set X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0) # Feature Scaling - Centre les données et les affiches sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) print(X_test) # Construction du modèle classifier = LogisticRegression(random_state = 0, solver='liblinear') classifier.fit(X_train, y_train) # Faire de nouvelles prédictions - Affiche la nprédiction du modèle y_pred = classifier.predict(X_test) print(classifier.score(X_test,y_test))