# -*- coding: utf-8 -*- """ Created on Tue Aug 3 11:39:09 2021 @author: Charles """ import pandas as pd import numpy as np import statsmodels.api as sm from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.metrics import confusion_matrix from sklearn.metrics import classification_report from matplotlib.colors import ListedColormap import seaborn as sns # Importer le dataset data = pd.read_csv("clients.csv") #Permets de classer les données (exemple le groupe sangin, le sexe...) #data["Gender"] = data["Gender"].astype('category') #Supprimes les données inutiles data.drop(["User ID"],axis='columns',inplace=True) data.drop(["Gender"],axis='columns',inplace=True) print(data.head()) # on définit x et y #Mettre y = data["Gender"].cat.codes, si il est nécessaire de classer les données #y = data["Purchased"] # on ne prend que les colonnes quantitatives #x = data.select_dtypes(np.number) x = data.iloc[:, [0, 1]].values y = data.iloc[:, -1].values #Mise en place du modèle modele_logit = LogisticRegression(random_state = 0, solver='liblinear') modele_logit.fit(x,y) pd.DataFrame(np.concatenate([modele_logit.intercept_.reshape(-1,1), modele_logit.coef_],axis=1), index = ["coef"], columns = ["constante"]+list(x.columns)).T # on ajoute une colonne pour la constante x_stat = sm.add_constant(x) # on ajuste le modèle model = sm.Logit(y, x_stat) result = model.fit() print(result.summary())