1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
| df <- data.frame(y = rnorm(100,5,0.02))
df$x1 <- df$y * 3 - 2 + rnorm(100, 0, 1.5)
df$x2<-df$y/2+rnorm(100,7,1)
df
N <- 100 # le nombre de répétition
{val.errorst=matrix(numeric(),N, 2)
stor.r2t=matrix(numeric(),N, 2)
stor.rmset=matrix(numeric(),N, 2)
stor.pet=matrix(numeric(),N, 2)
stor.biait=matrix(numeric(), N, 2)
val.errors=matrix(numeric(),N, 2)
stor.r2=matrix(numeric(),N, 2)
stor.rmse=matrix(numeric(),N, 2)
stor.pe=matrix(numeric(),N, 2)
stor.biai=matrix(numeric(),N, 2)
}
library(leaps)
for (j in 1:N) {
train<-sample(seq(100), 70, replace=FALSE)
regfit.bestfwd=regsubsets(y~.,data=df[train,],nvmax =2)
summar.regfitfwd<-summary(regfit.bestfwd)
summar.regfitfwd
#le calcul de la statistique des modèles sur un jeu d'apprentissage
app.matt = model.matrix(y~.,data=df[train,])
#le calcul de la statistique des modèles sur un jeu test
tst.mat=model.matrix(y~.,data=df[-train,])
for(i in 1:2){
coefi=coef(regfit.bestfwd,id=i)
predapp=app.matt[,names(coefi)]%*% coefi
val.errorst[j, i]=mean((df$y[train]-predapp)^2)
stor.r2t[j, i]=(cor(df$y[train], predapp))^2
stor.rmset[j, i]=sqrt(val.errorst[j, i]) #rmse: root mean squared error
stor.pet[j, i]=stor.rmset[j, i]/mean(df$y[train]) #pe: pourcentage d'erreur
stor.biait[j,i]=(sum(predapp-df$y[train])/sum(df$y[train]))*100 #biai: pourcentage de biais
predtst=tst.mat [,names(coefi)]%*% coefi
val.errors[j, i]=mean((df$y[-train]-predtst)^2)
stor.r2[j, i]=(cor(df$y[-train], predtst))^2
stor.rmse[j, i]=sqrt(val.errors[j, i])
stor.pe[j, i]=stor.rmse[j, i]/mean(df$y[-train])
stor.biai[j,i]=(sum(predtst-df$y[-train])/sum(df$y[-train]))*100
}
} |
Partager