1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
|
n<-100
d<-data.frame (y=runif(n), x1=(runif(n))*0.5, x2=(runif(n))*0.25, x3=(runif (n))*3.2,
x4=(runif(n))*4.35, x5=(runif(n))*2.2)
N <- 999 # le nombre de répétition
val.errorst=matrix(numeric(),N, 5)
stor.r2t=matrix(numeric(),N, 5)
stor.rmset=matrix(numeric(),N, 5)
stor.pet=matrix(numeric(),N, 5)
val.errors=matrix(numeric(),N, 5)
stor.r2=matrix(numeric(),N, 5)
stor.rmse=matrix(numeric(),N, 5)
stor.pe=matrix(numeric(),N, 5)
for (j in 1:N) {
train<-sample(seq(100), 70, replace=FALSE)
regfit.bestfwd=regsubsets(y~.,data=d[train,],
nvmax =5, method="forward")
summar.regfitfwd<-summary(regfit.bestfwd)
#le calcul de la statistique des modèles sur un jeu d'apprentissage
app.matt = model.matrix(y~.,data=d[train,])
#le calcul de la statistique des modèles sur un jeu test
app.mat=model.matrix(y~.,data=d[-train,])
for(i in 1:5){
coefi=coef(regfit.bestfwd,id=i)
pred=app.matt[,names(coefi)]%*% coefi
val.errorst[j, i]=mean((d$y[train]-pred)^2)
stor.r2t[j, i]=1-(sum((d$y[train]-pred)^2)/sum((d$y[train]-mean(d$y[train]))^2))
stor.rmset[j, i]=(sum((pred-d$y[train])^2/20))^0.5 #rmse: root mean squared error
stor.pet[j, i]=stor.rmset[j, i]/mean(d$y[train]) #pe: pourcentage d'erreur
pred=app.mat [,names(coefi)]%*% coefi
val.errors[j, i]=mean((d$y[-train]-pred)^2)
stor.r2[j, i]=1-(sum((d$y[-train]-pred)^2)/sum((d$y[-train]-mean(d$y[-train]))^2))
stor.rmse[j, i]=(sum((pred-d$y[-train])^2/20))^0.5
stor.pe[j, i]=stor.rmse[j, i]/mean(d$y[-train])
}
} |
Partager