| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 
 |  
n<-100
d<-data.frame (y=runif(n), x1=(runif(n))*0.5, x2=(runif(n))*0.25, x3=(runif (n))*3.2,
               x4=(runif(n))*4.35, x5=(runif(n))*2.2)
train<-sample(seq(100), 70, replace=FALSE)
regfit.bestfwd=regsubsets(y~.,data=d[train,],
                            nvmax =5, method="forward")
 
summar.regfitfwd<-summary(regfit.bestfwd)
 
#le calcul de la statistique des modèles sur un jeu d'apprentissage 
app.mat=model.matrix(y~.,data=d[train,])
{val.errors=rep(NA,5)
  stor.r2=rep(NA,5)
  stor.rmse=rep(NA,5)
  stor.pe=rep(NA,5)
 
  for(i in 1:5){
    coefi=coef(regfit.bestfwd,id=i)
    pred=app.mat [,names(coefi)]%*% coefi
    val.errors[i]=mean((d$y[train]-pred)^2)
    stor.r2[i]=1-(sum((d$y[train]-pred)^2)/sum((d$y[train]-mean(d$y[train]))^2))
    stor.rmse[i]=(sum((pred-d$y[train])^2/20))^0.5 #rmse: root mean squared error
    stor.pe[i]=stor.rmse[i]/mean(d$y[train]) #pe: pourcentage d'erreur
  }}
stor.r2
which.max(summar.regfitfwd$rsq)
coef(regfit.bestfwd, 5)
 
#le calcul de la statistique des modèles sur un jeu test 
app.mat=model.matrix(y~.,data=d[-train,])
{val.errors=rep(NA,5)
  stor.r2=rep(NA,5)
  stor.rmse=rep(NA,5)
  stor.pe=rep(NA,5)
 
  for(i in 1:5){
    coefi=coef(regfit.bestfwd,id=i)
    pred=app.mat [,names(coefi)]%*% coefi
    val.errors[i]=mean((d$y[-train]-pred)^2)
    stor.r2[i]=1-(sum((d$y[-train]-pred)^2)/sum((d$y[-train]-mean(d$y[-train]))^2))
    stor.rmse[i]=(sum((pred-d$y[-train])^2/20))^0.5
    stor.pe[i]=stor.rmse[i]/mean(d$y[-train])
  }}
stor.r2
which.max(summar.regfitfwd$rsq)
coef(regfit.bestfwd, 5) | 
Partager