1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
|
set.seed (500)
library(MASS)
require(Rmixmod)
require(ManlyMix)
data(finance)
data <- finance
data$Health <- as.numeric(as.factor(data$Health))
#data$Year <- as.numeric(as.factor(data$Year))
data$Year <- NULL
#apply(data,2,function(x) sum(is.na(x)))
index <- sample(1:nrow(data),round(0.75*nrow(data)))
train <- data[index,]
test <- data[-index,]
lm.fit <- glm(Health~., data=train)
summary(lm.fit) # Tableau du modèle
pr.lm <- predict(lm.fit,test)
MSE.lm <- sum((pr.lm - test$Health)^2)/nrow(test)
# Préparation Données NN
maxs <- apply(data, 2, max)
mins <- apply(data, 2, min)
scaled <- as.data.frame(scale(data, center = mins, scale = maxs - mins))
train_ <- scaled[index,]
test_ <- scaled[-index,]
library(neuralnet)
n <- names(train_)
f <- as.formula(paste("Health ~", paste(n[!n %in% "Health"], collapse = " + ")))
nn <- neuralnet(f,data=train_,hidden=c(3,1),linear.output=T)
plot(nn)
pr.nn <- compute(nn,test_[,2:5])
pr.nn_ <- pr.nn$net.result*(max(data$Health)-min(data$Health))+min(data$Health)
test.r <- (test_$Health)*(max(data$Health)-min(data$Health))+min(data$Health)
MSE.nn <- sum((test.r - pr.nn_)^2)/nrow(test_)
print(paste(MSE.lm,MSE.nn))
par(mfrow=c(1,2))
plot(test$Health,pr.nn_,col='red',main='Real vs predicted NN',pch=18,cex=0.7)
abline(0,1,lwd=2)
legend('bottomright',legend='NN',pch=18,col='red', bty='n')
plot(test$Health,pr.lm,col='blue',main='Real vs predicted lm',pch=18, cex=0.7)
abline(0,1,lwd=2)
legend('bottomright',legend='LM',pch=18,col='blue', bty='n', cex=.95)
plot(test$Health,pr.nn_,col='red',main='Real vs predicted NN',pch=18,cex=0.7)
points(test$Health,pr.lm,col='blue',pch=18,cex=0.7)
abline(0,1,lwd=2)
legend('bottomright',legend=c('NN','LM'),pch=18,col=c('red','blue'))
library(boot)
set.seed(200)
lm.fit <- glm(Health~.,data=data)
cv.glm(data,lm.fit,K=20)$delta[1]
set.seed(450)
cv.error <- NULL
k <- 20
library(plyr)
pbar <- create_progress_bar('text')
pbar$init(k)
for(i in 1:k){
index <- sample(1:nrow(data),round(0.9*nrow(data)))
train.cv <- scaled[index,]
test.cv <- scaled[-index,]
nn <- neuralnet(f,data=train.cv,hidden=c(3,1),linear.output=T)
pr.nn <- compute(nn,test.cv[,2:5])
pr.nn <- pr.nn$net.result*(max(data$Health)-min(data$Health))+min(data$Health)
test.cv.r <- (test.cv$Health)*(max(data$Health)-min(data$Health))+min(data$Health)
cv.error[i] <- sum((test.cv.r - pr.nn)^2)/nrow(test.cv)
pbar$step()
}
mean(cv.error)
cv.error
boxplot(cv.error,xlab='MSE CV',col='cyan',
border='blue',names='CV error (MSE)',
main='CV error (MSE) for NN',horizontal=TRUE) |
Partager