Réchantillonnage (bootstrap) suite à une ACP

**Tisoon** · 16/06/2014, 19h52

Bonjour,

J'utilise ce script que j'ai un peu modifié selon mes besoins.

Code :

Sélectionner tout - Visualiser dans une fenêtre à part

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
## Sample bootstrapped PCA
 
library(lattice)                                  # Lattice graphics package
 
rm(list=ls())                                     # Clear all variables
graphics.off()                                    # Close any open graphics windows
 
## Read data into data frame
 
sample.data = read.table('C:\\S4.csv', header=TRUE, sep=",")
sample.data
 
attach(sample.data)                               # Make contents available
 
X = cbind(NEA,NEB,NEC,NED,NEE,NEF,NEG,NEH,NEI,NEJ,NEK,NEL,NEM,NEN,NEO,NEP,NOA,NOB,NOC,NOD,NOE,NOF,NOG,NOH,NOI,NOJ,NOK,NOL,NOM,NON,NOO,NOP,NOQ,SOA,SOB,SOC,SOD,SOE,SOF,SOG,SOH,SOI,SOJ,SOK,SOL,SOM,SON,EAA,EAB,EAC,EAD,EAE,EAF,EAG,WEA,WEB,WEC,WED,WEE,WEF,CEA,CEB,CEC,CED,CEE,CEF,BKA,BKB,BKC,BKD,BKE,BKF)                         # Concatenate variables into data matrix
X
 
n.iterations = 1000                               # Number of bootstrap iterations
ci.level = 0.95                                   # Confidence level, as probability
useCorr = TRUE;                                   # Set to use correlation or covariance matrix
 
# --------------------------------------------------------------------------------------------
 
nObs = nrow(X)                                    # Size of matrix X
nVars = ncol(X)
 
## PCA results for original data
 
if (useCorr)
  {
  X.C = cor(X)
  X.pca = prcomp(X, scale=TRUE)
  } else {
  X.C = cov(X)
  X.pca = prcomp(X, scale=FALSE)
  }
 
pca.eigenvalues = X.pca$sdev^2                  # Isolate eigenvalues, loadings, scores
pca.percentVar = 100*pca.eigenvalues / sum(pca.eigenvalues)
pca.cumVar = cumsum(pca.percentVar)
pca.loadings = X.pca$rotation
pca.scores = predict(X.pca)
 
## Bootstrapped PCA results
 
distrib.corrs = matrix(0, nrow=n.iterations, ncol=nVars*nVars)  # Allocate vectors for sampling distributions
distrib.eigenvalues = matrix(0, nrow=n.iterations, ncol=nVars) 
distrib.percentVar = matrix(0, nrow=n.iterations, ncol=nVars)
distrib.loadings = matrix(0, nrow=n.iterations, ncol=nVars*nVars)
 
for (iter in 1:n.iterations)                    # Bootstrap iterations
  {
  i = sample(1:nObs, replace=TRUE)                # Get bootstrapped subscripts
  X.boot = X[i,]                                  # Bootstrapped sample of rows of X
 
  if (useCorr)
    {
    X.boot.C = cor(X.boot)
    X.boot.pca = prcomp(X.boot, scale=TRUE)
    } else {
    X.boot.C = cov(X.boot)
    X.boot.pca = prcomp(X.boot, scale=FALSE)
    }
 
  boot.C = X.boot.C                               # Isolate corrs, eigenvalues, loadings
  boot.eigenvalues = X.boot.pca$sdev^2
  boot.percentVar = 100*boot.eigenvalues / sum(boot.eigenvalues)
  boot.loadings = X.boot.pca$rotation
  boot.scores = predict(X.boot.pca)
 
  for (v in 1:nVars)                              # Reverse signs of loadings if negatively correlated
    {                                             #   with original loadings
    c = cor(cbind(pca.loadings[,v],boot.loadings[,v]))
    if (c[2,1]<0) boot.loadings[,v] = -boot.loadings[,v]
    }
 
  distrib.corrs[iter,] = c(boot.C)                # Stash results into bootstrap distributions
  distrib.eigenvalues[iter,] = c(boot.eigenvalues)
  distrib.percentVar[iter,] = c(boot.percentVar)
  distrib.loadings[iter,] = c(boot.loadings)
  }
 
prob.low = (1-ci.level)/2                       # Low probability bound for CI
prob.high = 1-prob.low                          # High probability bound for CI
 
ci.low.eigenvalues =  matrix(0, nrow=1, ncol=nVars)    # Allocate low & high confidence bounds
ci.high.eigenvalues = matrix(0, nrow=1, ncol=nVars)
ci.low.percentVar =   matrix(0, nrow=1, ncol=nVars)
ci.high.percentVar =  matrix(0, nrow=1, ncol=nVars)
 
ci.low.corrs =        matrix(0, nrow=nVars, ncol=nVars)
ci.high.corrs =       matrix(0, nrow=nVars, ncol=nVars)
ci.low.loadings =     matrix(0, nrow=nVars, ncol=nVars)
ci.high.loadings =    matrix(0, nrow=nVars, ncol=nVars)
 
v = 0;
for (vc in 1:nVars)                             # Find and stash confidence bounds
  {
  ci.low.eigenvalues[,vc] = quantile(distrib.eigenvalues[,vc], probs=prob.low)
  ci.high.eigenvalues[,vc] = quantile(distrib.eigenvalues[,vc], probs=prob.high)
  ci.low.percentVar[,vc] = quantile(distrib.percentVar[,vc], probs=prob.low)
  ci.high.percentVar[,vc] = quantile(distrib.percentVar[,vc], probs=prob.high)
  for (vr in 1:nVars)
    {
    v = v+1
    ci.low.corrs[vr,vc] = quantile(distrib.corrs[,v], probs=prob.low)
    ci.high.corrs[vr,vc] = quantile(distrib.corrs[,v], probs=prob.high)
    ci.low.loadings[vr,vc] = quantile(distrib.loadings[,v], probs=prob.low)
    ci.high.loadings[vr,vc] = quantile(distrib.loadings[,v], probs=prob.high)
    }
  }
 
 
## Display original values with low & high confidence bounds
 
pca.loadings
ci.low.loadings
ci.high.loadings

Seulement voilà, j'ai une erreur à la ligne 82 quand j'exécute :

Erreur dans pca.loadings[, v] : indice hors limites

Je n'ai pas cette erreur lorsque je le fais avec 7 variables. Je soupçonne un trop grand nombre de variables comme étant un problème ici, mais je n'ai aucune idée de comment corriger ça.

J'ai pensé à estimer d'abord mon ACP, puis à soumettre les vecteurs au bootstrap. Mais je ne suis pas plus doué de ce côté-ci. J'ai testé un tas de choses, mais je m'y perds au final ou obtiens un résultat bien différent de ce que je trouvais avec le code au-dessus (appliqué à 7 régions/variables).
Je trouve un résultat comme celui-ci avec 7 régions et le code au-dessus.
Application.pdf
Je n'ai rien de tel avec tout ce que j'ai testé (module boot de R).

Votre aide serait grandement appréciée, merci.

Réchantillonnage (bootstrap) suite à une ACP

R

Discussions similaires

Partager

Partager