1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
|
occ<-table(sml)
mat<-matrix(0, nrow=dim(Expression)[1], ncol=(dim(occ)*2)+2,
dimnames=list(
rownames(Expression),
c("NN AVG", "PN AVG", "PP AVG", "AVG", "SD", "NN NORM", "PN NORM", "PP NORM")
)
)
#Normalisation step
for(i in 1:dim(Expression)[1]) {
mat[i,1]<-mean(Expression[i,1:occ[1]])
mat[i,2]<-mean(Expression[i,(occ[1]+1):(occ[1]+occ[2])])
mat[i,3]<-mean(Expression[i,(occ[1]+occ[2]+1):(occ[1]+occ[2]+occ[3])])
mat[i,4]<-mean(Expression[i,])
mat[i,5]<-sd(Expression[i,])
mat[i,6]<-(mat[i,1]-mat[i,4])/mat[i,5]
mat[i,7]<-(mat[i,2]-mat[i,4])/mat[i,5]
mat[i,8]<-(mat[i,3]-mat[i,4])/mat[i,5]
}
#Writing to a file results
write.table(mat,file = "profiling_norm.txt", append = FALSE, quote = FALSE, sep = "\t",dec = ".", row.names = TRUE,col.names = TRUE, qmethod = c("escape", "double"))
#the part of matrix which is interesting for clustering
dim(mat[,6:8])
#Clustering
#The following line shows Error: cannot allocate vector of size 22.3 Gb
hr <- hclust(as.dist(1-cor(t(mat[,6:8]), method="pearson")), method="complete");
hc <- hclust(as.dist(1-cor(mat[,6:8], method="spearman")), method="complete")
plot(hc,main=paste("Gene clustering ;", "Spearman distance"), xlab=NULL, cex=0.1, cex.main=1.5) |
Partager