1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
| #lire le fichier boucle
library("readxl")
donnees <- read_excel("boucle.xlsx")
#initialisation de la boucle
n<-nrow(donnees)
#lire le nom du fichier
for( i in 1:n) {
library("stringr")
library("tm")
library("SnowballC")
library("wordcloud")
library("RColorBrewer")
library("NLP")
library("tm")
nom <- paste( "fichier", i,sep="_")
type_fichier <- paste( ".txt")
fichier<-paste(nom,type_fichier,sep="")
filePath <- fichier
text <- readLines(filePath)
docs <- Corpus(VectorSource(text))
#La transformation du texte
todoubleSpace <- content_transformer(function (x , pattern ) gsub(pattern, " ", x))
docs <- tm_map(docs, todoubleSpace, " " )
# Convertir le texte en minuscule
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, c())
toSpace <- content_transformer(function (x , pattern ) gsub(pattern, " ", x))
docs <- tm_map(docs, toSpace, "/")
docs <- tm_map(docs, toSpace, "@")
docs <- tm_map(docs, toSpace, "-" )
docs <- tm_map(docs, toSpace, "," )
docs <- tm_map(docs, toSpace, ";" )
docs <- tm_map(docs, toSpace, ":" )
docs <- tm_map(docs, toSpace, "'" )
accent <- content_transformer(function (x , pattern ) gsub(pattern, "e", x))
docs <- tm_map(docs, accent, "é")
docs <- tm_map(docs, accent, "è")
docs <- tm_map(docs, accent, "ê")
accent1 <- content_transformer(function (x , pattern ) gsub(pattern, "o", x))
docs <- tm_map(docs, accent1, "ô")
accent2 <- content_transformer(function (x , pattern ) gsub(pattern, "a", x))
docs <- tm_map(docs, accent2, "â")
accent2 <- content_transformer(function (x , pattern ) gsub(pattern, "a", x))
docs <- tm_map(docs, accent2, "à")
accent3 <- content_transformer(function (x , pattern ) gsub(pattern, "c", x))
docs <- tm_map(docs, accent2, "ç")
accent3 <- content_transformer(function (x , pattern ) gsub(pattern, "i", x))
docs <- tm_map(docs, accent2, "î")
# Convertir le texte en minuscule
docs <- tm_map(docs, content_transformer(tolower))
# Supprimer les nombres
docs <- tm_map(docs, removeNumbers)
# Supprimer les ponctuations
docs <- tm_map(docs, removePunctuation)
#dataframe
dtm <- TermDocumentMatrix(docs)
m <- as.matrix(dtm)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v,row.names = NULL)
filePath <- "listedemots.txt"
motsasupr <- readLines(filePath)
motsasupr1 <- Corpus(VectorSource(motsasupr))
dtm1 <- TermDocumentMatrix(motsasupr1)
m1 <- as.matrix(dtm1)
v1 <- sort(rowSums(m1),decreasing=TRUE)
d1 <- data.frame(word = names(v1),freq=v1,row.names = NULL)
#supprimer les mots
d1$temp <- 1
nuage <- merge(d, d1, by=c("word"), all.x=TRUE)
nuage <- nuage[is.na(nuage$temp), ]
nom1 <- paste( "sortie", i,sep="_")
type_fichier1 <- paste( ".csv")
sortie<-paste(nom1,type_fichier1,sep="")
write.table(nuage, file = sortie, append = FALSE, quote = TRUE, sep = " ",
eol = "\n", na = "NA", dec = ".", row.names = TRUE,
col.names = TRUE, qmethod = c("escape", "double"),
fileEncoding = "")
#Générer le nuage de mots
#library("wordcloud")
#set.seed(1234)
#wordcloud(words = nuage$word, freq = d$freq, min.freq = 1,
#max.words=50, random.order=FALSE, rot.per=0.10,
#colors=brewer.pal(10,"RdBu"))
} |
Partager