1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
| rassembler <- function(entrees, nomVar1, nomVar2, nomVar3, operation = sum,
initTexte = "æ", initNombre = -921473) {
stopifnot(all(sapply(c("foreach", "magrittr"), require, character.only = TRUE)))
appariement <- sapply(entrees[[nomVar1]], grep, entrees[[nomVar2]])
cibles <- which(and(
entrees[, nomVar1] %in% entrees[, nomVar2],
entrees[, nomVar2] %in% entrees[, nomVar1]))
tailleInitiale <- nrow(entrees)
finis <- rep(initNombre, tailleInitiale)
confinement <- new.env(parent = emptyenv())
confinement$sorties <- list(rep(initTexte, tailleInitiale),
rep(initTexte, tailleInitiale),
rep(initNombre, tailleInitiale))
names(confinement$sorties) <- c(nomVar1, nomVar2, nomVar3)
confinement$cptElements <- cptFlux <- 0L
ajouterSortie <- function(val1, val2, val3) {
confinement$cptElements <- confinement$cptElements + 1L
confinement$sorties[[nomVar1]][confinement$cptElements] <- val1
confinement$sorties[[nomVar2]][confinement$cptElements] <- val2
confinement$sorties[[nomVar3]][confinement$cptElements] <- val3
}
foreach(i = seq_along(entrees[[nomVar1]]), j = appariement) %do% {
if (all(length(j) > 0L, not(i %in% finis))) {
tmp <- entrees[j, ]
cible <- which(tmp[, nomVar1] == entrees[i, nomVar2])
if (length(cible) > 0L) {
cptFlux <- cptFlux + 1L
finis[cptFlux] <- i
cptFlux <- cptFlux + 1L
finis[cptFlux] <- cibles[entrees[cibles, nomVar1] == entrees[i, nomVar2]]
ajouterSortie(entrees[i, nomVar1],
entrees[i, nomVar2],
operation(entrees[i, nomVar3], tmp[cible, nomVar3]))
} else {
ajouterSortie(entrees[i, nomVar1], entrees[i, nomVar2], entrees[i, nomVar3])
}
} else if (not(i %in% finis)) {
ajouterSortie(entrees[i, nomVar1], entrees[i, nomVar2], entrees[i, nomVar3])
}
}
utilFiltre <- function(conteneur, valeurExclue) {
Filter(function(z) z != valeurExclue, conteneur)
}
finis <- utilFiltre(finis, initNombre)
confinement$sorties[c(1L, 2L)] <- lapply(confinement$sorties[c(1L, 2L)], utilFiltre, initTexte)
confinement$sorties[[3L]] <- utilFiltre(confinement$sorties[[3L]], initNombre)
message(paste0("Lignes originales à flux : ",
paste(finis, collapse = ", "),
"\nNombre de flux : ", length(finis) / 2L, "\n"))
return(as.data.frame(confinement$sorties, stringsAsFactors = FALSE))
}
donnees <- data.frame(
x = c("A", "C", "F", "B", "Z", "D", "M", "Q"),
y = c("B", "D", "G", "A", "V", "C", "X", "H"),
N = sample.int(8L),
stringsAsFactors = FALSE)
donneesAvecRepetitions <- data.frame(
x = c("A", "C", "F", "B", "Z", "D", "B", "M", "Q", "E", "O"),
y = c("B", "M", "G", "W", "B", "C", "A", "A", "H", "O", "E"),
N = sample.int(11L), stringsAsFactors = FALSE)
print(donnees)
suppressMessages(rassembler(donnees, "x", "y", "N")) |
Partager