1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
| findSyllabus <- function(txt) {
regles <- list("[^aeiouyîïéèm]?[aeiouyîïéè]+[^aeiouyîïéè]*$",
"[m]+[aeiouyîïéè]+[^aeiouyîïéè]*$",
"[^aeiouyîïéèm]?[aeiouyîïéè]+[l]{1,2}[aeiouyîïéè][s]?$",
"[^aeiouyîïéèm]?[aeiouyîïéè]+[n]{1,2}[aeiouyîïéè][s]?$",
"[^aeiouyîïéèm]?[aeiouyîïéè]+[m]{1,2}(e)[s]?$",
"[^aeiouyîïéèm]?[aeiouyîïéè]+[x]{1,2}(e)[s]?$",
"[^aeiouyîïéèm]?[aeiouyîïéè]+[s]{1,2}(e)[s]?$",
"[^aeiouyîïéèm]?[aeiouyîïéè]+[z]{1,2}(e)[s]?$",
"[^aeiouyîïéèm]?[aeiouyîïéè]+[tr]{1,2}(e)[s]?$",
"[^aeiouyîïéèm]?[aeiouyîïéè]+[vr]{1,2}(e)[s]?$",
"[^aeiouyîïéèm]?[aeiouyîïéè]+[pr]{1,2}(e)[s]?$",
"[^aeiouyîïéèm]?[aeiouyîïéè]+[g]{1,2}(e)[s]?$",
"(gn)[aeiouy]+[^aeiouy]?$",
"(pl)[aeiouy]+[^aeiouy]?$",
"(br)[aeiouy]+[^aeiouy]?$",
"(bl)[aeiouy]+[^aeiouy]?$")
out <- vector(mode="list", length=length(txt))
for (i in seq(along=regles)) {
p <- regexpr(regles[[i]], txt)
ids <- which(p > 0); #print(ids)
s <- sapply(1:length(txt), function(x) substr(txt[x], p[x], nchar(txt[x])))
out[ids] <- s[ids]
}
return(unlist(out))
}
sylls <- findSyllabus(tolower(f$NOM_COMM)) |
Partager