1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
| def soundex(chaine):
# Remplacement des caractères accentués
# {"âäà":"A","éèêë":"E","îï":"I","ôö":"O","ûùü":"U","ç":"C"}
# Remplacemet des lettres restantes par le code associé
# {"BP":"1","CKQ":"2","DT":"3","L":"4","MN":"5","R":"6","GJ":"7","XZS":"8","FV":"9"}
# Suppression des voyelles, w et h, des virgules,points etc et des espaces éventuels
# {"AEIOUYWH,.;: ":""}
# Toutes ces transformations en gardant la première lettre de la chaine
tupla = ("âäà","éèêë","îï","ôö","ûùü","ç","BP","CKQ","DT","L","MN","R","GJ","XZS","FV","AEIOUYWH,.;: \t\n\r")
tuplb = ("A","E","I","O","U","C","1","2","3","4","5","6","7","8","9","")
repl = dict(zip(tupla,tuplb))
print 'Vérification du dictionnaire :'
for el in tupla:
repel = repl[el]
if repel=='':
repel = ['']
if '\t' in el or '\n' in el or '\r' in el:
el = [el]
print el,'\t: ',repel
print
for mot in tupla:
for lettre in mot:
chaine=chaine[0]+chaine[1:].replace(lettre,repl[mot])
print [lettre],'\t',[repl[mot]],' ',chaine
#Suppression des doublons
chaine = ''.join([ u for u,v in zip(chaine,chaine[1:]) if u!=v])+chaine[-1]
print '\nchaine sans doublons =',chaine
#Mise en forme du code sous forme Lettre chiffre chiffre chiffre
return (chaine+'0000')[0:4]
ch = "âäà éèêë îï ôö ûùü ç BEFORE THE END OF THE DAY, THE RED CAT WAS DEAD"
chaine = soundex(ch)
print '\n-----------------------\n\nquadruplet final =',chaine,'\n' |
Partager