1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
| import re
adn = {}
for fichier in ('adn1.txt','adn2.txt'):
with open(fichier) as f:
uh = f.read()
# examen du contenu brut du fichier
print('uh =',fichier+'.read()','\nlen(uh) =',len(uh))
som = 0
for base in ('A','G','C','T','N','-'):
n = uh.count(base)
som += n
print (repr(base),n)
print('total =',som)
print()
# nettoyage des caracteres '\n'
uht = uh.replace('\n','')
print("uht = uh.replace('\\n','')",'\n','len(uht) =',len(uht))
# examen de la sequence d'ADN proprement dite
x,y = re.search('[AGTCN-]+',uht).span()
adn[fichier] = uht[x:y]
print('(x,y) =',repr((x,y)))
print('len(uht[x:y]) =',len(uht[x:y]))
som = 0
for base in ('A','G','C','T','N','-'):
n = uht[x:y].count(base)
som += n
print (repr(base),n)
print('total =',som)
# verification de l'absence de caracteres autres que AGCTN- dans la chaine ADN
for u in uht[x:y]:
if u not in ('A','G','C','T','N','-'):
print (repr(u),"est un caractere exotique dans le chaine ADN")
print('\n\n')
with open('adn.txt','w') as g:
g.write(adn['adn1.txt']+adn['adn2.txt'])
with open('adn.txt') as f:
uh = f.read()
print ("Nouveau fichier adn.txt = adn1.txt + adn2.txt : len('adn.txt') =",len(uh)) |
Partager