1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
| import re
ch = '''<div>
<div class="lc">
<h2 class="what">
Salut à tous</h2>
<p>
<strong>Il faut faire</strong> quelsue chose pour que ca marche <p>
</div>'''
pat = re.compile('((?:<.+?>\s*)+)')
liS = pat.split(ch)
print 'liS =\n','['+'\n'.join(repr(u) for u in liS)+']'
print '\nlen(liS) =',len(liS)
print
n = 1
patmot = re.compile('[^ ]+ ?')
for s in xrange(0,len(liS),2):
print '\nliS['+str(s)+'] =',repr(liS[s])
lispaned = []
for m in patmot.finditer(liS[s]):
lispaned.append('<span id="word_'+str(n)+'">'+m.group()+'</span>')
n += 1
print 'lispaned =\n',lispaned
print "''.join(lispaned) =\n",repr(''.join(lispaned))
liS[s] = ''.join(lispaned)
print
print
ch_spaned = ''.join(liS)
print ch_spaned |