1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
| import re
import hashlib
from contextlib import closing
import sys
class Tk(object):
_dico_ = {}
def __new__(cls, mot):
if not mot in cls._dico_:
instance = object.__new__(cls)
cls._dico_[mot] = instance._my_setup_(mot)
return cls._dico_[mot]
def _my_setup_(self, mot):
self._mot = mot
self._md5 = hashlib.md5(mot).hexdigest()
return self
def __str__(self):
return "%s: %s" % (self._mot, self._md5)
def readlines(filein):
with closing(filein) as lines:
for line in lines.readlines():
yield line
def get_mots(filein, fileout):
with closing(fileout) as out:
for line in readlines(filein):
for mot in rg.split(line):
if len(mot): #TODO: regex fix
out.write(str(Tk(mot)) + "\n")
if __name__ == '__main__':
import StringIO
mots = '''\
If allow_nan is True (the default), then NaN, Infinity, and -Infinity
will be encoded as such. This behavior is not JSON specification compliant,
but is consistent with most JavaScript based encoders and decoders.
Otherwise, it will be a ValueError to encode such floats.'''
rg = re.compile(r'\W+')
f = StringIO.StringIO(mots)
o = sys.stdout
get_mots(f, o) |