1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
| #!/usr/bin/env python
#coding=utf-8
import re,urllib,pprint
url = 'http://delicious.com/projetmbc'
pat = re.compile('<a\s+rel="nofollow"\s+class="taggedlink[\s"]+'\
+'href=("http://[^"]+")[\s>]+([^<]+)</a>')
myDeliciousTag,cnt = {},0
for nbPage in xrange(1,1000):
content = urllib.urlopen(url + '/?page=' + str(nbPage)).read()
myDeliciousTag.update(m.group(2,1) for m in pat.finditer(content))
if len(myDeliciousTag)==cnt: break
else: cnt = len(myDeliciousTag)
prettyPrint = pprint.PrettyPrinter(indent = 2)
print str(nbPage - 1) + ' page(s) opened.\n'
prettyPrint.pprint(myDeliciousTag) |