1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
|
# -*- coding: utf-8 -*-
from html.parser import HTMLParser
class RATPParser(HTMLParser):
def __init__(self):
super().__init__()
self.in_station = False
self.in_direction = False
self.in_direction = False
self.in_timing = False
self.in_bg = False
self.timing = {}
def handle_starttag(self, tag, attrs):
if self.in_timing:
if attrs == [('class', 'seppage')]:
self.in_timing = False
self.in_bg = False
elif tag == "div" and 'class' in attrs[0]:
if attrs[0][1].startswith(('bg', 'schmsg')):
self.in_bg = True
def handle_data(self, data):
data = data.strip()
if data:
if self.in_timing:
if self.in_bg:
self.timing['next_trains'].append(data)
if self.in_station:
self.timing['station'] = data
self.in_station = False
elif self.in_direction:
self.timing['direction'] = data
self.timing['next_trains'] = []
self.in_direction = False
self.in_timing = True
elif data == 'Station':
self.in_station = True
elif data == 'Direction':
self.in_direction = True
if __name__ == '__main__':
with open('/home/vincent/Bureau/ratp.html', 'r') as inf:
content = inf.read()
parser = RATPParser()
parser.feed(content)
tmg = parser.timing
if tmg:
print('Station:\t%s\nDirections:\t%s' %(tmg['station'], tmg['direction']))
for i in range(0, len(tmg['next_trains']), 2):
print(' %-28s:%-10s' % (tmg['next_trains'][i], tmg['next_trains'][i+1])) |
Partager