1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
|
# -*- coding: utf-8 -*-
import urllib.request
import urllib.parse
import requests
from lxml import html
from datetime import datetime
gare_depart='Paris (Toutes gares intramuros)'
gare_arrive='Strasbourg'
heure_depart='0700'
date_inv= "{:%Y-%m-%d}".format(datetime.now())
url = 'http://m.sncf.com/fr/horaires-info-trafic/trajet/'+gare_depart+'/'+gare_arrive+'/'+str(date_inv)+'/'+heure_depart+'/sens/depart/'
print (url)
user_agent = 'Mozilla/5.0 (Linux; U; Android 4.0.4; en-gb; GT-I9300 Build/IMM76D) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'
headers = { 'User-Agent' : user_agent }
proxies = {'http': 'http://proxy:8080/'}
page = requests.get(url, headers=headers)
tree = html.fromstring(page.content)
print (tree.text)
horaires = tree.xpath('//span[@class="horaire"]/text()')
pertubation = tree.xpath('//span[@class="state-perturbation"]/text()')
heure = tree.xpath('//div[@class="inner"]/text()')
heure = [i for i in heure if not i.isspace()]
heure=[item.replace('\n','') for item in heure]
heure=[item.replace('\r','') for item in heure]
heure=[item.replace('\t','') for item in heure]
i=0
j=0
print (len(heure))
train=[]
while i < (len(heure)):
horaire=[horaires[j],horaires[j+1],heure[i]]
i += 1
j += 2
train.extend(horaire)
print (train)
print (pertubation) |
Partager