1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
| # coding: utf8
import requests
import pandas as pd
# librairies BeautifoulSoup du package bs4
from bs4 import BeautifulSoup
List_canyon_Description = []
List_numURL = []
name_list = []
coord_list=[]
canyon_descr = []
List_canyon_notation=[]
List_canyon_tech = []
def getCanyonDescription(numcanyon):
response = requests.get("https://www.descente-canyon.com/canyoning/canyon-description/" + str(numcanyon) +"/topo.html")
responseMap = requests.get("https://www.descente-canyon.com/canyoning/canyon-carte/" + str(numcanyon) +"/carte.html")
if response.status_code != 404:
print(numcanyon)
content = response.content
contentMap = responseMap.content
#On applique BeautifulSoup pour analyser le contenu précéemenant télécharger
parser = BeautifulSoup(content, 'html.parser')
parserMap = BeautifulSoup(contentMap, 'lxml')
testMap = parserMap.find_all('script')
try:
canyon_Name = parser.find('h1').find('strong').text
name_list.append('Name: ' + canyon_Name)
List_numURL.append('Num: ' + str(numcanyon))
canyon_notation = parser.find_all('li',class_="list-group-item")
canyon_badge = parser.find_all('span',class_="badge")
for elt in canyon_notation:
List_canyon_notation.append(elt.text)
note = List_canyon_notation[2]
noteparenthese = note.find(' (')
note = note[1:noteparenthese]
for eltbadg in canyon_badge:
List_canyon_tech.append(eltbadg.text)
List_canyon_tech.append('Note: ' + note)
mapDataSplit= testMap[0].text.split('var point')
for elt in mapDataSplit:
start = elt[j:longueur].find("LatLng(") +1
end = elt[j:longueur].find(",remarque") +1
coord_list.append(elt[start:end])
del coord_list[0]
canyon_description_list = parser.find_all('p')
canyon_description_list_titre = parser.find_all('h3')
i=0
for canyondes in canyon_description_list_titre:
canyon_descr.append(canyondes.text)
canyon_descr.append(canyon_description_list[i].text)
i += 1
List_canyon_Description.append(canyon_descr)
except:
print('problem numéro' + str(numcanyon))
pass
canyon = 2136
canyon2 = (22094, 21554, 2136)
canyonnumlist = list(range(20,30)) + list(range(200,300)) +list(range(2000,3000)) + list(range(20000,24213))
#for numcanyon in canyonnumlist:
for numcanyon in canyon2:
getCanyonDescription(numcanyon)
print(len(name_list),len(List_numURL),len(List_canyon_tech), len(coord_list),len(List_canyon_Description), len(List_canyon_notation))
df = pd.DataFrame({
"Name" : name_list,
"NumUrl": List_numURL,
"Technique": List_canyon_tech,
"Coordonnées": coord_list,
"Description": List_canyon_Description
})
df.head()
#df.to_csv('All_Canyon_Data_ok.csv', index=True) |
Partager