1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
|
from bs4 import BeautifulSoup as bs
import requests
import pandas as pd
wttj_url = 'https://www.welcometothejungle.com/fr/companies/choco/jobs/account-executive_bruxelles_CHOCO_gkZGmzz?q=c242c9d932142436e2b1fb8bff93f768&o=1673356'
page = requests.get(wttj_url)
#page.text
soup = bs(page.text, "lxml")
def get_info(link):
page = requests.get(link)
soup = bs(page.text, "lxml")
compagny = soup.find('h3', attrs = {'class' : "sc-12bzhsi-11 iQbVXI"}).string
title = soup.find('h1', attrs = {'class' : "sc-12bzhsi-3 illqQm"}) .string
lieu = soup.find('span', attrs = {'class' : "wui-text"}).string
contrat = soup.find('span', attrs = {'class' : "sc-16yjgsd-3 jpPsCr"}).span.string
spans = soup.find_all('span', class_="sc-16yjgsd-3 jpPsCr")
education = spans[2].find_next("span").find_next("span").string
experience = spans[3].find_next("span").find_next("span").string
try :
if soup.find("time").has_attr('datetime'):
debut= soup.find("time").string
except :
debut = "Non trouvé"
col = soup.find_all('ul', attrs = {'class': "sc-16yjgsd-4 ezvNLf"})
for c in col :
domaine = c.find_next('span', class_="sc-16yjgsd-3 lcqDxo").string
taille = c.find_next('span', class_="sc-16yjgsd-3 keLjPw").string
decription = soup.find("div", class_="itvpid-1 bmwkTq")
line=[compagny,domaine,taille,lieu,debut,contrat,education,experience,link]
return line
a = get_info('https://www.welcometothejungle.com/fr/companies/choco/jobs/account-executive_bruxelles_CHOCO_gkZGmzz?q=c242c9d932142436e2b1fb8bff93f768&o=1673356')
a |
Partager