1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
   |  
from bs4 import BeautifulSoup as bs
import requests
import pandas as pd
 
wttj_url = 'https://www.welcometothejungle.com/fr/companies/choco/jobs/account-executive_bruxelles_CHOCO_gkZGmzz?q=c242c9d932142436e2b1fb8bff93f768&o=1673356'
page = requests.get(wttj_url)
#page.text
 
soup = bs(page.text, "lxml")
 
def get_info(link):
    page = requests.get(link)
    soup = bs(page.text, "lxml")
    compagny = soup.find('h3', attrs = {'class' : "sc-12bzhsi-11 iQbVXI"}).string
    title = soup.find('h1', attrs = {'class' : "sc-12bzhsi-3 illqQm"}) .string
    lieu = soup.find('span', attrs = {'class' : "wui-text"}).string
    contrat = soup.find('span', attrs = {'class' : "sc-16yjgsd-3 jpPsCr"}).span.string
    spans = soup.find_all('span', class_="sc-16yjgsd-3 jpPsCr")
    education = spans[2].find_next("span").find_next("span").string
    experience = spans[3].find_next("span").find_next("span").string
    try :
        if soup.find("time").has_attr('datetime'):
            debut= soup.find("time").string
    except :
        debut = "Non trouvé"
 
    col = soup.find_all('ul', attrs = {'class': "sc-16yjgsd-4 ezvNLf"})
    for c in col :
        domaine = c.find_next('span', class_="sc-16yjgsd-3 lcqDxo").string
        taille =  c.find_next('span', class_="sc-16yjgsd-3 keLjPw").string
    decription = soup.find("div", class_="itvpid-1 bmwkTq")
 
    line=[compagny,domaine,taille,lieu,debut,contrat,education,experience,link]
    return line
 
a = get_info('https://www.welcometothejungle.com/fr/companies/choco/jobs/account-executive_bruxelles_CHOCO_gkZGmzz?q=c242c9d932142436e2b1fb8bff93f768&o=1673356')
a | 
Partager