1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
| from urllib import request
from bs4 import BeautifulSoup
import json
import csv
page_url = 'https://alansimpson.me/python/scrape_sample.html'
ouverture = request.urlopen(page_url)
lecture = BeautifulSoup(ouverture, 'html5lib')
recup_2 = lecture.article # récupérer dans la balise article de la page web
liste = []
for link in recup_2.find_all('a'):
try:
url = link.get('href')
img = link.img.get('src')
text = link.span.text
liste.append({'url' : url, 'img': img, 'text': text})
except AttributeError:
pass
with open(r'C:\Users\Youcef\Documents\page_web.csv', 'w', newline='') as fichier_csv:
recup_3 = csv.writer(fichier_csv)
recup_3.writerow(['url','img','text'])
for ligne in liste:
recup_3.writerow([str(ligne['url']),str(ligne['img']),str(ligne['text'])]) |
Partager