1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
| import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
from PIL import Image
import os
import xlwt
def Rempalcer(textString):
cle = ['Comparer', '(voir tous les produits)', '(voir tous les articles)']
for Q in cle:
while Q in textString:
textString = textString.replace(Q, '')
return textString
productlist = []
def load_product(url,nume):
page = requests.get(url)
BS = BeautifulSoup(page.content, 'html.parser')
getOver = BS.find('section', id='overview').find('table', id='sellers')
K = getOver.find('tbody').find_all('tr')[1:]
contImage = BS.find('section', class_='section-images').find_all("img")[-1]
for e in K:
Val = e.find('td', class_='table-value').text
productlist.append(Rempalcer(Val).strip())
productlist.append(contImage.get("src"))
download_image(contImage.get("src"),"image_data",nume)
return productlist
links = []
def load_url(link):
page = requests.get(link)
BS = BeautifulSoup(page.content, 'html.parser')
contPage = BS.find('div', class_='products')
elems = contPage.find_all('a', class_="title")
keys = "produit/imprimantes-3d"
for el in elems:
link = el.get("href")
if keys in link:
links.append(link)
return(links)
def download_image(url, pathname,nume):
if not os.path.isdir(pathname):
os.makedirs(pathname)
data = "www.aniwaa.fr/wp-content/uploads/"
if data in url:
response = requests.get(url, stream=True)
file_size = int(response.headers.get("Content-Length", 0))
filename = os.path.join(pathname,nume+".png")
progress = tqdm(response.iter_content(1024), f"Downloading {filename}", total=file_size, unit="B", unit_scale=True, unit_divisor=1024)
with open(filename, "wb") as f:
for data in progress:
f.write(data)
progress.update(len(data))
load_url("https://www.aniwaa.fr/comparatif/imprimantes-3d/?sort=date&order=desc&show=5")
workbook = xlwt.Workbook()
sheet = workbook.add_sheet("Sheet Name", cell_overwrite_ok=True)
style = xlwt.easyxf('font: bold 1')
# Specifying column
sheet.write(0, 0, 'Id', style)
sheet.write(0, 1, 'Marque', style)
sheet.write(0, 2, 'Catégorie', style)
sheet.write(0, 3, 'Thématique', style)
sheet.write(0, 4, 'Technologie', style)
sheet.write(0, 5, 'Matériaux', style)
sheet.write(0, 6, 'Volume d', style)
sheet.write(0, 7, 'Date de sortie', style)
sheet.write(0, 8, 'Pays ', style)
sheet.write(0, 9, 'Image ', style)
workbook.save("AA.xls")
for num in range(len(links)):
num_ligne = links[num]
for a in range(len(load_product(num_ligne,str(num)))):
print(num, a, load_product(num_ligne,str(num))[a] )
sheet.write(num, a, load_product(num_ligne,str(num))[a] )
workbook.save("AA.xls") |
Partager