Bonjour,

J'arrive pas a enregistrer la data dans un fichier Excel .
et meme y a des doublon :


Code : Sélectionner tout - Visualiser dans une fenêtre à part
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
from PIL import Image
import os
import xlwt
 
 
def Rempalcer(textString):
    cle = ['Comparer', '(voir tous les produits)', '(voir tous les articles)']
    for Q in cle:
        while Q in textString:
            textString = textString.replace(Q, '')
    return textString
 
productlist = []
def load_product(url,nume):
    page = requests.get(url)
    BS = BeautifulSoup(page.content, 'html.parser')
    getOver = BS.find('section', id='overview').find('table', id='sellers')
    K = getOver.find('tbody').find_all('tr')[1:]
    contImage = BS.find('section', class_='section-images').find_all("img")[-1]
    for e in K:
        Val = e.find('td', class_='table-value').text
        productlist.append(Rempalcer(Val).strip())
    productlist.append(contImage.get("src"))
    download_image(contImage.get("src"),"image_data",nume)
    return productlist
 
 
links = []
def load_url(link):
    page = requests.get(link)
    BS = BeautifulSoup(page.content, 'html.parser')
    contPage = BS.find('div', class_='products')
    elems = contPage.find_all('a', class_="title")
    keys = "produit/imprimantes-3d"
    for el in elems:
        link = el.get("href")
        if keys in link:
            links.append(link)
    return(links)
 
 
def download_image(url, pathname,nume):
    if not os.path.isdir(pathname):
        os.makedirs(pathname)
    data = "www.aniwaa.fr/wp-content/uploads/"
    if data in url:
        response = requests.get(url, stream=True)
        file_size = int(response.headers.get("Content-Length", 0))
        filename = os.path.join(pathname,nume+".png")
        progress = tqdm(response.iter_content(1024), f"Downloading {filename}", total=file_size, unit="B", unit_scale=True, unit_divisor=1024)
        with open(filename, "wb") as f:
            for data in progress:
                f.write(data)
                progress.update(len(data))
 
load_url("https://www.aniwaa.fr/comparatif/imprimantes-3d/?sort=date&order=desc&show=5")
 
 
workbook = xlwt.Workbook()
sheet = workbook.add_sheet("Sheet Name", cell_overwrite_ok=True)
style = xlwt.easyxf('font: bold 1')
 
# Specifying column
 
sheet.write(0, 0, 'Id', style)
sheet.write(0, 1, 'Marque', style)
sheet.write(0, 2, 'Catégorie', style)
sheet.write(0, 3, 'Thématique', style)
sheet.write(0, 4, 'Technologie', style)
sheet.write(0, 5, 'Matériaux', style)
sheet.write(0, 6, 'Volume d', style)
sheet.write(0, 7, 'Date de sortie', style)
sheet.write(0, 8, 'Pays ', style)
sheet.write(0, 9, 'Image ', style)
 
workbook.save("AA.xls")
 
 
for num in range(len(links)):
    num_ligne = links[num]
    for a in range(len(load_product(num_ligne,str(num)))):
        print(num, a, load_product(num_ligne,str(num))[a] )
        sheet.write(num, a, load_product(num_ligne,str(num))[a] )
        workbook.save("AA.xls")

Voici un Extrait du Résultat donné :
Downloading image_data\0.jpg: 0%| | 7.00/6.11k [00:00<00:00, 6.62kB/s]
Downloading image_data\0.jpg: 0%| | 7.00/6.11k [00:00<00:01, 3.48kB/s]
Downloading image_data\0.jpg: 0%| | 7.00/6.11k [00:00<00:03, 1.64kB/s]
1 0 P
Downloading image_data\0.jpg: 0%| | 7.00/6.11k [00:00<00:06, 993B/s]
Downloading image_data\0.jpg: 0%| | 7.00/6.11k [00:00<00:06, 1.01kB/s]
1 1 r
Downloading image_data\0.jpg: 0%| | 7.00/6.11k [00:00<00:04, 1.56kB/s]
1 2 o
Downloading image_data\0.jpg: 0%| | 7.00/6.11k [00:00<00:01, 3.49kB/s]
Downloading image_data\0.jpg: 0%| | 7.00/6.11k [00:00<00:02, 2.34kB/s]
Downloading image_data\0.jpg: 0%| | 7.00/6.11k [00:00<00:05, 1.17kB/s]
1 3 f