Code : Sélectionner tout - Visualiser dans une fenêtre à part
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import requests as rq
from bs4 import BeautifulSoup
import pandas as pd
from random import randint
from time import sleep
 
headers = ({'User-Agent':
            'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'})
 
def initial_scrape(url): # scrapes initial page - used to find number of pages
    r = rq.get(url, headers=headers)
    soup = BeautifulSoup(r.text, 'html.parser')
    return soup
 
 
def find_pages(soup): # finds total number of pages
    pages_html = soup.select('.page-link')
    totalPages = pages_html[-4].get_text()
    return totalPages
 
 
def convertUnit(price): # converts price format from string to integer e.g. 100K -> 100000
    if 'K' in price:
        price = price.partition('K')[0]
        return int(float(price) * 1000)
    elif 'M' in price:
        price = price.partition('M')[0]
        return int((float(price) * 1000000))
    else:
        return int(price)
 
def gather_data(totalPages): # gathers the data of each player from each page
 
    database = []
    playerContainers = []
    pageCounter = 0
    totalCounter = 0
 
    for page in range(int(totalPages)): # for every page
        url = f'https://www.futbin.com/players?page=1&xbox_price=10000-50000&version=if_gold' # change url accordingly
        r = rq.get(url, headers=headers)
        soup = BeautifulSoup(r.text, 'html.parser')
 
        playerContainers = soup.select('tbody > tr')[2:] 
 
        counter = 0
 
        for player in playerContainers: # for every player
 
            stats = {} # adds stats to a dictionary (one for each player), which is then added to a list
 
            values = player.select('.num_td')
            name_position = player.select('.player_name_players_table')
 
            stats['name'] = name_position[0].get_text().partition('(')[0].strip()
            stats['position'] = name_position[0].get_text().partition('(')[2].strip(')')
            stats['rating'] = int(player.select('span[class*="form rating"]')[0].get_text())
            stats['price'] = convertUnit(player.select('.ps4_color')[0].get_text())
 
 
            database.append(stats)
 
            counter += 1
            print(f'Player: {counter}/{len(playerContainers)} ; {stats["name"]}') # tracks progress
 
        totalCounter += counter
        pageCounter += 1
        print(f'Page: {pageCounter}/{totalPages}') # more progress tracking
        sleep(randint(1, 3)) # timeout - can be removed
 
    return database
Bonjour, je suis débutant en python et je cherche a comprendre pourquoi le script voit le nombre de page du site web mais ne scrap seulement que la première. je ne cherche pas une réponse toute faite plus de l'aiguillage pour comprendre ou se situe le problème

merci d'avance