1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
|
# -*- coding: utf-8 -*-
import requests
import bs4
import json
try:
with open('recipe.json', 'r', encoding='utf-8') as file:
recipe = json.load(file)
except:
recipe = {}
def get_link_recipe(url):
sess = requests.session()
r = sess.post(url)
soup = bs4.BeautifulSoup(r.text, 'html.parser')
for elem in soup.find_all("table", attrs={"class": "ak-table ak-responsivetable"}):
url_of_recipe = elem.find_all("span", attrs={"class": "ak-linker"})
list_url_of_recipe = []
for i in url_of_recipe:
list_url_of_recipe.append('https://www.dofus.com' + str(i).split('"')[3])
return set(list_url_of_recipe)
nb = 0
def get_recipe(url):
global nb
sess = requests.session()
r = sess.post(url)
soup = bs4.BeautifulSoup(r.text, 'html.parser')
if not soup.find_all("div", class_="ak-container ak-panel ak-crafts"): # pas de recette
return
category_of_parsed_item = soup.find("div", class_="ak-encyclo-detail-type col-xs-6") # catégorie de l'item parsé
if not category_of_parsed_item.text.split()[-1] in recipe:
recipe[category_of_parsed_item.text.split()[-1]] = {}
name_of_parsed_item = soup.find("h1", class_="ak-return-link") # nom de l'item parsé
for i in name_of_parsed_item.text.split('\n'):
if i:
name_of_parsed_item = i
for elem in soup.find_all("div", class_="ak-container ak-panel ak-crafts"):
name_of_ressource = elem.find_all("span", class_="ak-linker")
quantity_of_ressource = elem.find_all("div", class_="ak-front")
category_of_ressource = elem.find_all("div", class_="ak-text")
list_name_of_ressource = []
for i in name_of_ressource:
for j in i.text.split('\n'):
if j:
list_name_of_ressource.append(j)
list_quantity_of_ressource = []
for i in quantity_of_ressource:
list_quantity_of_ressource.append(int(i.text.split()[0]))
list_category_of_ressource = []
for i in category_of_ressource:
list_category_of_ressource.append(i.text)
a = []
for i in zip(list_quantity_of_ressource, list_name_of_ressource, list_category_of_ressource):
a.append(i)
recipe[category_of_parsed_item.text.split()[-1]][name_of_parsed_item] = a
with open('recipe.json', 'w', encoding='utf-8') as f:
json.dump(recipe, f, sort_keys=True, indent="\t", ensure_ascii=False)
print('recette de : ', name_of_parsed_item)
nb +=1
# urls = get_link_recipe('https://www.dofus.com/fr/mmorpg/encyclopedie/equipements?size=2620')
urls = get_link_recipe('https://www.dofus.com/fr/mmorpg/encyclopedie/equipements')
for url in urls:
get_recipe(url)
print(nb, "recettes ajoutées") |
Partager