1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
|
# -*- coding: utf-8 -*-
import urllib.request
import json
from html.parser import HTMLParser
def get_page(url):
try:
content = urllib.request.urlopen(url).read()
return str(content.decode('utf-8', 'replace'))
except Exception as why:
print('urllib2 error: %s, %s' % (url, why))
return False
def get_recipe(link):
page = get_page(link)
if page:
parser = Parser()
parser.feed(page)
if parser.recipe:
miam(parser.recipe)
def miam(jsn):
recette = json.loads(jsn)
print("\n * %s\n" % recette["name"])
ing = ', '.join(recette["recipeIngredient"])
print("Dépends: %s\n" % ing)
print(recette["recipeInstructions"])
class Parser(HTMLParser):
def __init__(self):
super().__init__()
self.recipe = False
self.injson = False
def handle_starttag(self, tag, attrs):
if tag == "script":
# attrs doit être de la forme "[('type', 'application/ld+json')]"
if attrs and 'application/ld+json' in attrs[0]:
self.injson = True
def handle_data(self, data):
if self.injson:
self.clean_data(data)
self.injson = False
def clean_data(self, txt):
begin = '{"@context"'
end = ',"aggregateRating"'
r = begin + txt.split(begin)[1]
self.recipe = r.split(end)[0] + "}"
if __name__ == "__main__":
get_recipe("http://www.marmiton.org/recettes/recette_cookies-maison_86989.aspx") |