1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
|
from html.parser import HTMLParser
class Parser(HTMLParser):
def __init__(self):
super().__init__()
self.in_contact = False
self.in_value = False
self.info = False
self.facebook = False
def handle_starttag(self, tag, attrs):
if tag == "div":
for att in attrs:
if att[0] == 'class' and att[1] == "contactbox":
self.in_contact = True
elif self.in_contact and tag == "p":
self.in_value = True
elif self.in_value and tag == "a":
for att in attrs:
if att[0] == 'href':
self.facebook = att[1]
self.in_contact = False
self.in_value = False
def handle_endtag(self, tag):
if self.in_contact and tag == "div":
self.in_contact = False
self.in_value = False
def handle_data(self, data):
if self.in_value:
txt = data.strip()
if txt and txt.replace(" ", "").isdecimal():
self.info = txt
with open("3RunPl", "r") as inf:
content = inf.read()
parser = Parser()
parser.feed(content)
print("Contact info: %s" % parser.info)
print("Facebook: %s" % parser.facebook) |
Partager