1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
   |  
from html.parser import HTMLParser
 
class Parser(HTMLParser):
    def __init__(self):
        super().__init__()
        self.in_contact = False
        self.in_value = False
        self.info = False
        self.facebook = False
 
    def handle_starttag(self, tag, attrs):
        if tag == "div":
            for att in attrs:
                if att[0] == 'class' and att[1] == "contactbox":
                    self.in_contact = True
 
        elif self.in_contact and tag == "p":
            self.in_value = True
 
        elif self.in_value and tag == "a":
            for att in attrs:
                if att[0] == 'href':
                    self.facebook = att[1]
            self.in_contact = False
            self.in_value = False
 
    def handle_endtag(self, tag):
        if self.in_contact and tag == "div":
            self.in_contact = False
            self.in_value = False
 
    def handle_data(self, data):
        if self.in_value:
            txt = data.strip()
            if txt and txt.replace(" ", "").isdecimal():
                self.info = txt
 
with open("3RunPl", "r") as inf:
    content = inf.read()
 
parser = Parser()
parser.feed(content)
print("Contact info: %s" % parser.info)
print("Facebook: %s" % parser.facebook) | 
Partager