| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 
 |  
from html.parser import HTMLParser
 
class Parser(HTMLParser):
    def __init__(self):
        super().__init__()
        self.in_contact = False
        self.in_value = False
        self.info = False
        self.facebook = False
 
    def handle_starttag(self, tag, attrs):
        if tag == "div":
            for att in attrs:
                if att[0] == 'class' and att[1] == "contactbox":
                    self.in_contact = True
 
        elif self.in_contact and tag == "p":
            self.in_value = True
 
        elif self.in_value and tag == "a":
            for att in attrs:
                if att[0] == 'href':
                    self.facebook = att[1]
            self.in_contact = False
            self.in_value = False
 
    def handle_endtag(self, tag):
        if self.in_contact and tag == "div":
            self.in_contact = False
            self.in_value = False
 
    def handle_data(self, data):
        if self.in_value:
            txt = data.strip()
            if txt and txt.replace(" ", "").isdecimal():
                self.info = txt
 
with open("3RunPl", "r") as inf:
    content = inf.read()
 
parser = Parser()
parser.feed(content)
print("Contact info: %s" % parser.info)
print("Facebook: %s" % parser.facebook) | 
Partager