1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
|
from HTMLParser import HTMLParser
class MyHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.inLink = False
self.starttag = None
self.endtag = None
def handle_starttag(self, tag, attrs):
for name, value in attrs:
if name == 'class' and value == 'companyName':
print("Encountered a start tag:", tag)
self.inLink = True
self.starttag = tag
def handle_endtag(self, tag):
print("Encountered an end tag :", tag)
self.endtag = tag
def handle_data(self, data):
if self.inLink and self.starttag == 'a':
print("Encountered some data :", data)
parser = MyHTMLParser()
parser.feed(""" <ol class="">
<li id="lrVisitCard-1" class="visitCard ">
<span class="number">1</span><!-- -->
<div class="visitCardContent">
<h2 class="titleMain">
<a href="http://www.xxxx.fr#onglet-infos" class="companyName"
data-pjstats="{"idRequete":1}"><span>Hello world</span>
<span class="buttonDetails">+ détails</span>
</a></h2>
<div class="category">
<ul><li class="first">autres données</li> </ul></div>
<div class="dataCard sc">
<div class="localisationBlock">
<a href="http://www.xxxx.fr#onglet-infos" class="companyName"
data-pjstats="{"idRequete":1}"><span>Hello world</span>
<p>autre données<br />autre données</p>""") |
Partager