1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
|
#coding=utf-8
import urllib, urllib2, re, argparse, os
def download(tab):
subDirectory = []
for i in tab:
try:
urllib.urlretrieve(arguments['url']+i, i)
print 'Telechargement de : ' + i
except IOError as (strerr, strer):
print 'Download Failed ' + i + ' ' + strer
if strer == 'Is a directory' :
subDirectory.append(i)
return subDirectory
parser = argparse.ArgumentParser(description='Get All Document from a page')
parser.add_argument('url', action="store", help="Give an url to scan")
args = parser.parse_args()
arguments = dict(args._get_kwargs())
requete = urllib2.Request(arguments['url'])
page = urllib2.urlopen(requete).read()
variable = re.findall('\<a href="(.+?)"', page)
firstSub = download(variable)
print firstSub
for i in firstSub:
if i != '/' :
os.mkdir(i)
print 'Telechargement de : ' + arguments['url'] + i
requete = urllib2.Request(arguments['url'] + i)
page = urllib2.urlopen(requete).read()
variable = re.findall('\<a href="(.+?)"', page)
download(variable) |
Partager