Optimisation code, exemple de gestion des threads
Bonjour à toutes et tous,
J'ai testé un exemple de programmation parallèle donné dans le magazine GNU/Linux Magazine Hors-Série n.73 dans le but d'apprendre, voici le code :
Code:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
|
from threading import Thread
from queue import Queue
from timeit import timeit
import requests
from bs4 import BeautifulSoup #https://www.crummy.com/software/BeautifulSoup/
import urllib.request
NAME_NUMBER = 1
def sequential(images_uri):
for image_uri in images_uri:
print('\rDownload of %s' % image_uri)
download(image_uri)
print('\rDownload complete')
print('-' * 60)
def parallel(images_uri):
class Worker(Thread):
def __init__(self, queue, image_uri):
self.queue = queue
self.image_uri = image_uri
Thread.__init__(self)
def run(self):
print('\rDownload of %s' % self.image_uri)
download(self.image_uri)
self.queue.task_done() #reporting that the job is finished
#start tasks
try:
q = Queue(len(images_uri))
for image_uri in images_uri:
task = Worker(q, image_uri)
task.start()
q.put(task)
print('\rWaiting for the download to finish')
q.join() # Waiting for the last task to finish
print('\rDownload complete ')
except:
print("Error, unable to start the tasks")
print('-' * 60)
def get_image_uri(url):
r = requests.get(url)
data = r.text
soup = BeautifulSoup(data, "lxml")
imagelink = []
images = []
for link in soup.find_all('img'):
imagelink.append(link.get("src"))
for imagelist in imagelink:
if imagelist.startswith('http://dam'):
images.append(imagelist)
return images
def download(images_uri):
global NAME_NUMBER
full_file_name = str(NAME_NUMBER) + '.png'
urllib.request.urlretrieve(images_uri,full_file_name)
NAME_NUMBER += 1
images_uri = get_image_uri('http://www.formation-python.com/')
print('--- Starting sequential download ---')
print(timeit('sequential(images_uri)', number=1, setup="from __main__ import sequential, images_uri"))
print()
print('--- Starting parallel download ---')
print(timeit('parallel(images_uri)', number=1, setup="from __main__ import parallel, images_uri")) |
Les fonctions download et get_image_uri n'ont pas étées données dans l'exemple du magazine et je les ai construites en cherchant les solutions sur le web. Mais je ne suis pas sûr de leur exactitude. J'aimerai avoir un code correcte où je puisse par la suite me référencer.
Merci d'avance pour votre analyse d'expert.