urllib2 : pb recuperation headers

**killass** · 29/08/2006, 23h41

Salut

,
Je viens demander de l'aide ici car ca fait trop longtemps que je cherche une solution a mon pb et je ne trouve pas

Donc, en fait, je cherche a récuperer une piece jointe sur un webmail securisé exchange (python 2.4. windows)
L'adresse a recuperer est du genre : https://X.X.X/exchange/boiteauxlettr...EML/getgal.txt

Le probleme que j'ai est que j'aimerai recuperer le header Content-Length pour faire une barre de progression ou du moins montrer l'avancement en %
Avec urllib, il récupere bien le Content-Length mais je ne peut pas l'utiliser puisque je passe par un proxy et que c'est une url https.

Avec urllib2, je n'ai que ces headers la que je sois derriere un proxy ou pas :
Connection: close
Date: Tue, 29 Aug 2006 20:23:42 GMT
Server: Microsoft-IIS/6.0
X-Powered-By: ASP.NET
Content-Type: text/plain
MS-WebStorage: 6.5.7226
MS-WebStorage: 6.5.7226
Transfer-Encoding: chunked
X-Powered-By: ASP.NET
Cache-Control: no-cache

Si j'essaie avec wget, urllib ou Delphi(Indy), le header Content-Length est bien présent

urllib2 :

Code :

Sélectionner tout - Visualiser dans une fenêtre à part

1
2
3
4
5
6
7
 
req = urllib2.Request("https://X.X.X/exchange/boiteauxlettres/Bo%C3%AEte%20de%20r%C3%A9ception/test-2.EML/getgal.txt")
 
base64string = base64.encodestring('%s:%s' % ('login', 'password'))[:-1]
req.add_header("Authorization", "Basic %s" % base64string)
temp = urllib2.urlopen(url=req)
print temp.info()

urllib :

Code :

Sélectionner tout - Visualiser dans une fenêtre à part

1
2
3
 
f = urllib.urlopen('https://%s:%s@%s' % ("login", "password", "X.X.X/exchange/boiteauxlettres/Bo%C3%AEte%20de%20r%C3%A9ception/test-2.EML/getgal.txt"))
print f.info()

Je veux bien utiliser urllib mais ca ne marche pas si on passe par un proxy https.
Auriez-vous une idée?

Je vous remercie d'avance

**killass** · 29/08/2006, 23h46

J'avais regardé cette source http://aspn.activestate.com/ASPN/Coo.../Recipe/301740 mais j'ai pas réussi a l'adapter (https+proxy+autorisation basic)...je n'avais que des pages "this page has been moved"

Sinon, j'ai un serveur exchange chez moi accessible en https aussi et cela marche tres bien avec urllib2 :
Server: Microsoft-IIS/5.0
Date: Tue, 29 Aug 2006 20:51:23 GMT
Connection: close
Content-Type: text/plain
Content-Length: 599388
ETag: "30d21636833b404aa8af57b2f6f79f09000000012ff8"
Last-Modified: Mon, 14 Aug 2006 17:00:28 GMT
Accept-Ranges: bytes
Content-Disposition: inline;filename="getgal.txt"

J'ai juste changé l'adresse et les logins

**killass** · 30/08/2006, 14h29

Avec pycurl, ca pourrait peut-etre passer, je suis en train de chercher un exemple, mais si vous en avez un sous le coude

**killass** · 30/08/2006, 22h07

Bon, eh bien je me suis débrouillé avec pycurl en cherchant dans leur mailing-list

C'est un peu du bricolage mais ca tient la route et c'est ce que je veux

Code :

Sélectionner tout - Visualiser dans une fenêtre à part

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
 
# -*- coding: iso-8859-1 -*-
 
import pycurl, Tkinter
 
class var:
    size = 0
    time = 0
    speed = 0
    TR_speed = 0
 
def download(action):
    url = "https://blabla"
    f = open("body.txt", "wb")
    c = pycurl.Curl()
    c.setopt(c.URL, url)
    c.setopt(c.PROXYPORT, port_proxy) //port du proxy
    c.setopt(c.PROXY, 'ip_proxy') //ip ou nom dns du proxy
 
    if action == "headers":
        c.setopt(pycurl.NOBODY, 1)
    elif action== "body":
        c.setopt(c.WRITEDATA, f)
        c.setopt(c.NOPROGRESS, 0)
        c.setopt(c.PROGRESSFUNCTION, progress)
 
    c.setopt(c.SSLVERSION, 3)
    c.setopt(c.SSL_VERIFYPEER, 0) 
    c.setopt(c.HTTPAUTH,c.HTTPAUTH_BASIC); // type authorization http : HTTPAUTH_BASIC ou pycurl.HTTPAUTH_DIGEST
    c.setopt(c.USERPWD,"login:password");  // logins autorization http
    c.setopt(c.WRITEDATA, f)
    c.setopt(c.NOPROGRESS, 0)
    c.setopt(c.PROGRESSFUNCTION, progress)
    c.setopt(c.FOLLOWLOCATION, 1)
    c.setopt(c.MAXREDIRS, 5)
    c.setopt(c.OPT_FILETIME, 1)
    c.perform()
 
    if action == "headers":
        var.size = c.getinfo(c.CONTENT_LENGTH_DOWNLOAD)
    elif action == "body":
        var.time = c.getinfo(c.TOTAL_TIME)
        var.speed = c.getinfo(c.SPEED_DOWNLOAD)/1000
 
 
    c.close()
    f.close()
 
 
## Callback function invoked when progress information is updated
def progress(download_t, download_d, upload_t, upload_d):
    lab1.configure(text='%s' % int(download_d/1000) + "Ko sur " + str(int(var.size/1000)) + " Ko")
    lab2.configure(text='%s' % int((download_d-var.TR_speed)/1000) + " Ko/s")
    root.update()
    var.TR_speed = download_d
 
    if var.size == download_d:
        lab1.configure(text='Téléchargement terminé')
        lab2.configure(text = "Taille : %s Ko" % var.size)
 
root = Tkinter.Tk()
lab1 = Tkinter.Label(root, text = 'Démarrage du téléchargement')
lab1.pack()
lab2 = Tkinter.Label(root, text = '')
lab2.pack()
download("headers")
download("body")
Tkinter.Label(root, text = "Temps : " + str(var.time) + " sec").pack()
Tkinter.Label(root, text = "Vitesse : %.2f Ko/s" % var.speed).pack()
root.mainloop()

**killass** · 30/08/2006, 22h13

A noter qu'il faut la version ssl de pycurl sinon vous aurez droit a un "libcurl was built with SSL disabled, https: not supported!"

**killass** · 03/09/2006, 14h44

Voià, j'ai fini moin projet avec une progress bar avec un canvas et un thread

Code :

Sélectionner tout - Visualiser dans une fenêtre à part

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# -*- coding: iso-8859-1 -*-
 
import pycurl, Tkinter, sys, os, threading
 
class var:
    size = 0
    time = 0
    speed = 0
    TR_speed = 0
    status_HTTP = 0
 
class gui(threading.Thread):
    def __init__(self, root):
        threading.Thread.__init__(self)
    def run(self):
        try:
            download("headers")
            download("body")
        except:
            lab1.configure(text = "Impossible de se connecter au site ou au proxy")
            if os.path.isfile("C:/Python24/Freeze/body.txt"):
                os.remove("C:/Python24/Freeze/body.txt")
            if os.path.isfile("C:/Python24/Freeze/body.txt.old"):
                os.rename('C:/Python24/Freeze/body.txt.old', 'C:/Python24/Freeze/body.txt')
 
        if var.status_HTTP == 404:
            lab1.configure(text = "Téléchargement impossible : page non trouvée, veuillez verifier que l'adresse est correcte ou que le message est bien présent dans la boite aux lettres")
            lab2.destroy()
            os.remove('C:/Python24/Freeze/body.txt')
            if os.path.isfile("C:/Python24/Freeze/body.txt.old"):
                os.rename('C:/Python24/Freeze/body.txt.old', 'C:/Python24/Freeze/body.txt')
        if var.status_HTTP == 401:    
            lab1.configure(text = "Les logins de la messagerie sont incorrects")  
            lab2.destroy()
            os.remove('C:/Python24/Freeze/body.txt')
            if os.path.isfile("C:/Python24/Freeze/body.txt.old"):
                os.rename('C:/Python24/Freeze/body.txt.old', 'C:/Python24/Freeze/body.txt')
        elif var.status_HTTP == 200:
            Tkinter.Label(root, text = "Temps : " + str(var.time) + " sec").pack()
            Tkinter.Label(root, text = "Vitesse : %.2f Ko/s" % var.speed).pack()
            if os.path.isfile('body.txt.old'):
                os.remove('body.txt.old')
 
 
def download(action):
    url = "http://nchc.dl.sourceforge.net/sourceforge/emule/eMule0.47a-Installer.exe"
 
    f = open("body.txt", "wb")
    c = pycurl.Curl()
    c.setopt(c.URL, url)
    #c.setopt(c.PROXYPORT, 8080)
    #c.setopt(c.PROXY, '10.0.0.30')
 
    if action == "headers":
        c.setopt(c.NOBODY, 1)
    elif action== "body":
        c.setopt(c.WRITEDATA, f)
        c.setopt(c.NOPROGRESS, 0)
        c.setopt(c.PROGRESSFUNCTION, progress)
 
    c.setopt(c.SSLVERSION, 3)
    c.setopt(c.SSL_VERIFYPEER, 0) 
    c.setopt(c.HTTPAUTH,c.HTTPAUTH_BASIC);
    c.setopt(c.USERPWD,"user:passwd"); 
    c.setopt(c.WRITEDATA, f)
    c.setopt(c.NOPROGRESS, 0)
    c.setopt(c.PROGRESSFUNCTION, progress)
    c.setopt(c.FOLLOWLOCATION, 1)
    c.setopt(c.MAXREDIRS, 5)
    c.setopt(c.OPT_FILETIME, 1)
    try:
        c.perform()
        if action == "headers":
            var.size = c.getinfo(c.CONTENT_LENGTH_DOWNLOAD)
        elif action == "body":
            var.time = c.getinfo(c.TOTAL_TIME)
            var.speed = c.getinfo(c.SPEED_DOWNLOAD)/1000
 
        var.status_HTTP = c.getinfo(c.HTTP_CODE)
    finally:
        c.close()
        f.close()
 
        #print "Total-time:", c.getinfo(c.TOTAL_TIME)
        #print "Download speed: %.2f bytes/second" % c.getinfo(c.SPEED_DOWNLOAD)
        #print "Document size: %d bytes" % c.getinfo(c.SIZE_DOWNLOAD)
 
## Callback function invoked when progress information is updated
def progress(download_t, download_d, upload_t, upload_d):
    if var.status_HTTP != 200:
        pass
    else:
        canevas.pack()
        canevas.coords(rect, 1, 1, int(download_d/var.size*100)*2.5, 40)
        canevas.itemconfig(pourcentage, text = str(int(download_d/var.size*100)) + '%')
        lab1.configure(text='%s' % int(download_d/1000) + "Ko sur " + str(int(var.size/1000)) + " Ko")
        lab2.configure(text='%s' % int((download_d-var.TR_speed)/1000) + " Ko/s")
 
        if var.size == download_d:
            canevas.configure(height=0, width=0)
            lab1.configure(text='Téléchargement terminé')
            lab2.configure(text = "Taille : %s Ko" % int(var.size/1000))
 
        var.TR_speed = download_d    
 
if __name__ == "__main__":
    if os.path.isfile('body.txt.old'):
        os.remove('body.txt.old');
    if os.path.isfile('body.txt'):
        os.rename('body.txt', 'body.txt.old')
 
    root = Tkinter.Tk()
 
    canevas = Tkinter.Canvas(root, height=40, width=250, bg="white")
    canevas.place(x=1, y=1)
    canevas.pack()
    canevas.pack_forget()
    rect = canevas.create_rectangle(1, 1, 0, 40, fill='red')
    pourcentage = canevas.create_text(125,20,text='')
 
    lab1 = Tkinter.Label(root, text = 'Début du téléchargement')
    lab1.pack()
    lab2 = Tkinter.Label(root, text = '')
    lab2.pack()
    ihm = gui(root)
    ihm.start()
    root.mainloop()

urllib2 : pb recuperation headers

Réseau/Web Python

Vue hybride

Discussions similaires

Partager

Partager