Bonjour à tous les amis,

Dans le cadre d'un programme informatique, je souhaiterais récupérer d'une très longue chaîne de caractères (code html d'une page web) certaines informations qui me seraient précieuses.

Dans cette page web, quatre "tableaux" de ce type sont inclus :

Code HTML : Sélectionner tout - Visualiser dans une fenêtre à part
1
2
3
4
5
6
7
8
9
10
<div class="fcsttabf" id="div_wgfcst2">       
            <script language="JavaScript" type="text/javascript">
                //<![CDATA[        
        var wg_fcst_tab_data_2 = {"id_spot":802733,"id_user":1389136,"nickname":"other user's custom spot","custom_onlypro":1,"spot":"Canada - Coaticook","lat":45.1335,"lon":-71.8228,"alt":290,"id_model":38,"model":"usnmm","model_alt":367,"levels":1,"sst":null,"sunrise":"07:10","sunset":"16:50","tz":"EST","tzutc":"(UTC-5)","utc_offset":-5,"tzid":"Canada\/Eastern","tides":0,"md5chk":"98e9facefbacac0733d504b8638cf836","fcst":{"38":{"initstamp":1549022400,"TMP":[-18.5,-18.7,-18,-17,-15.9,-15.3,-14.6,-14.2,-14.1,-14.7,-16,-16.3,-16.7,-17,-17.1,-16.9,-16.7,-16.4,-16.6,-16.5,-16.5,-16.3,-15.1,-14,-13.6,-13.3,-13.1,-11.9,-10.3,-9.2,-7.8,-7.6,-7.3,-7.4,-9.3,-10,-11,-9.6,-11.3,-13.2,-14.3,-15.3,-16.6,-17.8,-18.7,-19.2,-20,-19.5,-19.6],"TCDC":[null,14,0,0,0,0,25,0,0,0,0,0,0,0,1,0,0,8,0,1,2,38,100,100,100,100,100,100,100,100,100,100,100,98,60,14,97,19,29,4,4,79,20,6,9,73,80,29,90],"HCDC":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,13,39,100,82,33,21,87,53,78,0,1,30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"MCDC":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,100,100,100,100,100,100,66,35,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,14,19,1],"LCDC":[0,14,0,0,0,0,25,0,0,0,0,0,0,0,1,0,0,8,0,0,1,14,100,100,100,100,100,100,100,100,100,100,100,98,60,14,97,19,29,4,4,79,20,6,9,73,78,11,89],"RH":[75,75,74,73,72,71,72,71,69,67,65,63,63,63,65,65,64,70,72,71,75,79,78,78,74,71,83,82,82,83,84,85,88,84,83,84,83,88,85,73,73,81,91,99,100,99,100,92,93],"GUST":[26.1,27.8,27.8,28.6,28.5,28.7,27.3,26.4,24.1,24.7,23.8,25.7,25.4,22.7,21.8,22.3,21.7,22.3,22.2,21.2,20.5,19.2,20.7,20.7,22.2,25.1,27.6,29.6,31.2,30.2,28.7,26.8,25.8,28.3,24.7,24.4,23.5,23.5,20.7,14.9,12.1,10.2,6.9,2.7,2.8,2.8,2.6,1.8,3],"SLP":[1024,1025,1024,1025,1025,1024,1023,1024,1024,1024,1024,1024,1025,1025,1024,1024,1024,1023,1023,1022,1021,1020,1018,1017,1017,1016,1014,1012,1011,1010,1008,1008,1008,1010,1011,1013,1014,1016,1016,1017,1018,1019,1021,1021,1022,1022,1022,1022,1023],"APCP1":[null,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.1,0.3,0.1,0.1,0.1,0.1,0.1,0.1,0,0,0,0.1,0,0,0,0,0,0,0,0,0,0,0],"WINDSPD":[10.1,10.6,11.5,12,12.4,12.7,12,12.1,11.9,10.7,8.6,8.4,7.7,6.8,6.4,6.3,5.9,6.5,6.5,6.7,7.1,7.1,7.5,8,8.5,8.9,10.5,11.2,11.5,11.3,11.5,11.6,11.5,15.1,11.9,10.2,7,10.4,9.1,5.8,3.7,3.1,2.7,2.8,2.7,3,2.8,2.6,3.5],"WINDDIR":[247,248,253,252,251,249,247,246,243,244,239,240,240,235,231,225,214,214,213,205,204,201,196,193,197,195,190,197,214,230,245,252,264,285,285,284,273,284,309,309,312,315,254,230,206,182,164,137,151],"SMERN":["11","11","11","11","11","11","11","11","11","11","11","11","11","10","10","10","10","10","9","9","9","9","9","9","9","9","8","9","10","10","11","11","12","13","13","13","12","13","14","14","14","14","11","10","9","8","7","6","7"],"TMPE":[-18,-18.3,-17.5,-16.6,-15.6,-14.9,-14.2,-13.8,-13.7,-14.3,-15.4,-15.7,-16.2,-16.4,-16.5,-16.3,-16.1,-15.9,-16.1,-16.1,-16.1,-15.9,-14.6,-13.5,-13.1,-12.7,-12.5,-11.3,-9.7,-8.6,-7.2,-7,-6.7,-6.8,-8.8,-9.5,-10.5,-9.1,-10.8,-12.7,-13.7,-14.8,-16,-17.2,-18.1,-18.5,-19.4,-18.9,-19],"hr_weekday":[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,0,0,0,0,0,0,0,0],"hr_h":["07","08","09","10","11","12","13","14","15","16","17","18","19","20","21","22","23","00","01","02","03","04","05","06","07","08","09","10","11","12","13","14","15","16","17","18","19","20","21","22","23","00","01","02","03","04","05","06","07"],"hr_d":["01","01","01","01","01","01","01","01","01","01","01","01","01","01","01","01","01","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","03","03","03","03","03","03","03","03"],"hours":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48],"vars":["TMP","TCDC","HCDC","MCDC","LCDC","RH","GUST","SLP","APCP1","WINDSPD","WINDDIR","SMERN","TMPE"],"initdate":"2019-02-01 12:00:00","init_d":"01.02.2019","init_dm":"01.02.","init_h":"12","initstr":"2019020112","model_name":"HRW 4 km","model_longname":"HRW 4 km (US)","id_model":38,"update_last":"2019-02-01 17:19:03","update_next":"2019-02-02 05:25:00","img_param":[],"img_var_map":{"WINDSPD":"windspd","MWINDSPD":"windspd","SMER":"windspd","SMERN":"windspd","WINDDIR":"windspd","TMP":"t2m","TMPE":"t2m","APCP1":"tcdc_apcp1","APCP1s":"tcdc_apcp1","HCDC":"tcdc_apcp1","MCDC":"tcdc_apcp1","LCDC":"tcdc_apcp1","CDC":"tcdc_apcp1","TCDC":"tcdc_apcp1","SLP":"press"}}}};
var wgopts_2 = {"id_user":0,"wj":"knots","tj":"c","waj":"m","odh":3,"doh":22,"wrap":40,"fhours":240,"limit1":10.63,"limit2":15.57,"limit3":19.41,"tlimit":10,"vt":1,"params":["WINDSPD","GUST","SMER","TMPE","CDC","APCP1","RATING"],"first_row_mwinfo":true,"path_lng":"\/fr\/"};
wgopts_2.lang = WgLang;
WgFcst.showForecast(wg_fcst_tab_data_2,wgopts_2);
            //]]>
            </script>
        </div>

De ce tableau, je souhaite récupérer les listes "APCP1" et "PCPT" entre autres en tant que listes.

J'ai commencé à travailler avec le module re afin de chercher dans le texte les informations mentionnées. Le code suit :

Code : Sélectionner tout - Visualiser dans une fenêtre à part
1
2
3
4
5
6
7
import re
code = 'blab,"id_user":1389136,"nickname":"other users custom spot","id_spot":123546,"TMP":[-18.7,-16.1,-13.6,-13.8,-16.3,-15.7,-15.6,-17,-15.2,-12.3,-10.2,-9.4,-9.9,-12.7,-12.9,-14.5,-12.9,-10,-7,-4.2,-4,-2.9,-1.9,-1.5,-0,1.2,1.8,2.4,2.3]'
# La variable code sert à tester le programme, à terme il faudrait que ça soit la page web entière
 
m=re.search(r'"id_user":(\w+)', code)
result=m.groups()
print(result)
Pour "id_user" ou "id_spot", je récupère bien ce qui suit jusqu'à la virgule. Cependant, je ne comprends pas pourquoi si je change "id_user" par "nickname" par exemple ou "TMP", ça m'affiche un message d'erreur que je n'arrive pas à résoudre malgré mes différentes tentatives.



Mes questions sont les suivantes :

- le package re est-il le bon à utiliser ?
- auriez-vous une idée de comment palier à mes problèmes ?

Merci d'avance à toutes les personnes qui passeront sur ce sujet.

Bonne journée, UL