IdentifiantMot de passe
Loading...
Mot de passe oublié ?Je m'inscris ! (gratuit)
Navigation

Inscrivez-vous gratuitement
pour pouvoir participer, suivre les réponses en temps réel, voter pour les messages, poser vos propres questions et recevoir la newsletter

Python Discussion :

Problème package re [Python 3.X]


Sujet :

Python

Vue hybride

Message précédent Message précédent   Message suivant Message suivant
  1. #1
    Membre averti
    Homme Profil pro
    Étudiant
    Inscrit en
    Février 2019
    Messages
    12
    Détails du profil
    Informations personnelles :
    Sexe : Homme
    Âge : 29
    Localisation : Canada

    Informations professionnelles :
    Activité : Étudiant
    Secteur : Industrie

    Informations forums :
    Inscription : Février 2019
    Messages : 12
    Par défaut Problème package re
    Bonjour à tous les amis,

    Dans le cadre d'un programme informatique, je souhaiterais récupérer d'une très longue chaîne de caractères (code html d'une page web) certaines informations qui me seraient précieuses.

    Dans cette page web, quatre "tableaux" de ce type sont inclus :

    Code HTML : Sélectionner tout - Visualiser dans une fenêtre à part
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    <div class="fcsttabf" id="div_wgfcst2">       
                <script language="JavaScript" type="text/javascript">
                    //<![CDATA[        
            var wg_fcst_tab_data_2 = {"id_spot":802733,"id_user":1389136,"nickname":"other user's custom spot","custom_onlypro":1,"spot":"Canada - Coaticook","lat":45.1335,"lon":-71.8228,"alt":290,"id_model":38,"model":"usnmm","model_alt":367,"levels":1,"sst":null,"sunrise":"07:10","sunset":"16:50","tz":"EST","tzutc":"(UTC-5)","utc_offset":-5,"tzid":"Canada\/Eastern","tides":0,"md5chk":"98e9facefbacac0733d504b8638cf836","fcst":{"38":{"initstamp":1549022400,"TMP":[-18.5,-18.7,-18,-17,-15.9,-15.3,-14.6,-14.2,-14.1,-14.7,-16,-16.3,-16.7,-17,-17.1,-16.9,-16.7,-16.4,-16.6,-16.5,-16.5,-16.3,-15.1,-14,-13.6,-13.3,-13.1,-11.9,-10.3,-9.2,-7.8,-7.6,-7.3,-7.4,-9.3,-10,-11,-9.6,-11.3,-13.2,-14.3,-15.3,-16.6,-17.8,-18.7,-19.2,-20,-19.5,-19.6],"TCDC":[null,14,0,0,0,0,25,0,0,0,0,0,0,0,1,0,0,8,0,1,2,38,100,100,100,100,100,100,100,100,100,100,100,98,60,14,97,19,29,4,4,79,20,6,9,73,80,29,90],"HCDC":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,13,39,100,82,33,21,87,53,78,0,1,30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"MCDC":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,100,100,100,100,100,100,66,35,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,14,19,1],"LCDC":[0,14,0,0,0,0,25,0,0,0,0,0,0,0,1,0,0,8,0,0,1,14,100,100,100,100,100,100,100,100,100,100,100,98,60,14,97,19,29,4,4,79,20,6,9,73,78,11,89],"RH":[75,75,74,73,72,71,72,71,69,67,65,63,63,63,65,65,64,70,72,71,75,79,78,78,74,71,83,82,82,83,84,85,88,84,83,84,83,88,85,73,73,81,91,99,100,99,100,92,93],"GUST":[26.1,27.8,27.8,28.6,28.5,28.7,27.3,26.4,24.1,24.7,23.8,25.7,25.4,22.7,21.8,22.3,21.7,22.3,22.2,21.2,20.5,19.2,20.7,20.7,22.2,25.1,27.6,29.6,31.2,30.2,28.7,26.8,25.8,28.3,24.7,24.4,23.5,23.5,20.7,14.9,12.1,10.2,6.9,2.7,2.8,2.8,2.6,1.8,3],"SLP":[1024,1025,1024,1025,1025,1024,1023,1024,1024,1024,1024,1024,1025,1025,1024,1024,1024,1023,1023,1022,1021,1020,1018,1017,1017,1016,1014,1012,1011,1010,1008,1008,1008,1010,1011,1013,1014,1016,1016,1017,1018,1019,1021,1021,1022,1022,1022,1022,1023],"APCP1":[null,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.1,0.3,0.1,0.1,0.1,0.1,0.1,0.1,0,0,0,0.1,0,0,0,0,0,0,0,0,0,0,0],"WINDSPD":[10.1,10.6,11.5,12,12.4,12.7,12,12.1,11.9,10.7,8.6,8.4,7.7,6.8,6.4,6.3,5.9,6.5,6.5,6.7,7.1,7.1,7.5,8,8.5,8.9,10.5,11.2,11.5,11.3,11.5,11.6,11.5,15.1,11.9,10.2,7,10.4,9.1,5.8,3.7,3.1,2.7,2.8,2.7,3,2.8,2.6,3.5],"WINDDIR":[247,248,253,252,251,249,247,246,243,244,239,240,240,235,231,225,214,214,213,205,204,201,196,193,197,195,190,197,214,230,245,252,264,285,285,284,273,284,309,309,312,315,254,230,206,182,164,137,151],"SMERN":["11","11","11","11","11","11","11","11","11","11","11","11","11","10","10","10","10","10","9","9","9","9","9","9","9","9","8","9","10","10","11","11","12","13","13","13","12","13","14","14","14","14","11","10","9","8","7","6","7"],"TMPE":[-18,-18.3,-17.5,-16.6,-15.6,-14.9,-14.2,-13.8,-13.7,-14.3,-15.4,-15.7,-16.2,-16.4,-16.5,-16.3,-16.1,-15.9,-16.1,-16.1,-16.1,-15.9,-14.6,-13.5,-13.1,-12.7,-12.5,-11.3,-9.7,-8.6,-7.2,-7,-6.7,-6.8,-8.8,-9.5,-10.5,-9.1,-10.8,-12.7,-13.7,-14.8,-16,-17.2,-18.1,-18.5,-19.4,-18.9,-19],"hr_weekday":[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,0,0,0,0,0,0,0,0],"hr_h":["07","08","09","10","11","12","13","14","15","16","17","18","19","20","21","22","23","00","01","02","03","04","05","06","07","08","09","10","11","12","13","14","15","16","17","18","19","20","21","22","23","00","01","02","03","04","05","06","07"],"hr_d":["01","01","01","01","01","01","01","01","01","01","01","01","01","01","01","01","01","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","03","03","03","03","03","03","03","03"],"hours":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48],"vars":["TMP","TCDC","HCDC","MCDC","LCDC","RH","GUST","SLP","APCP1","WINDSPD","WINDDIR","SMERN","TMPE"],"initdate":"2019-02-01 12:00:00","init_d":"01.02.2019","init_dm":"01.02.","init_h":"12","initstr":"2019020112","model_name":"HRW 4 km","model_longname":"HRW 4 km (US)","id_model":38,"update_last":"2019-02-01 17:19:03","update_next":"2019-02-02 05:25:00","img_param":[],"img_var_map":{"WINDSPD":"windspd","MWINDSPD":"windspd","SMER":"windspd","SMERN":"windspd","WINDDIR":"windspd","TMP":"t2m","TMPE":"t2m","APCP1":"tcdc_apcp1","APCP1s":"tcdc_apcp1","HCDC":"tcdc_apcp1","MCDC":"tcdc_apcp1","LCDC":"tcdc_apcp1","CDC":"tcdc_apcp1","TCDC":"tcdc_apcp1","SLP":"press"}}}};
    var wgopts_2 = {"id_user":0,"wj":"knots","tj":"c","waj":"m","odh":3,"doh":22,"wrap":40,"fhours":240,"limit1":10.63,"limit2":15.57,"limit3":19.41,"tlimit":10,"vt":1,"params":["WINDSPD","GUST","SMER","TMPE","CDC","APCP1","RATING"],"first_row_mwinfo":true,"path_lng":"\/fr\/"};
    wgopts_2.lang = WgLang;
    WgFcst.showForecast(wg_fcst_tab_data_2,wgopts_2);
                //]]>
                </script>
            </div>

    De ce tableau, je souhaite récupérer les listes "APCP1" et "PCPT" entre autres en tant que listes.

    J'ai commencé à travailler avec le module re afin de chercher dans le texte les informations mentionnées. Le code suit :

    Code : Sélectionner tout - Visualiser dans une fenêtre à part
    1
    2
    3
    4
    5
    6
    7
    import re
    code = 'blab,"id_user":1389136,"nickname":"other users custom spot","id_spot":123546,"TMP":[-18.7,-16.1,-13.6,-13.8,-16.3,-15.7,-15.6,-17,-15.2,-12.3,-10.2,-9.4,-9.9,-12.7,-12.9,-14.5,-12.9,-10,-7,-4.2,-4,-2.9,-1.9,-1.5,-0,1.2,1.8,2.4,2.3]'
    # La variable code sert à tester le programme, à terme il faudrait que ça soit la page web entière
     
    m=re.search(r'"id_user":(\w+)', code)
    result=m.groups()
    print(result)
    Pour "id_user" ou "id_spot", je récupère bien ce qui suit jusqu'à la virgule. Cependant, je ne comprends pas pourquoi si je change "id_user" par "nickname" par exemple ou "TMP", ça m'affiche un message d'erreur que je n'arrive pas à résoudre malgré mes différentes tentatives.



    Mes questions sont les suivantes :

    - le package re est-il le bon à utiliser ?
    - auriez-vous une idée de comment palier à mes problèmes ?

    Merci d'avance à toutes les personnes qui passeront sur ce sujet.

    Bonne journée, UL

  2. #2
    Expert confirmé
    Avatar de tyrtamos
    Homme Profil pro
    Retraité
    Inscrit en
    Décembre 2007
    Messages
    4 486
    Détails du profil
    Informations personnelles :
    Sexe : Homme
    Localisation : France, Var (Provence Alpes Côte d'Azur)

    Informations professionnelles :
    Activité : Retraité

    Informations forums :
    Inscription : Décembre 2007
    Messages : 4 486
    Billets dans le blog
    6
    Par défaut
    Bonjour,

    C'est normal que "nickname" ne marche pas avec \w+ puisque le texte qui va avec contient en plus une apostrophe et plusieurs espaces.

    Essaie ça:

    Code : Sélectionner tout - Visualiser dans une fenêtre à part
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    import re
     
    code = """<div class="fcsttabf" id="div_wgfcst2">       
                <script language="JavaScript" type="text/javascript">
                    //<![CDATA[        
            var wg_fcst_tab_data_2 = {"id_spot":802733,"id_user":1389136,"nickname":"other user's custom spot","custom_onlypro":1,"spot":"Canada - Coaticook","lat":45.1335,"lon":-71.8228,"alt":290,"id_model":38,"model":"usnmm","model_alt":367,"levels":1,"sst":null,"sunrise":"07:10","sunset":"16:50","tz":"EST","tzutc":"(UTC-5)","utc_offset":-5,"tzid":"Canada\/Eastern","tides":0,"md5chk":"98e9facefbacac0733d504b8638cf836","fcst":{"38":{"initstamp":1549022400,"TMP":[-18.5,-18.7,-18,-17,-15.9,-15.3,-14.6,-14.2,-14.1,-14.7,-16,-16.3,-16.7,-17,-17.1,-16.9,-16.7,-16.4,-16.6,-16.5,-16.5,-16.3,-15.1,-14,-13.6,-13.3,-13.1,-11.9,-10.3,-9.2,-7.8,-7.6,-7.3,-7.4,-9.3,-10,-11,-9.6,-11.3,-13.2,-14.3,-15.3,-16.6,-17.8,-18.7,-19.2,-20,-19.5,-19.6],"TCDC":[null,14,0,0,0,0,25,0,0,0,0,0,0,0,1,0,0,8,0,1,2,38,100,100,100,100,100,100,100,100,100,100,100,98,60,14,97,19,29,4,4,79,20,6,9,73,80,29,90],"HCDC":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,13,39,100,82,33,21,87,53,78,0,1,30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"MCDC":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,100,100,100,100,100,100,66,35,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,14,19,1],"LCDC":[0,14,0,0,0,0,25,0,0,0,0,0,0,0,1,0,0,8,0,0,1,14,100,100,100,100,100,100,100,100,100,100,100,98,60,14,97,19,29,4,4,79,20,6,9,73,78,11,89],"RH":[75,75,74,73,72,71,72,71,69,67,65,63,63,63,65,65,64,70,72,71,75,79,78,78,74,71,83,82,82,83,84,85,88,84,83,84,83,88,85,73,73,81,91,99,100,99,100,92,93],"GUST":[26.1,27.8,27.8,28.6,28.5,28.7,27.3,26.4,24.1,24.7,23.8,25.7,25.4,22.7,21.8,22.3,21.7,22.3,22.2,21.2,20.5,19.2,20.7,20.7,22.2,25.1,27.6,29.6,31.2,30.2,28.7,26.8,25.8,28.3,24.7,24.4,23.5,23.5,20.7,14.9,12.1,10.2,6.9,2.7,2.8,2.8,2.6,1.8,3],"SLP":[1024,1025,1024,1025,1025,1024,1023,1024,1024,1024,1024,1024,1025,1025,1024,1024,1024,1023,1023,1022,1021,1020,1018,1017,1017,1016,1014,1012,1011,1010,1008,1008,1008,1010,1011,1013,1014,1016,1016,1017,1018,1019,1021,1021,1022,1022,1022,1022,1023],"APCP1":[null,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.1,0.3,0.1,0.1,0.1,0.1,0.1,0.1,0,0,0,0.1,0,0,0,0,0,0,0,0,0,0,0],"WINDSPD":[10.1,10.6,11.5,12,12.4,12.7,12,12.1,11.9,10.7,8.6,8.4,7.7,6.8,6.4,6.3,5.9,6.5,6.5,6.7,7.1,7.1,7.5,8,8.5,8.9,10.5,11.2,11.5,11.3,11.5,11.6,11.5,15.1,11.9,10.2,7,10.4,9.1,5.8,3.7,3.1,2.7,2.8,2.7,3,2.8,2.6,3.5],"WINDDIR":[247,248,253,252,251,249,247,246,243,244,239,240,240,235,231,225,214,214,213,205,204,201,196,193,197,195,190,197,214,230,245,252,264,285,285,284,273,284,309,309,312,315,254,230,206,182,164,137,151],"SMERN":["11","11","11","11","11","11","11","11","11","11","11","11","11","10","10","10","10","10","9","9","9","9","9","9","9","9","8","9","10","10","11","11","12","13","13","13","12","13","14","14","14","14","11","10","9","8","7","6","7"],"TMPE":[-18,-18.3,-17.5,-16.6,-15.6,-14.9,-14.2,-13.8,-13.7,-14.3,-15.4,-15.7,-16.2,-16.4,-16.5,-16.3,-16.1,-15.9,-16.1,-16.1,-16.1,-15.9,-14.6,-13.5,-13.1,-12.7,-12.5,-11.3,-9.7,-8.6,-7.2,-7,-6.7,-6.8,-8.8,-9.5,-10.5,-9.1,-10.8,-12.7,-13.7,-14.8,-16,-17.2,-18.1,-18.5,-19.4,-18.9,-19],"hr_weekday":[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,0,0,0,0,0,0,0,0],"hr_h":["07","08","09","10","11","12","13","14","15","16","17","18","19","20","21","22","23","00","01","02","03","04","05","06","07","08","09","10","11","12","13","14","15","16","17","18","19","20","21","22","23","00","01","02","03","04","05","06","07"],"hr_d":["01","01","01","01","01","01","01","01","01","01","01","01","01","01","01","01","01","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","03","03","03","03","03","03","03","03"],"hours":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48],"vars":["TMP","TCDC","HCDC","MCDC","LCDC","RH","GUST","SLP","APCP1","WINDSPD","WINDDIR","SMERN","TMPE"],"initdate":"2019-02-01 12:00:00","init_d":"01.02.2019","init_dm":"01.02.","init_h":"12","initstr":"2019020112","model_name":"HRW 4 km","model_longname":"HRW 4 km (US)","id_model":38,"update_last":"2019-02-01 17:19:03","update_next":"2019-02-02 05:25:00","img_param":[],"img_var_map":{"WINDSPD":"windspd","MWINDSPD":"windspd","SMER":"windspd","SMERN":"windspd","WINDDIR":"windspd","TMP":"t2m","TMPE":"t2m","APCP1":"tcdc_apcp1","APCP1s":"tcdc_apcp1","HCDC":"tcdc_apcp1","MCDC":"tcdc_apcp1","LCDC":"tcdc_apcp1","CDC":"tcdc_apcp1","TCDC":"tcdc_apcp1","SLP":"press"}}}};
    var wgopts_2 = {"id_user":0,"wj":"knots","tj":"c","waj":"m","odh":3,"doh":22,"wrap":40,"fhours":240,"limit1":10.63,"limit2":15.57,"limit3":19.41,"tlimit":10,"vt":1,"params":["WINDSPD","GUST","SMER","TMPE","CDC","APCP1","RATING"],"first_row_mwinfo":true,"path_lng":"\/fr\/"};
    wgopts_2.lang = WgLang;
    WgFcst.showForecast(wg_fcst_tab_data_2,wgopts_2);
                //]]>
                </script>
            </div>"""
     
    motif = r'"nickname":"([\w \']+)"'
     
    m = re.search(motif, code)
    if m==None:
        print("Pas trouvé!")
    else:    
        print(m.groups(0)[0])
    Ce qui affiche bien:

    other user's custom spot

  3. #3
    Membre très actif

    Homme Profil pro
    Bidouilleur
    Inscrit en
    Avril 2016
    Messages
    721
    Détails du profil
    Informations personnelles :
    Sexe : Homme
    Localisation : France, Paris (Île de France)

    Informations professionnelles :
    Activité : Bidouilleur

    Informations forums :
    Inscription : Avril 2016
    Messages : 721
    Billets dans le blog
    1
    Par défaut
    Salut.

    Une autre solution simple serait de récupérer le contenu entier de la variable wg_fcst_tab_data_2 et donner ça à manger à json.

    Quelque chose comme.

    Code : Sélectionner tout - Visualiser dans une fenêtre à part
    1
    2
    3
    4
    5
    6
    7
    import json
    m = re.search('var wg_fcst_tab_data_2 = ({.+?});', code)
    if m :
        dico = json.loads(m.group(1))
        print("id spot =>", dico['id_spot'])
        print("id user =>", dico['id_user'])
        print("name =>", dico['nickname'])

  4. #4
    Membre averti
    Homme Profil pro
    Étudiant
    Inscrit en
    Février 2019
    Messages
    12
    Détails du profil
    Informations personnelles :
    Sexe : Homme
    Âge : 29
    Localisation : Canada

    Informations professionnelles :
    Activité : Étudiant
    Secteur : Industrie

    Informations forums :
    Inscription : Février 2019
    Messages : 12
    Par défaut
    Bonjour et merci à tous les deux pour vos réponses.

    Effectivement, les deux solutions marchent très efficacement.

    Cependant, un de mes objectifs principaux est de récupérer une liste du texte (comme "TMP" par exemple) afin de la transformer en une liste dans python pour pouvoir travailler dessus, et je n'y arrive toujours pas. De plus, le fait qu'en fin de texte, l'on retrouve ces mêmes noms de liste ("TMP", etc.) avec du texte derrière complique la chose car je ne souhaite pas exploiter cette information...

    Auriez-vous une idée de comment faire ?

    Merci d'avance à tous pour votre aide et bonne fin de journée !

  5. #5
    Membre très actif

    Homme Profil pro
    Bidouilleur
    Inscrit en
    Avril 2016
    Messages
    721
    Détails du profil
    Informations personnelles :
    Sexe : Homme
    Localisation : France, Paris (Île de France)

    Informations professionnelles :
    Activité : Bidouilleur

    Informations forums :
    Inscription : Avril 2016
    Messages : 721
    Billets dans le blog
    1
    Par défaut
    Salut.

    json fait ce travail pour toi.
    Code : Sélectionner tout - Visualiser dans une fenêtre à part
    print(dico['fcst']['38']['TMP'])
    Comme des TMP, il y en a plusieurs, s'il se situe dans une autre variable js, bah il suffit de faire pareil en chargant le contenu de la variable dans une autre instance de json.

  6. #6
    Membre averti
    Homme Profil pro
    Étudiant
    Inscrit en
    Février 2019
    Messages
    12
    Détails du profil
    Informations personnelles :
    Sexe : Homme
    Âge : 29
    Localisation : Canada

    Informations professionnelles :
    Activité : Étudiant
    Secteur : Industrie

    Informations forums :
    Inscription : Février 2019
    Messages : 12
    Par défaut
    Salut bistouille,

    Ça marche parfaitement.

    Merci énormément et bonne continuation,
    UL

+ Répondre à la discussion
Cette discussion est résolue.

Discussions similaires

  1. Problème package et getClass
    Par nek_kro_kvlt dans le forum Langage
    Réponses: 3
    Dernier message: 20/09/2007, 17h28
  2. problème packages à la compilation de servlet
    Par laurent333 dans le forum Tomcat et TomEE
    Réponses: 4
    Dernier message: 28/05/2007, 10h41
  3. Problème packages SSIS (mise en production)
    Par kince dans le forum MS SQL Server
    Réponses: 2
    Dernier message: 17/04/2007, 19h40
  4. Problème Package SSIS
    Par mic_schum dans le forum MS SQL Server
    Réponses: 4
    Dernier message: 26/03/2007, 16h20
  5. [AS2] Problème package
    Par wwave dans le forum ActionScript 1 & ActionScript 2
    Réponses: 3
    Dernier message: 27/01/2006, 09h31

Partager

Partager
  • Envoyer la discussion sur Viadeo
  • Envoyer la discussion sur Twitter
  • Envoyer la discussion sur Google
  • Envoyer la discussion sur Facebook
  • Envoyer la discussion sur Digg
  • Envoyer la discussion sur Delicious
  • Envoyer la discussion sur MySpace
  • Envoyer la discussion sur Yahoo