IdentifiantMot de passe
Loading...
Mot de passe oublié ?Je m'inscris ! (gratuit)
Navigation

Inscrivez-vous gratuitement
pour pouvoir participer, suivre les réponses en temps réel, voter pour les messages, poser vos propres questions et recevoir la newsletter

Python Discussion :

Problème package re [Python 3.X]


Sujet :

Python

  1. #1
    Membre averti
    Homme Profil pro
    Étudiant
    Inscrit en
    Février 2019
    Messages
    12
    Détails du profil
    Informations personnelles :
    Sexe : Homme
    Âge : 29
    Localisation : Canada

    Informations professionnelles :
    Activité : Étudiant
    Secteur : Industrie

    Informations forums :
    Inscription : Février 2019
    Messages : 12
    Par défaut Problème package re
    Bonjour à tous les amis,

    Dans le cadre d'un programme informatique, je souhaiterais récupérer d'une très longue chaîne de caractères (code html d'une page web) certaines informations qui me seraient précieuses.

    Dans cette page web, quatre "tableaux" de ce type sont inclus :

    Code HTML : Sélectionner tout - Visualiser dans une fenêtre à part
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    <div class="fcsttabf" id="div_wgfcst2">       
                <script language="JavaScript" type="text/javascript">
                    //<![CDATA[        
            var wg_fcst_tab_data_2 = {"id_spot":802733,"id_user":1389136,"nickname":"other user's custom spot","custom_onlypro":1,"spot":"Canada - Coaticook","lat":45.1335,"lon":-71.8228,"alt":290,"id_model":38,"model":"usnmm","model_alt":367,"levels":1,"sst":null,"sunrise":"07:10","sunset":"16:50","tz":"EST","tzutc":"(UTC-5)","utc_offset":-5,"tzid":"Canada\/Eastern","tides":0,"md5chk":"98e9facefbacac0733d504b8638cf836","fcst":{"38":{"initstamp":1549022400,"TMP":[-18.5,-18.7,-18,-17,-15.9,-15.3,-14.6,-14.2,-14.1,-14.7,-16,-16.3,-16.7,-17,-17.1,-16.9,-16.7,-16.4,-16.6,-16.5,-16.5,-16.3,-15.1,-14,-13.6,-13.3,-13.1,-11.9,-10.3,-9.2,-7.8,-7.6,-7.3,-7.4,-9.3,-10,-11,-9.6,-11.3,-13.2,-14.3,-15.3,-16.6,-17.8,-18.7,-19.2,-20,-19.5,-19.6],"TCDC":[null,14,0,0,0,0,25,0,0,0,0,0,0,0,1,0,0,8,0,1,2,38,100,100,100,100,100,100,100,100,100,100,100,98,60,14,97,19,29,4,4,79,20,6,9,73,80,29,90],"HCDC":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,13,39,100,82,33,21,87,53,78,0,1,30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"MCDC":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,100,100,100,100,100,100,66,35,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,14,19,1],"LCDC":[0,14,0,0,0,0,25,0,0,0,0,0,0,0,1,0,0,8,0,0,1,14,100,100,100,100,100,100,100,100,100,100,100,98,60,14,97,19,29,4,4,79,20,6,9,73,78,11,89],"RH":[75,75,74,73,72,71,72,71,69,67,65,63,63,63,65,65,64,70,72,71,75,79,78,78,74,71,83,82,82,83,84,85,88,84,83,84,83,88,85,73,73,81,91,99,100,99,100,92,93],"GUST":[26.1,27.8,27.8,28.6,28.5,28.7,27.3,26.4,24.1,24.7,23.8,25.7,25.4,22.7,21.8,22.3,21.7,22.3,22.2,21.2,20.5,19.2,20.7,20.7,22.2,25.1,27.6,29.6,31.2,30.2,28.7,26.8,25.8,28.3,24.7,24.4,23.5,23.5,20.7,14.9,12.1,10.2,6.9,2.7,2.8,2.8,2.6,1.8,3],"SLP":[1024,1025,1024,1025,1025,1024,1023,1024,1024,1024,1024,1024,1025,1025,1024,1024,1024,1023,1023,1022,1021,1020,1018,1017,1017,1016,1014,1012,1011,1010,1008,1008,1008,1010,1011,1013,1014,1016,1016,1017,1018,1019,1021,1021,1022,1022,1022,1022,1023],"APCP1":[null,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.1,0.3,0.1,0.1,0.1,0.1,0.1,0.1,0,0,0,0.1,0,0,0,0,0,0,0,0,0,0,0],"WINDSPD":[10.1,10.6,11.5,12,12.4,12.7,12,12.1,11.9,10.7,8.6,8.4,7.7,6.8,6.4,6.3,5.9,6.5,6.5,6.7,7.1,7.1,7.5,8,8.5,8.9,10.5,11.2,11.5,11.3,11.5,11.6,11.5,15.1,11.9,10.2,7,10.4,9.1,5.8,3.7,3.1,2.7,2.8,2.7,3,2.8,2.6,3.5],"WINDDIR":[247,248,253,252,251,249,247,246,243,244,239,240,240,235,231,225,214,214,213,205,204,201,196,193,197,195,190,197,214,230,245,252,264,285,285,284,273,284,309,309,312,315,254,230,206,182,164,137,151],"SMERN":["11","11","11","11","11","11","11","11","11","11","11","11","11","10","10","10","10","10","9","9","9","9","9","9","9","9","8","9","10","10","11","11","12","13","13","13","12","13","14","14","14","14","11","10","9","8","7","6","7"],"TMPE":[-18,-18.3,-17.5,-16.6,-15.6,-14.9,-14.2,-13.8,-13.7,-14.3,-15.4,-15.7,-16.2,-16.4,-16.5,-16.3,-16.1,-15.9,-16.1,-16.1,-16.1,-15.9,-14.6,-13.5,-13.1,-12.7,-12.5,-11.3,-9.7,-8.6,-7.2,-7,-6.7,-6.8,-8.8,-9.5,-10.5,-9.1,-10.8,-12.7,-13.7,-14.8,-16,-17.2,-18.1,-18.5,-19.4,-18.9,-19],"hr_weekday":[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,0,0,0,0,0,0,0,0],"hr_h":["07","08","09","10","11","12","13","14","15","16","17","18","19","20","21","22","23","00","01","02","03","04","05","06","07","08","09","10","11","12","13","14","15","16","17","18","19","20","21","22","23","00","01","02","03","04","05","06","07"],"hr_d":["01","01","01","01","01","01","01","01","01","01","01","01","01","01","01","01","01","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","03","03","03","03","03","03","03","03"],"hours":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48],"vars":["TMP","TCDC","HCDC","MCDC","LCDC","RH","GUST","SLP","APCP1","WINDSPD","WINDDIR","SMERN","TMPE"],"initdate":"2019-02-01 12:00:00","init_d":"01.02.2019","init_dm":"01.02.","init_h":"12","initstr":"2019020112","model_name":"HRW 4 km","model_longname":"HRW 4 km (US)","id_model":38,"update_last":"2019-02-01 17:19:03","update_next":"2019-02-02 05:25:00","img_param":[],"img_var_map":{"WINDSPD":"windspd","MWINDSPD":"windspd","SMER":"windspd","SMERN":"windspd","WINDDIR":"windspd","TMP":"t2m","TMPE":"t2m","APCP1":"tcdc_apcp1","APCP1s":"tcdc_apcp1","HCDC":"tcdc_apcp1","MCDC":"tcdc_apcp1","LCDC":"tcdc_apcp1","CDC":"tcdc_apcp1","TCDC":"tcdc_apcp1","SLP":"press"}}}};
    var wgopts_2 = {"id_user":0,"wj":"knots","tj":"c","waj":"m","odh":3,"doh":22,"wrap":40,"fhours":240,"limit1":10.63,"limit2":15.57,"limit3":19.41,"tlimit":10,"vt":1,"params":["WINDSPD","GUST","SMER","TMPE","CDC","APCP1","RATING"],"first_row_mwinfo":true,"path_lng":"\/fr\/"};
    wgopts_2.lang = WgLang;
    WgFcst.showForecast(wg_fcst_tab_data_2,wgopts_2);
                //]]>
                </script>
            </div>

    De ce tableau, je souhaite récupérer les listes "APCP1" et "PCPT" entre autres en tant que listes.

    J'ai commencé à travailler avec le module re afin de chercher dans le texte les informations mentionnées. Le code suit :

    Code : Sélectionner tout - Visualiser dans une fenêtre à part
    1
    2
    3
    4
    5
    6
    7
    import re
    code = 'blab,"id_user":1389136,"nickname":"other users custom spot","id_spot":123546,"TMP":[-18.7,-16.1,-13.6,-13.8,-16.3,-15.7,-15.6,-17,-15.2,-12.3,-10.2,-9.4,-9.9,-12.7,-12.9,-14.5,-12.9,-10,-7,-4.2,-4,-2.9,-1.9,-1.5,-0,1.2,1.8,2.4,2.3]'
    # La variable code sert à tester le programme, à terme il faudrait que ça soit la page web entière
     
    m=re.search(r'"id_user":(\w+)', code)
    result=m.groups()
    print(result)
    Pour "id_user" ou "id_spot", je récupère bien ce qui suit jusqu'à la virgule. Cependant, je ne comprends pas pourquoi si je change "id_user" par "nickname" par exemple ou "TMP", ça m'affiche un message d'erreur que je n'arrive pas à résoudre malgré mes différentes tentatives.



    Mes questions sont les suivantes :

    - le package re est-il le bon à utiliser ?
    - auriez-vous une idée de comment palier à mes problèmes ?

    Merci d'avance à toutes les personnes qui passeront sur ce sujet.

    Bonne journée, UL

  2. #2
    Expert confirmé
    Avatar de tyrtamos
    Homme Profil pro
    Retraité
    Inscrit en
    Décembre 2007
    Messages
    4 486
    Détails du profil
    Informations personnelles :
    Sexe : Homme
    Localisation : France, Var (Provence Alpes Côte d'Azur)

    Informations professionnelles :
    Activité : Retraité

    Informations forums :
    Inscription : Décembre 2007
    Messages : 4 486
    Billets dans le blog
    6
    Par défaut
    Bonjour,

    C'est normal que "nickname" ne marche pas avec \w+ puisque le texte qui va avec contient en plus une apostrophe et plusieurs espaces.

    Essaie ça:

    Code : Sélectionner tout - Visualiser dans une fenêtre à part
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    import re
     
    code = """<div class="fcsttabf" id="div_wgfcst2">       
                <script language="JavaScript" type="text/javascript">
                    //<![CDATA[        
            var wg_fcst_tab_data_2 = {"id_spot":802733,"id_user":1389136,"nickname":"other user's custom spot","custom_onlypro":1,"spot":"Canada - Coaticook","lat":45.1335,"lon":-71.8228,"alt":290,"id_model":38,"model":"usnmm","model_alt":367,"levels":1,"sst":null,"sunrise":"07:10","sunset":"16:50","tz":"EST","tzutc":"(UTC-5)","utc_offset":-5,"tzid":"Canada\/Eastern","tides":0,"md5chk":"98e9facefbacac0733d504b8638cf836","fcst":{"38":{"initstamp":1549022400,"TMP":[-18.5,-18.7,-18,-17,-15.9,-15.3,-14.6,-14.2,-14.1,-14.7,-16,-16.3,-16.7,-17,-17.1,-16.9,-16.7,-16.4,-16.6,-16.5,-16.5,-16.3,-15.1,-14,-13.6,-13.3,-13.1,-11.9,-10.3,-9.2,-7.8,-7.6,-7.3,-7.4,-9.3,-10,-11,-9.6,-11.3,-13.2,-14.3,-15.3,-16.6,-17.8,-18.7,-19.2,-20,-19.5,-19.6],"TCDC":[null,14,0,0,0,0,25,0,0,0,0,0,0,0,1,0,0,8,0,1,2,38,100,100,100,100,100,100,100,100,100,100,100,98,60,14,97,19,29,4,4,79,20,6,9,73,80,29,90],"HCDC":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,13,39,100,82,33,21,87,53,78,0,1,30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"MCDC":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,100,100,100,100,100,100,66,35,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,14,19,1],"LCDC":[0,14,0,0,0,0,25,0,0,0,0,0,0,0,1,0,0,8,0,0,1,14,100,100,100,100,100,100,100,100,100,100,100,98,60,14,97,19,29,4,4,79,20,6,9,73,78,11,89],"RH":[75,75,74,73,72,71,72,71,69,67,65,63,63,63,65,65,64,70,72,71,75,79,78,78,74,71,83,82,82,83,84,85,88,84,83,84,83,88,85,73,73,81,91,99,100,99,100,92,93],"GUST":[26.1,27.8,27.8,28.6,28.5,28.7,27.3,26.4,24.1,24.7,23.8,25.7,25.4,22.7,21.8,22.3,21.7,22.3,22.2,21.2,20.5,19.2,20.7,20.7,22.2,25.1,27.6,29.6,31.2,30.2,28.7,26.8,25.8,28.3,24.7,24.4,23.5,23.5,20.7,14.9,12.1,10.2,6.9,2.7,2.8,2.8,2.6,1.8,3],"SLP":[1024,1025,1024,1025,1025,1024,1023,1024,1024,1024,1024,1024,1025,1025,1024,1024,1024,1023,1023,1022,1021,1020,1018,1017,1017,1016,1014,1012,1011,1010,1008,1008,1008,1010,1011,1013,1014,1016,1016,1017,1018,1019,1021,1021,1022,1022,1022,1022,1023],"APCP1":[null,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.1,0.3,0.1,0.1,0.1,0.1,0.1,0.1,0,0,0,0.1,0,0,0,0,0,0,0,0,0,0,0],"WINDSPD":[10.1,10.6,11.5,12,12.4,12.7,12,12.1,11.9,10.7,8.6,8.4,7.7,6.8,6.4,6.3,5.9,6.5,6.5,6.7,7.1,7.1,7.5,8,8.5,8.9,10.5,11.2,11.5,11.3,11.5,11.6,11.5,15.1,11.9,10.2,7,10.4,9.1,5.8,3.7,3.1,2.7,2.8,2.7,3,2.8,2.6,3.5],"WINDDIR":[247,248,253,252,251,249,247,246,243,244,239,240,240,235,231,225,214,214,213,205,204,201,196,193,197,195,190,197,214,230,245,252,264,285,285,284,273,284,309,309,312,315,254,230,206,182,164,137,151],"SMERN":["11","11","11","11","11","11","11","11","11","11","11","11","11","10","10","10","10","10","9","9","9","9","9","9","9","9","8","9","10","10","11","11","12","13","13","13","12","13","14","14","14","14","11","10","9","8","7","6","7"],"TMPE":[-18,-18.3,-17.5,-16.6,-15.6,-14.9,-14.2,-13.8,-13.7,-14.3,-15.4,-15.7,-16.2,-16.4,-16.5,-16.3,-16.1,-15.9,-16.1,-16.1,-16.1,-15.9,-14.6,-13.5,-13.1,-12.7,-12.5,-11.3,-9.7,-8.6,-7.2,-7,-6.7,-6.8,-8.8,-9.5,-10.5,-9.1,-10.8,-12.7,-13.7,-14.8,-16,-17.2,-18.1,-18.5,-19.4,-18.9,-19],"hr_weekday":[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,0,0,0,0,0,0,0,0],"hr_h":["07","08","09","10","11","12","13","14","15","16","17","18","19","20","21","22","23","00","01","02","03","04","05","06","07","08","09","10","11","12","13","14","15","16","17","18","19","20","21","22","23","00","01","02","03","04","05","06","07"],"hr_d":["01","01","01","01","01","01","01","01","01","01","01","01","01","01","01","01","01","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","03","03","03","03","03","03","03","03"],"hours":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48],"vars":["TMP","TCDC","HCDC","MCDC","LCDC","RH","GUST","SLP","APCP1","WINDSPD","WINDDIR","SMERN","TMPE"],"initdate":"2019-02-01 12:00:00","init_d":"01.02.2019","init_dm":"01.02.","init_h":"12","initstr":"2019020112","model_name":"HRW 4 km","model_longname":"HRW 4 km (US)","id_model":38,"update_last":"2019-02-01 17:19:03","update_next":"2019-02-02 05:25:00","img_param":[],"img_var_map":{"WINDSPD":"windspd","MWINDSPD":"windspd","SMER":"windspd","SMERN":"windspd","WINDDIR":"windspd","TMP":"t2m","TMPE":"t2m","APCP1":"tcdc_apcp1","APCP1s":"tcdc_apcp1","HCDC":"tcdc_apcp1","MCDC":"tcdc_apcp1","LCDC":"tcdc_apcp1","CDC":"tcdc_apcp1","TCDC":"tcdc_apcp1","SLP":"press"}}}};
    var wgopts_2 = {"id_user":0,"wj":"knots","tj":"c","waj":"m","odh":3,"doh":22,"wrap":40,"fhours":240,"limit1":10.63,"limit2":15.57,"limit3":19.41,"tlimit":10,"vt":1,"params":["WINDSPD","GUST","SMER","TMPE","CDC","APCP1","RATING"],"first_row_mwinfo":true,"path_lng":"\/fr\/"};
    wgopts_2.lang = WgLang;
    WgFcst.showForecast(wg_fcst_tab_data_2,wgopts_2);
                //]]>
                </script>
            </div>"""
     
    motif = r'"nickname":"([\w \']+)"'
     
    m = re.search(motif, code)
    if m==None:
        print("Pas trouvé!")
    else:    
        print(m.groups(0)[0])
    Ce qui affiche bien:

    other user's custom spot

  3. #3
    Membre très actif

    Homme Profil pro
    Bidouilleur
    Inscrit en
    Avril 2016
    Messages
    721
    Détails du profil
    Informations personnelles :
    Sexe : Homme
    Localisation : France, Paris (Île de France)

    Informations professionnelles :
    Activité : Bidouilleur

    Informations forums :
    Inscription : Avril 2016
    Messages : 721
    Billets dans le blog
    1
    Par défaut
    Salut.

    Une autre solution simple serait de récupérer le contenu entier de la variable wg_fcst_tab_data_2 et donner ça à manger à json.

    Quelque chose comme.

    Code : Sélectionner tout - Visualiser dans une fenêtre à part
    1
    2
    3
    4
    5
    6
    7
    import json
    m = re.search('var wg_fcst_tab_data_2 = ({.+?});', code)
    if m :
        dico = json.loads(m.group(1))
        print("id spot =>", dico['id_spot'])
        print("id user =>", dico['id_user'])
        print("name =>", dico['nickname'])

  4. #4
    Membre averti
    Homme Profil pro
    Étudiant
    Inscrit en
    Février 2019
    Messages
    12
    Détails du profil
    Informations personnelles :
    Sexe : Homme
    Âge : 29
    Localisation : Canada

    Informations professionnelles :
    Activité : Étudiant
    Secteur : Industrie

    Informations forums :
    Inscription : Février 2019
    Messages : 12
    Par défaut
    Bonjour et merci à tous les deux pour vos réponses.

    Effectivement, les deux solutions marchent très efficacement.

    Cependant, un de mes objectifs principaux est de récupérer une liste du texte (comme "TMP" par exemple) afin de la transformer en une liste dans python pour pouvoir travailler dessus, et je n'y arrive toujours pas. De plus, le fait qu'en fin de texte, l'on retrouve ces mêmes noms de liste ("TMP", etc.) avec du texte derrière complique la chose car je ne souhaite pas exploiter cette information...

    Auriez-vous une idée de comment faire ?

    Merci d'avance à tous pour votre aide et bonne fin de journée !

  5. #5
    Membre très actif

    Homme Profil pro
    Bidouilleur
    Inscrit en
    Avril 2016
    Messages
    721
    Détails du profil
    Informations personnelles :
    Sexe : Homme
    Localisation : France, Paris (Île de France)

    Informations professionnelles :
    Activité : Bidouilleur

    Informations forums :
    Inscription : Avril 2016
    Messages : 721
    Billets dans le blog
    1
    Par défaut
    Salut.

    json fait ce travail pour toi.
    Code : Sélectionner tout - Visualiser dans une fenêtre à part
    print(dico['fcst']['38']['TMP'])
    Comme des TMP, il y en a plusieurs, s'il se situe dans une autre variable js, bah il suffit de faire pareil en chargant le contenu de la variable dans une autre instance de json.

  6. #6
    Membre averti
    Homme Profil pro
    Étudiant
    Inscrit en
    Février 2019
    Messages
    12
    Détails du profil
    Informations personnelles :
    Sexe : Homme
    Âge : 29
    Localisation : Canada

    Informations professionnelles :
    Activité : Étudiant
    Secteur : Industrie

    Informations forums :
    Inscription : Février 2019
    Messages : 12
    Par défaut
    Salut bistouille,

    Ça marche parfaitement.

    Merci énormément et bonne continuation,
    UL

  7. #7
    Membre averti
    Homme Profil pro
    Étudiant
    Inscrit en
    Février 2019
    Messages
    12
    Détails du profil
    Informations personnelles :
    Sexe : Homme
    Âge : 29
    Localisation : Canada

    Informations professionnelles :
    Activité : Étudiant
    Secteur : Industrie

    Informations forums :
    Inscription : Février 2019
    Messages : 12
    Par défaut
    En fait, j'ai désormais un autre problème.

    Lorsque je donne la page internet entière à json, il me renvoie une erreur :

    Code : Sélectionner tout - Visualiser dans une fenêtre à part
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    Traceback (most recent call last):
      File "C:\Users\xxx\Documents\Tests\texte.py", line 22, in <module>
        dico = json.loads(m.group(1))
      File "C:\Users\xxx\AppData\Local\Programs\Python\Python37\lib\json\__init__.py", line 348, in loads
        return _default_decoder.decode(s)
      File "C:\Users\xxx\AppData\Local\Programs\Python\Python37\lib\json\decoder.py", line 337, in decode
        obj, end = self.raw_decode(s, idx=_w(s, 0).end())
      File "C:\Users\xxx\AppData\Local\Programs\Python\Python37\lib\json\decoder.py", line 353, in raw_decode
        obj, end = self.scan_once(s, idx)
    json.decoder.JSONDecodeError: Invalid \escape: line 1 column 59 (char 58)
    Après quelques recherches, il apparaît que le problème vient du fait qu'il y ait notamment des \n et autres \' dans le fichier, que je ne peux modifier...
    J'ai donc cherché à modifier le code fourni en :

    Code : Sélectionner tout - Visualiser dans une fenêtre à part
    1
    2
    json_str = json.loads(code, strict=False)
    m = re.search(r'''var wg_fcst_tab_data_2 = ({.+?});''', json_str)
    Mais cela me renvoie alors
    Code : Sélectionner tout - Visualiser dans une fenêtre à part
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    Traceback (most recent call last):
      File "C:\Users\xxx\Documents\Tests\texte.py", line 19, in <module>
        json_str = json.loads(code, strict=False)
      File "C:\Users\xxx\AppData\Local\Programs\Python\Python37\lib\json\__init__.py", line 361, in loads
        return cls(**kw).decode(s)
      File "C:\Users\xxx\AppData\Local\Programs\Python\Python37\lib\json\decoder.py", line 337, in decode
        obj, end = self.raw_decode(s, idx=_w(s, 0).end())
      File "C:\Users\xxx\AppData\Local\Programs\Python\Python37\lib\json\decoder.py", line 355, in raw_decode
        raise JSONDecodeError("Expecting value", s, err.value) from None
    json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
    Je n'arrive pas à résoudre ces erreurs...

    Merci d'avance, UL

    EDIT : En fait, après plusieurs tests, ce ne sont pas les \n et autres qui posent problème... Ça semble venir directement de mon fichier texte contenant le code mais je ne parviens pas à savoir d'où...

  8. #8
    Membre très actif

    Homme Profil pro
    Bidouilleur
    Inscrit en
    Avril 2016
    Messages
    721
    Détails du profil
    Informations personnelles :
    Sexe : Homme
    Localisation : France, Paris (Île de France)

    Informations professionnelles :
    Activité : Bidouilleur

    Informations forums :
    Inscription : Avril 2016
    Messages : 721
    Billets dans le blog
    1
    Par défaut
    Que contient m.group(1) ?

    C'est dans cette valeur qu'il faut voir ce qui cloche, et éventuellement supprimer ce qui pose problème.

  9. #9
    Membre averti
    Homme Profil pro
    Étudiant
    Inscrit en
    Février 2019
    Messages
    12
    Détails du profil
    Informations personnelles :
    Sexe : Homme
    Âge : 29
    Localisation : Canada

    Informations professionnelles :
    Activité : Étudiant
    Secteur : Industrie

    Informations forums :
    Inscription : Février 2019
    Messages : 12
    Par défaut
    Alors voici m.group(1) avec le document html directement :

    Code : Sélectionner tout - Visualiser dans une fenêtre à part
    '{"id_spot":802733,"id_user":1389136,"nickname":"other user\\\'s custom spot","custom_onlypro":1,"spot":"Canada - Coaticook","lat":45.1335,"lon":-71.8228,"alt":290,"id_model":38,"model":"usnmm","model_alt":367,"levels":1,"sst":null,"sunrise":"07:07","sunset":"16:55","tz":"EST","tzutc":"(UTC-5)","utc_offset":-5,"tzid":"Canada\\\\/Eastern","tides":0,"md5chk":"46dd3b300fb9e161427316f83ec4afa5","fcst":{"38":{"initstamp":1549281600,"TMP":[1.8,1.9,1.7,1.5,1.2,1.3,1.5,1.6,1.5,1.6,1.4,2.4,2.7,3,3,2.9,3.1,3.3,3.5,3.4,3.5,3.7,3.7,3.8,3.8,3.6,3.6,3.7,3.7,3.4,1.4,0.1,-1.8,-4.2,-7.2,-8.1,-8.6,-8.6,-9.3,-10.5,-11.2,-11.8,-12.3,-12.7,-13.2,-13.7,-14.2,-14.3,-14.6],"TCDC":[null,100,100,100,100,100,57,100,100,100,100,85,36,100,100,100,100,100,29,26,100,100,100,100,93,27,47,93,100,100,100,100,31,100,100,100,100,100,99,74,47,44,27,31,27,22,16,10,21],"HCDC":[0,0,0,0,0,1,1,0,0,0,4,2,8,100,100,100,100,100,11,1,0,6,95,44,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,19,0,0,0,0,0,0,0],"MCDC":[0,0,0,0,0,0,0,24,6,7,13,0,0,25,100,100,100,78,17,7,100,100,100,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"LCDC":[0,100,100,100,100,100,56,100,100,100,100,84,31,22,74,28,6,6,6,20,3,100,100,100,93,27,47,93,100,100,100,100,31,100,100,100,100,100,99,74,42,30,27,31,27,22,16,10,21],"RH":[100,98,99,99,99,99,97,98,99,99,100,99,99,94,98,99,99,98,99,97,96,94,94,97,98,99,99,99,99,99,99,92,85,88,89,87,88,89,86,85,84,84,84,83,82,82,81,80,81],"GUST":[9.3,13.8,11.8,9.3,6.2,6.1,7.4,3.5,0.6,1.9,2.6,5.5,7.3,9.8,12.2,17.3,21.1,23.7,26,29.7,31.1,36.9,32.4,38.7,34.1,29.3,26.8,26.6,24.9,25.1,27.9,30.3,34.3,29.5,26.2,26.4,22.9,20.8,20.3,20.2,19.7,18.5,16.6,13.7,12.2,10.3,6.7,5.1,3.5],"SLP":[1014,1016,1014,1015,1015,1015,1013,1014,1014,1014,1014,1013,1013,1013,1012,1011,1010,1009,1008,1007,1006,1005,1004,1003,1004,1004,1003,1003,1004,1004,1006,1007,1009,1011,1014,1016,1018,1019,1020,1022,1023,1023,1024,1024,1025,1026,1026,1028,1028],"APCP1":[null,0.5,0.9,0.3,0.2,0.1,0.1,0.1,0.3,0.2,0.1,0.1,0,0,0,0,0,0,0,0,0,0,0.1,1.1,0.8,0.2,0.1,0.1,0.1,0.1,0.9,0.2,0.1,0,0.1,0.2,0.1,0.1,0.1,0,0,0,0,0,0,0,0,0,0],"WINDSPD":[5.2,3.2,2.7,2.1,1.9,2.9,2.5,0.9,0.4,2.2,3,4.1,4.6,5.8,7.3,8.5,9.3,10.3,12.2,12.8,13.1,13.4,11.9,13.8,12.6,10.8,9.5,8.7,8.7,8.8,14,13.9,15.6,14.3,14.3,13.3,12,11.9,12.2,11.1,10,8.9,7.9,7.3,6.8,6,4.3,3.8,2.8],"WINDDIR":[231,236,273,290,231,192,196,236,152,131,141,152,155,154,152,151,154,163,166,164,166,169,167,182,187,189,190,196,206,228,267,259,254,259,272,276,283,297,303,302,303,306,310,314,318,317,328,320,353],"SMERN":["10","10","12","13","10","9","9","10","7","6","6","7","7","7","7","7","7","7","7","7","7","8","7","8","8","8","8","9","9","10","12","12","11","12","12","12","13","13","13","13","13","14","14","14","14","14","15","14","0"],"TMPE":[2.2,2.3,2.2,2,1.8,1.7,1.8,1.8,1.9,2,1.6,2.5,2.7,2.7,2.6,2.8,3.1,3.5,3.7,3.7,3.8,4.1,4.1,4.3,4.2,4.1,4.1,4.1,4.2,3.9,2,0.7,-1.2,-3.6,-6.5,-7.5,-8,-8.1,-8.8,-9.9,-10.7,-11.3,-11.8,-12.2,-12.7,-13.2,-13.7,-13.8,-14.1],"hr_weekday":[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3],"hr_h":["07","08","09","10","11","12","13","14","15","16","17","18","19","20","21","22","23","00","01","02","03","04","05","06","07","08","09","10","11","12","13","14","15","16","17","18","19","20","21","22","23","00","01","02","03","04","05","06","07"],"hr_d":["04","04","04","04","04","04","04","04","04","04","04","04","04","04","04","04","04","05","05","05","05","05","05","05","05","05","05","05","05","05","05","05","05","05","05","05","05","05","05","05","05","06","06","06","06","06","06","06","06"],"hours":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48],"vars":["TMP","TCDC","HCDC","MCDC","LCDC","RH","GUST","SLP","APCP1","WINDSPD","WINDDIR","SMERN","TMPE"],"initdate":"2019-02-04 12:00:00","init_d":"04.02.2019","init_dm":"04.02.","init_h":"12","initstr":"2019020412","model_name":"HRW 4 km","model_longname":"HRW 4 km (US)","id_model":38,"update_last":"2019-02-04 17:19:03","update_next":"2019-02-05 05:25:00","img_param":[],"img_var_map":{"WINDSPD":"windspd","MWINDSPD":"windspd","SMER":"windspd","SMERN":"windspd","WINDDIR":"windspd","TMP":"t2m","TMPE":"t2m","APCP1":"tcdc_apcp1","APCP1s":"tcdc_apcp1","HCDC":"tcdc_apcp1","MCDC":"tcdc_apcp1","LCDC":"tcdc_apcp1","CDC":"tcdc_apcp1","TCDC":"tcdc_apcp1","SLP":"press"}}}}'
    Et avec le code collé directement dans le programme :

    Code : Sélectionner tout - Visualiser dans une fenêtre à part
    '{"id_spot":802733,"id_user":1389136,"nickname":"other user\'s custom spot","custom_onlypro":1,"spot":"Canada - Coaticook","lat":45.1335,"lon":-71.8228,"alt":290,"id_model":38,"model":"usnmm","model_alt":367,"levels":1,"sst":null,"sunrise":"07:10","sunset":"16:50","tz":"EST","tzutc":"(UTC-5)","utc_offset":-5,"tzid":"Canada\\/Eastern","tides":0,"md5chk":"98e9facefbacac0733d504b8638cf836","fcst":{"38":{"initstamp":1549022400,"TMP":[-18.5,-18.7,-18,-17,-15.9,-15.3,-14.6,-14.2,-14.1,-14.7,-16,-16.3,-16.7,-17,-17.1,-16.9,-16.7,-16.4,-16.6,-16.5,-16.5,-16.3,-15.1,-14,-13.6,-13.3,-13.1,-11.9,-10.3,-9.2,-7.8,-7.6,-7.3,-7.4,-9.3,-10,-11,-9.6,-11.3,-13.2,-14.3,-15.3,-16.6,-17.8,-18.7,-19.2,-20,-19.5,-19.6],"TCDC":[null,14,0,0,0,0,25,0,0,0,0,0,0,0,1,0,0,8,0,1,2,38,100,100,100,100,100,100,100,100,100,100,100,98,60,14,97,19,29,4,4,79,20,6,9,73,80,29,90],"HCDC":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,13,39,100,82,33,21,87,53,78,0,1,30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"MCDC":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,100,100,100,100,100,100,66,35,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,14,19,1],"LCDC":[0,14,0,0,0,0,25,0,0,0,0,0,0,0,1,0,0,8,0,0,1,14,100,100,100,100,100,100,100,100,100,100,100,98,60,14,97,19,29,4,4,79,20,6,9,73,78,11,89],"RH":[75,75,74,73,72,71,72,71,69,67,65,63,63,63,65,65,64,70,72,71,75,79,78,78,74,71,83,82,82,83,84,85,88,84,83,84,83,88,85,73,73,81,91,99,100,99,100,92,93],"GUST":[26.1,27.8,27.8,28.6,28.5,28.7,27.3,26.4,24.1,24.7,23.8,25.7,25.4,22.7,21.8,22.3,21.7,22.3,22.2,21.2,20.5,19.2,20.7,20.7,22.2,25.1,27.6,29.6,31.2,30.2,28.7,26.8,25.8,28.3,24.7,24.4,23.5,23.5,20.7,14.9,12.1,10.2,6.9,2.7,2.8,2.8,2.6,1.8,3],"SLP":[1024,1025,1024,1025,1025,1024,1023,1024,1024,1024,1024,1024,1025,1025,1024,1024,1024,1023,1023,1022,1021,1020,1018,1017,1017,1016,1014,1012,1011,1010,1008,1008,1008,1010,1011,1013,1014,1016,1016,1017,1018,1019,1021,1021,1022,1022,1022,1022,1023],"APCP1":[null,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.1,0.3,0.1,0.1,0.1,0.1,0.1,0.1,0,0,0,0.1,0,0,0,0,0,0,0,0,0,0,0],"WINDSPD":[10.1,10.6,11.5,12,12.4,12.7,12,12.1,11.9,10.7,8.6,8.4,7.7,6.8,6.4,6.3,5.9,6.5,6.5,6.7,7.1,7.1,7.5,8,8.5,8.9,10.5,11.2,11.5,11.3,11.5,11.6,11.5,15.1,11.9,10.2,7,10.4,9.1,5.8,3.7,3.1,2.7,2.8,2.7,3,2.8,2.6,3.5],"WINDDIR":[247,248,253,252,251,249,247,246,243,244,239,240,240,235,231,225,214,214,213,205,204,201,196,193,197,195,190,197,214,230,245,252,264,285,285,284,273,284,309,309,312,315,254,230,206,182,164,137,151],"SMERN":["11","11","11","11","11","11","11","11","11","11","11","11","11","10","10","10","10","10","9","9","9","9","9","9","9","9","8","9","10","10","11","11","12","13","13","13","12","13","14","14","14","14","11","10","9","8","7","6","7"],"TMPE":[-18,-18.3,-17.5,-16.6,-15.6,-14.9,-14.2,-13.8,-13.7,-14.3,-15.4,-15.7,-16.2,-16.4,-16.5,-16.3,-16.1,-15.9,-16.1,-16.1,-16.1,-15.9,-14.6,-13.5,-13.1,-12.7,-12.5,-11.3,-9.7,-8.6,-7.2,-7,-6.7,-6.8,-8.8,-9.5,-10.5,-9.1,-10.8,-12.7,-13.7,-14.8,-16,-17.2,-18.1,-18.5,-19.4,-18.9,-19],"hr_weekday":[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,0,0,0,0,0,0,0,0],"hr_h":["07","08","09","10","11","12","13","14","15","16","17","18","19","20","21","22","23","00","01","02","03","04","05","06","07","08","09","10","11","12","13","14","15","16","17","18","19","20","21","22","23","00","01","02","03","04","05","06","07"],"hr_d":["01","01","01","01","01","01","01","01","01","01","01","01","01","01","01","01","01","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","02","03","03","03","03","03","03","03","03"],"hours":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48],"vars":["TMP","TCDC","HCDC","MCDC","LCDC","RH","GUST","SLP","APCP1","WINDSPD","WINDDIR","SMERN","TMPE"],"initdate":"2019-02-01 12:00:00","init_d":"01.02.2019","init_dm":"01.02.","init_h":"12","initstr":"2019020112","model_name":"HRW 4 km","model_longname":"HRW 4 km (US)","id_model":38,"update_last":"2019-02-01 17:19:03","update_next":"2019-02-02 05:25:00","img_param":[],"img_var_map":{"WINDSPD":"windspd","MWINDSPD":"windspd","SMER":"windspd","SMERN":"windspd","WINDDIR":"windspd","TMP":"t2m","TMPE":"t2m","APCP1":"tcdc_apcp1","APCP1s":"tcdc_apcp1","HCDC":"tcdc_apcp1","MCDC":"tcdc_apcp1","LCDC":"tcdc_apcp1","CDC":"tcdc_apcp1","TCDC":"tcdc_apcp1","SLP":"press"}}}}'
    Les seules différences que je vois entre ces deux contenus sont l'insertion de \\\ dans le premier cas contre des simples \ dans le deuxième cas... Mais la variable m.group(1) contient bien ce qu'elle doit contenir dans le premier cas, et le programme me renvoie tout de même l'erreur suivante :

    Code : Sélectionner tout - Visualiser dans une fenêtre à part
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    Traceback (most recent call last):
      File "C:\Users\Université Laval\Documents\Tests\texte.py", line 22, in <module>
        dico = json.loads(m.group(1))
      File "C:\Users\Université Laval\AppData\Local\Programs\Python\Python37\lib\json\__init__.py", line 348, in loads
        return _default_decoder.decode(s)
      File "C:\Users\Université Laval\AppData\Local\Programs\Python\Python37\lib\json\decoder.py", line 337, in decode
        obj, end = self.raw_decode(s, idx=_w(s, 0).end())
      File "C:\Users\Université Laval\AppData\Local\Programs\Python\Python37\lib\json\decoder.py", line 353, in raw_decode
        obj, end = self.scan_once(s, idx)
    json.decoder.JSONDecodeError: Invalid \escape: line 1 column 59 (char 58)
    Merci de m'aider bistouille !

  10. #10
    Membre très actif

    Homme Profil pro
    Bidouilleur
    Inscrit en
    Avril 2016
    Messages
    721
    Détails du profil
    Informations personnelles :
    Sexe : Homme
    Localisation : France, Paris (Île de France)

    Informations professionnelles :
    Activité : Bidouilleur

    Informations forums :
    Inscription : Avril 2016
    Messages : 721
    Billets dans le blog
    1
    Par défaut
    Salut.

    Comment et avec quoi est chargée la page html ? Ces backslashs doivent être ajoutés à cet étape, enfin je suppose.

    En attendant de comprendre où ils sont ajoutés, rien ne t'empêche des les supprimer avec un simple str.replace("\\", "")

  11. #11
    Membre averti
    Homme Profil pro
    Étudiant
    Inscrit en
    Février 2019
    Messages
    12
    Détails du profil
    Informations personnelles :
    Sexe : Homme
    Âge : 29
    Localisation : Canada

    Informations professionnelles :
    Activité : Étudiant
    Secteur : Industrie

    Informations forums :
    Inscription : Février 2019
    Messages : 12
    Par défaut
    Salut bistouille,

    Ma page internet est chargée avec :

    Code : Sélectionner tout - Visualiser dans une fenêtre à part
    1
    2
    page = urllib.request.urlopen('site web')
    html = str(page.read())
    Effectivement, ta solution avec str.replace("\\", "") solutionne tout, et le programme marche désormais parfaitement en récupérant la page web directement sur internet.

    Ils sont bien ajoutés à la lecture de la page directement car en affichant le code dans un fichier ils sont présents alors qu'ils ne le sont pas dans le code source directement...

    Merci énormément ! UL

  12. #12
    Membre très actif

    Homme Profil pro
    Bidouilleur
    Inscrit en
    Avril 2016
    Messages
    721
    Détails du profil
    Informations personnelles :
    Sexe : Homme
    Localisation : France, Paris (Île de France)

    Informations professionnelles :
    Activité : Bidouilleur

    Informations forums :
    Inscription : Avril 2016
    Messages : 721
    Billets dans le blog
    1
    Par défaut
    Remplace :
    Code : Sélectionner tout - Visualiser dans une fenêtre à part
    html = str(page.read())
    Par :
    Code : Sélectionner tout - Visualiser dans une fenêtre à part
    html = page.read().decode()
    Et tu n'auras plus ces problèmes de caractères échappés, et donc plus besoin de faire de remplacements

  13. #13
    Membre averti
    Homme Profil pro
    Étudiant
    Inscrit en
    Février 2019
    Messages
    12
    Détails du profil
    Informations personnelles :
    Sexe : Homme
    Âge : 29
    Localisation : Canada

    Informations professionnelles :
    Activité : Étudiant
    Secteur : Industrie

    Informations forums :
    Inscription : Février 2019
    Messages : 12
    Par défaut
    Merci beaucoup, ça marche et c'est même bien plus rapide

    Saurais-tu m'expliquer d'où vient l'ajout des \\ si l'on ne met pas .decode() ?

    Bonne journée !

  14. #14
    Membre très actif

    Homme Profil pro
    Bidouilleur
    Inscrit en
    Avril 2016
    Messages
    721
    Détails du profil
    Informations personnelles :
    Sexe : Homme
    Localisation : France, Paris (Île de France)

    Informations professionnelles :
    Activité : Bidouilleur

    Informations forums :
    Inscription : Avril 2016
    Messages : 721
    Billets dans le blog
    1
    Par défaut
    Salut ULaval.

    urllib.request.urlopen retourne un type bytes, pour convertir un bytes en str, il faut utiliser bytes.decode.

    Maintenant pourquoi le fait de transformer un bytes avec str fait échapper les apostrophes, je ne m'avance pas, mais je pense que c'est lié à la façon dont les chaînes de caractères sont représentées par pyhton, entre apostrophes donc.

    Un petit test permet de s'en assurer.

    Code : Sélectionner tout - Visualiser dans une fenêtre à part
    1
    2
    3
    4
    5
    6
    7
    8
    >>> s =  "un p'tiot \"apostrophe\""
    >>> s
    'un p\'tiot "apostrophe"'
    >>> b = s.encode()
    >>> b
    b'un p\'tiot "apostrophe"'
    >>> str(b)
    'b\'un p\\\'tiot "apostrophe"\''
    str étant représenté entre apostrophes, les échappements de guillemets sont supprimés, en revanche chaque apostrophe de la chaîne est échappée, logique puisque le délimiteur de chaîne est justement l"apostrophe.
    Le fait de transformer la chaîne en bytes ne change rien.
    En revanche en convertissant le bytes en str (sans decode), il y a une nouvelle conversion en une nouvelle chaîne de la représentation du bytes, ce qui induit une nouvelle séquence d'échappements, et les \' sont transformés en \\\', ce qui est en y réfléchissant bien somme toute logique, car sinon on se retouverait avec des caractères exotiques ou comme tu as eu, des problèmes de caractères invalides.

+ Répondre à la discussion
Cette discussion est résolue.

Discussions similaires

  1. Problème package et getClass
    Par nek_kro_kvlt dans le forum Langage
    Réponses: 3
    Dernier message: 20/09/2007, 17h28
  2. problème packages à la compilation de servlet
    Par laurent333 dans le forum Tomcat et TomEE
    Réponses: 4
    Dernier message: 28/05/2007, 10h41
  3. Problème packages SSIS (mise en production)
    Par kince dans le forum MS SQL Server
    Réponses: 2
    Dernier message: 17/04/2007, 19h40
  4. Problème Package SSIS
    Par mic_schum dans le forum MS SQL Server
    Réponses: 4
    Dernier message: 26/03/2007, 16h20
  5. [AS2] Problème package
    Par wwave dans le forum ActionScript 1 & ActionScript 2
    Réponses: 3
    Dernier message: 27/01/2006, 09h31

Partager

Partager
  • Envoyer la discussion sur Viadeo
  • Envoyer la discussion sur Twitter
  • Envoyer la discussion sur Google
  • Envoyer la discussion sur Facebook
  • Envoyer la discussion sur Digg
  • Envoyer la discussion sur Delicious
  • Envoyer la discussion sur MySpace
  • Envoyer la discussion sur Yahoo