voici mon code avec les import bloqués:
Code : Sélectionner tout - Visualiser dans une fenêtre à part
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
from __future__ import annotations
 
import os
import re
#import pandas
import numpy as np
import sys
sys.path = ["src"] + sys.path
 
#from datasets import load_dataset
from googlesearch import search
from langchain_chroma import Chroma
from langchain_community.chat_models import ChatOllama
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_community.embeddings.ollama import OllamaEmbeddings
from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.vectorstores import VectorStore
from langchain_text_splitters import CharacterTextSplitter
from pathlib import Path
from typing import List, TypedDict, Optional
 
from api.config import OLLAMA_URL
from api.services.llm._chat import BaseMessage, HumanMessage, AIMessage, SourcedAIMessage, SystemMessage, DetectbotChat
 
def output_parser_test(ratings, moy):
    """
    uniquement pour des tests
    Ne pas importer !!!
    """
    notes = []
    for rating in ratings:
        pos = rating.find("/10")
        if pos < 2:
            pos = rating.find(" out of 10")
        if pos >= 2 and rating[pos-1] <= '9' and rating[pos - 1]>='0':
            notes.append(rating[pos-2:pos])
        else:
            notes.append("00")
    notes_processed = [10 if notes[item]=='10' else int(notes[item][1]) for item in range(len(notes))]
    for i in range(len(notes)):
        moy[i].append(notes_processed[i])
 
class ReWOO_args:
    # doit contenir les informations nécessaires à la création de l'agent avant l'attribution de la tâche
    # ajouter des éléments sur la DocSearch
    planner_prompt = """For the following task, make plans that can solve the problem step by step. For each plan, indicate \
    which external tool together with tool input to retrieve evidence. You can store the evidence into a \
    variable #E that can be called by later tools. (Plan, #E1, Plan, #E2, Plan, ...)
 
    Tools can be one of the following:
    (1) Google[input]: Worker that searches results from Google. Useful when you need to find short
    and succinct answers about a specific topic. The input should be a search query.
    (2) LLM[input]: A pretrained LLM like yourself. Useful when you need to act with general
    world knowledge and common sense. Prioritize it when you are confident in solving the problem
    yourself. Input can be any instruction.
    (3) DocSearch[input]: Research in documents. Useful when you need to find precise knowledge on 
    a specific subject. The input should be a query
 
    For example,
    Task: Thomas, Toby, and Rebecca worked a total of 157 hours in one week. Thomas worked x
    hours. Toby worked 10 hours less than twice what Thomas worked, and Rebecca worked 8 hours
    less than Toby. How many hours did Rebecca work?
    Plan: Given Thomas worked x hours, translate the problem into algebraic expressions and solve
    with Wolfram Alpha. #E1 = WolframAlpha[Solve x + (2x − 10) + ((2x − 10) − 8) = 157]
    Plan: Find out the number of hours Thomas worked. #E2 = LLM[What is x, given #E1]
    Plan: Calculate the number of hours Rebecca worked. #E3 = Calculator[(2 ∗ #E2 − 10) − 8]
 
    Begin! 
    Describe your plans with rich details. Each Plan should be followed by no number and only one #E.
 
    Task: {task}"""
 
    solver_prompt = """Solve the following task or problem. To solve the problem, we have made step-by-step Plan and \
    retrieved corresponding Evidence to each Plan. Use them with caution since long evidence might \
    contain irrelevant information.
 
    {plan}
 
    Now solve the question or task according to provided Evidence above. Respond with the answer
    directly with no extra words.
 
    Task: {task}
    Response:"""
 
    planner_llm_model = "mistral"
    worker_llm_model = "mistral"
    solver_llm_model = "mistral"
    embedding_fn=OllamaEmbeddings(base_url=OLLAMA_URL,model='nomic-embed-text')
    docpaths : List[str] = ["C:\\Users\\JaufreHACQUARD\\Documents\\detebot_api\\test\\test_data\\guide_myndea.pdf"]
    task : str
 
def chargeDocs(docpaths:List[str], embedding) -> Optional[VectorStore]:
    """doit charger les documents dont les paths sont en input"""
    data = []
    for document in docpaths:
        loader = PyMuPDFLoader(document)
        data += loader.load()
    splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    docs=splitter.split_documents(data)
    docs : List[Document] = [Document(page_content=doc.page_content,metadata={"document_id":doc.metadata['source'].split("/")[-1],"detail":f"page {doc.metadata['page']}"}) for doc in docs]
    return Chroma.from_documents(documents = docs, embedding=embedding)
 
class ReWOO(TypedDict):
    task: str
    plan_string: str
    steps: List
    results: dict
    result: str
 
class ReWOO_Agent:
    """créer une méthode de génération"""
    planner : ChatPromptTemplate | ChatOllama
    worker_llm: ChatOllama
    solver: ChatPromptTemplate | ChatOllama
    state : ReWOO
    vector_store: Optional[VectorStore]
 
    def __init__(self, params: ReWOO_args) -> None:
        self.planner = ChatPromptTemplate.from_messages([("user", params.planner_prompt)])|ChatOllama(base_url=OLLAMA_URL, model=params.planner_llm_model)
        self.worker_llm = ChatOllama(base_url=OLLAMA_URL, model=params.worker_llm_model)
        self.solver = ChatPromptTemplate.from_messages([("user", params.solver_prompt)])|ChatOllama(base_url=OLLAMA_URL, model=params.solver_llm_model)
        self.vector_store = chargeDocs(params.docpaths, params.embedding_fn)
 
    def lancement(self, task:str):
        self.state = {"task":task}
        result = self.planner.invoke({"task": task})
        regex_pattern = r"Plan:\s*(.+)\s*(#E\d+)\s*=\s*(\w+)\s*\[([^\]]+)\]"
        matches = re.findall(regex_pattern, result.content)
        self.state["plan_string"] = result.content
        self.state["steps"] = matches
        self.state["results"] = {}
 
    def _get_current_task(self):
        if "results" not in self.state or self.state["results"] is None:
            return 1
        if len(self.state["results"]) == len(self.state["steps"]):
            return None
        else:
            return len(self.state["results"]) + 1
 
    def tool_execution(self):
        """Worker node that executes the tools of a given plan."""
        _step = self._get_current_task()
        _, step_name, tool, tool_input = self.state["steps"][_step - 1]
        for k, v in self.state["results"].items():
            tool_input = tool_input.replace(k, v)
        if tool == "Google":
            search_results = search(tool_input, advanced=True)
            result = [(search_result.url, search_result.description) for search_result in search_results]
        elif tool == "LLM":
            result = self.worker_llm.invoke(tool_input)
        elif tool == "DocSearch":
            result = self.vector_store.similarity_search_with_relevance_scores(tool_input)
        else:
            raise ValueError
        self.state["results"][step_name] = str(result)
 
    def solve(self):
        plan = ""
        for _plan, step_name, tool, tool_input in self.state["steps"]:
            _results = (self.state["results"] or  {}) if "results" in self.state else {}
            for k, v in _results.items():
                tool_input = tool_input.replace(k, v)
                step_name = step_name.replace(k, v)
            plan += f"Plan: {_plan}\n{step_name} = {tool}[{tool_input}]"
        result = self.solver.invoke({"plan":plan, "task":self.state["task"]})
        self.state["result"] = result
 
    def _route(self):
        _step = self._get_current_task()
        if _step is None:
            # We have executed all tasks
            return "solve"
        else:
            # We are still executing tasks, loop back to the "tool" node
            return "tool"
 
    def usage(self, task):
        self.lancement(task=task)
        while self._route() == "tool":
            self.tool_execution()
        self.solve()
        return self.state
 
def compare(args : ReWOO_args):
    agent1 = ReWOO_Agent(params=args)
    agent2 = DetectbotChat(ChatOllama(base_url=OLLAMA_URL, model=args.worker_llm_model), vector_store=agent1.vector_store)
    eval_data=load_dataset('json',data_files=f"C:\\Users\\JaufreHACQUARD\\Documents\\detebot_api\\notebooks\\ds.jsonl")
    moy1 = [[] for i in range(0, len(eval_data["train"]), 5)]
    moy2 = [[] for i in range(0, len(eval_data["train"]), 5)]
    ds = {"question" : [eval_data['train'][i]['question'] for i in range(len(eval_data["train"]))]}
    try:
        for i in range(3):
            answer_list1 = []
            answer_list2 = []
            for index in range(0, len(eval_data["train"]), 5) :
                question = eval_data['train'][index]['question']
                chat = agent2.chat_with_rag(question)
                response= {"response":chat.chat_list[3].content}
                answer_list2.append(response)
                response = {"response":agent1.usage(task=question)["result"].content}
                answer_list1.append(response)
                print('generating output ', i, index) #~1min / ligne
            rating_list1 = []
            rating_list2 = []
            for index in range(len(answer_list2)) :
                chat=(ChatOllama(base_url=OLLAMA_URL,model="mistral:latest"))
                prompt = ChatPromptTemplate.from_template("""Down below is the LLM output from a QA-type chatbot. This response is made up of a prompt, a question and documents
                                                                    useful to answer the question. The answer sequence to analyze should start with the tag <ANSWER> :
                                                                    {LLM_output}. Compare this sequence to the true answer given here : {true_answer} . Rate the LLM_output out of 10 based with the on its resemblance to the true_answer in the format X/10.""")
                chain = prompt | chat | StrOutputParser()
                TRUE_ANSWER = eval_data['train'][index * 5]['cot_answer']
                eval_response=chain.invoke({"LLM_output" : {answer_list2[index]["response"]}, 'true_answer' : {TRUE_ANSWER} })
                rating_list2.append(eval_response)
                eval_response=chain.invoke({"LLM_output" : {answer_list1[index]["response"]}, 'true_answer' : {TRUE_ANSWER} })
                rating_list1.append(eval_response)
            output_parser_test(rating_list2, moy2)
            output_parser_test(rating_list1, moy1)
    except PermissionError:
        print("nb_turn trop grand, veuillez bien diminuer")
    l = []
    for k in range(len(rating_list2)):
        moyenne_question_naif = np.mean(np.asarray(moy2[k]))
        variance_question_naif = np.var(np.asarray(moy2[k]))
        variance_question_ReWOO = np.var(np.asarray(moy1[k]))
        moyenne_question_ReWOO = np.mean(np.asarray(moy1[k]))
        l.append([moyenne_question_naif, variance_question_naif, moyenne_question_ReWOO, variance_question_ReWOO])
    #return pandas.DataFrame(data = l, columns = ["moyenne_naif", "variance_naif", "moyenne_ReWOO", "variance_ReWOO"], index = [ds["question"][i] for i in range(0, len(ds["question"]), 5)])
 
if __name__ == "__main__":
    args2 = ReWOO_args()
    args2.docpaths = [
            "C:\\Users\\JaufreHACQUARD\\Documents\\detebot_api\\test\\test_data\\guide_myndea.pdf",
            "C:\\Users\\JaufreHACQUARD\\Documents\\detebot_api\\Détails.pdf",
            "C:\\Users\\JaufreHACQUARD\\Documents\\detebot_api\\Kober_affect.pdf",
            "C:\\Users\\JaufreHACQUARD\\Documents\\detebot_api\\Kober-grille-orientation.pdf"
        ]
    res = compare(args=args2)
    #res.to_csv(Path("C:\\Users\\JaufreHACQUARD\\Documents\\detebot_api\\notebooks/question_evals.csv"), sep=';')
Quand je ne bloque pas pandas et datasets, ça s'arrête sans erreur à la ligne Chroma.from_documents.