Spaces:

wakeupmh
/

alem-do-espectro

Sleeping

App Files Files Community

wakeupmh commited on Feb 26, 2025

Commit

aaab899

1 Parent(s): 269dd87

add tools

Browse files

Files changed (3) hide show

app.py +3 -3
requirements.txt +2 -1
services/model_handler.py +282 -82

app.py CHANGED Viewed

@@ -22,7 +22,7 @@ class AutismResearchApp:
             Pergunte o que quiser e eu vou analisar os últimos artigos científicos e fornecer uma resposta baseada em evidências.
         """)
-    def run(self):
         """Run the main application loop"""
         self._setup_streamlit()
@@ -49,7 +49,7 @@ class AutismResearchApp:
                 # Sempre usar o modelo, nunca a resposta padrão
                 self.model_handler.force_default_response = False
-                answer = self.model_handler.generate_answer(query)
                 status.write("✨ Resposta gerada! Exibindo resultados...")
@@ -61,7 +61,7 @@ class AutismResearchApp:
 def main():
     app = AutismResearchApp()
-    app.run()
 if __name__ == "__main__":
     main()

             Pergunte o que quiser e eu vou analisar os últimos artigos científicos e fornecer uma resposta baseada em evidências.
         """)
+    async def run(self):
         """Run the main application loop"""
         self._setup_streamlit()
                 # Sempre usar o modelo, nunca a resposta padrão
                 self.model_handler.force_default_response = False
+                answer = await self.model_handler.generate_answer_async(query)
                 status.write("✨ Resposta gerada! Exibindo resultados...")
 def main():
     app = AutismResearchApp()
+    asyncio.run(app.run())
 if __name__ == "__main__":
     main()

requirements.txt CHANGED Viewed

@@ -9,4 +9,5 @@ agno==1.1.5
 pypdf>=3.11.1
 watchdog>=2.3.1
 sentencepiece>=0.1.99
-tenacity>=8.2.2

 pypdf>=3.11.1
 watchdog>=2.3.1
 sentencepiece>=0.1.99
+tenacity>=8.2.2
+asyncio

services/model_handler.py CHANGED Viewed

@@ -9,8 +9,27 @@ from tenacity import retry, stop_after_attempt, wait_exponential
 import time
 import datetime
 import os
-MODEL_PATH = "google/flan-t5-large"
 # Simple Response class to wrap the model output
 class Response:
@@ -56,55 +75,56 @@ class Response:
         return self.content if self.content else ""
     def __repr__(self):
-        return f"Response(content='{self.content}')"
-# Personnalized class for local models
 class LocalHuggingFaceModel(Model):
-    def __init__(self, model, tokenizer, max_length=512):
-        super().__init__(id="local-huggingface")
         self.model = model
         self.tokenizer = tokenizer
         self.max_length = max_length
     async def ainvoke(self, prompt: str, **kwargs) -> str:
         """Async invoke method"""
         try:
-            logging.info(f"ainvoke called with prompt: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}...")
             return await self.invoke(prompt, **kwargs)
         except Exception as e:
-            logging.error(f"Error in ainvoke: {str(e)}")
             return Response(f"Error in ainvoke: {str(e)}")
     async def ainvoke_stream(self, prompt: str, **kwargs):
         """Async streaming invoke method"""
         try:
-            logging.info(f"ainvoke_stream called with prompt: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}...")
             result = await self.invoke(prompt, **kwargs)
             yield result
         except Exception as e:
-            logging.error(f"Error in ainvoke_stream: {str(e)}")
             yield Response(f"Error in ainvoke_stream: {str(e)}")
     def invoke(self, prompt: str, **kwargs) -> str:
         """Synchronous invoke method"""
         try:
-            logging.info(f"Invoking model with prompt: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}...")
             # Check if prompt is None or empty
             if prompt is None:
-                logging.warning("None prompt provided to invoke method")
                 return Response("No input provided. Please provide a valid prompt.")
             if not isinstance(prompt, str):
-                logging.warning(f"Non-string prompt provided: {type(prompt)}")
                 try:
                     prompt = str(prompt)
-                    logging.info(f"Converted prompt to string: {prompt[:100]}...")
                 except:
                     return Response("Invalid input type. Please provide a string prompt.")
             if not prompt.strip():
-                logging.warning("Empty prompt provided to invoke method")
                 return Response("No input provided. Please provide a non-empty prompt.")
             inputs = self.tokenizer(prompt, return_tensors="pt", padding=True)
@@ -124,13 +144,13 @@ class LocalHuggingFaceModel(Model):
             # Check if output is empty
             if not decoded_output or not decoded_output.strip():
-                logging.warning("Model generated empty output")
                 return Response("The model did not generate any output. Please try with a different prompt.")
-            logging.info(f"Model generated output: {decoded_output[:100]}...")
             return Response(decoded_output)
         except Exception as e:
-            logging.error(f"Error in local model generation: {str(e)}")
             if hasattr(e, 'args') and len(e.args) > 0:
                 error_message = e.args[0]
             else:
@@ -140,11 +160,11 @@ class LocalHuggingFaceModel(Model):
     def invoke_stream(self, prompt: str, **kwargs):
         """Synchronous streaming invoke method"""
         try:
-            logging.info(f"invoke_stream called with prompt: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}...")
             result = self.invoke(prompt, **kwargs)
             yield result
         except Exception as e:
-            logging.error(f"Error in invoke_stream: {str(e)}")
             yield Response(f"Error in invoke_stream: {str(e)}")
     def parse_provider_response(self, response: str) -> str:
@@ -159,7 +179,7 @@ class LocalHuggingFaceModel(Model):
         """Async response method - required abstract method"""
         try:
             # Log detalhado de todos os argumentos
-            logging.info(f"aresponse args: prompt={prompt}, kwargs keys={list(kwargs.keys())}")
             # Extrair o prompt das mensagens se estiverem disponíveis
             if prompt is None and 'messages' in kwargs and kwargs['messages']:
@@ -168,32 +188,32 @@ class LocalHuggingFaceModel(Model):
                 for message in messages:
                     if hasattr(message, 'role') and message.role == 'user' and hasattr(message, 'content'):
                         prompt = message.content
-                        logging.info(f"Extracted prompt from user message: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}")
                         break
             # Verificar se o prompt está em kwargs['input']
             if prompt is None:
                 if 'input' in kwargs:
                     prompt = kwargs.get('input')
-                    logging.info(f"Found prompt in kwargs['input']: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}")
-            logging.info(f"aresponse called with prompt: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}...")
             if not prompt or not isinstance(prompt, str) or not prompt.strip():
-                logging.warning("Empty or invalid prompt in aresponse")
                 return Response("No input provided. Please provide a valid prompt.")
             content = await self.ainvoke(prompt, **kwargs)
             return content if isinstance(content, Response) else Response(content)
         except Exception as e:
-            logging.error(f"Error in aresponse: {str(e)}")
             return Response(f"Error in aresponse: {str(e)}")
     def response(self, prompt=None, **kwargs):
         """Synchronous response method - required abstract method"""
         try:
             # Log detalhado de todos os argumentos
-            logging.info(f"response args: prompt={prompt}, kwargs keys={list(kwargs.keys())}")
             # Extrair o prompt das mensagens se estiverem disponíveis
             if prompt is None and 'messages' in kwargs and kwargs['messages']:
@@ -202,32 +222,32 @@ class LocalHuggingFaceModel(Model):
                 for message in messages:
                     if hasattr(message, 'role') and message.role == 'user' and hasattr(message, 'content'):
                         prompt = message.content
-                        logging.info(f"Extracted prompt from user message: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}")
                         break
             # Verificar se o prompt está em kwargs['input']
             if prompt is None:
                 if 'input' in kwargs:
                     prompt = kwargs.get('input')
-                    logging.info(f"Found prompt in kwargs['input']: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}")
-            logging.info(f"response called with prompt: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}...")
             if not prompt or not isinstance(prompt, str) or not prompt.strip():
-                logging.warning("Empty or invalid prompt in response")
                 return Response("No input provided. Please provide a valid prompt.")
             content = self.invoke(prompt, **kwargs)
             return content if isinstance(content, Response) else Response(content)
         except Exception as e:
-            logging.error(f"Error in response: {str(e)}")
             return Response(f"Error in response: {str(e)}")
     def response_stream(self, prompt=None, **kwargs):
         """Synchronous streaming response method - required abstract method"""
         try:
             # Log detalhado de todos os argumentos
-            logging.info(f"response_stream args: prompt={prompt}, kwargs keys={list(kwargs.keys())}")
             # Extrair o prompt das mensagens se estiverem disponíveis
             if prompt is None and 'messages' in kwargs and kwargs['messages']:
@@ -236,26 +256,26 @@ class LocalHuggingFaceModel(Model):
                 for message in messages:
                     if hasattr(message, 'role') and message.role == 'user' and hasattr(message, 'content'):
                         prompt = message.content
-                        logging.info(f"Extracted prompt from user message: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}")
                         break
             # Verificar se o prompt está em kwargs['input']
             if prompt is None:
                 if 'input' in kwargs:
                     prompt = kwargs.get('input')
-                    logging.info(f"Found prompt in kwargs['input']: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}")
-            logging.info(f"response_stream called with prompt: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}...")
             if not prompt or not isinstance(prompt, str) or not prompt.strip():
-                logging.warning("Empty or invalid prompt in response_stream")
                 yield Response("No input provided. Please provide a valid prompt.")
                 return
             for chunk in self.invoke_stream(prompt, **kwargs):
                 yield chunk if isinstance(chunk, Response) else Response(chunk)
         except Exception as e:
-            logging.error(f"Error in response_stream: {str(e)}")
             yield Response(f"Error in response_stream: {str(e)}")
     def generate(self, prompt: str, **kwargs):
@@ -277,7 +297,7 @@ class LocalHuggingFaceModel(Model):
             return decoded_output
         except Exception as e:
-            logging.error(f"Error in generate method: {str(e)}")
             if hasattr(e, 'args') and len(e.args) > 0:
                 error_message = e.args[0]
             else:
@@ -286,17 +306,18 @@ class LocalHuggingFaceModel(Model):
 class ModelHandler:
     """
-    Classe para gerenciar modelos e gerar respostas.
     """
     def __init__(self):
         """
-        Inicializa o ModelHandler.
         """
         self.translator = None
         self.researcher = None
         self.presenter = None
         self.force_default_response = False
         # Inicializar modelos
         self._load_models()
@@ -360,6 +381,10 @@ Please provide a detailed explanation about the topic, including:
 - Recent developments or research
 - Real-world implications and applications
 Aim to write at least 4-5 paragraphs with detailed information.
 Be thorough and informative, covering all important aspects of the topic.
 Use clear and accessible language suitable for a general audience.
@@ -388,16 +413,45 @@ Output:"""
         else:
             logging.error(f"Unknown prompt type: {prompt_type}")
             return f"Unknown prompt type: {prompt_type}"
     @staticmethod
     @st.cache_resource
-    def _load_model():
-        """Load the model and tokenizer with retry logic"""
         # Define retry decorator for model loading
         @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
         def load_with_retry(model_name):
             try:
-                logging.info(f"Attempting to load model from {model_name}")
                 # Criar diretório de cache se não existir
                 cache_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "model_cache")
@@ -407,10 +461,10 @@ Output:"""
                 tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
                 model = AutoModelForSeq2SeqLM.from_pretrained(model_name, cache_dir=cache_dir)
-                logging.info(f"Successfully loaded model from {model_name}")
                 return model, tokenizer
             except Exception as e:
-                logging.error(f"Error loading model {model_name}: {str(e)}")
                 raise
         # Lista de modelos para tentar, em ordem de preferência
@@ -421,50 +475,179 @@ Output:"""
             try:
                 return load_with_retry(model_name)
             except Exception as e:
-                logging.error(f"Failed to load {model_name}: {str(e)}")
                 continue
         # Se todos os modelos falharem, retornar None
-        logging.error("All models failed to load")
         return None, None
     def _load_models(self):
-        """Carrega os modelos necessários"""
-        # Inicializar modelo local
-        base_model = self._initialize_local_model()
         self.translator = Agent(
             name="Translator",
             role="You will translate the query to English",
-            model=base_model,
             goal="Translate to English",
             instructions=[
-                "Translate the query to English"
             ]
         )
         self.researcher = Agent(
             name="Researcher",
             role="You are a research scholar who specializes in autism research.",
-            model=base_model,
             instructions=[
                 "You need to understand the context of the question to provide the best answer.",
                 "Be precise and provide detailed information.",
                 "You must create an accessible explanation.",
                 "The content must be for people without autism knowledge.",
                 "Focus on providing comprehensive information about the topic.",
-                "Include definition, characteristics, causes, and current understanding."
             ],
             tools=[
-                ArxivTools(),
-                PubmedTools()
-            ]
         )
         self.presenter = Agent(
             name="Presenter",
             role="You are a professional researcher who presents the results of the research.",
-            model=base_model,
             instructions=[
                 "You are multilingual",
                 "You must present the results in a clear and engaging manner.",
@@ -472,19 +655,38 @@ Output:"""
                 "Provide simple explanations of complex concepts.",
                 "Include a brief conclusion or summary.",
                 "Add emojis to make the presentation more interactive.",
-                "Translate the answer to Portuguese."
             ]
         )
-    def _initialize_local_model(self):
-        """Initialize local model as fallback"""
-        model, tokenizer = self._load_model()
-        return LocalHuggingFaceModel(model, tokenizer)
-    def generate_answer(self, query: str) -> str:
         """
-        Gera uma resposta baseada na consulta do usuário.
         Args:
             query: A consulta do usuário
@@ -509,7 +711,7 @@ Output:"""
             logging.info(f"Translation prompt: {translation_prompt}")
             try:
-                translation_result = self.translator.run(translation_prompt)
                 logging.info(f"Translation result type: {type(translation_result)}")
                 # Extrair o conteúdo da resposta
@@ -520,12 +722,11 @@ Output:"""
                     logging.error("Empty translation result")
                     return "Desculpe, não foi possível processar sua consulta. Por favor, tente novamente com uma pergunta diferente."
-                # Realizar a pesquisa
                 research_prompt = self._format_prompt("research", translation_content)
                 logging.info(f"Research prompt: {research_prompt}")
-                research_result = self.researcher.run(research_prompt)
                 logging.info(f"Research result type: {type(research_result)}")
                 # Extrair o conteúdo da pesquisa
@@ -541,16 +742,16 @@ Output:"""
                     # Tentar novamente com um prompt mais específico
                     enhanced_prompt = f"""Task: Detailed Research
-Instructions:
-Provide a comprehensive explanation about '{translation_content}'.
-Include definition, characteristics, causes, and current understanding.
-Write at least 4-5 paragraphs with detailed information.
-Be thorough and informative, covering all important aspects of the topic.
-Use clear and accessible language suitable for a general audience.
-Output:"""
                     logging.info(f"Enhanced research prompt: {enhanced_prompt}")
-                    research_result = self.researcher.run(enhanced_prompt)
                     research_content = self._extract_content(research_result)
                     research_length = len(research_content.strip()) if research_content and isinstance(research_content, str) else 0
                     logging.info(f"Enhanced research content: {research_content}")
@@ -562,11 +763,11 @@ Output:"""
                         logging.info("Using default research content")
                         research_content = self._get_default_research_content(translation_content)
                 presentation_prompt = self._format_prompt("presentation", research_content)
                 logging.info(f"Presentation prompt: {presentation_prompt}")
-                presentation_result = self.presenter.run(presentation_prompt)
                 logging.info(f"Presentation type: {type(presentation_result)}")
                 presentation_content = self._extract_content(presentation_result)
@@ -586,6 +787,5 @@ Output:"""
                 return f"Desculpe, ocorreu um erro ao processar sua consulta: {str(e)}. Por favor, tente novamente mais tarde."
         except Exception as e:
-            logging.error(f"Unexpected error in generate_answer: {str(e)}")
-            return "Desculpe, ocorreu um erro inesperado. Por favor, tente novamente mais tarde."

 import time
 import datetime
 import os
+from typing import Tuple, Optional, Dict, Any, List
+# Configuração de logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+# Configurações dos modelos
+MODEL_CONFIG = {
+    "translator": {
+        "primary": "facebook/nllb-200-distilled-600M",
+        "fallback": "google/flan-t5-base"
+    },
+    "researcher": {
+        "primary": "google/flan-t5-large",
+        "fallback": "google/flan-t5-base"
+    },
+    "presenter": {
+        "primary": "bigscience/bloomz-1b7",
+        "fallback": "google/flan-t5-base"
+    }
+}
 # Simple Response class to wrap the model output
 class Response:
         return self.content if self.content else ""
     def __repr__(self):
+        return f"Response(content='{self.content[:50]}{'...' if len(self.content) > 50 else ''}')"
+# Personalizada classe para modelos locais
 class LocalHuggingFaceModel(Model):
+    def __init__(self, model, tokenizer, model_id="local-huggingface", max_length=512):
+        super().__init__(id=model_id)
         self.model = model
         self.tokenizer = tokenizer
         self.max_length = max_length
+        self.model_name = model_id
     async def ainvoke(self, prompt: str, **kwargs) -> str:
         """Async invoke method"""
         try:
+            logging.info(f"[{self.model_name}] ainvoke called with prompt: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}...")
             return await self.invoke(prompt, **kwargs)
         except Exception as e:
+            logging.error(f"[{self.model_name}] Error in ainvoke: {str(e)}")
             return Response(f"Error in ainvoke: {str(e)}")
     async def ainvoke_stream(self, prompt: str, **kwargs):
         """Async streaming invoke method"""
         try:
+            logging.info(f"[{self.model_name}] ainvoke_stream called with prompt: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}...")
             result = await self.invoke(prompt, **kwargs)
             yield result
         except Exception as e:
+            logging.error(f"[{self.model_name}] Error in ainvoke_stream: {str(e)}")
             yield Response(f"Error in ainvoke_stream: {str(e)}")
     def invoke(self, prompt: str, **kwargs) -> str:
         """Synchronous invoke method"""
         try:
+            logging.info(f"[{self.model_name}] Invoking model with prompt: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}...")
             # Check if prompt is None or empty
             if prompt is None:
+                logging.warning(f"[{self.model_name}] None prompt provided to invoke method")
                 return Response("No input provided. Please provide a valid prompt.")
             if not isinstance(prompt, str):
+                logging.warning(f"[{self.model_name}] Non-string prompt provided: {type(prompt)}")
                 try:
                     prompt = str(prompt)
+                    logging.info(f"[{self.model_name}] Converted prompt to string: {prompt[:100]}...")
                 except:
                     return Response("Invalid input type. Please provide a string prompt.")
             if not prompt.strip():
+                logging.warning(f"[{self.model_name}] Empty prompt provided to invoke method")
                 return Response("No input provided. Please provide a non-empty prompt.")
             inputs = self.tokenizer(prompt, return_tensors="pt", padding=True)
             # Check if output is empty
             if not decoded_output or not decoded_output.strip():
+                logging.warning(f"[{self.model_name}] Model generated empty output")
                 return Response("The model did not generate any output. Please try with a different prompt.")
+            logging.info(f"[{self.model_name}] Model generated output: {decoded_output[:100]}...")
             return Response(decoded_output)
         except Exception as e:
+            logging.error(f"[{self.model_name}] Error in local model generation: {str(e)}")
             if hasattr(e, 'args') and len(e.args) > 0:
                 error_message = e.args[0]
             else:
     def invoke_stream(self, prompt: str, **kwargs):
         """Synchronous streaming invoke method"""
         try:
+            logging.info(f"[{self.model_name}] invoke_stream called with prompt: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}...")
             result = self.invoke(prompt, **kwargs)
             yield result
         except Exception as e:
+            logging.error(f"[{self.model_name}] Error in invoke_stream: {str(e)}")
             yield Response(f"Error in invoke_stream: {str(e)}")
     def parse_provider_response(self, response: str) -> str:
         """Async response method - required abstract method"""
         try:
             # Log detalhado de todos os argumentos
+            logging.info(f"[{self.model_name}] aresponse args: prompt={prompt}, kwargs keys={list(kwargs.keys())}")
             # Extrair o prompt das mensagens se estiverem disponíveis
             if prompt is None and 'messages' in kwargs and kwargs['messages']:
                 for message in messages:
                     if hasattr(message, 'role') and message.role == 'user' and hasattr(message, 'content'):
                         prompt = message.content
+                        logging.info(f"[{self.model_name}] Extracted prompt from user message: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}")
                         break
             # Verificar se o prompt está em kwargs['input']
             if prompt is None:
                 if 'input' in kwargs:
                     prompt = kwargs.get('input')
+                    logging.info(f"[{self.model_name}] Found prompt in kwargs['input']: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}")
+            logging.info(f"[{self.model_name}] aresponse called with prompt: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}...")
             if not prompt or not isinstance(prompt, str) or not prompt.strip():
+                logging.warning(f"[{self.model_name}] Empty or invalid prompt in aresponse")
                 return Response("No input provided. Please provide a valid prompt.")
             content = await self.ainvoke(prompt, **kwargs)
             return content if isinstance(content, Response) else Response(content)
         except Exception as e:
+            logging.error(f"[{self.model_name}] Error in aresponse: {str(e)}")
             return Response(f"Error in aresponse: {str(e)}")
     def response(self, prompt=None, **kwargs):
         """Synchronous response method - required abstract method"""
         try:
             # Log detalhado de todos os argumentos
+            logging.info(f"[{self.model_name}] response args: prompt={prompt}, kwargs keys={list(kwargs.keys())}")
             # Extrair o prompt das mensagens se estiverem disponíveis
             if prompt is None and 'messages' in kwargs and kwargs['messages']:
                 for message in messages:
                     if hasattr(message, 'role') and message.role == 'user' and hasattr(message, 'content'):
                         prompt = message.content
+                        logging.info(f"[{self.model_name}] Extracted prompt from user message: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}")
                         break
             # Verificar se o prompt está em kwargs['input']
             if prompt is None:
                 if 'input' in kwargs:
                     prompt = kwargs.get('input')
+                    logging.info(f"[{self.model_name}] Found prompt in kwargs['input']: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}")
+            logging.info(f"[{self.model_name}] response called with prompt: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}...")
             if not prompt or not isinstance(prompt, str) or not prompt.strip():
+                logging.warning(f"[{self.model_name}] Empty or invalid prompt in response")
                 return Response("No input provided. Please provide a valid prompt.")
             content = self.invoke(prompt, **kwargs)
             return content if isinstance(content, Response) else Response(content)
         except Exception as e:
+            logging.error(f"[{self.model_name}] Error in response: {str(e)}")
             return Response(f"Error in response: {str(e)}")
     def response_stream(self, prompt=None, **kwargs):
         """Synchronous streaming response method - required abstract method"""
         try:
             # Log detalhado de todos os argumentos
+            logging.info(f"[{self.model_name}] response_stream args: prompt={prompt}, kwargs keys={list(kwargs.keys())}")
             # Extrair o prompt das mensagens se estiverem disponíveis
             if prompt is None and 'messages' in kwargs and kwargs['messages']:
                 for message in messages:
                     if hasattr(message, 'role') and message.role == 'user' and hasattr(message, 'content'):
                         prompt = message.content
+                        logging.info(f"[{self.model_name}] Extracted prompt from user message: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}")
                         break
             # Verificar se o prompt está em kwargs['input']
             if prompt is None:
                 if 'input' in kwargs:
                     prompt = kwargs.get('input')
+                    logging.info(f"[{self.model_name}] Found prompt in kwargs['input']: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}")
+            logging.info(f"[{self.model_name}] response_stream called with prompt: {prompt[:100] if prompt and isinstance(prompt, str) else 'None'}...")
             if not prompt or not isinstance(prompt, str) or not prompt.strip():
+                logging.warning(f"[{self.model_name}] Empty or invalid prompt in response_stream")
                 yield Response("No input provided. Please provide a valid prompt.")
                 return
             for chunk in self.invoke_stream(prompt, **kwargs):
                 yield chunk if isinstance(chunk, Response) else Response(chunk)
         except Exception as e:
+            logging.error(f"[{self.model_name}] Error in response_stream: {str(e)}")
             yield Response(f"Error in response_stream: {str(e)}")
     def generate(self, prompt: str, **kwargs):
             return decoded_output
         except Exception as e:
+            logging.error(f"[{self.model_name}] Error in generate method: {str(e)}")
             if hasattr(e, 'args') and len(e.args) > 0:
                 error_message = e.args[0]
             else:
 class ModelHandler:
     """
+    Classe para gerenciar múltiplos modelos e gerar respostas.
     """
     def __init__(self):
         """
+        Inicializa o ModelHandler com múltiplos modelos.
         """
         self.translator = None
         self.researcher = None
         self.presenter = None
         self.force_default_response = False
+        self.models = {}
         # Inicializar modelos
         self._load_models()
 - Recent developments or research
 - Real-world implications and applications
+Search for relevant academic papers and medical resources using the provided tools.
+Make sure to include findings from recent research in your response.
+Use ArxivTools and PubmedTools to find the most relevant and up-to-date information.
 Aim to write at least 4-5 paragraphs with detailed information.
 Be thorough and informative, covering all important aspects of the topic.
 Use clear and accessible language suitable for a general audience.
         else:
             logging.error(f"Unknown prompt type: {prompt_type}")
             return f"Unknown prompt type: {prompt_type}"
+    @staticmethod
+    def _load_specific_model(model_name: str, purpose: str) -> Tuple[Optional[Any], Optional[Any]]:
+        """
+        Load a specific model with retry logic
+        Args:
+            model_name: The name of the model to load
+            purpose: What the model will be used for (logging purposes)
+        Returns:
+            A tuple of (model, tokenizer) or (None, None) if loading fails
+        """
+        try:
+            logging.info(f"Attempting to load {purpose} model: {model_name}")
+            # Criar diretório de cache se não existir
+            cache_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "model_cache")
+            os.makedirs(cache_dir, exist_ok=True)
+            # Carregar modelo e tokenizer
+            tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
+            model = AutoModelForSeq2SeqLM.from_pretrained(model_name, cache_dir=cache_dir)
+            logging.info(f"Successfully loaded {purpose} model: {model_name}")
+            return model, tokenizer
+        except Exception as e:
+            logging.error(f"Error loading {purpose} model {model_name}: {str(e)}")
+            return None, None
     @staticmethod
     @st.cache_resource
+    def _load_fallback_model():
+        """Load a fallback model"""
         # Define retry decorator for model loading
         @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
         def load_with_retry(model_name):
             try:
+                logging.info(f"Attempting to load fallback model from {model_name}")
                 # Criar diretório de cache se não existir
                 cache_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "model_cache")
                 tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
                 model = AutoModelForSeq2SeqLM.from_pretrained(model_name, cache_dir=cache_dir)
+                logging.info(f"Successfully loaded fallback model from {model_name}")
                 return model, tokenizer
             except Exception as e:
+                logging.error(f"Error loading fallback model {model_name}: {str(e)}")
                 raise
         # Lista de modelos para tentar, em ordem de preferência
             try:
                 return load_with_retry(model_name)
             except Exception as e:
+                logging.error(f"Failed to load fallback model {model_name}: {str(e)}")
                 continue
         # Se todos os modelos falharem, retornar None
+        logging.error("All fallback models failed to load")
         return None, None
+    def _get_default_research_content(self, topic):
+        """
+        Gera conteúdo de pesquisa padrão quando não for possível gerar com o modelo.
+        Args:
+            topic: O tópico da pesquisa
+        Returns:
+            Conteúdo de pesquisa padrão
+        """
+        return f"""
+# Research on {topic}
+## Definition and Key Characteristics
+{topic} is a subject of significant interest in various fields. While detailed information is currently limited in our system, we understand that it encompasses several key characteristics and has important implications.
+## Current Understanding
+Research on {topic} continues to evolve, with new findings emerging regularly. The current understanding suggests multiple dimensions to consider when approaching this topic.
+## Applications and Implications
+The study of {topic} has several real-world applications and implications that affect various sectors including healthcare, education, and social services.
+## Conclusion
+While our current information on {topic} is limited, it represents an important area for continued research and understanding. For more detailed information, consulting specialized literature and experts is recommended.
+        """
     def _load_models(self):
+        """Carrega múltiplos modelos para diferentes propósitos"""
+        # Carregar modelo de tradução
+        translator_model, translator_tokenizer = self._load_specific_model(
+            MODEL_CONFIG["translator"]["primary"], "translator"
+        )
+        # Carregar modelo de pesquisa
+        researcher_model, researcher_tokenizer = self._load_specific_model(
+            MODEL_CONFIG["researcher"]["primary"], "researcher"
+        )
+        # Carregar modelo de apresentação
+        presenter_model, presenter_tokenizer = self._load_specific_model(
+            MODEL_CONFIG["presenter"]["primary"], "presenter"
+        )
+        # Carregar modelo de fallback
+        fallback_model, fallback_tokenizer = self._load_fallback_model()
+        # Criar modelos locais
+        if translator_model and translator_tokenizer:
+            self.models["translator"] = LocalHuggingFaceModel(
+                translator_model,
+                translator_tokenizer,
+                model_id=MODEL_CONFIG["translator"]["primary"]
+            )
+        else:
+            # Tentar carregar o modelo fallback para tradutor
+            fallback_translator, fallback_translator_tokenizer = self._load_specific_model(
+                MODEL_CONFIG["translator"]["fallback"], "translator fallback"
+            )
+            if fallback_translator and fallback_translator_tokenizer:
+                self.models["translator"] = LocalHuggingFaceModel(
+                    fallback_translator,
+                    fallback_translator_tokenizer,
+                    model_id=MODEL_CONFIG["translator"]["fallback"]
+                )
+            else:
+                self.models["translator"] = LocalHuggingFaceModel(
+                    fallback_model,
+                    fallback_tokenizer,
+                    model_id="fallback-model"
+                )
+        if researcher_model and researcher_tokenizer:
+            self.models["researcher"] = LocalHuggingFaceModel(
+                researcher_model,
+                researcher_tokenizer,
+                model_id=MODEL_CONFIG["researcher"]["primary"]
+            )
+        else:
+            # Tentar carregar o modelo fallback para pesquisador
+            fallback_researcher, fallback_researcher_tokenizer = self._load_specific_model(
+                MODEL_CONFIG["researcher"]["fallback"], "researcher fallback"
+            )
+            if fallback_researcher and fallback_researcher_tokenizer:
+                self.models["researcher"] = LocalHuggingFaceModel(
+                    fallback_researcher,
+                    fallback_researcher_tokenizer,
+                    model_id=MODEL_CONFIG["researcher"]["fallback"]
+                )
+            else:
+                self.models["researcher"] = LocalHuggingFaceModel(
+                    fallback_model,
+                    fallback_tokenizer,
+                    model_id="fallback-model"
+                )
+        if presenter_model and presenter_tokenizer:
+            self.models["presenter"] = LocalHuggingFaceModel(
+                presenter_model,
+                presenter_tokenizer,
+                model_id=MODEL_CONFIG["presenter"]["primary"]
+            )
+        else:
+            # Tentar carregar o modelo fallback para apresentador
+            fallback_presenter, fallback_presenter_tokenizer = self._load_specific_model(
+                MODEL_CONFIG["presenter"]["fallback"], "presenter fallback"
+            )
+            if fallback_presenter and fallback_presenter_tokenizer:
+                self.models["presenter"] = LocalHuggingFaceModel(
+                    fallback_presenter,
+                    fallback_presenter_tokenizer,
+                    model_id=MODEL_CONFIG["presenter"]["fallback"]
+                )
+            else:
+                self.models["presenter"] = LocalHuggingFaceModel(
+                    fallback_model,
+                    fallback_tokenizer,
+                    model_id="fallback-model"
+                )
+        # Configurar agentes com seus respectivos modelos
         self.translator = Agent(
             name="Translator",
             role="You will translate the query to English",
+            model=self.models["translator"],
             goal="Translate to English",
             instructions=[
+                "Translate the query to English",
+                "Preserve all key information from the original query",
+                "Return only the translated text without additional comments"
             ]
         )
+        # Configurar o agente de pesquisa com as ferramentas ArxivTools e PubmedTools
         self.researcher = Agent(
             name="Researcher",
             role="You are a research scholar who specializes in autism research.",
+            model=self.models["researcher"],
             instructions=[
                 "You need to understand the context of the question to provide the best answer.",
                 "Be precise and provide detailed information.",
                 "You must create an accessible explanation.",
                 "The content must be for people without autism knowledge.",
                 "Focus on providing comprehensive information about the topic.",
+                "Include definition, characteristics, causes, and current understanding.",
+                "ALWAYS use the provided tools (ArxivTools and PubmedTools) to search for relevant information.",
+                "Cite specific papers and studies in your response when appropriate.",
+                "When using tools, specify the search query clearly in your thoughts before making the call."
             ],
             tools=[
+                ArxivTools(),  # Usar ferramentas ArxivTools
+                PubmedTools()  # Usar ferramentas PubmedTools
+            ],
+            verbose=True     # Ativar modo verbose para depuração
         )
         self.presenter = Agent(
             name="Presenter",
             role="You are a professional researcher who presents the results of the research.",
+            model=self.models["presenter"],
             instructions=[
                 "You are multilingual",
                 "You must present the results in a clear and engaging manner.",
                 "Provide simple explanations of complex concepts.",
                 "Include a brief conclusion or summary.",
                 "Add emojis to make the presentation more interactive.",
+                "Translate the answer to Portuguese.",
+                "Maintain any citations or references from the research in your presentation.",
+                "Do not add fictional information not present in the research."
             ]
         )
+        logging.info("Models and agents loaded successfully.")
+    async def _run_with_tools(self, agent, prompt, max_steps=5):
+        """
+        Executa um agente com suporte a ferramentas e gerencia a execução.
+        Args:
+            agent: O agente a ser executado
+            prompt: O prompt a ser enviado para o agente
+            max_steps: Número máximo de passos para execução
+        Returns:
+            O resultado da execução do agente
+        """
+        try:
+            logging.info(f"Running agent {agent.name} with tools")
+            result = await agent.arun(prompt, max_steps=max_steps)
+            logging.info(f"Agent {agent.name} execution complete")
+            return result
+        except Exception as e:
+            logging.error(f"Error during agent {agent.name} execution: {str(e)}")
+            return f"Error during {agent.name} execution: {str(e)}"
+    async def generate_answer_async(self, query: str) -> str:
         """
+        Gera uma resposta baseada na consulta do usuário usando execução assíncrona.
         Args:
             query: A consulta do usuário
             logging.info(f"Translation prompt: {translation_prompt}")
             try:
+                translation_result = await self.translator.arun(translation_prompt)
                 logging.info(f"Translation result type: {type(translation_result)}")
                 # Extrair o conteúdo da resposta
                     logging.error("Empty translation result")
                     return "Desculpe, não foi possível processar sua consulta. Por favor, tente novamente com uma pergunta diferente."
+                # Realizar a pesquisa com ferramentas
                 research_prompt = self._format_prompt("research", translation_content)
                 logging.info(f"Research prompt: {research_prompt}")
+                research_result = await self._run_with_tools(self.researcher, research_prompt)
                 logging.info(f"Research result type: {type(research_result)}")
                 # Extrair o conteúdo da pesquisa
                     # Tentar novamente com um prompt mais específico
                     enhanced_prompt = f"""Task: Detailed Research
+    Instructions:
+    Provide a comprehensive explanation about '{translation_content}'.
+    Include definition, characteristics, causes, and current understanding.
+    Write at least 4-5 paragraphs with detailed information.
+    Be thorough and informative, covering all important aspects of the topic.
+    Use clear and accessible language suitable for a general audience.
+    Output:"""
                     logging.info(f"Enhanced research prompt: {enhanced_prompt}")
+                    research_result = await self._run_with_tools(self.researcher, enhanced_prompt)
                     research_content = self._extract_content(research_result)
                     research_length = len(research_content.strip()) if research_content and isinstance(research_content, str) else 0
                     logging.info(f"Enhanced research content: {research_content}")
                         logging.info("Using default research content")
                         research_content = self._get_default_research_content(translation_content)
+                # Gerar a apresentação
                 presentation_prompt = self._format_prompt("presentation", research_content)
                 logging.info(f"Presentation prompt: {presentation_prompt}")
+                presentation_result = await self.presenter.arun(presentation_prompt)
                 logging.info(f"Presentation type: {type(presentation_result)}")
                 presentation_content = self._extract_content(presentation_result)
                 return f"Desculpe, ocorreu um erro ao processar sua consulta: {str(e)}. Por favor, tente novamente mais tarde."
         except Exception as e:
+            logging.error(f"Unexpected error in generate_answer_async: {str(e)}")
+            return "Desculpe, ocorreu um erro inesperado. Por favor, tente novamente mais tarde."