Spaces:

abrar-adnan
/

GeekBot

Sleeping

App Files Files Community

abrar-adnan commited on Nov 2, 2025

Commit

6445da3

verified ·

1 Parent(s): 97eabab

Update agents/research_agent.py

Browse files

Files changed (1) hide show

agents/research_agent.py +82 -84

agents/research_agent.py CHANGED Viewed

@@ -1,85 +1,83 @@
-from langchain_ollama import OllamaLLM
-from langchain_ollama import ChatOllama
-from typing import Dict, List
-from langchain_core.documents.base import Document
-from config.settings import settings
-class ResearchAgent:
-    def __init__(self):
-        """
-        Initialize the research agent with local Ollama LLM.
-        """
-        print("Initializing ResearchAgent with Ollama (local)...")
-        self.llm = ChatOllama(
-            base_url=settings.OLLAMA_BASE_URL,
-            model=settings.OLLAMA_MODEL_RESEARCH,
-            temperature=0.3,
-            num_predict=300,  # max_tokens equivalent
-        )
-        print("Ollama LLM initialized successfully.")
-    def sanitize_response(self, response_text: str) -> str:
-        """
-        Sanitize the LLM's response by stripping unnecessary whitespace.
-        """
-        return response_text.strip()
-    def generate_prompt(self, question: str, context: str) -> str:
-        """
-        Generate a structured prompt for the LLM to generate a precise and factual answer.
-        """
-        prompt = f"""
-        You are an AI assistant designed to provide precise and factual answers based on the given context.
-        **Instructions:**
-        - Answer the following question using only the provided context.
-        - Be clear, concise, and factual.
-        - Return as much information as you can get from the context.
-        **Question:** {question}
-        **Context:**
-        {context}
-        **Provide your answer below:**
-        """
-        return prompt
-    def generate(self, question: str, documents: List[Document]) -> Dict:
-        """
-        Generate an initial answer using the provided documents.
-        """
-        print(f"ResearchAgent.generate called with question='{question}' and {len(documents)} documents.")
-        # Combine the top document contents into one string
-        context = "\n\n".join([doc.page_content for doc in documents])
-        print(f"Combined context length: {len(context)} characters.")
-        # Create a prompt for the LLM
-        prompt = self.generate_prompt(question, context)
-        print("Prompt created for the LLM.")
-        # Call the LLM to generate the answer
-        try:
-            print("Sending prompt to Ollama...")
-            response = self.llm.invoke(prompt)
-            print("LLM response received.")
-            # Extract content from LangChain message
-            if hasattr(response, 'content'):
-                llm_response = response.content
-            else:
-                llm_response = str(response)
-        except Exception as e:
-            print(f"Error during model inference: {e}")
-            raise RuntimeError("Failed to generate answer due to a model error.") from e
-        # Sanitize the response
-        draft_answer = self.sanitize_response(llm_response) if llm_response else "I cannot answer this question based on the provided documents."
-        print(f"Generated answer: {draft_answer}")
-        return {
-            "draft_answer": draft_answer,
-            "context_used": context
         }

+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+from typing import Dict, List
+from langchain_core.documents.base import Document
+from config.settings import settings
+import torch
+class ResearchAgent:
+    def __init__(self):
+        """
+        Initialize the research agent with local Ollama LLM.
+        """
+        print("Initializing ResearchAgent with Hugging Face Transformers...")
+        model_name = getattr(settings, "HF_MODEL_RESEARCH", "google/flan-t5-large")
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model.to(self.device)
+        print(f"Model '{model_name}' initialized successfully on {self.device}.")
+    def sanitize_response(self, response_text: str) -> str:
+        """
+        Sanitize the LLM's response by stripping unnecessary whitespace.
+        """
+        return response_text.strip()
+    def generate_prompt(self, question: str, context: str) -> str:
+        """
+        Generate a structured prompt for the LLM to generate a precise and factual answer.
+        """
+        prompt = f"""
+        You are an AI assistant designed to provide precise and factual answers based on the given context.
+        **Instructions:**
+        - Answer the following question using only the provided context.
+        - Be clear, concise, and factual.
+        - Return as much information as you can get from the context.
+        **Question:** {question}
+        **Context:**
+        {context}
+        **Provide your answer below:**
+        """
+        return prompt
+    def generate(self, question: str, documents: List[Document]) -> Dict:
+        """
+        Generate an initial answer using the provided documents.
+        """
+        print(f"ResearchAgent.generate called with question='{question}' and {len(documents)} documents.")
+        # Combine the top document contents into one string
+        context = "\n\n".join([doc.page_content for doc in documents])
+        print(f"Combined context length: {len(context)} characters.")
+        # Create a prompt for the LLM
+        prompt = self.generate_prompt(question, context)
+        print("Prompt created for the LLM.")
+        # Call the LLM to generate the answer
+        try:
+            print("Running inference with Transformers...")
+            inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to(self.device)
+            outputs = self.model.generate(**inputs, max_new_tokens=300, temperature=0.3)
+            llm_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            print("Model response received.")
+        except Exception as e:
+            print(f"Error during model inference: {e}")
+            raise RuntimeError("Failed to generate answer due to a model error.") from e
+        # Sanitize the response
+        draft_answer = self.sanitize_response(llm_response) if llm_response else "I cannot answer this question based on the provided documents."
+        print(f"Generated answer: {draft_answer}")
+        return {
+            "draft_answer": draft_answer,
+            "context_used": context
         }