Spaces:

amitbhatt6075
/

reachify-ai-service

Running

App Files Files Community

amitbhatt6075 commited on 20 days ago

Commit

d8f03cc

1 Parent(s): 6cb46f3

refactor(agent): Use human-friendly prompt for reliable chatbot responses

Browse files

Files changed (1) hide show

core/support_agent.py +69 -23

core/support_agent.py CHANGED Viewed

@@ -1,21 +1,14 @@
 import traceback
 from typing import Dict, Any, List
 from llama_cpp import Llama
-# ✅ THE FIX IS HERE: The new, correct import paths for LangChain
 from langchain_core.language_models.llms import LLM
 from langchain.chains import ConversationalRetrievalChain
 from langchain.memory import ConversationBufferMemory
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import Chroma
 from langchain_core.prompts import PromptTemplate
-from langchain_core.output_parsers import StrOutputParser
-from dotenv import load_dotenv
-load_dotenv()
-# This class allows us to use our already-loaded llama_cpp model with LangChain
 class LlamaLangChain(LLM):
     llama_instance: Llama
@@ -23,16 +16,12 @@ class LlamaLangChain(LLM):
     def _llm_type(self) -> str:
         return "custom"
-    # Changed stop to List[str] for better type hinting
     def _call(self, prompt: str, stop: List[str] | None = None, **kwargs) -> str:
-        response = self.llama_instance(
-            prompt, max_tokens=256, stop=stop, stream=False
-        )
         return response["choices"][0]["text"]
-    # Required for async operations, even if not used, to match the base class
     async def _acall(self, prompt: str, stop: List[str] | None = None, **kwargs) -> str:
-        # For simplicity, we call the sync method. For production, you might want a true async implementation.
         return self._call(prompt, stop, **kwargs)
 def format_docs(docs):
@@ -41,23 +30,80 @@ def format_docs(docs):
 class SupportAgent:
     def __init__(self, llm_instance: Llama, embedding_path: str, db_path: str):
         print("--- Initializing Support Agent (Optimized for Low RAM) ---")
         if llm_instance is None:
             raise ValueError("SupportAgent received an invalid LLM instance.")
-        # This wrapper is correct
         self.langchain_llm_wrapper = LlamaLangChain(llama_instance=llm_instance)
         self.embeddings = HuggingFaceEmbeddings(model_name=embedding_path)
         self.vector_store = Chroma(persist_directory=db_path, embedding_function=self.embeddings)
-        self.conversations = {}
-        router_template = """Classify: 'live_data' or 'general_knowledge'. Question: {question} Classification:"""
-        self.router_prompt = PromptTemplate.from_template(router_template)
-        self.router_chain = self.router_prompt | self.langchain_llm_wrapper | StrOutputParser()
         print("✅ Agent and core components initialized successfully.")
     def _get_or_create_memory(self, conversation_id: str) -> ConversationBufferMemory:
         if conversation_id not in self.conversations:

 import traceback
 from typing import Dict, Any, List
 from llama_cpp import Llama
 from langchain_core.language_models.llms import LLM
 from langchain.chains import ConversationalRetrievalChain
 from langchain.memory import ConversationBufferMemory
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import Chroma
 from langchain_core.prompts import PromptTemplate
 class LlamaLangChain(LLM):
     llama_instance: Llama
     def _llm_type(self) -> str:
         return "custom"
     def _call(self, prompt: str, stop: List[str] | None = None, **kwargs) -> str:
+        # Give a generous token limit for the answer
+        response = self.llama_instance(prompt, max_tokens=512, stop=stop, stream=False, echo=False)
         return response["choices"][0]["text"]
     async def _acall(self, prompt: str, stop: List[str] | None = None, **kwargs) -> str:
         return self._call(prompt, stop, **kwargs)
 def format_docs(docs):
 class SupportAgent:
     def __init__(self, llm_instance: Llama, embedding_path: str, db_path: str):
         print("--- Initializing Support Agent (Optimized for Low RAM) ---")
         if llm_instance is None:
             raise ValueError("SupportAgent received an invalid LLM instance.")
         self.langchain_llm_wrapper = LlamaLangChain(llama_instance=llm_instance)
         self.embeddings = HuggingFaceEmbeddings(model_name=embedding_path)
         self.vector_store = Chroma(persist_directory=db_path, embedding_function=self.embeddings)
+        self.conversations: Dict[str, ConversationBufferMemory] = {}
         print("✅ Agent and core components initialized successfully.")
+    def _get_or_create_memory(self, conversation_id: str) -> ConversationBufferMemory:
+        if conversation_id not in self.conversations:
+            self.conversations[conversation_id] = ConversationBufferMemory(
+                memory_key="chat_history", return_messages=True, input_key="question", output_key='answer'
+            )
+        return self.conversations[conversation_id]
+    def answer(self, payload: dict, conversation_id: str) -> dict:
+        question = payload.get("question", "")
+        live_data_context = payload.get("live_data", "") # Get the live data from backend
+        user_role = payload.get("role", "user")
+        memory = self._get_or_create_memory(conversation_id)
+        try:
+            # === ✅ THE FINAL, BULLETPROOF FIX IS HERE ✅ ===
+            # We create a simple, human-like prompt that combines everything.
+            # No more complex [CONTEXT] blocks.
+            human_friendly_template = """You are a helpful and professional support assistant for the Reachify platform.
+Answer the user's question based on their chat history and the context provided below.
+Chat History:
+{chat_history}
+Additional Context (if available):
+{context}
+Live Data about the User (Role: {role}):
+{live_data}
+User's Question: {question}
+Your Answer:
+"""
+            # Create a LangChain PromptTemplate from our new string
+            final_prompt = PromptTemplate.from_template(human_friendly_template)
+            retriever = self.vector_store.as_ retriever()
+            # Now, we pass this beautiful, simple prompt to the chain
+            qa_chain = ConversationalRetrievalChain.from_llm(
+                llm=self.langchain_llm_wrapper,
+                retriever=retriever,
+                memory=memory,
+                combine_docs_chain_kwargs={"prompt": final_prompt}
+            )
+            # We need to add all required variables for our new prompt
+            result = qa_chain.invoke({
+                "question": question,
+                "live_data": live_data_context,
+                "role": user_role
+            })
+            final_answer = result.get("answer", "I'm sorry, I could not find an answer.").strip()
+            # Final safety check
+            if "[NODE_NAME]" in final_answer or "Your Answer:" in final_answer:
+                 return {"response": "I'm having trouble generating a clear response right now. Can you please rephrase the question?", "context": "AI returned a template."}
+            return {"response": final_answer, "context": format_docs(result.get('source_documents', []))}
+        except Exception as e:
+            traceback.print_exc()
+            return {"response": "A critical server error occurred in the AI agent.", "context": str(e)}
     def _get_or_create_memory(self, conversation_id: str) -> ConversationBufferMemory:
         if conversation_id not in self.conversations: