Spaces:

galbendavids
/

CarsRUS

Sleeping

+"""
+LangGraph agent: orchestrates RAG pipeline and verifies answer quality.
+If the planned answer is not good, the agent directs the pipeline toward correction (retry with feedback).
+"""
+from typing import Optional, List, TypedDict, Literal
+from langgraph.graph import StateGraph, END
+from rag_engine import RAGEngine
+class AgentState(TypedDict, total=False):
+    query: str
+    api_key: str
+    refusal: Optional[str]
+    system_prompt: Optional[str]
+    user_prompt: Optional[str]
+    steps_log: List[str]
+    draft_answer: Optional[str]
+    is_good: bool
+    feedback: Optional[str]
+    iteration: int
+MAX_REFINE_ITERATIONS = 2
+def build_agent_graph(engine: RAGEngine):
+    """Build the LangGraph: retrieve -> generate -> evaluate -> (end | generate with feedback)."""
+    def retrieve(state: AgentState) -> dict:
+        """Run RAG up to (not including) LLM. Fill refusal or prompts + steps_log."""
+        query = state["query"]
+        refusal, system_prompt, user_prompt, steps_log = engine.prepare_generation(query)
+        if refusal:
+            return {"refusal": refusal, "steps_log": steps_log}
+        return {
+            "system_prompt": system_prompt,
+            "user_prompt": user_prompt,
+            "steps_log": steps_log,
+            "iteration": 0,
+        }
+    def generate(state: AgentState) -> dict:
+        """Call LLM with current prompt + optional feedback. Set draft_answer and append to steps_log."""
+        engine.configure_api(state["api_key"])
+        system_prompt = state["system_prompt"]
+        user_prompt = state["user_prompt"]
+        feedback = state.get("feedback") or ""
+        steps_log = list(state.get("steps_log") or [])
+        if feedback:
+            steps_log.append(f"🔄 Refining (iteration {state.get('iteration', 0) + 1}): {feedback[:80]}...")
+        else:
+            steps_log.append("💭 Generating response with Gemini...")
+        full_prompt = user_prompt
+        if feedback:
+            full_prompt = user_prompt + "\n\n[Correction requested by quality check]: " + feedback + "\n\nRevised answer:"
+        models = ["gemini-2.0-flash", "gemini-1.5-flash"]
+        draft = engine._call_api_with_backoff(system_prompt, full_prompt, models)
+        steps_log.append("✅ Draft generated")
+        return {"draft_answer": draft, "steps_log": steps_log}
+    def evaluate(state: AgentState) -> dict:
+        """Check if the answer is good. Set is_good and optionally feedback for refinement."""
+        query = state["query"]
+        draft = state.get("draft_answer") or ""
+        steps_log = list(state.get("steps_log") or [])
+        iteration = state.get("iteration", 0)
+        # Error / timeout / rate limit responses are not "good" but we don't refine them
+        if draft.startswith("⚠️") or draft.startswith("❌") or draft.startswith("⏱️"):
+            return {"is_good": True, "steps_log": steps_log}  # Treat as final
+        steps_log.append("🔍 Evaluating answer quality...")
+        engine.configure_api(state["api_key"])
+        eval_prompt = f"""You are a quality checker. Given the user question and the assistant's answer, decide if the answer is good.
+User question: {query[:300]}
+Assistant answer: {draft[:1500]}
+Reply with exactly one of:
+- YES
+- NO: <one short line explaining what to improve>
+Reply:"""
+        models = ["gemini-2.0-flash", "gemini-1.5-flash"]
+        try:
+            raw = engine._call_api_with_backoff(
+                "You reply only with YES or NO: <feedback>. No other text.",
+                eval_prompt,
+                models,
+            )
+        except Exception:
+            raw = "YES"
+        raw = (raw or "").strip().upper()
+        is_good = raw.startswith("YES") or "NO" not in raw[:10]
+        feedback = ""
+        if not is_good and "NO" in raw:
+            idx = raw.find(":")
+            if idx != -1:
+                feedback = raw[idx + 1 :].strip()[:200]
+            else:
+                feedback = "Improve relevance and completeness."
+        steps_log.append("✅ Good" if is_good else f"⚠️ Needs improvement: {feedback[:60]}...")
+        return {
+            "is_good": is_good,
+            "feedback": feedback if not is_good else None,
+            "iteration": iteration + 1,
+            "steps_log": steps_log,
+        }
+    def route_after_retrieve(state: AgentState) -> Literal["end", "generate"]:
+        if state.get("refusal"):
+            return "end"
+        return "generate"
+    def route_after_evaluate(state: AgentState) -> Literal["end", "generate"]:
+        if state.get("is_good") or (state.get("iteration") or 0) >= MAX_REFINE_ITERATIONS:
+            return "end"
+        return "generate"
+    workflow = StateGraph(AgentState)
+    workflow.add_node("retrieve", retrieve)
+    workflow.add_node("generate", generate)
+    workflow.add_node("evaluate", evaluate)
+    workflow.set_entry_point("retrieve")
+    workflow.add_conditional_edges("retrieve", route_after_retrieve, {"end": END, "generate": "generate"})
+    workflow.add_edge("generate", "evaluate")
+    workflow.add_conditional_edges("evaluate", route_after_evaluate, {"end": END, "generate": "generate"})
+    return workflow.compile()
+def run_stream(engine: RAGEngine, graph, query: str, api_key: str):
+    """
+    Run the agent graph and yield progress (steps + draft) for each step.
+    Updates engine cache and history with the final answer. Yields strings for Gradio.
+    """
+    initial: AgentState = {"query": query, "api_key": api_key}
+    last_state: AgentState = initial
+    for _node_name, state in graph.stream(initial):
+        last_state = state
+        steps_log = state.get("steps_log") or []
+        refusal = state.get("refusal")
+        draft = state.get("draft_answer") or ""
+        text = "\n".join(steps_log)
+        body = refusal if refusal else draft
+        if body:
+            text = f"{text}\n\n{body}"
+        yield text
+    # Final state: update cache and history
+    final_answer = last_state.get("refusal") or last_state.get("draft_answer") or ""
+    steps_log = last_state.get("steps_log") or []
+    if not any(final_answer.startswith(p) for p in ("⚠️", "❌", "⏱️")):
+        cache_key = engine._get_cache_key(query)
+        engine.response_cache[cache_key] = final_answer
+    engine._maintain_conversation_history(query, final_answer)
+    steps_log.append("✅ Done")
+    yield f"{chr(10).join(steps_log)}\n\n{final_answer}"

app.py CHANGED Viewed

@@ -18,18 +18,22 @@ import gradio as gr
 import os
 import traceback
 from rag_engine import RAGEngine
-# Initialize RAG Engine - Loads data and builds indices
-# This runs once at startup for performance
 try:
     print("🚀 Initializing RAG Engine...")
     engine = RAGEngine()
     print(f"✅ Engine ready with {len(engine.chunks)} smart chunks")
 except Exception as e:
-    print(f"❌ Error initializing RAG Engine: {e}")
-    print("Full traceback:")
     traceback.print_exc()
     engine = None
 def chat_function(message: str, history: list) -> str:
@@ -58,10 +62,10 @@ The Gemini API key is not set. To fix this:
 Get your key from: [Google AI Studio](https://aistudio.google.com/apikey)"""
         return
-    if not engine:
         yield """❌ **Initialization Error**
-The RAG Engine failed to load. This usually means:
 - Data files are missing
 - Environment is misconfigured
 - Check the Space logs for specific error details
@@ -73,9 +77,13 @@ Common solutions:
         return
     try:
-        # Generate response using the RAG engine
-        # Stream progress updates so the user sees what's happening live.
-        yield from engine.generate_response_stream(message, history, api_key)
     except Exception as e:
         yield f"""❌ **Error Processing Query**

 import os
 import traceback
 from rag_engine import RAGEngine
+from agent import build_agent_graph, run_stream
+# Initialize RAG Engine and LangGraph agent (once at startup)
+engine = None
+agent_graph = None
 try:
     print("🚀 Initializing RAG Engine...")
     engine = RAGEngine()
     print(f"✅ Engine ready with {len(engine.chunks)} smart chunks")
+    agent_graph = build_agent_graph(engine)
+    print("✅ LangGraph agent ready (retrieve → generate → evaluate → refine)")
 except Exception as e:
+    print(f"❌ Error initializing: {e}")
     traceback.print_exc()
     engine = None
+    agent_graph = None
 def chat_function(message: str, history: list) -> str:
 Get your key from: [Google AI Studio](https://aistudio.google.com/apikey)"""
         return
+    if not engine or not agent_graph:
         yield """❌ **Initialization Error**
+The RAG Engine or agent failed to load. This usually means:
 - Data files are missing
 - Environment is misconfigured
 - Check the Space logs for specific error details
         return
     try:
+        # Check cache before running the agent
+        cache_key = engine._get_cache_key(message)
+        if cache_key in engine.response_cache:
+            yield f"🔁 Returned cached result\n\n{engine.response_cache[cache_key]}"
+            return
+        # Run LangGraph agent: retrieve → generate → evaluate → (refine if needed)
+        yield from run_stream(engine, agent_graph, message, api_key)
     except Exception as e:
         yield f"""❌ **Error Processing Query**

rag_engine.py CHANGED Viewed

@@ -732,6 +732,107 @@ class RAGEngine:
         yield "❌ Failed to get response from API"
     def generate_response(self, query: str, history, api_key: str):
         """יצירת תשובה חכמה עם כל 10 העצות"""
         if not api_key:

         yield "❌ Failed to get response from API"
+    def configure_api(self, api_key: str) -> None:
+        """Configure Gemini API key (for use by external agent)."""
+        genai.configure(api_key=api_key)
+    def prepare_generation(self, query: str) -> Tuple[Optional[str], Optional[str], Optional[str], List[str]]:
+        """
+        Run RAG pipeline up to (but not including) the LLM call.
+        Returns (refusal_message, system_prompt, user_prompt, steps_log).
+        If refusal_message is set, the other three are None / empty; otherwise use prompts for generation.
+        """
+        steps_log: List[str] = []
+        steps_log.append("🔍 Normalizing car names...")
+        canonical = self._normalize_car_name(query)
+        if canonical:
+            steps_log.append(f"✅ Recognized canonical id: {canonical}")
+            search_query = canonical
+        else:
+            steps_log.append("ℹ️ No canonical car found; using full query for search")
+            search_query = query
+        is_comparison = self._is_comparison_question(query)
+        if is_comparison:
+            steps_log.append("📋 Detected: comparison question (rule-based)")
+        else:
+            steps_log.append("📋 Detected: single-model question (rule-based)")
+        mentioned_supported = self._find_supported_canonicals_in_text(query)
+        if is_comparison:
+            if len(mentioned_supported) < 2:
+                return (self._unsupported_car_refusal(query, is_comparison=True), None, None, steps_log)
+        else:
+            if not canonical and not mentioned_supported and self._looks_like_specific_car_question(query):
+                return (self._unsupported_car_refusal(query, is_comparison=False), None, None, steps_log)
+        steps_log.append("🔎 Searching knowledge base (vectors + keywords)...")
+        comparison_prompt = ""
+        context_results = []
+        if is_comparison:
+            cars = re.findall(r'\b[A-Za-z\u05D0-\u05EA0-9]+\b', query)
+            if len(cars) >= 2:
+                steps_log.append("📊 Extracting structured comparison data (regex)...")
+                comparison_data = self._extract_comparison_data(cars[0], cars[1])
+                context_results = self._hybrid_search(search_query, top_k=self.max_chunks_comparison)
+                steps_log.append(f"✅ Retrieved {len(context_results)} chunks for comparison")
+                comparison_prompt = f"""
+Based on the car reviews, create a structured comparison between {cars[0]} and {cars[1]}:
+Format your response as:
+**יתרונות {cars[0]}:**
+- [list advantages]
+**יתרונות {cars[1]}:**
+- [list advantages]
+**המלצה לפי פרופיל משתמש:**
+- [personalized recommendation]
+Structured Data:
+{json.dumps(comparison_data, ensure_ascii=False, indent=2)}
+Context from reviews:
+"""
+            else:
+                context_results = self._hybrid_search(search_query, top_k=self.max_chunks_general)
+                comparison_prompt = "Answer in the same language as the user's question. "
+                steps_log.append(f"✅ Retrieved {len(context_results)} chunks")
+        else:
+            context_results = self._hybrid_search(search_query, top_k=self.max_chunks_general)
+            steps_log.append(f"✅ Retrieved {len(context_results)} relevant chunks")
+        context_text = ""
+        for r in context_results:
+            meta = r['metadata']
+            context_text += f"""
+Source: {meta['title']}
+Topic: {meta['topic']}
+Content: {r['text'][:self.max_context_chars_per_chunk]}...
+"""
+        conversation_context = self._get_context_from_history()
+        system_prompt = """You are an expert automotive assistant specializing in car comparisons.
+Use the provided context and structured data to answer questions accurately.
+Always respond in the same language as the user (Hebrew or English).
+For comparison questions, provide a structured analysis with clear advantages for each vehicle.
+Focus on facts from the reviews provided.
+"""
+        user_prompt = f"""Context from car reviews:
+{context_text}
+Previous conversation context (last turns):
+{conversation_context}
+User Question: {query}
+{comparison_prompt}
+Answer:"""
+        return (None, system_prompt, user_prompt, steps_log)
     def generate_response(self, query: str, history, api_key: str):
         """יצירת תשובה חכמה עם כל 10 העצות"""
         if not api_key:

requirements.txt CHANGED Viewed

@@ -5,3 +5,5 @@ requests
 sentence-transformers
 numpy<2.0.0
 torch>=2.0.0

 sentence-transformers
 numpy<2.0.0
 torch>=2.0.0
+langgraph>=0.2.0
+langchain-core>=0.3.0

test_agent.py ADDED Viewed

	@@ -0,0 +1,72 @@

+"""
+Test script for the LangGraph agent pipeline.
+Runs several queries with a short wait between them to verify the full flow.
+Requires gemini_api in environment for real LLM calls; otherwise only tests prepare_generation (no API).
+"""
+import os
+import time
+from rag_engine import RAGEngine
+from agent import build_agent_graph, run_stream
+def main():
+    print("Loading RAG Engine and building agent graph...")
+    engine = RAGEngine()
+    graph = build_agent_graph(engine)
+    print("OK.\n")
+    api_key = os.environ.get("gemini_api")
+    if not api_key:
+        print("⚠️  gemini_api not set. Testing only prepare_generation (no LLM calls).\n")
+        test_queries = [
+            "Tell me about the Audi RS3",
+            "Compare Audi RS3 vs Hyundai Elantra N",
+            "מה דעתך על BMW X5?",  # should trigger refusal
+        ]
+        for i, query in enumerate(test_queries, 1):
+            print(f"--- Test {i}: prepare_generation ---")
+            print(f"Query: {query!r}")
+            refusal, sys_p, user_p, steps = engine.prepare_generation(query)
+            if refusal:
+                print(f"Refusal (expected for unsupported car): {refusal[:150]}...")
+            else:
+                print(f"Steps: {len(steps)}; system_prompt length: {len(sys_p or '')}; user_prompt length: {len(user_p or '')}")
+            print()
+        print("Done (prepare_generation only). Set gemini_api to run full agent.")
+        return
+    test_queries = [
+        "Tell me about the Audi RS3",
+        "Compare Audi RS3 vs Hyundai Elantra N",
+        "מה היתרונות של קיה EV9?",
+        "מה דעתך על BMW X5?",  # should trigger refusal (unsupported model)
+    ]
+    wait_seconds = 8
+    for i, query in enumerate(test_queries, 1):
+        print(f"--- Test {i}/{len(test_queries)} ---")
+        print(f"Query: {query!r}")
+        last_output = None
+        step_count = 0
+        try:
+            for out in run_stream(engine, graph, query, api_key):
+                last_output = out
+                step_count += 1
+            if last_output:
+                preview = last_output[:400] + "..." if len(last_output) > 400 else last_output
+                print(f"Steps yielded: {step_count}; final length: {len(last_output)}")
+                print(f"Final preview:\n{preview}\n")
+            else:
+                print("No output received.\n")
+        except Exception as e:
+            print(f"Error: {e}\n")
+        if i < len(test_queries):
+            print(f"Waiting {wait_seconds}s before next query...")
+            time.sleep(wait_seconds)
+    print("All tests finished.")
+if __name__ == "__main__":
+    main()