Spaces:

pluto90
/

Smart-Notes-backend

Running

App Files Files Community

pluto90 commited on Apr 12

Commit

bb25312

verified ·

1 Parent(s): 5ac77c8

Upload 7 files

Browse files

Files changed (5) hide show

app/graph/nodes/evaluator.py +92 -109
app/graph/nodes/hybrid_agent.py +34 -0
app/graph/nodes/rag_agent.py +19 -6
app/graph/nodes/router.py +216 -33
app/graph/nodes/synthesizer.py +17 -19

app/graph/nodes/evaluator.py CHANGED Viewed

@@ -1,109 +1,92 @@
-# # app/graph/nodes/evaluator.py
-# from app.core.llm_engine import llm
-# from app.core.prompts.evaluator_prompt import evaluator_prompt
-# from langchain_core.output_parsers import StrOutputParser
-# import json
-# chain = evaluator_prompt | llm | StrOutputParser()
-# def evaluator_node(state):
-#     query = state.get("query")
-#     answer = state.get("final_answer")
-#     context = state.get("context", "")
-#     try:
-#         response = chain.invoke({
-#                 "query": query,
-#                 "answer": answer,
-#                 "context": context
-#             })
-#         # 🔥 clean response (important)
-#         response = response.strip()
-#         # sometimes model adds ```json
-#         if response.startswith("```"):
-#             response = response.replace("```json", "").replace("```", "").strip()
-#         evaluation = json.loads(response)
-#     except Exception as e:
-#         print("EVALUATOR ERROR →", e)
-#         evaluation = {
-#             "relevance_score": 0.5,
-#             "context_usage": 0.5,
-#             "hallucination": True
-#         }
-#     return {
-#         **state,
-#         "evaluation": evaluation
-#     }
-# app/graph/nodes/evaluator.py
-from app.core.llm_engine import llm
-from app.core.prompts.evaluator_prompt import evaluator_prompt
-from langchain_core.output_parsers import StrOutputParser
-import json, re
-chain = evaluator_prompt | llm | StrOutputParser()
-def evaluator_node(state):
-    query = state.get("query")
-    answer = state.get("final_answer")
-    context = state.get("context", "")
-    try:
-        response = chain.invoke({
-            "query": query,
-            "answer": answer,
-            "context": context
-        }).strip()
-        # 🔥 remove markdown/code blocks
-        response = re.sub(r"```.*?```", "", response, flags=re.DOTALL).strip()
-        # 🔥 extract JSON only
-        match = re.search(r"\{.*\}", response, re.DOTALL)
-        if match:
-            response = match.group(0)
-        # 🔥 validate JSON start
-        if not response.startswith("{"):
-            raise ValueError("Invalid JSON from LLM")
-        evaluation = json.loads(response)
-        # 🔥 clamp values
-        evaluation = {
-            "relevance_score": min(max(evaluation.get("relevance_score", 0), 0), 1),
-            "context_usage": min(max(evaluation.get("context_usage", 0), 0), 1),
-            "hallucination": bool(evaluation.get("hallucination", True))
-        }
-    except Exception as e:
-        print("EVALUATOR ERROR →", e)
-        evaluation = {
-            "relevance_score": 0.5,
-            "context_usage": 0.5,
-            "hallucination": True
-        }
-    return {
-        **state,
-        "evaluation": evaluation
-    }

+# app/graph/nodes/evaluator.py
+from app.core.llm_engine import eval_llm
+from app.core.prompts.evaluator_prompt import evaluator_prompt
+from langchain_core.output_parsers import StrOutputParser
+import json, re
+chain = evaluator_prompt | eval_llm | StrOutputParser()
+def _extract_json(text: str) -> dict:
+    """Robustly extract JSON from LLM response, handling thinking blocks."""
+    # ✅ Strip Gemini thinking/reasoning blocks
+    text = re.sub(r"<thinking>.*?</thinking>", "", text, flags=re.DOTALL)
+    text = re.sub(r"<thought>.*?</thought>", "", text, flags=re.DOTALL)
+    # ✅ Strip markdown code fences
+    text = re.sub(r"```(?:json)?", "", text)
+    text = text.strip()
+    # ✅ Greedy match — finds outermost { ... } correctly
+    # [^{}]* fails on any nested structure, use .* with DOTALL instead
+    match = re.search(r"\{.*\}", text, re.DOTALL)
+    if not match:
+        raise ValueError(f"No JSON found. Raw: {text[:300]}")
+    raw_json = match.group(0).strip()
+    return json.loads(raw_json)
+def _fallback_evaluation():
+    """Explicit fallback — always returns a valid dict."""
+    return {
+        "relevance_score": 0.5,
+        "context_usage": 0.5,
+        "hallucination": True,
+        "route": "rag"
+    }
+def evaluator_node(state):
+    query = state.get("query")
+    answer = state.get("final_answer")
+    context = state.get("context", "")
+    route = state.get("route", "general")
+    # ✅ Don't evaluate general answers against RAG context — they'll always score 0
+    if route == "general" or not context:
+        return {
+            **state,
+            "evaluation": {
+                "relevance_score": 1.0,
+                "context_usage": None,   # N/A for general
+                "hallucination": False,
+                "route": "general"
+            }
+        }
+    try:
+        raw_response = chain.invoke({
+            "query": query,
+            "answer": answer,
+            "context": context[:600]
+        }).strip()
+        print(f"EVALUATOR RAW → {raw_response[:300]}")  # ✅ log first 200 chars to debug
+        parsed= _extract_json(raw_response)
+        evaluation = {
+            "relevance_score": round(min(max(float(parsed.get("relevance_score", 0)), 0), 1), 3),
+            "context_usage": round(min(max(float(parsed.get("context_usage", 0)), 0), 1), 3),
+            "hallucination": bool(parsed.get("hallucination", True)),
+            "route": "rag"
+        }
+        print(f"EVALUATOR SUCCESS → {evaluation}")
+        # ✅ return is INSIDE try — only reached if no exception above
+        return {**state, "evaluation": evaluation}
+    except Exception as e:
+        print("EVALUATOR ERROR →", e)
+        # ✅ return is INSIDE except — evaluation variable always defined
+        return {**state, "evaluation": _fallback_evaluation()}

app/graph/nodes/hybrid_agent.py ADDED Viewed

	@@ -0,0 +1,34 @@

+# app/graph/nodes/hybrid_agent.py
+from app.core.llm_engine import llm
+from langchain_core.prompts import PromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+hybrid_prompt = PromptTemplate(
+    input_variables=["context", "query", "history"],
+    template=(
+        "You are a document-aware assistant.\n"
+        "The uploaded document has LIMITED information on this topic.\n\n"
+        "INSTRUCTIONS:\n"
+        "- Start your answer using what the document says (cite it briefly)\n"
+        "- Then expand with your general knowledge to give a complete answer\n"
+        "- Clearly separate what came from the document vs general knowledge\n"
+        "- Be concise and helpful\n\n"
+        "Conversation History:\n{history}\n\n"
+        "Document excerpt:\n{context}\n\n"
+        "Question:\n{query}\n\n"
+        "Answer:"
+    )
+)
+chain = hybrid_prompt | llm | StrOutputParser()
+def hybrid_agent_node(state):
+    response = chain.invoke({
+        "context": state.get("context", ""),
+        "query": state.get("query", ""),
+        "history": state.get("history", "")
+    })
+    return {
+        **state,
+        "general_answer": response.strip()   # synthesizer picks this up for hybrid route
+    }

app/graph/nodes/rag_agent.py CHANGED Viewed

@@ -23,19 +23,32 @@
 def rag_agent_node(state):
-    print("DEBUG → state received:", state)
     # ✅ context already comes from router now
-    context = state.get("context")
-    sources = state.get("sources")
-    print("DEBUG → context:", context[:200] if context else "EMPTY")
     return {
         **state,
-        "context": context,
-        "sources": sources
     }

 def rag_agent_node(state):
+    """
+    Context is already fetched by router_node.
+    This node exists to rerank or validate — keeps the graph extensible.
+    Right now it passes state through; add reranking here later.
+    """
+    # print("DEBUG → state received:", state)
     # ✅ context already comes from router now
+    context = state.get("context", "")
+    sources = state.get("sources", [])
+    score= state.get("score", 0.0)
+    print(f"RAG AGENT → context length: {len(context)} | score: {score:.3f}")
+    if not context:
+        # Fallback: if somehow context is empty, reroute to general
+        return {
+            **state,
+            "route": "general",
+        }
     return {
         **state,
+        # "context": context,
+        # "sources": sources
     }

app/graph/nodes/router.py CHANGED Viewed

@@ -1,52 +1,235 @@
 # app/graph/nodes/router.py
 from app.core.rag_service import get_rag_context
 def router_node(state):
     query = state.get("query")
     doc_id = state.get("doc_id")
-    # 🔥 Step 1: Try retrieving context
-    context, sources, scores = get_rag_context(query, doc_id)
-    print("ROUTER DEBUG → scores:", scores)
-    # print("ROUTER DEBUG → context:", context[:100] if context else "EMPTY")
-    # # 🔥 Step 2: Decide route based on context presence
-    # if context and len(context.strip()) > 50:
-    #     route = "rag"
-    # else:
-    #     route = "general"
-    # print("ROUTER DECISION →", route)
-    # return {
-    #     **state,
-    #     "route": route,
-    #     "context": context,   # ✅ pass forward (important)
-    #     "sources": sources
-    # }
-     # 🔥 Step 1: get best score
-    max_score = max(scores) if scores else 0
-    # 🔥 Step 2: threshold decision
-    THRESHOLD = 0.75   # 👈 tune this
-    if max_score >= THRESHOLD:
-        route = "rag"
-    else:
-        route = "general"
-        context = ""   # ❗ important: clear bad context
-    print("ROUTER DECISION →", route, "| score:", max_score)
-    return {
-        **state,
-        "route": route,
-        "context": context,
-        "sources": sources,
-        "score": max_score
-    }

 # app/graph/nodes/router.py
 from app.core.rag_service import get_rag_context
+from app.core.llm_engine import eval_llm        # ✅ use eval_llm — faster, no thinking
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import PromptTemplate
+import ast
+# Query expansion prompt
+expansion_prompt = PromptTemplate(
+    input_variables=["query"],
+    template=(
+        "Generate 3 short alternative phrasings of this question for document search.\n"
+        "Cover singular/plural, synonyms, and sub-concepts.\n"
+        "Return ONLY a Python list of strings, nothing else.\n"
+        "Example: ['What is an array?', 'array data structure', 'arrays in programming']\n\n"
+        "Question: {query}\n\n"
+        "List:"
+    )
+)
+# ✅ Sanity check — asks LLM if the retrieved context actually answers the query
+relevance_check_prompt = PromptTemplate(
+    input_variables=["query", "context"],
+    template=(
+        "Does the following context contain enough information to answer the query?\n"
+        "Reply with ONLY one word: yes or no\n\n"
+        "Query: {query}\n\n"
+        "Context: {context}\n\n"
+        "Answer:"
+    )
+)
+expansion_chain = expansion_prompt | eval_llm | StrOutputParser()
+relevance_chain = relevance_check_prompt | eval_llm | StrOutputParser()
+def expand_query(query: str) -> list:
+    try:
+        raw = expansion_chain.invoke({"query": query}).strip()
+        expansions = ast.literal_eval(raw)
+        if isinstance(expansions, list):
+            return [query] + [q for q in expansions if q != query][:3]
+    except Exception as e:
+        print(f"QUERY EXPANSION FAILED → {e}")
+    return [query]
+def is_context_relevant(query: str, context: str) -> bool:
+    """LLM confirms whether retrieved context actually answers the query."""
+    try:
+        answer = relevance_chain.invoke({
+            "query": query,
+            "context": context[:800]    # ✅ cap context sent to LLM — saves tokens
+        }).strip().lower()
+        print(f"RELEVANCE CHECK → '{answer}'")
+        return answer.startswith("yes")
+    except Exception as e:
+        print(f"RELEVANCE CHECK FAILED → {e}")
+        return False                    # ✅ fail safe: if check fails, go general
+# def router_node(state):
+#     query = state.get("query")
+#     doc_id = state.get("doc_id")
+#     # ── Stage 1: score original query ──────────────────────────────────────
+#     original_context, original_sources, original_scores = get_rag_context(
+#         query, doc_id, top_k=3
+#     )
+#     original_max_score = max(original_scores) if original_scores else 0.0
+#     print(f"ORIGINAL QUERY SCORE → {original_max_score:.3f}")
+#     HIGH_THRESHOLD = 0.70   # ✅ auto-RAG — very confident
+#     LOW_THRESHOLD  = 0.50   # ✅ below this → always general, no LLM check needed
+#     # ── Stage 2: definitive general (score too low) ─────────────────────────
+#     if original_max_score < LOW_THRESHOLD:
+#         print(f"ROUTER DECISION → general | score too low: {original_max_score:.3f}")
+#         return {
+#             **state,
+#             "route": "general",
+#             "context": "",
+#             "sources": [],
+#             "score": original_max_score
+#         }
+#     # ── Stage 3: definitive RAG (score very high) ──────────────────────────
+#     if original_max_score >= HIGH_THRESHOLD:
+#         # Still expand to get more chunks, but don't need LLM sanity check
+#         expanded_queries = expand_query(query)
+#         print(f"EXPANDED QUERIES → {expanded_queries}")
+#         all_contexts, all_scores, seen = _collect_chunks(
+#             expanded_queries, original_context, original_scores, doc_id
+#         )
+#         merged = "\n\n---\n\n".join(all_contexts)
+#         print(f"ROUTER DECISION → rag (high confidence) | score: {original_max_score:.3f} | chunks: {len(all_contexts)}")
+#         return {
+#             **state,
+#             "route": "rag",
+#             "context": merged,
+#             "sources": all_contexts,
+#             "score": original_max_score
+#         }
+#     # ── Stage 4: ambiguous zone (0.50–0.70) → LLM sanity check ────────────
+#     print(f"AMBIGUOUS SCORE → {original_max_score:.3f} | running relevance check...")
+#     context_is_relevant = is_context_relevant(query, original_context)
+#     if not context_is_relevant:
+#         print(f"ROUTER DECISION → general | LLM says context doesn't answer query")
+#         return {
+#             **state,
+#             "route": "general",
+#             "context": "",
+#             "sources": [],
+#             "score": original_max_score
+#         }
+#     # Context confirmed relevant — expand and collect chunks
+#     expanded_queries = expand_query(query)
+#     print(f"EXPANDED QUERIES → {expanded_queries}")
+#     all_contexts, all_scores, seen = _collect_chunks(
+#         expanded_queries, original_context, original_scores, doc_id
+#     )
+#     merged = "\n\n---\n\n".join(all_contexts)
+#     print(f"ROUTER DECISION → rag (llm confirmed) | score: {original_max_score:.3f} | chunks: {len(all_contexts)}")
+#     return {
+#         **state,
+#         "route": "rag",
+#         "context": merged,
+#         "sources": all_contexts,
+#         "score": original_max_score
+#     }
+# app/graph/nodes/router.py
+# Add a third threshold zone between general and ambiguous
 def router_node(state):
     query = state.get("query")
     doc_id = state.get("doc_id")
+    original_context, original_sources, original_scores = get_rag_context(
+        query, doc_id, top_k=3
+    )
+    original_max_score = max(original_scores) if original_scores else 0.0
+    print(f"ORIGINAL QUERY SCORE → {original_max_score:.3f}")
+    HIGH_THRESHOLD   = 0.70   # strong match  → RAG only
+    HYBRID_THRESHOLD = 0.40   # weak match    → hybrid (doc snippet + general knowledge)
+    LOW_THRESHOLD    = 0.40   # below this    → pure general
+    # Pure general — no document relevance at all
+    if original_max_score < LOW_THRESHOLD:
+        print(f"ROUTER DECISION → general | score: {original_max_score:.3f}")
+        return {**state, "route": "general", "context": "", "sources": [], "score": original_max_score}
+    # Strong match — full RAG
+    if original_max_score >= HIGH_THRESHOLD:
+        expanded_queries = expand_query(query)
+        print(f"EXPANDED QUERIES → {expanded_queries}")
+        all_contexts, all_scores, _ = _collect_chunks(
+            expanded_queries, original_context, original_scores, doc_id
+        )
+        merged = "\n\n---\n\n".join(all_contexts)
+        print(f"ROUTER DECISION → rag | score: {original_max_score:.3f} | chunks: {len(all_contexts)}")
+        return {**state, "route": "rag", "context": merged, "sources": all_contexts, "score": original_max_score}
+    # Ambiguous zone (0.40–0.70) — LLM sanity check first
+    print(f"AMBIGUOUS SCORE → {original_max_score:.3f} | running relevance check...")
+    context_is_relevant = is_context_relevant(query, original_context)
+    if not context_is_relevant:
+        # Doc has weak overlap but context doesn't actually answer it → hybrid
+        print(f"ROUTER DECISION → hybrid | LLM says context partial")
+        return {
+            **state,
+            "route": "hybrid",
+            "context": original_context,   # pass what we have — synthesizer will supplement
+            "sources": [original_context],
+            "score": original_max_score
+        }
+    # LLM confirmed context is relevant — full RAG with expansion
+    expanded_queries = expand_query(query)
+    print(f"EXPANDED QUERIES → {expanded_queries}")
+    all_contexts, all_scores, _ = _collect_chunks(
+        expanded_queries, original_context, original_scores, doc_id
+    )
+    merged = "\n\n---\n\n".join(all_contexts)
+    print(f"ROUTER DECISION → rag (confirmed) | score: {original_max_score:.3f} | chunks: {len(all_contexts)}")
+    return {**state, "route": "rag", "context": merged, "sources": all_contexts, "score": original_max_score}
+def _collect_chunks(expanded_queries, original_context, original_scores, doc_id):
+    """Merge chunks from original + expanded queries, deduplicating by text."""
+    seen = set()
+    all_contexts = []
+    all_scores = []
+    # Seed with original results
+    for chunk, score in zip(original_context.split("\n\n---\n\n"), original_scores):
+        chunk = chunk.strip()
+        if chunk and chunk not in seen:
+            seen.add(chunk)
+            all_contexts.append(chunk)
+            all_scores.append(score)
+    # Add expanded query results
+    for q in expanded_queries[1:]:
+        ctx, _, scores = get_rag_context(q, doc_id, top_k=2)
+        if ctx:
+            for chunk, score in zip(ctx.split("\n\n---\n\n"), scores):
+                chunk = chunk.strip()
+                if chunk and chunk not in seen:
+                    seen.add(chunk)
+                    all_contexts.append(chunk)
+                    all_scores.append(score)
+    return all_contexts, all_scores, seen

app/graph/nodes/synthesizer.py CHANGED Viewed

@@ -1,38 +1,36 @@
 from app.core.llm_engine import llm
 from app.core.prompts.rag_prompt import rag_prompt
 from langchain_core.output_parsers import StrOutputParser
-def synthesizer_node(state):
-    query= state["query"]
-    context= state.get("context", "")
-    history= state.get("histroy", "")
     general_answer = state.get("general_answer")
-    # If general route, skip RAG context
-    if state.get("route") == "general":
         return {
             **state,
-            "final_answer": general_answer or "No answer generated."
         }
-    full_context= f"""
-    Conversation History:
-    {history}
-    Retrieved Context:
-    {context}
-    """
     chain = rag_prompt | llm | StrOutputParser()
     answer = chain.invoke({
-        "context": full_context,
-        "query": query
     })
     return {
         **state,
         "final_answer": answer.strip()
-    }

+# app/graph/nodes/synthesizer.py
 from app.core.llm_engine import llm
 from app.core.prompts.rag_prompt import rag_prompt
 from langchain_core.output_parsers import StrOutputParser
+def synthesizer_node(state):
+    route = state.get("route")
     general_answer = state.get("general_answer")
+    if route == ("general", "hybrid"):
         return {
             **state,
+            "final_answer": general_answer or "I couldn't find a relevant answer."
         }
+    query = state["query"]
+    context = state.get("context", "")
+    history = state.get("history", "")
     chain = rag_prompt | llm | StrOutputParser()
     answer = chain.invoke({
+        "context": context,
+        "query": query,
+        "history": history
     })
     return {
         **state,
         "final_answer": answer.strip()
+    }