AI_Agent_Final

Sleeping

App Files Files Community

SarahXia0405 commited on Dec 21, 2025

Commit

6f94a8a

verified ·

1 Parent(s): 0c571ff

Update api/rag_engine.py

Browse files

Files changed (1) hide show

api/rag_engine.py +23 -27

api/rag_engine.py CHANGED Viewed

@@ -21,7 +21,6 @@ from pypdf import PdfReader
 from docx import Document
 from pptx import Presentation
 # ----------------------------
 # Helpers
 # ----------------------------
@@ -158,21 +157,30 @@ def build_rag_chunks_from_file(path: str, doc_type: str) -> List[Dict]:
 def retrieve_relevant_chunks(
     query: str,
     chunks: List[Dict],
-    k: int = 2,
-    max_context_chars: int = 1200,
-    min_score: int = 3,
 ) -> Tuple[str, List[Dict]]:
     """
     Deterministic lightweight retrieval (no embeddings):
-    - score by token overlap (fast)
-    - ONLY include context when overlap score is meaningful (>= min_score)
-    - keep context short to reduce LLM latency
     """
     query = _clean_text(query)
     if not query or not chunks:
         return "", []
-    q_tokens = set(re.findall(r"[a-zA-Z0-9]+", query.lower()))
     if not q_tokens:
         return "", []
@@ -183,19 +191,13 @@ def retrieve_relevant_chunks(
             continue
         t_tokens = set(re.findall(r"[a-zA-Z0-9]+", text.lower()))
         score = len(q_tokens.intersection(t_tokens))
-        if score > 0:
             scored.append((score, c))
     if not scored:
         return "", []
     scored.sort(key=lambda x: x[0], reverse=True)
-    # 如果最相关的都很弱，就别塞 RAG（避免白白变慢）
-    best_score = scored[0][0]
-    if best_score < min_score:
-        return "", []
     top = [c for _, c in scored[:k]]
     buf_parts: List[str] = []
@@ -205,18 +207,12 @@ def retrieve_relevant_chunks(
         t = c.get("text") or ""
         if not t:
             continue
-        remaining = max_context_chars - total
-        if remaining <= 0:
-            break
-        if len(t) > remaining:
-            t = t[:remaining]
-        buf_parts.append(t)
-        used.append(c)
-        total += len(t)
         if total >= max_context_chars:
             break

 from docx import Document
 from pptx import Presentation
 # ----------------------------
 # Helpers
 # ----------------------------
 def retrieve_relevant_chunks(
     query: str,
     chunks: List[Dict],
+    k: int = 1,
+    max_context_chars: int = 600,
+    min_score: int = 6,
 ) -> Tuple[str, List[Dict]]:
     """
     Deterministic lightweight retrieval (no embeddings):
+    - score by token overlap
+    - return top-k chunks concatenated as context
+    Speed improvements:
+    - short/generic queries won't trigger RAG
+    - higher min_score prevents accidental triggers
+    - smaller max_context_chars reduces LLM prompt size
     """
     query = _clean_text(query)
     if not query or not chunks:
         return "", []
+    # ✅ Short query gate: avoid wasting time on RAG for greetings / tiny inputs
+    q_tokens_list = re.findall(r"[a-zA-Z0-9]+", query.lower())
+    if (len(q_tokens_list) < 3) and (len(query) < 20):
+        return "", []
+    q_tokens = set(q_tokens_list)
     if not q_tokens:
         return "", []
             continue
         t_tokens = set(re.findall(r"[a-zA-Z0-9]+", text.lower()))
         score = len(q_tokens.intersection(t_tokens))
+        if score >= min_score:
             scored.append((score, c))
     if not scored:
         return "", []
     scored.sort(key=lambda x: x[0], reverse=True)
     top = [c for _, c in scored[:k]]
     buf_parts: List[str] = []
         t = c.get("text") or ""
         if not t:
             continue
+        if total + len(t) > max_context_chars:
+            t = t[: max(0, max_context_chars - total)]
+        if t:
+            buf_parts.append(t)
+            used.append(c)
+            total += len(t)
         if total >= max_context_chars:
             break