Spaces:

Nottybro
/

acra-api

Sleeping

App Files Files Community

Nottybro commited on Apr 28

Commit

92d9d21

verified ·

1 Parent(s): 9d77494

fix: L0 always retrieves then lets Gemma decide context vs knowledge

Browse files

Files changed (1) hide show

acra.py +42 -33

acra.py CHANGED Viewed

@@ -5,22 +5,20 @@ from db import supabase
 from classifier_inference import classify_query
 from typing import List
-client     = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
 EMBED_MODEL = "gemini-embedding-001"
 GEN_MODEL   = "gemma-3-27b-it"
 DEPTH       = {0: 3, 1: 3, 2: 6, 3: 10}
 def embed_texts(texts):
-    result = client.models.embed_content(
-        model=EMBED_MODEL, contents=texts,
         config=types.EmbedContentConfig(task_type="RETRIEVAL_DOCUMENT", output_dimensionality=768))
-    return [e.values for e in result.embeddings]
 def embed_query(q):
-    result = client.models.embed_content(
-        model=EMBED_MODEL, contents=[q],
         config=types.EmbedContentConfig(task_type="RETRIEVAL_QUERY", output_dimensionality=768))
-    return result.embeddings[0].values
 def adaptive_chunk(text, max_tok=512):
     paras = [p.strip() for p in text.split("\n\n") if p.strip()]
@@ -39,13 +37,13 @@ def web_search(query, max_results=5):
         r = httpx.get("https://api.duckduckgo.com/",
             params={"q": query, "format": "json", "no_html": "1", "skip_disambig": "1"},
             headers={"User-Agent": "ACRA/1.0"}, timeout=10.0)
-        data, results = r.json(), []
         if data.get("AbstractText"):
-            results.append({"title": data.get("Heading","Web"), "snippet": data["AbstractText"], "url": data.get("AbstractURL","")})
-        for t in data.get("RelatedTopics", [])[:max_results]:
-            if isinstance(t, dict) and t.get("Text"):
-                results.append({"title": t.get("Name","Web"), "snippet": t["Text"], "url": t.get("FirstURL","")})
-        return results[:max_results]
     except: return []
 def decompose(query):
@@ -71,7 +69,6 @@ def vsearch(query, namespace, user_id, k):
     }).execute().data or [])
 PROMPTS = {
-    0: "Answer this from your knowledge:\n\n{q}",
     1: "Answer using ONLY the context. Be concise.\n\nContext:\n{ctx}\n\nQuestion: {q}\nAnswer:",
     2: "Synthesize the context step by step.\n\nContext:\n{ctx}\n\nQuestion: {q}\nAnswer:",
     3: "Use chain-of-thought reasoning.\n\nContext:\n{ctx}\n\nQuestion: {q}\nAnswer:",
@@ -86,7 +83,8 @@ async def ingest_pipeline(texts, metadata, namespace, user_id):
     chunks, meta = [], []
     for i, t in enumerate(texts):
         for j, c in enumerate(adaptive_chunk(t)):
-            chunks.append(c); meta.append({**metadata[i], "source_index": i, "chunk_index": j})
     rows = [{"content": c, "embedding": e, "metadata": m, "namespace": namespace, "user_id": user_id}
             for c, e, m in zip(chunks, embed_texts(chunks), meta)]
     for i in range(0, len(rows), 50):
@@ -97,27 +95,35 @@ async def query_pipeline(query, namespace, top_k, rerank, user_id, use_web=False
     cls   = classify_query(query)
     level = cls["level"]
     k     = DEPTH[level]
-    model = client
-    # ── L0: try docs first (similarity > 0.75), fall back to model knowledge
     if level == 0:
-        l0_hits     = vsearch(query, namespace, user_id, 3)
-        strong_hits = [h for h in l0_hits if h.get("similarity", 0) > 0.75]
-        if strong_hits:
-            ctx = "\n\n---\n\n".join(h["content"] for h in strong_hits)
             r   = client.models.generate_content(model=GEN_MODEL,
-                contents=f"Answer using ONLY the context. Be concise.\n\nContext:\n{ctx}\n\nQuestion: {query}\nAnswer:")
             return {
                 "answer":           r.text.strip(),
-                "sources":          [{"content": h["content"][:200], "metadata": h.get("metadata", {}),
-                                      "score": h.get("similarity", 0), "source": "local"} for h in strong_hits],
                 "complexity":       cls,
-                "retrieval_source": "local"
             }
-        r = client.models.generate_content(model=GEN_MODEL, contents=PROMPTS[0].format(q=query))
         return {"answer": r.text.strip(), "sources": [], "complexity": cls, "retrieval_source": "model_knowledge"}
-    # ── L1-L3: standard retrieval
     hits = []
     if level == 3:
         seen = set()
@@ -131,7 +137,8 @@ async def query_pipeline(query, namespace, top_k, rerank, user_id, use_web=False
     if use_web or not hits:
         web_hits = web_search(query, max_results=k)
         if not hits and not web_hits:
-            return {"answer": "Nothing found locally or on the web.", "sources": [], "complexity": cls, "retrieval_source": "none"}
         retrieval_source = "web" if not hits else "local_and_web"
     all_chunks, all_sources = [], []
@@ -139,19 +146,21 @@ async def query_pipeline(query, namespace, top_k, rerank, user_id, use_web=False
         lc = [h["content"] for h in hits]
         if rerank and level >= 2: lc = [c for c in compress(query, lc) if c.strip()]
         all_chunks  += lc[:k]
-        all_sources += [{"content": h["content"][:200], "metadata": h.get("metadata", {}),
-                         "score": h.get("similarity", 0), "source": "local"} for h in hits[:len(lc)]]
     if web_hits:
         all_chunks  += [f"{h['title']}: {h['snippet']}" for h in web_hits]
         all_sources += [{"content": h["snippet"][:200], "metadata": {"title": h["title"], "url": h["url"]},
                          "score": 1.0, "source": "web"} for h in web_hits]
     ctx    = "\n\n---\n\n".join(all_chunks)
-    prompt = (WEB_PROMPTS if retrieval_source == "web" else PROMPTS).get(level, PROMPTS[level])
     r      = client.models.generate_content(model=GEN_MODEL, contents=prompt.format(ctx=ctx, q=query))
-    return {"answer": r.text.strip(), "sources": all_sources, "complexity": cls, "retrieval_source": retrieval_source}
 async def run_acra_pipeline(mode, **kw):
     if mode == "ingest":
         return await ingest_pipeline(kw["texts"], kw["metadata"], kw["namespace"], kw["user_id"])
-    return await query_pipeline(kw["query"], kw["namespace"], kw["top_k"], kw["rerank"], kw["user_id"], use_web=kw.get("use_web", False))

 from classifier_inference import classify_query
 from typing import List
+client      = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
 EMBED_MODEL = "gemini-embedding-001"
 GEN_MODEL   = "gemma-3-27b-it"
 DEPTH       = {0: 3, 1: 3, 2: 6, 3: 10}
 def embed_texts(texts):
+    r = client.models.embed_content(model=EMBED_MODEL, contents=texts,
         config=types.EmbedContentConfig(task_type="RETRIEVAL_DOCUMENT", output_dimensionality=768))
+    return [e.values for e in r.embeddings]
 def embed_query(q):
+    r = client.models.embed_content(model=EMBED_MODEL, contents=[q],
         config=types.EmbedContentConfig(task_type="RETRIEVAL_QUERY", output_dimensionality=768))
+    return r.embeddings[0].values
 def adaptive_chunk(text, max_tok=512):
     paras = [p.strip() for p in text.split("\n\n") if p.strip()]
         r = httpx.get("https://api.duckduckgo.com/",
             params={"q": query, "format": "json", "no_html": "1", "skip_disambig": "1"},
             headers={"User-Agent": "ACRA/1.0"}, timeout=10.0)
+        data, out = r.json(), []
         if data.get("AbstractText"):
+            out.append({"title": data.get("Heading","Web"), "snippet": data["AbstractText"], "url": data.get("AbstractURL","")})
+        for t in data.get("RelatedTopics",[])[:max_results]:
+            if isinstance(t,dict) and t.get("Text"):
+                out.append({"title": t.get("Name","Web"), "snippet": t["Text"], "url": t.get("FirstURL","")})
+        return out[:max_results]
     except: return []
 def decompose(query):
     }).execute().data or [])
 PROMPTS = {
     1: "Answer using ONLY the context. Be concise.\n\nContext:\n{ctx}\n\nQuestion: {q}\nAnswer:",
     2: "Synthesize the context step by step.\n\nContext:\n{ctx}\n\nQuestion: {q}\nAnswer:",
     3: "Use chain-of-thought reasoning.\n\nContext:\n{ctx}\n\nQuestion: {q}\nAnswer:",
     chunks, meta = [], []
     for i, t in enumerate(texts):
         for j, c in enumerate(adaptive_chunk(t)):
+            chunks.append(c)
+            meta.append({**metadata[i], "source_index": i, "chunk_index": j})
     rows = [{"content": c, "embedding": e, "metadata": m, "namespace": namespace, "user_id": user_id}
             for c, e, m in zip(chunks, embed_texts(chunks), meta)]
     for i in range(0, len(rows), 50):
     cls   = classify_query(query)
     level = cls["level"]
     k     = DEPTH[level]
+    # L0: always retrieve first — give Gemma the context and let it decide
+    # whether to use it or answer from its own knowledge. This prevents
+    # hallucination when the answer exists in the user docs.
     if level == 0:
+        hits = vsearch(query, namespace, user_id, 2)
+        if hits:
+            ctx = "\n\n---\n\n".join(h["content"] for h in hits)
             r   = client.models.generate_content(model=GEN_MODEL,
+                contents=(
+                    f"Use the context below if it contains a relevant answer to the question. "
+                    f"If the context is not relevant, answer from your own knowledge instead.\n\n"
+                    f"Context:\n{ctx}\n\nQuestion: {query}\nAnswer:"
+                ))
+            top_score = hits[0].get("similarity", 0)
             return {
                 "answer":           r.text.strip(),
+                "sources":          [{"content": h["content"][:200], "metadata": h.get("metadata",{}),
+                                      "score": h.get("similarity",0), "source": "local"}
+                                     for h in hits if h.get("similarity",0) > 0.5],
                 "complexity":       cls,
+                "retrieval_source": "local" if top_score > 0.5 else "model_knowledge",
             }
+        # No docs at all — answer from model knowledge
+        r = client.models.generate_content(model=GEN_MODEL,
+            contents=f"Answer this from your knowledge:\n\n{query}")
         return {"answer": r.text.strip(), "sources": [], "complexity": cls, "retrieval_source": "model_knowledge"}
+    # L1–L3: standard retrieval
     hits = []
     if level == 3:
         seen = set()
     if use_web or not hits:
         web_hits = web_search(query, max_results=k)
         if not hits and not web_hits:
+            return {"answer": "Nothing found locally or on the web.", "sources": [],
+                    "complexity": cls, "retrieval_source": "none"}
         retrieval_source = "web" if not hits else "local_and_web"
     all_chunks, all_sources = [], []
         lc = [h["content"] for h in hits]
         if rerank and level >= 2: lc = [c for c in compress(query, lc) if c.strip()]
         all_chunks  += lc[:k]
+        all_sources += [{"content": h["content"][:200], "metadata": h.get("metadata",{}),
+                         "score": h.get("similarity",0), "source": "local"} for h in hits[:len(lc)]]
     if web_hits:
         all_chunks  += [f"{h['title']}: {h['snippet']}" for h in web_hits]
         all_sources += [{"content": h["snippet"][:200], "metadata": {"title": h["title"], "url": h["url"]},
                          "score": 1.0, "source": "web"} for h in web_hits]
     ctx    = "\n\n---\n\n".join(all_chunks)
+    prompt = (WEB_PROMPTS if retrieval_source == "web" else PROMPTS).get(level, PROMPTS[1])
     r      = client.models.generate_content(model=GEN_MODEL, contents=prompt.format(ctx=ctx, q=query))
+    return {"answer": r.text.strip(), "sources": all_sources,
+            "complexity": cls, "retrieval_source": retrieval_source}
 async def run_acra_pipeline(mode, **kw):
     if mode == "ingest":
         return await ingest_pipeline(kw["texts"], kw["metadata"], kw["namespace"], kw["user_id"])
+    return await query_pipeline(kw["query"], kw["namespace"], kw["top_k"],
+                                kw["rerank"], kw["user_id"], use_web=kw.get("use_web", False))