Spaces:

Nottybro
/

acra-api

Sleeping

App Files Files Community

Nottybro commited on 28 days ago

Commit

9d77494

verified ·

1 Parent(s): 45cb177

fix: L0 checks docs first (similarity>0.75) before model fallback

Browse files

Files changed (1) hide show

acra.py +41 -26

acra.py CHANGED Viewed

@@ -5,31 +5,21 @@ from db import supabase
 from classifier_inference import classify_query
 from typing import List
-client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
-EMBED_MODEL = "gemini-embedding-001"   # replaces shut-down text-embedding-004
 GEN_MODEL   = "gemma-3-27b-it"
-DEPTH       = {0: 0, 1: 3, 2: 6, 3: 10}
 def embed_texts(texts):
     result = client.models.embed_content(
-        model=EMBED_MODEL,
-        contents=texts,
-        config=types.EmbedContentConfig(
-            task_type="RETRIEVAL_DOCUMENT",
-            output_dimensionality=768   # keeps existing Supabase vector(768) schema
-        )
-    )
     return [e.values for e in result.embeddings]
 def embed_query(q):
     result = client.models.embed_content(
-        model=EMBED_MODEL,
-        contents=[q],
-        config=types.EmbedContentConfig(
-            task_type="RETRIEVAL_QUERY",
-            output_dimensionality=768
-        )
-    )
     return result.embeddings[0].values
 def adaptive_chunk(text, max_tok=512):
@@ -81,7 +71,7 @@ def vsearch(query, namespace, user_id, k):
     }).execute().data or [])
 PROMPTS = {
-    0: "Answer from your knowledge:\n\n{q}",
     1: "Answer using ONLY the context. Be concise.\n\nContext:\n{ctx}\n\nQuestion: {q}\nAnswer:",
     2: "Synthesize the context step by step.\n\nContext:\n{ctx}\n\nQuestion: {q}\nAnswer:",
     3: "Use chain-of-thought reasoning.\n\nContext:\n{ctx}\n\nQuestion: {q}\nAnswer:",
@@ -104,10 +94,30 @@ async def ingest_pipeline(texts, metadata, namespace, user_id):
     return len(chunks)
 async def query_pipeline(query, namespace, top_k, rerank, user_id, use_web=False):
-    cls = classify_query(query); level = cls["level"]; k = DEPTH[level]
     if level == 0:
         r = client.models.generate_content(model=GEN_MODEL, contents=PROMPTS[0].format(q=query))
         return {"answer": r.text.strip(), "sources": [], "complexity": cls, "retrieval_source": "model_knowledge"}
     hits = []
     if level == 3:
         seen = set()
@@ -116,24 +126,29 @@ async def query_pipeline(query, namespace, top_k, rerank, user_id, use_web=False
                 if h["id"] not in seen: seen.add(h["id"]); hits.append(h)
     else:
         hits = vsearch(query, namespace, user_id, k)
     web_hits, retrieval_source = [], "local"
     if use_web or not hits:
         web_hits = web_search(query, max_results=k)
         if not hits and not web_hits:
             return {"answer": "Nothing found locally or on the web.", "sources": [], "complexity": cls, "retrieval_source": "none"}
         retrieval_source = "web" if not hits else "local_and_web"
     all_chunks, all_sources = [], []
     if hits:
         lc = [h["content"] for h in hits]
         if rerank and level >= 2: lc = [c for c in compress(query, lc) if c.strip()]
-        all_chunks += lc[:k]
-        all_sources += [{"content": h["content"][:200], "metadata": h.get("metadata",{}), "score": h.get("similarity",0), "source": "local"} for h in hits[:len(lc)]]
     if web_hits:
-        all_chunks += [f"{h['title']}: {h['snippet']}" for h in web_hits]
-        all_sources += [{"content": h["snippet"][:200], "metadata": {"title": h["title"], "url": h["url"]}, "score": 1.0, "source": "web"} for h in web_hits]
-    ctx = "\n\n---\n\n".join(all_chunks)
-    prompt = (WEB_PROMPTS if retrieval_source=="web" else PROMPTS).get(level, PROMPTS[level])
-    r = client.models.generate_content(model=GEN_MODEL, contents=prompt.format(ctx=ctx, q=query))
     return {"answer": r.text.strip(), "sources": all_sources, "complexity": cls, "retrieval_source": retrieval_source}
 async def run_acra_pipeline(mode, **kw):

 from classifier_inference import classify_query
 from typing import List
+client     = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
+EMBED_MODEL = "gemini-embedding-001"
 GEN_MODEL   = "gemma-3-27b-it"
+DEPTH       = {0: 3, 1: 3, 2: 6, 3: 10}
 def embed_texts(texts):
     result = client.models.embed_content(
+        model=EMBED_MODEL, contents=texts,
+        config=types.EmbedContentConfig(task_type="RETRIEVAL_DOCUMENT", output_dimensionality=768))
     return [e.values for e in result.embeddings]
 def embed_query(q):
     result = client.models.embed_content(
+        model=EMBED_MODEL, contents=[q],
+        config=types.EmbedContentConfig(task_type="RETRIEVAL_QUERY", output_dimensionality=768))
     return result.embeddings[0].values
 def adaptive_chunk(text, max_tok=512):
     }).execute().data or [])
 PROMPTS = {
+    0: "Answer this from your knowledge:\n\n{q}",
     1: "Answer using ONLY the context. Be concise.\n\nContext:\n{ctx}\n\nQuestion: {q}\nAnswer:",
     2: "Synthesize the context step by step.\n\nContext:\n{ctx}\n\nQuestion: {q}\nAnswer:",
     3: "Use chain-of-thought reasoning.\n\nContext:\n{ctx}\n\nQuestion: {q}\nAnswer:",
     return len(chunks)
 async def query_pipeline(query, namespace, top_k, rerank, user_id, use_web=False):
+    cls   = classify_query(query)
+    level = cls["level"]
+    k     = DEPTH[level]
+    model = client
+    # ── L0: try docs first (similarity > 0.75), fall back to model knowledge
     if level == 0:
+        l0_hits     = vsearch(query, namespace, user_id, 3)
+        strong_hits = [h for h in l0_hits if h.get("similarity", 0) > 0.75]
+        if strong_hits:
+            ctx = "\n\n---\n\n".join(h["content"] for h in strong_hits)
+            r   = client.models.generate_content(model=GEN_MODEL,
+                contents=f"Answer using ONLY the context. Be concise.\n\nContext:\n{ctx}\n\nQuestion: {query}\nAnswer:")
+            return {
+                "answer":           r.text.strip(),
+                "sources":          [{"content": h["content"][:200], "metadata": h.get("metadata", {}),
+                                      "score": h.get("similarity", 0), "source": "local"} for h in strong_hits],
+                "complexity":       cls,
+                "retrieval_source": "local"
+            }
         r = client.models.generate_content(model=GEN_MODEL, contents=PROMPTS[0].format(q=query))
         return {"answer": r.text.strip(), "sources": [], "complexity": cls, "retrieval_source": "model_knowledge"}
+    # ── L1-L3: standard retrieval
     hits = []
     if level == 3:
         seen = set()
                 if h["id"] not in seen: seen.add(h["id"]); hits.append(h)
     else:
         hits = vsearch(query, namespace, user_id, k)
     web_hits, retrieval_source = [], "local"
     if use_web or not hits:
         web_hits = web_search(query, max_results=k)
         if not hits and not web_hits:
             return {"answer": "Nothing found locally or on the web.", "sources": [], "complexity": cls, "retrieval_source": "none"}
         retrieval_source = "web" if not hits else "local_and_web"
     all_chunks, all_sources = [], []
     if hits:
         lc = [h["content"] for h in hits]
         if rerank and level >= 2: lc = [c for c in compress(query, lc) if c.strip()]
+        all_chunks  += lc[:k]
+        all_sources += [{"content": h["content"][:200], "metadata": h.get("metadata", {}),
+                         "score": h.get("similarity", 0), "source": "local"} for h in hits[:len(lc)]]
     if web_hits:
+        all_chunks  += [f"{h['title']}: {h['snippet']}" for h in web_hits]
+        all_sources += [{"content": h["snippet"][:200], "metadata": {"title": h["title"], "url": h["url"]},
+                         "score": 1.0, "source": "web"} for h in web_hits]
+    ctx    = "\n\n---\n\n".join(all_chunks)
+    prompt = (WEB_PROMPTS if retrieval_source == "web" else PROMPTS).get(level, PROMPTS[level])
+    r      = client.models.generate_content(model=GEN_MODEL, contents=prompt.format(ctx=ctx, q=query))
     return {"answer": r.text.strip(), "sources": all_sources, "complexity": cls, "retrieval_source": retrieval_source}
 async def run_acra_pipeline(mode, **kw):