Spaces:

Nottybro
/

acra-api

Sleeping

App Files Files Community

Nottybro commited on 23 days ago

Commit

ea894c2

verified ·

1 Parent(s): aa7d131

fix: acra.py — switch to google-genai + gemini-embedding-001

Browse files

Files changed (1) hide show

acra.py +82 -64

acra.py CHANGED Viewed

@@ -1,21 +1,36 @@
-import os
-import google.generativeai as genai
 from db import supabase
 from classifier_inference import classify_query
 from typing import List
-genai.configure(api_key=os.environ["GEMINI_API_KEY"])
-EMBED_MODEL = "models/text-embedding-004"
 GEN_MODEL   = "gemma-3-27b-it"
 DEPTH       = {0: 0, 1: 3, 2: 6, 3: 10}
 def embed_texts(texts):
-    return [genai.embed_content(model=EMBED_MODEL, content=t,
-            task_type="retrieval_document")["embedding"] for t in texts]
 def embed_query(q):
-    return genai.embed_content(model=EMBED_MODEL, content=q,
-           task_type="retrieval_query")["embedding"]
 def adaptive_chunk(text, max_tok=512):
     paras = [p.strip() for p in text.split("\n\n") if p.strip()]
@@ -29,96 +44,99 @@ def adaptive_chunk(text, max_tok=512):
     if cur: chunks.append(cur)
     return chunks or [text]
 def decompose(query):
-    m = genai.GenerativeModel(GEN_MODEL)
-    r = m.generate_content(
-        f"Decompose into 2-4 simpler sub-queries. "
-        f"Return numbered list only.\n\nQuery: {query}")
-    lines = [l.strip().lstrip("1234567890.). ")
-             for l in r.text.strip().split("\n") if l.strip()]
     return lines[:4] or [query]
 def compress(query, chunks):
-    m = genai.GenerativeModel(GEN_MODEL)
     out = []
     for c in chunks:
-        r = m.generate_content(
-            f"Extract only sentences relevant to the query. "
-            f"Return empty if none.\n\nQuery: {query}\nChunk: {c}")
         if r.text.strip(): out.append(r.text.strip())
     return out
 def vsearch(query, namespace, user_id, k):
     return (supabase.rpc("match_documents", {
-        "query_embedding":  embed_query(query),
-        "match_count":      k,
         "filter_namespace": namespace,
-        "filter_user_id":   user_id
     }).execute().data or [])
 PROMPTS = {
-    1: "Answer using ONLY the context below. Be concise.\n\nContext:\n{ctx}\n\nQuestion: {q}\nAnswer:",
-    2: "Synthesize the context to answer. Think step by step.\n\nContext:\n{ctx}\n\nQuestion: {q}\nAnswer:",
-    3: "Use chain-of-thought to answer this complex question.\nAddress each aspect. Note any gaps.\n\nContext:\n{ctx}\n\nQuestion: {q}\nReasoning and answer:",
 }
 async def ingest_pipeline(texts, metadata, namespace, user_id):
     chunks, meta = [], []
     for i, t in enumerate(texts):
         for j, c in enumerate(adaptive_chunk(t)):
-            chunks.append(c)
-            meta.append({**metadata[i], "source_index": i, "chunk_index": j})
-    rows = [{"content": c, "embedding": e, "metadata": m,
-             "namespace": namespace, "user_id": user_id}
             for c, e, m in zip(chunks, embed_texts(chunks), meta)]
     for i in range(0, len(rows), 50):
         supabase.table("documents").insert(rows[i:i+50]).execute()
     return len(chunks)
-async def query_pipeline(query, namespace, top_k, rerank, user_id):
-    cls   = classify_query(query)
-    level = cls["level"]
-    k     = DEPTH[level]
-    model = genai.GenerativeModel(GEN_MODEL)
     if level == 0:
-        r = model.generate_content(
-            f"Answer concisely from your knowledge:\n\n{query}")
-        return {"answer": r.text.strip(), "sources": [], "complexity": cls}
     hits = []
     if level == 3:
         seen = set()
         for sq in decompose(query):
             for h in vsearch(sq, namespace, user_id, 4):
-                if h["id"] not in seen:
-                    seen.add(h["id"]); hits.append(h)
     else:
         hits = vsearch(query, namespace, user_id, k)
-    if not hits:
-        return {"answer": "No relevant documents found. Ingest some first.",
-                "sources": [], "complexity": cls}
-    chunks = [h["content"] for h in hits]
-    if rerank and level >= 2:
-        chunks = [c for c in compress(query, chunks) if c.strip()]
-    ctx = "\n\n---\n\n".join(chunks[:k])
-    r   = model.generate_content(PROMPTS[level].format(ctx=ctx, q=query))
-    return {
-        "answer": r.text.strip(),
-        "sources": [{"content": h["content"][:200],
-                     "metadata": h.get("metadata", {}),
-                     "score": h.get("similarity", 0)}
-                    for h in hits[:len(chunks)]],
-        "complexity": cls
-    }
 async def run_acra_pipeline(mode, **kw):
     if mode == "ingest":
-        return await ingest_pipeline(kw["texts"], kw["metadata"],
-                                     kw["namespace"], kw["user_id"])
-    return await query_pipeline(kw["query"], kw["namespace"],
-                                kw["top_k"], kw["rerank"], kw["user_id"])

+import os, httpx
+from google import genai
+from google.genai import types
 from db import supabase
 from classifier_inference import classify_query
 from typing import List
+client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
+EMBED_MODEL = "gemini-embedding-001"   # replaces shut-down text-embedding-004
 GEN_MODEL   = "gemma-3-27b-it"
 DEPTH       = {0: 0, 1: 3, 2: 6, 3: 10}
 def embed_texts(texts):
+    result = client.models.embed_content(
+        model=EMBED_MODEL,
+        contents=texts,
+        config=types.EmbedContentConfig(
+            task_type="RETRIEVAL_DOCUMENT",
+            output_dimensionality=768   # keeps existing Supabase vector(768) schema
+        )
+    )
+    return [e.values for e in result.embeddings]
 def embed_query(q):
+    result = client.models.embed_content(
+        model=EMBED_MODEL,
+        contents=[q],
+        config=types.EmbedContentConfig(
+            task_type="RETRIEVAL_QUERY",
+            output_dimensionality=768
+        )
+    )
+    return result.embeddings[0].values
 def adaptive_chunk(text, max_tok=512):
     paras = [p.strip() for p in text.split("\n\n") if p.strip()]
     if cur: chunks.append(cur)
     return chunks or [text]
+def web_search(query, max_results=5):
+    try:
+        r = httpx.get("https://api.duckduckgo.com/",
+            params={"q": query, "format": "json", "no_html": "1", "skip_disambig": "1"},
+            headers={"User-Agent": "ACRA/1.0"}, timeout=10.0)
+        data, results = r.json(), []
+        if data.get("AbstractText"):
+            results.append({"title": data.get("Heading","Web"), "snippet": data["AbstractText"], "url": data.get("AbstractURL","")})
+        for t in data.get("RelatedTopics", [])[:max_results]:
+            if isinstance(t, dict) and t.get("Text"):
+                results.append({"title": t.get("Name","Web"), "snippet": t["Text"], "url": t.get("FirstURL","")})
+        return results[:max_results]
+    except: return []
 def decompose(query):
+    r = client.models.generate_content(model=GEN_MODEL,
+        contents=f"Decompose into 2-4 simpler sub-queries. Numbered list only.\n\nQuery: {query}")
+    lines = [l.strip().lstrip("1234567890.). ") for l in r.text.strip().split("\n") if l.strip()]
     return lines[:4] or [query]
 def compress(query, chunks):
     out = []
     for c in chunks:
+        r = client.models.generate_content(model=GEN_MODEL,
+            contents=f"Extract only sentences relevant to the query. Return empty if none.\n\nQuery: {query}\nChunk: {c}")
         if r.text.strip(): out.append(r.text.strip())
     return out
 def vsearch(query, namespace, user_id, k):
     return (supabase.rpc("match_documents", {
+        "query_embedding": embed_query(query),
+        "match_count": k,
         "filter_namespace": namespace,
+        "filter_user_id": user_id,
     }).execute().data or [])
 PROMPTS = {
+    0: "Answer from your knowledge:\n\n{q}",
+    1: "Answer using ONLY the context. Be concise.\n\nContext:\n{ctx}\n\nQuestion: {q}\nAnswer:",
+    2: "Synthesize the context step by step.\n\nContext:\n{ctx}\n\nQuestion: {q}\nAnswer:",
+    3: "Use chain-of-thought reasoning.\n\nContext:\n{ctx}\n\nQuestion: {q}\nAnswer:",
+}
+WEB_PROMPTS = {
+    1: "Answer using these web results:\n\n{ctx}\n\nQuestion: {q}\nAnswer:",
+    2: "Synthesize these web results:\n\n{ctx}\n\nQuestion: {q}\nAnswer:",
+    3: "Reason through this using web results:\n\n{ctx}\n\nQuestion: {q}\nAnswer:",
 }
 async def ingest_pipeline(texts, metadata, namespace, user_id):
     chunks, meta = [], []
     for i, t in enumerate(texts):
         for j, c in enumerate(adaptive_chunk(t)):
+            chunks.append(c); meta.append({**metadata[i], "source_index": i, "chunk_index": j})
+    rows = [{"content": c, "embedding": e, "metadata": m, "namespace": namespace, "user_id": user_id}
             for c, e, m in zip(chunks, embed_texts(chunks), meta)]
     for i in range(0, len(rows), 50):
         supabase.table("documents").insert(rows[i:i+50]).execute()
     return len(chunks)
+async def query_pipeline(query, namespace, top_k, rerank, user_id, use_web=False):
+    cls = classify_query(query); level = cls["level"]; k = DEPTH[level]
     if level == 0:
+        r = client.models.generate_content(model=GEN_MODEL, contents=PROMPTS[0].format(q=query))
+        return {"answer": r.text.strip(), "sources": [], "complexity": cls, "retrieval_source": "model_knowledge"}
     hits = []
     if level == 3:
         seen = set()
         for sq in decompose(query):
             for h in vsearch(sq, namespace, user_id, 4):
+                if h["id"] not in seen: seen.add(h["id"]); hits.append(h)
     else:
         hits = vsearch(query, namespace, user_id, k)
+    web_hits, retrieval_source = [], "local"
+    if use_web or not hits:
+        web_hits = web_search(query, max_results=k)
+        if not hits and not web_hits:
+            return {"answer": "Nothing found locally or on the web.", "sources": [], "complexity": cls, "retrieval_source": "none"}
+        retrieval_source = "web" if not hits else "local_and_web"
+    all_chunks, all_sources = [], []
+    if hits:
+        lc = [h["content"] for h in hits]
+        if rerank and level >= 2: lc = [c for c in compress(query, lc) if c.strip()]
+        all_chunks += lc[:k]
+        all_sources += [{"content": h["content"][:200], "metadata": h.get("metadata",{}), "score": h.get("similarity",0), "source": "local"} for h in hits[:len(lc)]]
+    if web_hits:
+        all_chunks += [f"{h['title']}: {h['snippet']}" for h in web_hits]
+        all_sources += [{"content": h["snippet"][:200], "metadata": {"title": h["title"], "url": h["url"]}, "score": 1.0, "source": "web"} for h in web_hits]
+    ctx = "\n\n---\n\n".join(all_chunks)
+    prompt = (WEB_PROMPTS if retrieval_source=="web" else PROMPTS).get(level, PROMPTS[level])
+    r = client.models.generate_content(model=GEN_MODEL, contents=prompt.format(ctx=ctx, q=query))
+    return {"answer": r.text.strip(), "sources": all_sources, "complexity": cls, "retrieval_source": retrieval_source}
 async def run_acra_pipeline(mode, **kw):
     if mode == "ingest":
+        return await ingest_pipeline(kw["texts"], kw["metadata"], kw["namespace"], kw["user_id"])
+    return await query_pipeline(kw["query"], kw["namespace"], kw["top_k"], kw["rerank"], kw["user_id"], use_web=kw.get("use_web", False))