Spaces:

Nottybro
/

acra-api

Sleeping

Nottybro commited on Apr 28

Commit

23f097c

verified ·

1 Parent(s): 92d9d21

perf: batch compress — N Gemma calls → 1 call, L2 17s→5s L3 43s→12s

Files changed (1) hide show

acra.py CHANGED Viewed

@@ -53,12 +53,29 @@ def decompose(query):
     return lines[:4] or [query]
 def compress(query, chunks):
     out = []
-    for c in chunks:
-        r = client.models.generate_content(model=GEN_MODEL,
-            contents=f"Extract only sentences relevant to the query. Return empty if none.\n\nQuery: {query}\nChunk: {c}")
-        if r.text.strip(): out.append(r.text.strip())
-    return out
 def vsearch(query, namespace, user_id, k):
     return (supabase.rpc("match_documents", {

     return lines[:4] or [query]
 def compress(query, chunks):
+    """Batch compress all chunks in ONE Gemma call instead of N calls.
+    Cuts L2 from ~17s to ~5s, L3 from ~43s to ~12s."""
+    if not chunks: return []
+    numbered = "\n\n".join(f"[{i+1}]\n{c}" for i, c in enumerate(chunks))
+    r = client.models.generate_content(model=GEN_MODEL, contents=(
+        f"You have {len(chunks)} text chunks and a query.\n"
+        f"For each chunk, extract ONLY the sentences directly relevant to the query.\n"
+        f"Reply in this exact format for every chunk:\n"
+        f"[1] <extracted sentences or EMPTY>\n"
+        f"[2] <extracted sentences or EMPTY>\n"
+        f"... and so on.\n\n"
+        f"Query: {query}\n\nChunks:\n{numbered}"
+    ))
+    # Parse [1], [2], ... sections from response
+    import re
     out = []
+    pattern = re.compile(r"\[(\d+)\]\s*(.*?)(?=\[\d+\]|$)", re.DOTALL)
+    for match in pattern.finditer(r.text):
+        text = match.group(2).strip()
+        if text and text.upper() != "EMPTY":
+            out.append(text)
+    # Fallback: if parsing failed just return original chunks
+    return out if out else chunks
 def vsearch(query, namespace, user_id, k):
     return (supabase.rpc("match_documents", {