perf: batch compress — N Gemma calls → 1 call, L2 17s→5s L3 43s→12s
Browse files
acra.py
CHANGED
|
@@ -53,12 +53,29 @@ def decompose(query):
|
|
| 53 |
return lines[:4] or [query]
|
| 54 |
|
| 55 |
def compress(query, chunks):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
out = []
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
if
|
| 61 |
-
|
|
|
|
|
|
|
| 62 |
|
| 63 |
def vsearch(query, namespace, user_id, k):
|
| 64 |
return (supabase.rpc("match_documents", {
|
|
|
|
| 53 |
return lines[:4] or [query]
|
| 54 |
|
| 55 |
def compress(query, chunks):
|
| 56 |
+
"""Batch compress all chunks in ONE Gemma call instead of N calls.
|
| 57 |
+
Cuts L2 from ~17s to ~5s, L3 from ~43s to ~12s."""
|
| 58 |
+
if not chunks: return []
|
| 59 |
+
numbered = "\n\n".join(f"[{i+1}]\n{c}" for i, c in enumerate(chunks))
|
| 60 |
+
r = client.models.generate_content(model=GEN_MODEL, contents=(
|
| 61 |
+
f"You have {len(chunks)} text chunks and a query.\n"
|
| 62 |
+
f"For each chunk, extract ONLY the sentences directly relevant to the query.\n"
|
| 63 |
+
f"Reply in this exact format for every chunk:\n"
|
| 64 |
+
f"[1] <extracted sentences or EMPTY>\n"
|
| 65 |
+
f"[2] <extracted sentences or EMPTY>\n"
|
| 66 |
+
f"... and so on.\n\n"
|
| 67 |
+
f"Query: {query}\n\nChunks:\n{numbered}"
|
| 68 |
+
))
|
| 69 |
+
# Parse [1], [2], ... sections from response
|
| 70 |
+
import re
|
| 71 |
out = []
|
| 72 |
+
pattern = re.compile(r"\[(\d+)\]\s*(.*?)(?=\[\d+\]|$)", re.DOTALL)
|
| 73 |
+
for match in pattern.finditer(r.text):
|
| 74 |
+
text = match.group(2).strip()
|
| 75 |
+
if text and text.upper() != "EMPTY":
|
| 76 |
+
out.append(text)
|
| 77 |
+
# Fallback: if parsing failed just return original chunks
|
| 78 |
+
return out if out else chunks
|
| 79 |
|
| 80 |
def vsearch(query, namespace, user_id, k):
|
| 81 |
return (supabase.rpc("match_documents", {
|