Nottybro commited on
Commit
23f097c
·
verified ·
1 Parent(s): 92d9d21

perf: batch compress — N Gemma calls → 1 call, L2 17s→5s L3 43s→12s

Browse files
Files changed (1) hide show
  1. acra.py +22 -5
acra.py CHANGED
@@ -53,12 +53,29 @@ def decompose(query):
53
  return lines[:4] or [query]
54
 
55
  def compress(query, chunks):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  out = []
57
- for c in chunks:
58
- r = client.models.generate_content(model=GEN_MODEL,
59
- contents=f"Extract only sentences relevant to the query. Return empty if none.\n\nQuery: {query}\nChunk: {c}")
60
- if r.text.strip(): out.append(r.text.strip())
61
- return out
 
 
62
 
63
  def vsearch(query, namespace, user_id, k):
64
  return (supabase.rpc("match_documents", {
 
53
  return lines[:4] or [query]
54
 
55
  def compress(query, chunks):
56
+ """Batch compress all chunks in ONE Gemma call instead of N calls.
57
+ Cuts L2 from ~17s to ~5s, L3 from ~43s to ~12s."""
58
+ if not chunks: return []
59
+ numbered = "\n\n".join(f"[{i+1}]\n{c}" for i, c in enumerate(chunks))
60
+ r = client.models.generate_content(model=GEN_MODEL, contents=(
61
+ f"You have {len(chunks)} text chunks and a query.\n"
62
+ f"For each chunk, extract ONLY the sentences directly relevant to the query.\n"
63
+ f"Reply in this exact format for every chunk:\n"
64
+ f"[1] <extracted sentences or EMPTY>\n"
65
+ f"[2] <extracted sentences or EMPTY>\n"
66
+ f"... and so on.\n\n"
67
+ f"Query: {query}\n\nChunks:\n{numbered}"
68
+ ))
69
+ # Parse [1], [2], ... sections from response
70
+ import re
71
  out = []
72
+ pattern = re.compile(r"\[(\d+)\]\s*(.*?)(?=\[\d+\]|$)", re.DOTALL)
73
+ for match in pattern.finditer(r.text):
74
+ text = match.group(2).strip()
75
+ if text and text.upper() != "EMPTY":
76
+ out.append(text)
77
+ # Fallback: if parsing failed just return original chunks
78
+ return out if out else chunks
79
 
80
  def vsearch(query, namespace, user_id, k):
81
  return (supabase.rpc("match_documents", {