imtrt004 commited on
Commit
b5be2eb
·
0 Parent(s):

Initial backend

Browse files
.env.example ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # hf-backend HuggingFace Space environment variables
2
+ # Set these in your HF Space settings → Variables and Secrets
3
+
4
+ SUPABASE_URL=https://YOUR_PROJECT_REF.supabase.co
5
+ SUPABASE_KEY=your_service_role_key_here # NOT the anon key — use service role
Dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Build tools for llama-cpp-python
6
+ RUN apt-get update && apt-get install -y \
7
+ build-essential cmake git curl \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Copy and install Python deps first (layer cache)
11
+ COPY requirements.txt .
12
+
13
+ # Build llama-cpp-python for CPU (no GPU flags)
14
+ RUN CMAKE_ARGS="-DLLAMA_BLAS=OFF -DLLAMA_NATIVE=OFF" \
15
+ pip install llama-cpp-python==0.3.8 --no-cache-dir
16
+
17
+ RUN pip install -r requirements.txt --no-cache-dir
18
+
19
+ COPY . .
20
+
21
+ EXPOSE 7860
22
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1", "--timeout-keep-alive", "120"]
app.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from contextlib import asynccontextmanager
2
+ from fastapi import FastAPI, UploadFile, HTTPException, BackgroundTasks
3
+ from fastapi.responses import StreamingResponse
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+ from pydantic import BaseModel
6
+ from supabase import create_client
7
+ import uuid
8
+ import os
9
+
10
+ from model.loader import get_llm
11
+ from retrieval.embedder import get_model, embed_chunks, embed_query
12
+ from retrieval.vectorstore import store_chunks, similarity_search
13
+ from ingestion.parser import parse_file
14
+ from ingestion.chunker import smart_chunk
15
+ from generation.llm import stream_answer
16
+ from generation.quiz import generate_quiz
17
+ from persistence.tier import (
18
+ get_user_tier,
19
+ get_expiry,
20
+ can_upload,
21
+ check_message_limit,
22
+ Tier,
23
+ )
24
+
25
+
26
+ def _supa():
27
+ return create_client(os.environ["SUPABASE_URL"], os.environ["SUPABASE_KEY"])
28
+
29
+
30
+ # ─── Lifespan (replaces deprecated @app.on_event) ───────────────────────────
31
+
32
+ @asynccontextmanager
33
+ async def lifespan(app: FastAPI):
34
+ # Startup: warm up both models so first user doesn't wait
35
+ print("🚀 Warming up models...")
36
+ get_model() # BGE-small — ~2s
37
+ get_llm() # Qwen3-4B — ~30s on first boot
38
+ print("✅ Ready")
39
+ yield
40
+ # Shutdown: nothing needed, models unload with process
41
+
42
+
43
+ app = FastAPI(title="RAG Backend", lifespan=lifespan)
44
+
45
+ app.add_middleware(
46
+ CORSMiddleware,
47
+ allow_origins=["*"], # Restrict to your CF domain in production
48
+ allow_credentials=True,
49
+ allow_methods=["*"],
50
+ allow_headers=["*"],
51
+ )
52
+
53
+
54
+ # ─── Upload ──────────────────────────────────────────────────────────────────
55
+
56
+ @app.post("/upload")
57
+ async def upload(
58
+ file: UploadFile,
59
+ user_id: str,
60
+ bg: BackgroundTasks,
61
+ ):
62
+ content = await file.read()
63
+ ok, msg = can_upload(user_id, len(content))
64
+ if not ok:
65
+ raise HTTPException(status_code=403, detail=msg)
66
+
67
+ tier = get_user_tier(user_id)
68
+ expires = get_expiry(tier)
69
+ doc_id = str(uuid.uuid4())
70
+ supa = _supa()
71
+
72
+ # Store raw file in Supabase Storage
73
+ supa.storage.from_("documents").upload(
74
+ path=f"{user_id}/{doc_id}/{file.filename}",
75
+ file=content,
76
+ file_options={"content-type": file.content_type or "application/octet-stream"},
77
+ )
78
+
79
+ # Create doc metadata row
80
+ supa.table("documents").insert({
81
+ "id": doc_id,
82
+ "user_id": user_id,
83
+ "filename": file.filename,
84
+ "status": "processing",
85
+ "tier_at_upload": str(tier),
86
+ "expires_at": expires.isoformat(),
87
+ }).execute()
88
+
89
+ # Process in background (parse → chunk → embed → store)
90
+ bg.add_task(_process_doc, content, doc_id, user_id, expires, file.filename)
91
+
92
+ return {"doc_id": doc_id, "status": "processing", "expires_at": expires.isoformat()}
93
+
94
+
95
+ async def _process_doc(content, doc_id, user_id, expires, filename):
96
+ supa = _supa()
97
+ try:
98
+ text = parse_file(content, filename)
99
+ chunks = smart_chunk(text)
100
+ embeds = embed_chunks(chunks)
101
+ store_chunks(doc_id, user_id, chunks, embeds, expires)
102
+ supa.table("documents").update({"status": "ready", "chunk_count": len(chunks)}) \
103
+ .eq("id", doc_id).execute()
104
+ except Exception as e:
105
+ supa.table("documents").update({"status": "error", "error": str(e)}) \
106
+ .eq("id", doc_id).execute()
107
+
108
+
109
+ # ─── Chat ────────────────────────────────────────────────────────────────────
110
+
111
+ class ChatRequest(BaseModel):
112
+ doc_id: str
113
+ query: str
114
+ user_id: str
115
+ session_id: str
116
+
117
+
118
+ @app.post("/chat")
119
+ async def chat(req: ChatRequest):
120
+ ok, msg = check_message_limit(req.user_id, req.session_id)
121
+ if not ok:
122
+ raise HTTPException(status_code=429, detail=msg)
123
+
124
+ tier = get_user_tier(req.user_id)
125
+ expires = get_expiry(tier)
126
+ q_vec = embed_query(req.query)
127
+ chunks = similarity_search(req.doc_id, q_vec, top_k=5)
128
+
129
+ if not chunks:
130
+ raise HTTPException(status_code=404, detail="Document expired or not found.")
131
+
132
+ # Scholar tier gets Qwen3's thinking mode for deeper answers
133
+ use_thinking = (tier == Tier.SCHOLAR)
134
+ supa = _supa()
135
+ full_resp: list[str] = []
136
+
137
+ # Save user message
138
+ supa.table("chat_history").insert({
139
+ "doc_id": req.doc_id,
140
+ "session_id": req.session_id,
141
+ "user_id": req.user_id,
142
+ "role": "user",
143
+ "content": req.query,
144
+ "expires_at": expires.isoformat(),
145
+ }).execute()
146
+
147
+ def generate():
148
+ for token in stream_answer(req.query, chunks, thinking_mode=use_thinking):
149
+ full_resp.append(token)
150
+ yield f"data: {token}\n\n"
151
+
152
+ # Persist assistant response after stream completes
153
+ supa.table("chat_history").insert({
154
+ "doc_id": req.doc_id,
155
+ "session_id": req.session_id,
156
+ "user_id": req.user_id,
157
+ "role": "assistant",
158
+ "content": "".join(full_resp),
159
+ "expires_at": expires.isoformat(),
160
+ }).execute()
161
+ yield "data: [DONE]\n\n"
162
+
163
+ return StreamingResponse(
164
+ generate(),
165
+ media_type="text/event-stream",
166
+ headers={"X-Accel-Buffering": "no"}, # disable nginx buffering
167
+ )
168
+
169
+
170
+ # ─── Quiz ────────────────────────────────────────────────────────────────────
171
+
172
+ class QuizRequest(BaseModel):
173
+ doc_id: str
174
+ query: str # last question asked — use same context
175
+ user_id: str
176
+
177
+
178
+ @app.post("/quiz")
179
+ async def quiz(req: QuizRequest):
180
+ tier = get_user_tier(req.user_id)
181
+ if tier not in (Tier.SCHOLAR, Tier.PRO):
182
+ raise HTTPException(status_code=403, detail="Quiz mode requires Pro or Scholar plan.")
183
+
184
+ q_vec = embed_query(req.query)
185
+ chunks = similarity_search(req.doc_id, q_vec, top_k=3)
186
+
187
+ if not chunks:
188
+ raise HTTPException(status_code=404, detail="Document not found or expired.")
189
+
190
+ questions = generate_quiz(chunks)
191
+ return {"questions": questions}
192
+
193
+
194
+ # ─── Utility ─────────────────────────────────────────────────────────────────
195
+
196
+ @app.get("/doc-status/{doc_id}")
197
+ async def doc_status(doc_id: str):
198
+ supa = _supa()
199
+ result = supa.table("documents").select("status,chunk_count,expires_at") \
200
+ .eq("id", doc_id).single().execute()
201
+ return result.data
202
+
203
+
204
+ @app.get("/health")
205
+ def health():
206
+ return {"status": "alive", "model": "Qwen3-4B-Instruct-Q4_K_M"}
generation/__init__.py ADDED
File without changes
generation/llm.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from model.loader import get_llm
2
+ from typing import Generator
3
+
4
+ SYSTEM_PROMPT = """You are a precise document study assistant by Md Tusar Akon.
5
+ Answer ONLY from the provided context. Be concise and factual.
6
+ If the answer is not in the context, say exactly: "I couldn't find that in your document."
7
+ Never make up or infer information not present in the context."""
8
+
9
+
10
+ def stream_answer(
11
+ query: str,
12
+ context_chunks: list[str],
13
+ thinking_mode: bool = False,
14
+ ) -> Generator[str, None, None]:
15
+ llm = get_llm()
16
+ context = "\n\n---\n\n".join(context_chunks)
17
+
18
+ # Qwen3 native thinking toggle — appended to user message
19
+ think_tag = "/think" if thinking_mode else "/no_think"
20
+
21
+ messages = [
22
+ {"role": "system", "content": SYSTEM_PROMPT},
23
+ {
24
+ "role": "user",
25
+ "content": f"Context:\n{context}\n\nQuestion: {query} {think_tag}",
26
+ },
27
+ ]
28
+
29
+ in_think_block = False
30
+ for chunk in llm.create_chat_completion(
31
+ messages=messages,
32
+ max_tokens=600,
33
+ temperature=0.2,
34
+ top_p=0.95,
35
+ top_k=20,
36
+ stream=True,
37
+ ):
38
+ delta = chunk["choices"][0]["delta"].get("content", "")
39
+ if not delta:
40
+ continue
41
+
42
+ # Strip <think>...</think> blocks from output stream
43
+ if "<think>" in delta:
44
+ in_think_block = True
45
+ if "</think>" in delta:
46
+ in_think_block = False
47
+ continue
48
+ if not in_think_block:
49
+ yield delta
generation/quiz.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from model.loader import get_llm
2
+ import json
3
+ import re
4
+
5
+ QUIZ_PROMPT = """Based on the context below, generate exactly 3 multiple-choice quiz questions.
6
+ Each question must test understanding of the content, not trivia.
7
+
8
+ Context:
9
+ {context}
10
+
11
+ Respond ONLY with a JSON array, no markdown, no explanation:
12
+ [
13
+ {{
14
+ "question": "...",
15
+ "options": ["A) ...", "B) ...", "C) ...", "D) ..."],
16
+ "answer": "A",
17
+ "explanation": "Brief explanation why"
18
+ }},
19
+ ...
20
+ ]"""
21
+
22
+
23
+ def generate_quiz(context_chunks: list[str]) -> list[dict]:
24
+ llm = get_llm()
25
+ context = "\n\n".join(context_chunks[:3]) # Use top 3 chunks
26
+
27
+ messages = [
28
+ {
29
+ "role": "user",
30
+ "content": QUIZ_PROMPT.format(context=context) + " /no_think",
31
+ }
32
+ ]
33
+
34
+ result = llm.create_chat_completion(
35
+ messages=messages,
36
+ max_tokens=800,
37
+ temperature=0.4,
38
+ stream=False,
39
+ )
40
+
41
+ raw = result["choices"][0]["message"]["content"]
42
+
43
+ # Strip any accidental markdown fences
44
+ raw = re.sub(r"```json|```", "", raw).strip()
45
+
46
+ try:
47
+ questions = json.loads(raw)
48
+ return questions if isinstance(questions, list) else []
49
+ except json.JSONDecodeError:
50
+ return []
ingestion/__init__.py ADDED
File without changes
ingestion/chunker.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
2
+
3
+
4
+ def smart_chunk(text: str, chunk_size: int = 512, overlap: int = 64) -> list[str]:
5
+ splitter = RecursiveCharacterTextSplitter(
6
+ chunk_size=chunk_size,
7
+ chunk_overlap=overlap,
8
+ separators=["\n\n", "\n", ".", "!", "?", " ", ""],
9
+ length_function=len,
10
+ )
11
+ return [c for c in splitter.split_text(text) if len(c.strip()) > 30]
ingestion/parser.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import pymupdf # pymupdf 1.25+ import (not fitz)
3
+ from docx import Document
4
+
5
+
6
+ def parse_file(content: bytes, filename: str) -> str:
7
+ fname = filename.lower()
8
+
9
+ if fname.endswith(".pdf"):
10
+ doc = pymupdf.open(stream=content, filetype="pdf")
11
+ pages = [page.get_text() for page in doc]
12
+ doc.close()
13
+ return "\n\n".join(pages)
14
+
15
+ if fname.endswith(".docx"):
16
+ doc = Document(io.BytesIO(content))
17
+ return "\n\n".join(p.text for p in doc.paragraphs if p.text.strip())
18
+
19
+ if fname.endswith(".txt") or fname.endswith(".md"):
20
+ return content.decode("utf-8", errors="replace")
21
+
22
+ raise ValueError(f"Unsupported file type: {filename}")
model/__init__.py ADDED
File without changes
model/loader.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_cpp import Llama
2
+ from contextlib import asynccontextmanager
3
+
4
+ _llm: Llama | None = None
5
+
6
+
7
+ def get_llm() -> Llama:
8
+ global _llm
9
+ if _llm is None:
10
+ print("⏳ Loading Qwen3-4B-Instruct Q4_K_M...")
11
+ _llm = Llama.from_pretrained(
12
+ repo_id="Qwen/Qwen3-4B-GGUF",
13
+ filename="qwen3-4b-q4_k_m.gguf",
14
+ # Use jinja template embedded in GGUF — recommended for Qwen3
15
+ # avoids any chat_format string mismatch
16
+ chat_format=None,
17
+ n_ctx=8192,
18
+ n_threads=2, # HF free CPU = 2 vCPUs
19
+ n_gpu_layers=0, # CPU only
20
+ verbose=False,
21
+ )
22
+ print("✅ Qwen3-4B loaded and ready")
23
+ return _llm
persistence/__init__.py ADDED
File without changes
persistence/tier.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime, timedelta, UTC
2
+ from enum import StrEnum
3
+ from supabase import create_client
4
+ import os
5
+
6
+
7
+ def _client():
8
+ return create_client(os.environ["SUPABASE_URL"], os.environ["SUPABASE_KEY"])
9
+
10
+
11
+ class Tier(StrEnum):
12
+ FREE = "free"
13
+ PRO = "pro"
14
+ SCHOLAR = "scholar"
15
+
16
+
17
+ TTL: dict[Tier, timedelta] = {
18
+ Tier.FREE: timedelta(hours=3),
19
+ Tier.PRO: timedelta(weeks=1),
20
+ Tier.SCHOLAR: timedelta(days=30),
21
+ }
22
+
23
+ FILE_LIMIT_MB: dict[Tier, int] = {Tier.FREE: 5, Tier.PRO: 25, Tier.SCHOLAR: 50}
24
+ DOC_LIMIT: dict[Tier, int | None] = {Tier.FREE: 1, Tier.PRO: 10, Tier.SCHOLAR: None}
25
+ MSG_LIMIT: dict[Tier, int | None] = {Tier.FREE: 5, Tier.PRO: 100, Tier.SCHOLAR: None}
26
+
27
+
28
+ def get_user_tier(user_id: str) -> Tier:
29
+ r = _client().table("profiles").select("tier").eq("id", user_id).single().execute()
30
+ return Tier(r.data.get("tier", "free"))
31
+
32
+
33
+ def get_expiry(tier: Tier) -> datetime:
34
+ return datetime.now(UTC) + TTL[tier]
35
+
36
+
37
+ def can_upload(user_id: str, file_bytes: int) -> tuple[bool, str]:
38
+ tier = get_user_tier(user_id)
39
+ max_bytes = FILE_LIMIT_MB[tier] * 1024 * 1024
40
+
41
+ if file_bytes > max_bytes:
42
+ return False, f"File exceeds {FILE_LIMIT_MB[tier]}MB limit on {tier} plan."
43
+
44
+ max_docs = DOC_LIMIT[tier]
45
+ if max_docs is not None:
46
+ count = (
47
+ _client()
48
+ .table("documents")
49
+ .select("id", count="exact")
50
+ .eq("user_id", user_id)
51
+ .execute()
52
+ .count
53
+ )
54
+ if count >= max_docs:
55
+ return False, f"{tier.capitalize()} allows {max_docs} doc(s). Upgrade to store more."
56
+
57
+ return True, "ok"
58
+
59
+
60
+ def check_message_limit(user_id: str, session_id: str) -> tuple[bool, str]:
61
+ tier = get_user_tier(user_id)
62
+ limit = MSG_LIMIT[tier]
63
+ if limit is None:
64
+ return True, "ok"
65
+
66
+ client = _client()
67
+ if tier == Tier.FREE:
68
+ count = (
69
+ client.table("chat_history")
70
+ .select("id", count="exact")
71
+ .eq("session_id", session_id)
72
+ .eq("role", "user")
73
+ .execute()
74
+ .count
75
+ )
76
+ else:
77
+ today = datetime.now(UTC).date().isoformat()
78
+ count = (
79
+ client.table("chat_history")
80
+ .select("id", count="exact")
81
+ .eq("user_id", user_id)
82
+ .gte("created_at", today)
83
+ .eq("role", "user")
84
+ .execute()
85
+ .count
86
+ )
87
+
88
+ if count >= limit:
89
+ return False, f"Message limit reached on {tier} plan. Upgrade to continue."
90
+ return True, "ok"
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.129.0
2
+ uvicorn[standard]==0.34.0
3
+ llama-cpp-python==0.3.8
4
+ sentence-transformers==4.1.0
5
+ huggingface-hub==0.29.1
6
+ supabase==2.13.0
7
+ pymupdf==1.25.3
8
+ python-docx==1.1.2
9
+ langchain-text-splitters==0.3.8
10
+ pydantic==2.11.0
11
+ python-multipart==0.0.20
12
+ httpx==0.28.1
retrieval/__init__.py ADDED
File without changes
retrieval/embedder.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ import numpy as np
3
+
4
+ _model: SentenceTransformer | None = None
5
+
6
+
7
+ def get_model() -> SentenceTransformer:
8
+ global _model
9
+ if _model is None:
10
+ # 130MB, 384-dim, fastest accurate model on CPU
11
+ _model = SentenceTransformer("BAAI/bge-small-en-v1.5")
12
+ return _model
13
+
14
+
15
+ def embed_chunks(chunks: list[str]) -> list[list[float]]:
16
+ model = get_model()
17
+ vecs = model.encode(chunks, normalize_embeddings=True, batch_size=32)
18
+ return vecs.tolist()
19
+
20
+
21
+ def embed_query(query: str) -> list[float]:
22
+ model = get_model()
23
+ # BGE needs this prefix for queries
24
+ prefixed = f"Represent this sentence for searching: {query}"
25
+ vec = model.encode(prefixed, normalize_embeddings=True)
26
+ return vec.tolist()
retrieval/vectorstore.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from supabase import create_client, Client
2
+ from datetime import datetime
3
+ import os
4
+
5
+
6
+ def _client() -> Client:
7
+ return create_client(os.environ["SUPABASE_URL"], os.environ["SUPABASE_KEY"])
8
+
9
+
10
+ def store_chunks(
11
+ doc_id: str,
12
+ user_id: str,
13
+ chunks: list[str],
14
+ embeddings: list[list[float]],
15
+ expires_at: datetime,
16
+ ) -> None:
17
+ client = _client()
18
+ rows = [
19
+ {
20
+ "doc_id": doc_id,
21
+ "user_id": user_id,
22
+ "chunk_text": chunk,
23
+ "embedding": embedding,
24
+ "chunk_index": i,
25
+ "expires_at": expires_at.isoformat(),
26
+ }
27
+ for i, (chunk, embedding) in enumerate(zip(chunks, embeddings))
28
+ ]
29
+ # Insert in batches of 100 to avoid payload limits
30
+ for i in range(0, len(rows), 100):
31
+ client.table("chunks").insert(rows[i : i + 100]).execute()
32
+
33
+
34
+ def similarity_search(
35
+ doc_id: str,
36
+ query_embedding: list[float],
37
+ top_k: int = 5,
38
+ ) -> list[str]:
39
+ client = _client()
40
+ result = client.rpc(
41
+ "match_chunks",
42
+ {
43
+ "query_embedding": query_embedding,
44
+ "doc_id_filter": doc_id,
45
+ "match_count": top_k,
46
+ },
47
+ ).execute()
48
+ return [r["chunk_text"] for r in result.data]