Update main.py
Browse files
main.py
CHANGED
|
@@ -176,6 +176,8 @@ async def lifespan(app: FastAPI):
|
|
| 176 |
|
| 177 |
|
| 178 |
# βββββββββββββββββββββββββββ App βββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
|
|
|
| 179 |
app = FastAPI(
|
| 180 |
title="Code Search API",
|
| 181 |
description=(
|
|
@@ -195,10 +197,20 @@ app.add_middleware(
|
|
| 195 |
|
| 196 |
|
| 197 |
# βββββββββββββββββββββββββββ Embedding helpers ββββββββββββββββββββββββββββββββ
|
|
|
|
|
|
|
| 198 |
def encode(texts: list[str]) -> np.ndarray:
|
| 199 |
-
"""
|
| 200 |
-
|
| 201 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
|
| 203 |
|
| 204 |
async def encode_async(texts: list[str]) -> np.ndarray:
|
|
@@ -396,6 +408,13 @@ async def index_document(
|
|
| 396 |
raise HTTPException(503, "Model not loaded yet β please retry in a few seconds.")
|
| 397 |
|
| 398 |
content = await file.read()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 399 |
source = content.decode("utf-8", errors="replace")
|
| 400 |
filename = file.filename or "unknown"
|
| 401 |
resolved_id = doc_id.strip() or os.path.splitext(filename)[0]
|
|
@@ -436,6 +455,14 @@ async def index_batch(req: BatchIndexRequest):
|
|
| 436 |
if not all_chunks:
|
| 437 |
raise HTTPException(400, "No chunks produced from provided files.")
|
| 438 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 439 |
embeddings = await encode_async(all_chunks)
|
| 440 |
index = build_faiss_index(embeddings.astype("float32"))
|
| 441 |
store[req.doc_id] = {"chunks": all_chunks, "index": index}
|
|
|
|
| 176 |
|
| 177 |
|
| 178 |
# βββββββββββββββββββββββββββ App βββββββββββββββββββββββββββββββββββββββββββββ
|
| 179 |
+
MAX_UPLOAD_BYTES = int(os.getenv("MAX_UPLOAD_MB", "50")) * 1024 * 1024 # default 50 MB
|
| 180 |
+
|
| 181 |
app = FastAPI(
|
| 182 |
title="Code Search API",
|
| 183 |
description=(
|
|
|
|
| 197 |
|
| 198 |
|
| 199 |
# βββββββββββββββββββββββββββ Embedding helpers ββββββββββββββββββββββββββββββββ
|
| 200 |
+
ENCODE_BATCH_SIZE = 32 # chunks per model forward pass β tune down if OOM
|
| 201 |
+
|
| 202 |
def encode(texts: list[str]) -> np.ndarray:
|
| 203 |
+
"""
|
| 204 |
+
Synchronous encode with micro-batching.
|
| 205 |
+
Processes ENCODE_BATCH_SIZE chunks at a time to avoid OOM on large files.
|
| 206 |
+
Works for both document chunks and queries.
|
| 207 |
+
"""
|
| 208 |
+
all_embeddings = []
|
| 209 |
+
for i in range(0, len(texts), ENCODE_BATCH_SIZE):
|
| 210 |
+
batch = texts[i : i + ENCODE_BATCH_SIZE]
|
| 211 |
+
embs = models["model"].encode(batch, show_progress_bar=False)
|
| 212 |
+
all_embeddings.append(np.array(embs))
|
| 213 |
+
return np.vstack(all_embeddings).astype("float32")
|
| 214 |
|
| 215 |
|
| 216 |
async def encode_async(texts: list[str]) -> np.ndarray:
|
|
|
|
| 408 |
raise HTTPException(503, "Model not loaded yet β please retry in a few seconds.")
|
| 409 |
|
| 410 |
content = await file.read()
|
| 411 |
+
if len(content) > MAX_UPLOAD_BYTES:
|
| 412 |
+
raise HTTPException(
|
| 413 |
+
413,
|
| 414 |
+
f"File too large ({len(content) / 1024 / 1024:.1f} MB). "
|
| 415 |
+
f"Max allowed: {MAX_UPLOAD_BYTES // 1024 // 1024} MB. "
|
| 416 |
+
"Use /index/batch for large codebases.",
|
| 417 |
+
)
|
| 418 |
source = content.decode("utf-8", errors="replace")
|
| 419 |
filename = file.filename or "unknown"
|
| 420 |
resolved_id = doc_id.strip() or os.path.splitext(filename)[0]
|
|
|
|
| 455 |
if not all_chunks:
|
| 456 |
raise HTTPException(400, "No chunks produced from provided files.")
|
| 457 |
|
| 458 |
+
MAX_CHUNKS = int(os.getenv("MAX_CHUNKS", "20000"))
|
| 459 |
+
if len(all_chunks) > MAX_CHUNKS:
|
| 460 |
+
raise HTTPException(
|
| 461 |
+
413,
|
| 462 |
+
f"Too many chunks ({len(all_chunks):,}). Max: {MAX_CHUNKS:,}. "
|
| 463 |
+
"Split your project into smaller doc_id groups.",
|
| 464 |
+
)
|
| 465 |
+
|
| 466 |
embeddings = await encode_async(all_chunks)
|
| 467 |
index = build_faiss_index(embeddings.astype("float32"))
|
| 468 |
store[req.doc_id] = {"chunks": all_chunks, "index": index}
|