kamp0010 commited on
Commit
75a4aee
Β·
verified Β·
1 Parent(s): 06a598f

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +30 -3
main.py CHANGED
@@ -176,6 +176,8 @@ async def lifespan(app: FastAPI):
176
 
177
 
178
  # ─────────────────────────── App ─────────────────────────────────────────────
 
 
179
  app = FastAPI(
180
  title="Code Search API",
181
  description=(
@@ -195,10 +197,20 @@ app.add_middleware(
195
 
196
 
197
  # ─────────────────────────── Embedding helpers ────────────────────────────────
 
 
198
  def encode(texts: list[str]) -> np.ndarray:
199
- """Synchronous encode β€” works for both documents and queries."""
200
- embeddings = models["model"].encode(texts, show_progress_bar=False)
201
- return np.array(embeddings).astype("float32")
 
 
 
 
 
 
 
 
202
 
203
 
204
  async def encode_async(texts: list[str]) -> np.ndarray:
@@ -396,6 +408,13 @@ async def index_document(
396
  raise HTTPException(503, "Model not loaded yet β€” please retry in a few seconds.")
397
 
398
  content = await file.read()
 
 
 
 
 
 
 
399
  source = content.decode("utf-8", errors="replace")
400
  filename = file.filename or "unknown"
401
  resolved_id = doc_id.strip() or os.path.splitext(filename)[0]
@@ -436,6 +455,14 @@ async def index_batch(req: BatchIndexRequest):
436
  if not all_chunks:
437
  raise HTTPException(400, "No chunks produced from provided files.")
438
 
 
 
 
 
 
 
 
 
439
  embeddings = await encode_async(all_chunks)
440
  index = build_faiss_index(embeddings.astype("float32"))
441
  store[req.doc_id] = {"chunks": all_chunks, "index": index}
 
176
 
177
 
178
  # ─────────────────────────── App ─────────────────────────────────────────────
179
+ MAX_UPLOAD_BYTES = int(os.getenv("MAX_UPLOAD_MB", "50")) * 1024 * 1024 # default 50 MB
180
+
181
  app = FastAPI(
182
  title="Code Search API",
183
  description=(
 
197
 
198
 
199
  # ─────────────────────────── Embedding helpers ────────────────────────────────
200
+ ENCODE_BATCH_SIZE = 32 # chunks per model forward pass β€” tune down if OOM
201
+
202
  def encode(texts: list[str]) -> np.ndarray:
203
+ """
204
+ Synchronous encode with micro-batching.
205
+ Processes ENCODE_BATCH_SIZE chunks at a time to avoid OOM on large files.
206
+ Works for both document chunks and queries.
207
+ """
208
+ all_embeddings = []
209
+ for i in range(0, len(texts), ENCODE_BATCH_SIZE):
210
+ batch = texts[i : i + ENCODE_BATCH_SIZE]
211
+ embs = models["model"].encode(batch, show_progress_bar=False)
212
+ all_embeddings.append(np.array(embs))
213
+ return np.vstack(all_embeddings).astype("float32")
214
 
215
 
216
  async def encode_async(texts: list[str]) -> np.ndarray:
 
408
  raise HTTPException(503, "Model not loaded yet β€” please retry in a few seconds.")
409
 
410
  content = await file.read()
411
+ if len(content) > MAX_UPLOAD_BYTES:
412
+ raise HTTPException(
413
+ 413,
414
+ f"File too large ({len(content) / 1024 / 1024:.1f} MB). "
415
+ f"Max allowed: {MAX_UPLOAD_BYTES // 1024 // 1024} MB. "
416
+ "Use /index/batch for large codebases.",
417
+ )
418
  source = content.decode("utf-8", errors="replace")
419
  filename = file.filename or "unknown"
420
  resolved_id = doc_id.strip() or os.path.splitext(filename)[0]
 
455
  if not all_chunks:
456
  raise HTTPException(400, "No chunks produced from provided files.")
457
 
458
+ MAX_CHUNKS = int(os.getenv("MAX_CHUNKS", "20000"))
459
+ if len(all_chunks) > MAX_CHUNKS:
460
+ raise HTTPException(
461
+ 413,
462
+ f"Too many chunks ({len(all_chunks):,}). Max: {MAX_CHUNKS:,}. "
463
+ "Split your project into smaller doc_id groups.",
464
+ )
465
+
466
  embeddings = await encode_async(all_chunks)
467
  index = build_faiss_index(embeddings.astype("float32"))
468
  store[req.doc_id] = {"chunks": all_chunks, "index": index}