Spaces:

kamp0010
/

cc1

Runtime error

App Files Files Community

kamp0010 commited on 1 day ago

Commit

75a4aee

verified ·

1 Parent(s): 06a598f

Update main.py

Browse files

Files changed (1) hide show

main.py +30 -3

main.py CHANGED Viewed

@@ -176,6 +176,8 @@ async def lifespan(app: FastAPI):
 # ─────────────────────────── App ─────────────────────────────────────────────
 app = FastAPI(
     title="Code Search API",
     description=(
@@ -195,10 +197,20 @@ app.add_middleware(
 # ─────────────────────────── Embedding helpers ────────────────────────────────
 def encode(texts: list[str]) -> np.ndarray:
-    """Synchronous encode — works for both documents and queries."""
-    embeddings = models["model"].encode(texts, show_progress_bar=False)
-    return np.array(embeddings).astype("float32")
 async def encode_async(texts: list[str]) -> np.ndarray:
@@ -396,6 +408,13 @@ async def index_document(
         raise HTTPException(503, "Model not loaded yet — please retry in a few seconds.")
     content  = await file.read()
     source   = content.decode("utf-8", errors="replace")
     filename = file.filename or "unknown"
     resolved_id = doc_id.strip() or os.path.splitext(filename)[0]
@@ -436,6 +455,14 @@ async def index_batch(req: BatchIndexRequest):
     if not all_chunks:
         raise HTTPException(400, "No chunks produced from provided files.")
     embeddings = await encode_async(all_chunks)
     index      = build_faiss_index(embeddings.astype("float32"))
     store[req.doc_id] = {"chunks": all_chunks, "index": index}

 # ─────────────────────────── App ─────────────────────────────────────────────
+MAX_UPLOAD_BYTES = int(os.getenv("MAX_UPLOAD_MB", "50")) * 1024 * 1024  # default 50 MB
 app = FastAPI(
     title="Code Search API",
     description=(
 # ─────────────────────────── Embedding helpers ────────────────────────────────
+ENCODE_BATCH_SIZE = 32   # chunks per model forward pass — tune down if OOM
 def encode(texts: list[str]) -> np.ndarray:
+    """
+    Synchronous encode with micro-batching.
+    Processes ENCODE_BATCH_SIZE chunks at a time to avoid OOM on large files.
+    Works for both document chunks and queries.
+    """
+    all_embeddings = []
+    for i in range(0, len(texts), ENCODE_BATCH_SIZE):
+        batch = texts[i : i + ENCODE_BATCH_SIZE]
+        embs  = models["model"].encode(batch, show_progress_bar=False)
+        all_embeddings.append(np.array(embs))
+    return np.vstack(all_embeddings).astype("float32")
 async def encode_async(texts: list[str]) -> np.ndarray:
         raise HTTPException(503, "Model not loaded yet — please retry in a few seconds.")
     content  = await file.read()
+    if len(content) > MAX_UPLOAD_BYTES:
+        raise HTTPException(
+            413,
+            f"File too large ({len(content) / 1024 / 1024:.1f} MB). "
+            f"Max allowed: {MAX_UPLOAD_BYTES // 1024 // 1024} MB. "
+            "Use /index/batch for large codebases.",
+        )
     source   = content.decode("utf-8", errors="replace")
     filename = file.filename or "unknown"
     resolved_id = doc_id.strip() or os.path.splitext(filename)[0]
     if not all_chunks:
         raise HTTPException(400, "No chunks produced from provided files.")
+    MAX_CHUNKS = int(os.getenv("MAX_CHUNKS", "20000"))
+    if len(all_chunks) > MAX_CHUNKS:
+        raise HTTPException(
+            413,
+            f"Too many chunks ({len(all_chunks):,}). Max: {MAX_CHUNKS:,}. "
+            "Split your project into smaller doc_id groups.",
+        )
     embeddings = await encode_async(all_chunks)
     index      = build_faiss_index(embeddings.astype("float32"))
     store[req.doc_id] = {"chunks": all_chunks, "index": index}