Spaces:

Antaram
/

server1

Sleeping

App Files Files Community

Antaram commited on Dec 6, 2025

Commit

06d16fa

verified ·

1 Parent(s): 787b02e

Upload 3 files

Browse files

Files changed (1) hide show

app.py +66 -66

app.py CHANGED Viewed

@@ -368,72 +368,72 @@ async def fast_chat(prompt: str = "", max_tokens: int = 512):
     return {"response": ''.join(response_parts)}
- # ===== Mini-server load tracking & coordination endpoints =====
- # How many concurrent requests this mini should handle
- MAX_CONCURRENT_REQUESTS = int(os.environ.get("MAX_CONCURRENT_REQUESTS", "1"))
- # In-memory tracking per process
- current_requests = 0
- # For identification / debugging
- MINI_SERVER_ID = os.environ.get("MINI_SERVER_ID", "mini-1")
- class MiniStatus(BaseModel):
-     server_id: str
-     max_concurrent: int
-     current_requests: int
-     status: str
- @app.get("/status")
- async def mini_status():
-     """
-     Used by the main server to know if this mini is idle/busy.
-     """
-     status = "busy" if current_requests >= MAX_CONCURRENT_REQUESTS else "idle"
-     return MiniStatus(
-         server_id=MINI_SERVER_ID,
-         max_concurrent=MAX_CONCURRENT_REQUESTS,
-         current_requests=current_requests,
-         status=status,
-     )
- @app.post("/reserve")
- async def reserve_slot():
-     """
-     Called by the main server BEFORE it forwards a chat request.
-     If this mini is full, returns 429 so main server can try another mini.
-     """
-     global current_requests
-     if current_requests >= MAX_CONCURRENT_REQUESTS:
-         raise HTTPException(status_code=429, detail="Mini server busy")
-     current_requests += 1
-     return {
-         "server_id": MINI_SERVER_ID,
-         "current_requests": current_requests,
-         "max_concurrent": MAX_CONCURRENT_REQUESTS,
-     }
- @app.post("/release")
- async def release_slot():
-     """
-     Called by the main server after request is finished (stream closed/response sent).
-     """
-     global current_requests
-     if current_requests > 0:
-         current_requests -= 1
-     return {
-         "server_id": MINI_SERVER_ID,
-         "current_requests": current_requests,
-         "max_concurrent": MAX_CONCURRENT_REQUESTS,
-     }
- if __name__ == "__main__":
     uvicorn.run(
         app,
         host="0.0.0.0",

     return {"response": ''.join(response_parts)}
+# ===== Mini-server load tracking & coordination endpoints =====
+# How many concurrent requests this mini should handle
+MAX_CONCURRENT_REQUESTS = int(os.environ.get("MAX_CONCURRENT_REQUESTS", "1"))
+# In-memory tracking per process
+current_requests = 0
+# For identification / debugging
+MINI_SERVER_ID = os.environ.get("MINI_SERVER_ID", "mini-1")
+class MiniStatus(BaseModel):
+    server_id: str
+    max_concurrent: int
+    current_requests: int
+    status: str
+@app.get("/status")
+async def mini_status():
+    """
+    Used by the main server to know if this mini is idle/busy.
+    """
+    status = "busy" if current_requests >= MAX_CONCURRENT_REQUESTS else "idle"
+    return MiniStatus(
+        server_id=MINI_SERVER_ID,
+        max_concurrent=MAX_CONCURRENT_REQUESTS,
+        current_requests=current_requests,
+        status=status,
+    )
+@app.post("/reserve")
+async def reserve_slot():
+    """
+    Called by the main server BEFORE it forwards a chat request.
+    If this mini is full, returns 429 so main server can try another mini.
+    """
+    global current_requests
+    if current_requests >= MAX_CONCURRENT_REQUESTS:
+        raise HTTPException(status_code=429, detail="Mini server busy")
+    current_requests += 1
+    return {
+        "server_id": MINI_SERVER_ID,
+        "current_requests": current_requests,
+        "max_concurrent": MAX_CONCURRENT_REQUESTS,
+    }
+@app.post("/release")
+async def release_slot():
+    """
+    Called by the main server after request is finished (stream closed/response sent).
+    """
+    global current_requests
+    if current_requests > 0:
+        current_requests -= 1
+    return {
+        "server_id": MINI_SERVER_ID,
+        "current_requests": current_requests,
+        "max_concurrent": MAX_CONCURRENT_REQUESTS,
+    }
+if __name__ == "__main__":
     uvicorn.run(
         app,
         host="0.0.0.0",