Spaces:

Amogh1221
/

deepcastle-api

Running

App Files Files Community

Amogh1221 commited on Mar 30

Commit

7953192

verified ·

1 Parent(s): c60b8ec

Update main.py

Browse files

Files changed (1) hide show

main.py +29 -102

main.py CHANGED Viewed

@@ -122,13 +122,6 @@ _GLOBAL_DEEPCASTLE_ENGINE = None
 _ENGINE_LOCK = asyncio.Lock()
 _ENGINE_IO_LOCK = asyncio.Lock()
-# Global state for monitoring
-_PENDING_ENGINE_REQUESTS = 0
-_MAX_PENDING_ENGINE_REQUESTS = 10  # Drop requests if queue is too long to prevent OOM
-_MAX_PENDING_GAME_REVIEWS = 2     # Max concurrent full game reviews (heavy RAM)
-_PENDING_GAME_REVIEWS = 0
-_LOCK_TIMEOUT_SEC = 20.0            # Max wait time in queue before giving up
 def _engine_hash_mb() -> int:
     try:
@@ -268,13 +261,8 @@ async def _engine_call(engine, coro, timeout_sec: float):
 # ─── Background Memory Cleanup Task ───────────────────────────────────────────
-_RAM_CLEANUP_THRESHOLD_MB = float(os.environ.get("RAM_CLEANUP_THRESHOLD_MB", "1024"))
-# Global System Limit: 18GB (All analysis sessions freed)
-_RAM_PANIC_THRESHOLD_MB = 18000.0
-# Individual Session Limit: 4GB
-_RAM_SESSION_LIMIT_MB = 4000.0
-# How often to run the cleanup in seconds (5s)
-_RAM_CLEANUP_INTERVAL_SEC = 5
 async def memory_cleanup_task():
     """
@@ -288,14 +276,7 @@ async def memory_cleanup_task():
             process = psutil.Process(os.getpid())
             mem_mb = process.memory_info().rss / 1024 / 1024
-            # PANIC CASE: Over 18GB - total server reset
-            if mem_mb > _RAM_PANIC_THRESHOLD_MB:
-                print(f"[PANIC] RAM at {mem_mb:.1f}MB! Exceeded 18GB System limit. Resetting everything...")
-                await shutdown_engine_async()
-                force_memory_release()
-            # NORMAL CLEANUP
-            elif mem_mb > _RAM_CLEANUP_THRESHOLD_MB:
                 print(f"[CLEANUP] RAM at {mem_mb:.1f}MB (threshold {_RAM_CLEANUP_THRESHOLD_MB}MB) — clearing engine hash")
                 engine = _GLOBAL_DEEPCASTLE_ENGINE
                 if engine is not None:
@@ -463,35 +444,25 @@ def normalize_search_stats(info: dict) -> Tuple[int, int, int]:
 # ─── Bot Move (/move) ──────────────────────────────────────────────────────────
 @app.post("/move", response_model=MoveResponse)
 async def get_move(request: MoveRequest):
-    global _PENDING_ENGINE_REQUESTS
-    if _PENDING_ENGINE_REQUESTS > _MAX_PENDING_ENGINE_REQUESTS:
-        force_memory_release()
-        raise HTTPException(status_code=429, detail="Server busy — too many analysis requests.")
-    _PENDING_ENGINE_REQUESTS += 1
     try:
         engine = await get_deepcastle_engine()
         board = chess.Board(request.fen)
         limit = chess.engine.Limit(time=request.time, depth=request.depth)
         tsec = _search_timeout_sec(request.time, request.depth)
-        try:
-            async with asyncio.timeout(_LOCK_TIMEOUT_SEC):
-                async with _ENGINE_IO_LOCK:
-                    result = await _engine_call(
-                        engine,
-                        engine.play(board, limit, info=chess.engine.INFO_ALL),
-                        tsec,
-                    )
-                    info = dict(result.info)
-                    if not info:
-                        info = await _engine_call(
-                            engine,
-                            engine.analyse(board, limit, info=chess.engine.INFO_ALL),
-                            tsec,
-                        )
-        except asyncio.TimeoutError:
-            raise HTTPException(status_code=503, detail="Server overloaded — lock wait timeout.")
         score_cp, mate_in = get_normalized_score(info)
         depth, nodes, nps = normalize_search_stats(info)
@@ -517,11 +488,6 @@ async def get_move(request: MoveRequest):
         del result
         del info
-        # Match Shield: Force cleanup and hash clear after every move to prevent "Idle Growth"
-        async with _ENGINE_IO_LOCK:
-            await _clear_engine_hash(engine)
-        force_memory_release()
         return MoveResponse(
             bestmove=best_move,
@@ -538,42 +504,30 @@ async def get_move(request: MoveRequest):
     except Exception as e:
         print(f"Error: {e}")
         raise HTTPException(status_code=500, detail=str(e))
-    finally:
-        _PENDING_ENGINE_REQUESTS -= 1
 # ─── Hint Move (/analysis-move) ───────────────────────────────────────────────
 @app.post("/analysis-move", response_model=MoveResponse)
 async def get_analysis_move(request: MoveRequest):
-    global _PENDING_ENGINE_REQUESTS
-    if _PENDING_ENGINE_REQUESTS > _MAX_PENDING_ENGINE_REQUESTS:
-        force_memory_release()
-        raise HTTPException(status_code=429, detail="Server busy — too many analysis requests.")
-    _PENDING_ENGINE_REQUESTS += 1
     try:
         engine = await get_stockfish_engine()
         board = chess.Board(request.fen)
         limit = chess.engine.Limit(time=request.time, depth=request.depth)
         tsec = _search_timeout_sec(request.time, request.depth)
-        try:
-            async with asyncio.timeout(_LOCK_TIMEOUT_SEC):
-                async with _ENGINE_IO_LOCK:
-                    result = await _engine_call(
-                        engine,
-                        engine.play(board, limit, info=chess.engine.INFO_ALL),
-                        tsec,
-                    )
-                    info = dict(result.info)
-                    if not info:
-                        info = await _engine_call(
-                            engine,
-                            engine.analyse(board, limit, info=chess.engine.INFO_ALL),
-                            tsec,
-                        )
-        except asyncio.TimeoutError:
-            raise HTTPException(status_code=503, detail="Server overloaded — lock wait timeout.")
         score_cp, mate_in = get_normalized_score(info)
         depth, nodes, nps = normalize_search_stats(info)
@@ -620,8 +574,6 @@ async def get_analysis_move(request: MoveRequest):
     except Exception as e:
         print(f"Analysis move error: {e}")
         raise HTTPException(status_code=500, detail=str(e))
-    finally:
-        _PENDING_ENGINE_REQUESTS -= 1
 # ─── Openings DB ───────────────────────────────────────────────────────────────
@@ -777,16 +729,6 @@ def get_move_classification(
 # ─── Game Analysis (/analyze-game) ────────────────────────────────────────────
 @app.post("/analyze-game", response_model=AnalyzeResponse)
 async def analyze_game(request: AnalyzeRequest):
-    global _PENDING_ENGINE_REQUESTS, _PENDING_GAME_REVIEWS
-    if _PENDING_GAME_REVIEWS >= _MAX_PENDING_GAME_REVIEWS:
-        raise HTTPException(status_code=429, detail="Analysis queue full. Only 2 games can be reviewed at once.")
-    if _PENDING_ENGINE_REQUESTS >= _MAX_PENDING_ENGINE_REQUESTS:
-        raise HTTPException(status_code=429, detail="Server busy — too many moving parts.")
-    _PENDING_ENGINE_REQUESTS += 1
-    _PENDING_GAME_REVIEWS += 1
     try:
         engine = await get_stockfish_engine()
         board = chess.Board(request.start_fen) if request.start_fen else chess.Board()
@@ -819,19 +761,7 @@ async def analyze_game(request: AnalyzeRequest):
         player_moves_count = 0
         current_score, _ = get_normalized_score(infos_before[0])
-        process = psutil.Process()
-        mem_start = process.memory_info().rss / 1024 / 1024
         for i, san_move in enumerate(request.moves):
-            # Check individual session growth (4GB limit)
-            mem_now = process.memory_info().rss / 1024 / 1024
-            if (mem_now - mem_start) > _RAM_SESSION_LIMIT_MB:
-                print(f"[ANALYSIS-RESET] Individual session exceeds 4GB growth. Clearing RAM mid-analysis.")
-                async with _ENGINE_IO_LOCK:
-                    await _clear_engine_hash(engine)
-                force_memory_release()
-                mem_start = process.memory_info().rss / 1024 / 1024
             is_white_turn = board.turn == chess.WHITE
             is_player_turn = is_white_turn if player_is_white else not is_white_turn
@@ -958,9 +888,6 @@ async def analyze_game(request: AnalyzeRequest):
     except Exception as e:
         print(f"Analysis Error: {e}")
         raise HTTPException(status_code=500, detail=str(e))
-    finally:
-        _PENDING_ENGINE_REQUESTS -= 1
-        _PENDING_GAME_REVIEWS -= 1
 if __name__ == "__main__":

 _ENGINE_LOCK = asyncio.Lock()
 _ENGINE_IO_LOCK = asyncio.Lock()
 def _engine_hash_mb() -> int:
     try:
 # ─── Background Memory Cleanup Task ───────────────────────────────────────────
+_RAM_CLEANUP_THRESHOLD_MB = float(os.environ.get("RAM_CLEANUP_THRESHOLD_MB", "400"))
+_RAM_CLEANUP_INTERVAL_SEC = int(os.environ.get("RAM_CLEANUP_INTERVAL_SEC", "300"))
 async def memory_cleanup_task():
     """
             process = psutil.Process(os.getpid())
             mem_mb = process.memory_info().rss / 1024 / 1024
+            if mem_mb > _RAM_CLEANUP_THRESHOLD_MB:
                 print(f"[CLEANUP] RAM at {mem_mb:.1f}MB (threshold {_RAM_CLEANUP_THRESHOLD_MB}MB) — clearing engine hash")
                 engine = _GLOBAL_DEEPCASTLE_ENGINE
                 if engine is not None:
 # ─── Bot Move (/move) ──────────────────────────────────────────────────────────
 @app.post("/move", response_model=MoveResponse)
 async def get_move(request: MoveRequest):
     try:
         engine = await get_deepcastle_engine()
         board = chess.Board(request.fen)
         limit = chess.engine.Limit(time=request.time, depth=request.depth)
         tsec = _search_timeout_sec(request.time, request.depth)
+        async with _ENGINE_IO_LOCK:
+            result = await _engine_call(
+                engine,
+                engine.play(board, limit, info=chess.engine.INFO_ALL),
+                tsec,
+            )
+            info = dict(result.info)
+            if not info:
+                info = await _engine_call(
+                    engine,
+                    engine.analyse(board, limit, info=chess.engine.INFO_ALL),
+                    tsec,
+                )
         score_cp, mate_in = get_normalized_score(info)
         depth, nodes, nps = normalize_search_stats(info)
         del result
         del info
         return MoveResponse(
             bestmove=best_move,
     except Exception as e:
         print(f"Error: {e}")
         raise HTTPException(status_code=500, detail=str(e))
 # ─── Hint Move (/analysis-move) ───────────────────────────────────────────────
 @app.post("/analysis-move", response_model=MoveResponse)
 async def get_analysis_move(request: MoveRequest):
     try:
         engine = await get_stockfish_engine()
         board = chess.Board(request.fen)
         limit = chess.engine.Limit(time=request.time, depth=request.depth)
         tsec = _search_timeout_sec(request.time, request.depth)
+        async with _ENGINE_IO_LOCK:
+            result = await _engine_call(
+                engine,
+                engine.play(board, limit, info=chess.engine.INFO_ALL),
+                tsec,
+            )
+            info = dict(result.info)
+            if not info:
+                info = await _engine_call(
+                    engine,
+                    engine.analyse(board, limit, info=chess.engine.INFO_ALL),
+                    tsec,
+                )
         score_cp, mate_in = get_normalized_score(info)
         depth, nodes, nps = normalize_search_stats(info)
     except Exception as e:
         print(f"Analysis move error: {e}")
         raise HTTPException(status_code=500, detail=str(e))
 # ─── Openings DB ───────────────────────────────────────────────────────────────
 # ─── Game Analysis (/analyze-game) ────────────────────────────────────────────
 @app.post("/analyze-game", response_model=AnalyzeResponse)
 async def analyze_game(request: AnalyzeRequest):
     try:
         engine = await get_stockfish_engine()
         board = chess.Board(request.start_fen) if request.start_fen else chess.Board()
         player_moves_count = 0
         current_score, _ = get_normalized_score(infos_before[0])
         for i, san_move in enumerate(request.moves):
             is_white_turn = board.turn == chess.WHITE
             is_player_turn = is_white_turn if player_is_white else not is_white_turn
     except Exception as e:
         print(f"Analysis Error: {e}")
         raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":