Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
|
@@ -11,6 +11,7 @@ import chess.engine
|
|
| 11 |
import asyncio
|
| 12 |
import json
|
| 13 |
import gc
|
|
|
|
| 14 |
|
| 15 |
# βββ Multiplayer / Challenge Manager ββββββββββββββββββββββββββββββββββββββββββ
|
| 16 |
class ConnectionManager:
|
|
@@ -242,9 +243,59 @@ async def _engine_call(engine, coro, timeout_sec: float):
|
|
| 242 |
raise HTTPException(status_code=504, detail="Engine search timed out")
|
| 243 |
|
| 244 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
@asynccontextmanager
|
| 246 |
async def lifespan(app: FastAPI):
|
|
|
|
|
|
|
|
|
|
| 247 |
yield
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 248 |
await shutdown_engine_async()
|
| 249 |
|
| 250 |
|
|
@@ -277,7 +328,6 @@ async def websocket_endpoint(websocket: WebSocket, match_id: str):
|
|
| 277 |
await manager.broadcast(data, match_id, exclude=websocket)
|
| 278 |
except WebSocketDisconnect:
|
| 279 |
manager.disconnect(websocket, match_id)
|
| 280 |
-
# FIX: Broadcast disconnect then nudge GC to free room state
|
| 281 |
await manager.broadcast(json.dumps({"type": "opponent_disconnected"}), match_id)
|
| 282 |
gc.collect()
|
| 283 |
except Exception:
|
|
@@ -286,7 +336,7 @@ async def websocket_endpoint(websocket: WebSocket, match_id: str):
|
|
| 286 |
gc.collect()
|
| 287 |
|
| 288 |
|
| 289 |
-
# βββ Health βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 290 |
@app.get("/")
|
| 291 |
def home():
|
| 292 |
return {"status": "online", "engine": "Deepcastle Hybrid Neural", "platform": "Hugging Face Spaces"}
|
|
@@ -297,7 +347,6 @@ def home():
|
|
| 297 |
def health():
|
| 298 |
if not os.path.exists(DEEPCASTLE_ENGINE_PATH):
|
| 299 |
return {"status": "error", "message": "Missing engine binary: deepcastle"}
|
| 300 |
-
# FIX: Nudge GC on every health ping
|
| 301 |
gc.collect()
|
| 302 |
return {"status": "ok", "engine": "deepcastle"}
|
| 303 |
|
|
@@ -317,7 +366,26 @@ async def health_ready():
|
|
| 317 |
raise HTTPException(status_code=503, detail=str(e))
|
| 318 |
|
| 319 |
|
| 320 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 321 |
@app.post("/new-game")
|
| 322 |
async def new_game():
|
| 323 |
"""
|
|
@@ -389,7 +457,6 @@ async def get_move(request: MoveRequest):
|
|
| 389 |
score_cp, mate_in = get_normalized_score(info)
|
| 390 |
depth, nodes, nps = normalize_search_stats(info)
|
| 391 |
|
| 392 |
-
# FIX: Use a local pv_board and delete it after use
|
| 393 |
pv_board = board.copy()
|
| 394 |
pv_parts = []
|
| 395 |
for m in info.get("pv", [])[:5]:
|
|
@@ -402,17 +469,15 @@ async def get_move(request: MoveRequest):
|
|
| 402 |
else:
|
| 403 |
break
|
| 404 |
pv = " ".join(pv_parts)
|
| 405 |
-
del pv_board # FIX:
|
| 406 |
|
| 407 |
score_pawns = score_cp / 100.0 if abs(score_cp) < 9900 else (100.0 if score_cp > 0 else -100.0)
|
| 408 |
-
|
| 409 |
board_fen_only = board.fen().split(" ")[0]
|
| 410 |
opening_name = openings_db.get(board_fen_only)
|
| 411 |
best_move = result.move.uci()
|
| 412 |
|
| 413 |
-
# FIX:
|
| 414 |
-
del
|
| 415 |
-
del info
|
| 416 |
|
| 417 |
return MoveResponse(
|
| 418 |
bestmove=best_move,
|
|
@@ -457,7 +522,6 @@ async def get_analysis_move(request: MoveRequest):
|
|
| 457 |
score_cp, mate_in = get_normalized_score(info)
|
| 458 |
depth, nodes, nps = normalize_search_stats(info)
|
| 459 |
|
| 460 |
-
# FIX: Use a local pv_board and delete it after use
|
| 461 |
pv_board = board.copy()
|
| 462 |
pv_parts = []
|
| 463 |
for m in info.get("pv", [])[:5]:
|
|
@@ -470,19 +534,17 @@ async def get_analysis_move(request: MoveRequest):
|
|
| 470 |
else:
|
| 471 |
break
|
| 472 |
pv = " ".join(pv_parts)
|
| 473 |
-
del pv_board # FIX:
|
| 474 |
|
| 475 |
score_pawns = score_cp / 100.0 if abs(score_cp) < 9900 else (100.0 if score_cp > 0 else -100.0)
|
| 476 |
-
|
| 477 |
board_fen_only = board.fen().split(" ")[0]
|
| 478 |
opening_name = openings_db.get(board_fen_only)
|
| 479 |
best_move = result.move.uci()
|
| 480 |
|
| 481 |
-
# FIX:
|
| 482 |
-
del
|
| 483 |
-
del info
|
| 484 |
|
| 485 |
-
# FIX: Clear hash after hint β
|
| 486 |
async with _ENGINE_IO_LOCK:
|
| 487 |
await _clear_engine_hash(engine)
|
| 488 |
gc.collect()
|
|
@@ -681,8 +743,7 @@ async def analyze_game(request: AnalyzeRequest):
|
|
| 681 |
|
| 682 |
player_is_white = (request.player_color.lower() == "white")
|
| 683 |
|
| 684 |
-
# FIX: Sliding window
|
| 685 |
-
# We only ever need the last 3 FENs and last 2 moves for classification
|
| 686 |
fen_window: List[str] = [board.fen()]
|
| 687 |
move_window: List[chess.Move] = []
|
| 688 |
|
|
@@ -713,11 +774,10 @@ async def analyze_game(request: AnalyzeRequest):
|
|
| 713 |
alt_win_pct_before = get_win_percentage(line)
|
| 714 |
break
|
| 715 |
|
| 716 |
-
# FIX: Copy board before move, delete right after classification
|
| 717 |
board_before_move = board.copy()
|
| 718 |
board.push(move)
|
| 719 |
|
| 720 |
-
# FIX: Sliding window β discard oldest
|
| 721 |
move_window.append(move)
|
| 722 |
if len(move_window) > 2:
|
| 723 |
move_window.pop(0)
|
|
@@ -739,13 +799,11 @@ async def analyze_game(request: AnalyzeRequest):
|
|
| 739 |
win_pct_after = get_win_percentage(info_after_dict)
|
| 740 |
score_after, _ = get_normalized_score(info_after_dict)
|
| 741 |
current_score = score_after
|
| 742 |
-
|
| 743 |
best_pv_after = info_after_dict.get("pv", [])
|
| 744 |
|
| 745 |
fen_two_moves_ago = fen_window[0] if len(fen_window) == 3 else None
|
| 746 |
uci_next_two_moves = tuple(move_window[-2:]) if len(move_window) >= 2 else None
|
| 747 |
|
| 748 |
-
# Classify
|
| 749 |
cls = "Book"
|
| 750 |
opening_name = None
|
| 751 |
board_fen_only = board.fen().split(" ")[0]
|
|
@@ -767,7 +825,7 @@ async def analyze_game(request: AnalyzeRequest):
|
|
| 767 |
best_pv_after=best_pv_after
|
| 768 |
)
|
| 769 |
|
| 770 |
-
# FIX: Free board copy immediately after classification
|
| 771 |
del board_before_move
|
| 772 |
|
| 773 |
move_gain = score_after - score_before if is_white_turn else score_before - score_after
|
|
|
|
| 11 |
import asyncio
|
| 12 |
import json
|
| 13 |
import gc
|
| 14 |
+
import psutil
|
| 15 |
|
| 16 |
# βββ Multiplayer / Challenge Manager ββββββββββββββββββββββββββββββββββββββββββ
|
| 17 |
class ConnectionManager:
|
|
|
|
| 243 |
raise HTTPException(status_code=504, detail="Engine search timed out")
|
| 244 |
|
| 245 |
|
| 246 |
+
# βββ Background Memory Cleanup Task βββββββββββββββββββββββββββββββββββββββββββ
|
| 247 |
+
# RAM threshold in MB above which engine hash is also cleared (tune to your HF plan)
|
| 248 |
+
_RAM_CLEANUP_THRESHOLD_MB = float(os.environ.get("RAM_CLEANUP_THRESHOLD_MB", "400"))
|
| 249 |
+
# How often to run the cleanup in seconds (default: 5 minutes)
|
| 250 |
+
_RAM_CLEANUP_INTERVAL_SEC = int(os.environ.get("RAM_CLEANUP_INTERVAL_SEC", "300"))
|
| 251 |
+
|
| 252 |
+
async def memory_cleanup_task():
|
| 253 |
+
"""
|
| 254 |
+
Background task that runs every 5 minutes.
|
| 255 |
+
- Always nudges Python GC to free unreferenced objects.
|
| 256 |
+
- If RAM exceeds threshold, also clears engine hash table.
|
| 257 |
+
"""
|
| 258 |
+
while True:
|
| 259 |
+
await asyncio.sleep(_RAM_CLEANUP_INTERVAL_SEC)
|
| 260 |
+
try:
|
| 261 |
+
process = psutil.Process(os.getpid())
|
| 262 |
+
mem_mb = process.memory_info().rss / 1024 / 1024
|
| 263 |
+
|
| 264 |
+
# Always run Python GC
|
| 265 |
+
gc.collect()
|
| 266 |
+
|
| 267 |
+
if mem_mb > _RAM_CLEANUP_THRESHOLD_MB:
|
| 268 |
+
print(f"[CLEANUP] RAM at {mem_mb:.1f}MB (threshold {_RAM_CLEANUP_THRESHOLD_MB}MB) β clearing engine hash")
|
| 269 |
+
engine = _GLOBAL_DEEPCASTLE_ENGINE
|
| 270 |
+
if engine is not None:
|
| 271 |
+
try:
|
| 272 |
+
if not engine.is_terminated():
|
| 273 |
+
async with _ENGINE_IO_LOCK:
|
| 274 |
+
await _clear_engine_hash(engine)
|
| 275 |
+
except Exception:
|
| 276 |
+
pass
|
| 277 |
+
gc.collect()
|
| 278 |
+
after_mb = process.memory_info().rss / 1024 / 1024
|
| 279 |
+
print(f"[CLEANUP] Done. RAM: {mem_mb:.1f}MB β {after_mb:.1f}MB")
|
| 280 |
+
else:
|
| 281 |
+
print(f"[CLEANUP] RAM at {mem_mb:.1f}MB β OK, no action needed")
|
| 282 |
+
|
| 283 |
+
except Exception as e:
|
| 284 |
+
print(f"[CLEANUP] Error during cleanup: {e}")
|
| 285 |
+
|
| 286 |
+
|
| 287 |
@asynccontextmanager
|
| 288 |
async def lifespan(app: FastAPI):
|
| 289 |
+
# Start background memory cleanup task on boot
|
| 290 |
+
cleanup_task = asyncio.create_task(memory_cleanup_task())
|
| 291 |
+
print(f"[STARTUP] Memory cleanup task started (every {_RAM_CLEANUP_INTERVAL_SEC}s, threshold {_RAM_CLEANUP_THRESHOLD_MB}MB)")
|
| 292 |
yield
|
| 293 |
+
# On shutdown: cancel cleanup task then quit engine
|
| 294 |
+
cleanup_task.cancel()
|
| 295 |
+
try:
|
| 296 |
+
await cleanup_task
|
| 297 |
+
except asyncio.CancelledError:
|
| 298 |
+
pass
|
| 299 |
await shutdown_engine_async()
|
| 300 |
|
| 301 |
|
|
|
|
| 328 |
await manager.broadcast(data, match_id, exclude=websocket)
|
| 329 |
except WebSocketDisconnect:
|
| 330 |
manager.disconnect(websocket, match_id)
|
|
|
|
| 331 |
await manager.broadcast(json.dumps({"type": "opponent_disconnected"}), match_id)
|
| 332 |
gc.collect()
|
| 333 |
except Exception:
|
|
|
|
| 336 |
gc.collect()
|
| 337 |
|
| 338 |
|
| 339 |
+
# βββ Health & Monitoring βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 340 |
@app.get("/")
|
| 341 |
def home():
|
| 342 |
return {"status": "online", "engine": "Deepcastle Hybrid Neural", "platform": "Hugging Face Spaces"}
|
|
|
|
| 347 |
def health():
|
| 348 |
if not os.path.exists(DEEPCASTLE_ENGINE_PATH):
|
| 349 |
return {"status": "error", "message": "Missing engine binary: deepcastle"}
|
|
|
|
| 350 |
gc.collect()
|
| 351 |
return {"status": "ok", "engine": "deepcastle"}
|
| 352 |
|
|
|
|
| 366 |
raise HTTPException(status_code=503, detail=str(e))
|
| 367 |
|
| 368 |
|
| 369 |
+
@app.get("/ram")
|
| 370 |
+
def ram_usage():
|
| 371 |
+
"""Monitor RAM usage β call this anytime to check memory health."""
|
| 372 |
+
process = psutil.Process(os.getpid())
|
| 373 |
+
mem = process.memory_info()
|
| 374 |
+
mem_mb = mem.rss / 1024 / 1024
|
| 375 |
+
return {
|
| 376 |
+
"rss_mb": round(mem_mb, 2), # actual RAM used
|
| 377 |
+
"vms_mb": round(mem.vms / 1024 / 1024, 2), # virtual memory
|
| 378 |
+
"threshold_mb": _RAM_CLEANUP_THRESHOLD_MB, # cleanup trigger
|
| 379 |
+
"cleanup_interval_sec": _RAM_CLEANUP_INTERVAL_SEC, # how often cleanup runs
|
| 380 |
+
"status": "high" if mem_mb > _RAM_CLEANUP_THRESHOLD_MB else "ok",
|
| 381 |
+
"active_rooms": len(manager.active_connections), # live websocket rooms
|
| 382 |
+
"active_connections": sum(
|
| 383 |
+
len(v) for v in manager.active_connections.values()
|
| 384 |
+
),
|
| 385 |
+
}
|
| 386 |
+
|
| 387 |
+
|
| 388 |
+
# FIX: Call from frontend on game start/end to clear engine hash
|
| 389 |
@app.post("/new-game")
|
| 390 |
async def new_game():
|
| 391 |
"""
|
|
|
|
| 457 |
score_cp, mate_in = get_normalized_score(info)
|
| 458 |
depth, nodes, nps = normalize_search_stats(info)
|
| 459 |
|
|
|
|
| 460 |
pv_board = board.copy()
|
| 461 |
pv_parts = []
|
| 462 |
for m in info.get("pv", [])[:5]:
|
|
|
|
| 469 |
else:
|
| 470 |
break
|
| 471 |
pv = " ".join(pv_parts)
|
| 472 |
+
del pv_board # FIX: Free board copy
|
| 473 |
|
| 474 |
score_pawns = score_cp / 100.0 if abs(score_cp) < 9900 else (100.0 if score_cp > 0 else -100.0)
|
|
|
|
| 475 |
board_fen_only = board.fen().split(" ")[0]
|
| 476 |
opening_name = openings_db.get(board_fen_only)
|
| 477 |
best_move = result.move.uci()
|
| 478 |
|
| 479 |
+
del result # FIX: Free engine result
|
| 480 |
+
del info # FIX: Free info dict
|
|
|
|
| 481 |
|
| 482 |
return MoveResponse(
|
| 483 |
bestmove=best_move,
|
|
|
|
| 522 |
score_cp, mate_in = get_normalized_score(info)
|
| 523 |
depth, nodes, nps = normalize_search_stats(info)
|
| 524 |
|
|
|
|
| 525 |
pv_board = board.copy()
|
| 526 |
pv_parts = []
|
| 527 |
for m in info.get("pv", [])[:5]:
|
|
|
|
| 534 |
else:
|
| 535 |
break
|
| 536 |
pv = " ".join(pv_parts)
|
| 537 |
+
del pv_board # FIX: Free board copy
|
| 538 |
|
| 539 |
score_pawns = score_cp / 100.0 if abs(score_cp) < 9900 else (100.0 if score_cp > 0 else -100.0)
|
|
|
|
| 540 |
board_fen_only = board.fen().split(" ")[0]
|
| 541 |
opening_name = openings_db.get(board_fen_only)
|
| 542 |
best_move = result.move.uci()
|
| 543 |
|
| 544 |
+
del result # FIX: Free engine result
|
| 545 |
+
del info # FIX: Free info dict
|
|
|
|
| 546 |
|
| 547 |
+
# FIX: Clear hash after hint β one-shot search, no continuity needed
|
| 548 |
async with _ENGINE_IO_LOCK:
|
| 549 |
await _clear_engine_hash(engine)
|
| 550 |
gc.collect()
|
|
|
|
| 743 |
|
| 744 |
player_is_white = (request.player_color.lower() == "white")
|
| 745 |
|
| 746 |
+
# FIX: Sliding window β only keep last 3 FENs and last 2 moves, never grows
|
|
|
|
| 747 |
fen_window: List[str] = [board.fen()]
|
| 748 |
move_window: List[chess.Move] = []
|
| 749 |
|
|
|
|
| 774 |
alt_win_pct_before = get_win_percentage(line)
|
| 775 |
break
|
| 776 |
|
|
|
|
| 777 |
board_before_move = board.copy()
|
| 778 |
board.push(move)
|
| 779 |
|
| 780 |
+
# FIX: Sliding window β discard oldest beyond what we need
|
| 781 |
move_window.append(move)
|
| 782 |
if len(move_window) > 2:
|
| 783 |
move_window.pop(0)
|
|
|
|
| 799 |
win_pct_after = get_win_percentage(info_after_dict)
|
| 800 |
score_after, _ = get_normalized_score(info_after_dict)
|
| 801 |
current_score = score_after
|
|
|
|
| 802 |
best_pv_after = info_after_dict.get("pv", [])
|
| 803 |
|
| 804 |
fen_two_moves_ago = fen_window[0] if len(fen_window) == 3 else None
|
| 805 |
uci_next_two_moves = tuple(move_window[-2:]) if len(move_window) >= 2 else None
|
| 806 |
|
|
|
|
| 807 |
cls = "Book"
|
| 808 |
opening_name = None
|
| 809 |
board_fen_only = board.fen().split(" ")[0]
|
|
|
|
| 825 |
best_pv_after=best_pv_after
|
| 826 |
)
|
| 827 |
|
| 828 |
+
# FIX: Free board copy immediately after classification
|
| 829 |
del board_before_move
|
| 830 |
|
| 831 |
move_gain = score_after - score_before if is_white_turn else score_before - score_after
|