Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
|
@@ -11,8 +11,25 @@ import chess.engine
|
|
| 11 |
import asyncio
|
| 12 |
import json
|
| 13 |
import gc
|
|
|
|
| 14 |
import psutil
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
# βββ Multiplayer / Challenge Manager ββββββββββββββββββββββββββββββββββββββββββ
|
| 17 |
class ConnectionManager:
|
| 18 |
def __init__(self):
|
|
@@ -244,15 +261,13 @@ async def _engine_call(engine, coro, timeout_sec: float):
|
|
| 244 |
|
| 245 |
|
| 246 |
# βββ Background Memory Cleanup Task βββββββββββββββββββββββββββββββββββββββββββ
|
| 247 |
-
# RAM threshold in MB above which engine hash is also cleared (tune to your HF plan)
|
| 248 |
_RAM_CLEANUP_THRESHOLD_MB = float(os.environ.get("RAM_CLEANUP_THRESHOLD_MB", "400"))
|
| 249 |
-
# How often to run the cleanup in seconds (default: 5 minutes)
|
| 250 |
_RAM_CLEANUP_INTERVAL_SEC = int(os.environ.get("RAM_CLEANUP_INTERVAL_SEC", "300"))
|
| 251 |
|
| 252 |
async def memory_cleanup_task():
|
| 253 |
"""
|
| 254 |
Background task that runs every 5 minutes.
|
| 255 |
-
- Always
|
| 256 |
- If RAM exceeds threshold, also clears engine hash table.
|
| 257 |
"""
|
| 258 |
while True:
|
|
@@ -261,9 +276,6 @@ async def memory_cleanup_task():
|
|
| 261 |
process = psutil.Process(os.getpid())
|
| 262 |
mem_mb = process.memory_info().rss / 1024 / 1024
|
| 263 |
|
| 264 |
-
# Always run Python GC
|
| 265 |
-
gc.collect()
|
| 266 |
-
|
| 267 |
if mem_mb > _RAM_CLEANUP_THRESHOLD_MB:
|
| 268 |
print(f"[CLEANUP] RAM at {mem_mb:.1f}MB (threshold {_RAM_CLEANUP_THRESHOLD_MB}MB) β clearing engine hash")
|
| 269 |
engine = _GLOBAL_DEEPCASTLE_ENGINE
|
|
@@ -274,11 +286,13 @@ async def memory_cleanup_task():
|
|
| 274 |
await _clear_engine_hash(engine)
|
| 275 |
except Exception:
|
| 276 |
pass
|
| 277 |
-
|
| 278 |
after_mb = process.memory_info().rss / 1024 / 1024
|
| 279 |
print(f"[CLEANUP] Done. RAM: {mem_mb:.1f}MB β {after_mb:.1f}MB")
|
| 280 |
else:
|
| 281 |
-
|
|
|
|
|
|
|
| 282 |
|
| 283 |
except Exception as e:
|
| 284 |
print(f"[CLEANUP] Error during cleanup: {e}")
|
|
@@ -286,11 +300,9 @@ async def memory_cleanup_task():
|
|
| 286 |
|
| 287 |
@asynccontextmanager
|
| 288 |
async def lifespan(app: FastAPI):
|
| 289 |
-
# Start background memory cleanup task on boot
|
| 290 |
cleanup_task = asyncio.create_task(memory_cleanup_task())
|
| 291 |
print(f"[STARTUP] Memory cleanup task started (every {_RAM_CLEANUP_INTERVAL_SEC}s, threshold {_RAM_CLEANUP_THRESHOLD_MB}MB)")
|
| 292 |
yield
|
| 293 |
-
# On shutdown: cancel cleanup task then quit engine
|
| 294 |
cleanup_task.cancel()
|
| 295 |
try:
|
| 296 |
await cleanup_task
|
|
@@ -329,11 +341,11 @@ async def websocket_endpoint(websocket: WebSocket, match_id: str):
|
|
| 329 |
except WebSocketDisconnect:
|
| 330 |
manager.disconnect(websocket, match_id)
|
| 331 |
await manager.broadcast(json.dumps({"type": "opponent_disconnected"}), match_id)
|
| 332 |
-
|
| 333 |
except Exception:
|
| 334 |
manager.disconnect(websocket, match_id)
|
| 335 |
await manager.broadcast(json.dumps({"type": "opponent_disconnected"}), match_id)
|
| 336 |
-
|
| 337 |
|
| 338 |
|
| 339 |
# βββ Health & Monitoring βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -347,7 +359,7 @@ def home():
|
|
| 347 |
def health():
|
| 348 |
if not os.path.exists(DEEPCASTLE_ENGINE_PATH):
|
| 349 |
return {"status": "error", "message": "Missing engine binary: deepcastle"}
|
| 350 |
-
|
| 351 |
return {"status": "ok", "engine": "deepcastle"}
|
| 352 |
|
| 353 |
|
|
@@ -368,20 +380,18 @@ async def health_ready():
|
|
| 368 |
|
| 369 |
@app.get("/ram")
|
| 370 |
def ram_usage():
|
| 371 |
-
"""Monitor RAM usage β call
|
| 372 |
process = psutil.Process(os.getpid())
|
| 373 |
mem = process.memory_info()
|
| 374 |
mem_mb = mem.rss / 1024 / 1024
|
| 375 |
return {
|
| 376 |
-
"rss_mb": round(mem_mb, 2),
|
| 377 |
-
"vms_mb": round(mem.vms / 1024 / 1024, 2),
|
| 378 |
-
"threshold_mb": _RAM_CLEANUP_THRESHOLD_MB,
|
| 379 |
-
"cleanup_interval_sec": _RAM_CLEANUP_INTERVAL_SEC,
|
| 380 |
"status": "high" if mem_mb > _RAM_CLEANUP_THRESHOLD_MB else "ok",
|
| 381 |
-
"active_rooms": len(manager.active_connections),
|
| 382 |
-
"active_connections": sum(
|
| 383 |
-
len(v) for v in manager.active_connections.values()
|
| 384 |
-
),
|
| 385 |
}
|
| 386 |
|
| 387 |
|
|
@@ -400,7 +410,7 @@ async def new_game():
|
|
| 400 |
engine = await get_deepcastle_engine()
|
| 401 |
async with _ENGINE_IO_LOCK:
|
| 402 |
await _clear_engine_hash(engine)
|
| 403 |
-
|
| 404 |
return {"status": "ok", "message": "Engine hash cleared"}
|
| 405 |
except HTTPException:
|
| 406 |
raise
|
|
@@ -469,15 +479,15 @@ async def get_move(request: MoveRequest):
|
|
| 469 |
else:
|
| 470 |
break
|
| 471 |
pv = " ".join(pv_parts)
|
| 472 |
-
del pv_board
|
| 473 |
|
| 474 |
score_pawns = score_cp / 100.0 if abs(score_cp) < 9900 else (100.0 if score_cp > 0 else -100.0)
|
| 475 |
board_fen_only = board.fen().split(" ")[0]
|
| 476 |
opening_name = openings_db.get(board_fen_only)
|
| 477 |
best_move = result.move.uci()
|
| 478 |
|
| 479 |
-
del result
|
| 480 |
-
del info
|
| 481 |
|
| 482 |
return MoveResponse(
|
| 483 |
bestmove=best_move,
|
|
@@ -534,20 +544,20 @@ async def get_analysis_move(request: MoveRequest):
|
|
| 534 |
else:
|
| 535 |
break
|
| 536 |
pv = " ".join(pv_parts)
|
| 537 |
-
del pv_board
|
| 538 |
|
| 539 |
score_pawns = score_cp / 100.0 if abs(score_cp) < 9900 else (100.0 if score_cp > 0 else -100.0)
|
| 540 |
board_fen_only = board.fen().split(" ")[0]
|
| 541 |
opening_name = openings_db.get(board_fen_only)
|
| 542 |
best_move = result.move.uci()
|
| 543 |
|
| 544 |
-
del result
|
| 545 |
-
del info
|
| 546 |
|
| 547 |
-
# FIX: Clear hash
|
| 548 |
async with _ENGINE_IO_LOCK:
|
| 549 |
await _clear_engine_hash(engine)
|
| 550 |
-
|
| 551 |
|
| 552 |
return MoveResponse(
|
| 553 |
bestmove=best_move,
|
|
@@ -615,7 +625,7 @@ def is_simple_recapture(fen_two_moves_ago: str, previous_move: chess.Move, playe
|
|
| 615 |
return False
|
| 616 |
b = chess.Board(fen_two_moves_ago)
|
| 617 |
result = b.piece_at(previous_move.to_square) is not None
|
| 618 |
-
del b
|
| 619 |
return result
|
| 620 |
|
| 621 |
def get_material_difference(board: chess.Board) -> int:
|
|
@@ -671,7 +681,7 @@ def get_is_piece_sacrifice(board: chess.Board, played_move: chess.Move, best_pv:
|
|
| 671 |
return False
|
| 672 |
|
| 673 |
end_diff = get_material_difference(sim_board)
|
| 674 |
-
del sim_board
|
| 675 |
mat_diff = end_diff - start_diff
|
| 676 |
player_rel = mat_diff if white_to_play else -mat_diff
|
| 677 |
return player_rel < 0
|
|
@@ -861,10 +871,10 @@ async def analyze_game(request: AnalyzeRequest):
|
|
| 861 |
accuracy = max(10.0, min(100.0, 100.0 * math.exp(-0.005 * avg_cpl)))
|
| 862 |
estimated_elo = int(max(400, min(3600, round(3600 * math.exp(-0.015 * avg_cpl)))))
|
| 863 |
|
| 864 |
-
# FIX: Clear engine hash
|
| 865 |
async with _ENGINE_IO_LOCK:
|
| 866 |
await _clear_engine_hash(engine)
|
| 867 |
-
|
| 868 |
|
| 869 |
return AnalyzeResponse(
|
| 870 |
accuracy=round(accuracy, 1),
|
|
|
|
| 11 |
import asyncio
|
| 12 |
import json
|
| 13 |
import gc
|
| 14 |
+
import ctypes
|
| 15 |
import psutil
|
| 16 |
|
| 17 |
+
# βββ Force memory back to OS (Linux/HF compatible) ββββββββββββββββββββββββββββ
|
| 18 |
+
def force_memory_release():
|
| 19 |
+
"""
|
| 20 |
+
Run GC twice (catches cyclic references missed on first pass),
|
| 21 |
+
then call malloc_trim to return freed pages back to the OS.
|
| 22 |
+
Without this, Python holds freed memory in its own pool and
|
| 23 |
+
the OS still shows high RAM even after objects are deleted.
|
| 24 |
+
"""
|
| 25 |
+
gc.collect()
|
| 26 |
+
gc.collect()
|
| 27 |
+
try:
|
| 28 |
+
ctypes.CDLL("libc.so.6").malloc_trim(0)
|
| 29 |
+
except Exception:
|
| 30 |
+
pass
|
| 31 |
+
|
| 32 |
+
|
| 33 |
# βββ Multiplayer / Challenge Manager ββββββββββββββββββββββββββββββββββββββββββ
|
| 34 |
class ConnectionManager:
|
| 35 |
def __init__(self):
|
|
|
|
| 261 |
|
| 262 |
|
| 263 |
# βββ Background Memory Cleanup Task βββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 264 |
_RAM_CLEANUP_THRESHOLD_MB = float(os.environ.get("RAM_CLEANUP_THRESHOLD_MB", "400"))
|
|
|
|
| 265 |
_RAM_CLEANUP_INTERVAL_SEC = int(os.environ.get("RAM_CLEANUP_INTERVAL_SEC", "300"))
|
| 266 |
|
| 267 |
async def memory_cleanup_task():
|
| 268 |
"""
|
| 269 |
Background task that runs every 5 minutes.
|
| 270 |
+
- Always runs GC twice and malloc_trim to return memory to OS.
|
| 271 |
- If RAM exceeds threshold, also clears engine hash table.
|
| 272 |
"""
|
| 273 |
while True:
|
|
|
|
| 276 |
process = psutil.Process(os.getpid())
|
| 277 |
mem_mb = process.memory_info().rss / 1024 / 1024
|
| 278 |
|
|
|
|
|
|
|
|
|
|
| 279 |
if mem_mb > _RAM_CLEANUP_THRESHOLD_MB:
|
| 280 |
print(f"[CLEANUP] RAM at {mem_mb:.1f}MB (threshold {_RAM_CLEANUP_THRESHOLD_MB}MB) β clearing engine hash")
|
| 281 |
engine = _GLOBAL_DEEPCASTLE_ENGINE
|
|
|
|
| 286 |
await _clear_engine_hash(engine)
|
| 287 |
except Exception:
|
| 288 |
pass
|
| 289 |
+
force_memory_release()
|
| 290 |
after_mb = process.memory_info().rss / 1024 / 1024
|
| 291 |
print(f"[CLEANUP] Done. RAM: {mem_mb:.1f}MB β {after_mb:.1f}MB")
|
| 292 |
else:
|
| 293 |
+
# Always nudge GC + malloc_trim even when RAM is fine
|
| 294 |
+
force_memory_release()
|
| 295 |
+
print(f"[CLEANUP] RAM at {mem_mb:.1f}MB β OK")
|
| 296 |
|
| 297 |
except Exception as e:
|
| 298 |
print(f"[CLEANUP] Error during cleanup: {e}")
|
|
|
|
| 300 |
|
| 301 |
@asynccontextmanager
|
| 302 |
async def lifespan(app: FastAPI):
|
|
|
|
| 303 |
cleanup_task = asyncio.create_task(memory_cleanup_task())
|
| 304 |
print(f"[STARTUP] Memory cleanup task started (every {_RAM_CLEANUP_INTERVAL_SEC}s, threshold {_RAM_CLEANUP_THRESHOLD_MB}MB)")
|
| 305 |
yield
|
|
|
|
| 306 |
cleanup_task.cancel()
|
| 307 |
try:
|
| 308 |
await cleanup_task
|
|
|
|
| 341 |
except WebSocketDisconnect:
|
| 342 |
manager.disconnect(websocket, match_id)
|
| 343 |
await manager.broadcast(json.dumps({"type": "opponent_disconnected"}), match_id)
|
| 344 |
+
force_memory_release()
|
| 345 |
except Exception:
|
| 346 |
manager.disconnect(websocket, match_id)
|
| 347 |
await manager.broadcast(json.dumps({"type": "opponent_disconnected"}), match_id)
|
| 348 |
+
force_memory_release()
|
| 349 |
|
| 350 |
|
| 351 |
# βββ Health & Monitoring βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 359 |
def health():
|
| 360 |
if not os.path.exists(DEEPCASTLE_ENGINE_PATH):
|
| 361 |
return {"status": "error", "message": "Missing engine binary: deepcastle"}
|
| 362 |
+
force_memory_release()
|
| 363 |
return {"status": "ok", "engine": "deepcastle"}
|
| 364 |
|
| 365 |
|
|
|
|
| 380 |
|
| 381 |
@app.get("/ram")
|
| 382 |
def ram_usage():
|
| 383 |
+
"""Monitor RAM usage β call anytime to check memory health."""
|
| 384 |
process = psutil.Process(os.getpid())
|
| 385 |
mem = process.memory_info()
|
| 386 |
mem_mb = mem.rss / 1024 / 1024
|
| 387 |
return {
|
| 388 |
+
"rss_mb": round(mem_mb, 2),
|
| 389 |
+
"vms_mb": round(mem.vms / 1024 / 1024, 2),
|
| 390 |
+
"threshold_mb": _RAM_CLEANUP_THRESHOLD_MB,
|
| 391 |
+
"cleanup_interval_sec": _RAM_CLEANUP_INTERVAL_SEC,
|
| 392 |
"status": "high" if mem_mb > _RAM_CLEANUP_THRESHOLD_MB else "ok",
|
| 393 |
+
"active_rooms": len(manager.active_connections),
|
| 394 |
+
"active_connections": sum(len(v) for v in manager.active_connections.values()),
|
|
|
|
|
|
|
| 395 |
}
|
| 396 |
|
| 397 |
|
|
|
|
| 410 |
engine = await get_deepcastle_engine()
|
| 411 |
async with _ENGINE_IO_LOCK:
|
| 412 |
await _clear_engine_hash(engine)
|
| 413 |
+
force_memory_release()
|
| 414 |
return {"status": "ok", "message": "Engine hash cleared"}
|
| 415 |
except HTTPException:
|
| 416 |
raise
|
|
|
|
| 479 |
else:
|
| 480 |
break
|
| 481 |
pv = " ".join(pv_parts)
|
| 482 |
+
del pv_board
|
| 483 |
|
| 484 |
score_pawns = score_cp / 100.0 if abs(score_cp) < 9900 else (100.0 if score_cp > 0 else -100.0)
|
| 485 |
board_fen_only = board.fen().split(" ")[0]
|
| 486 |
opening_name = openings_db.get(board_fen_only)
|
| 487 |
best_move = result.move.uci()
|
| 488 |
|
| 489 |
+
del result
|
| 490 |
+
del info
|
| 491 |
|
| 492 |
return MoveResponse(
|
| 493 |
bestmove=best_move,
|
|
|
|
| 544 |
else:
|
| 545 |
break
|
| 546 |
pv = " ".join(pv_parts)
|
| 547 |
+
del pv_board
|
| 548 |
|
| 549 |
score_pawns = score_cp / 100.0 if abs(score_cp) < 9900 else (100.0 if score_cp > 0 else -100.0)
|
| 550 |
board_fen_only = board.fen().split(" ")[0]
|
| 551 |
opening_name = openings_db.get(board_fen_only)
|
| 552 |
best_move = result.move.uci()
|
| 553 |
|
| 554 |
+
del result
|
| 555 |
+
del info
|
| 556 |
|
| 557 |
+
# FIX: Clear hash + force memory back to OS after hint
|
| 558 |
async with _ENGINE_IO_LOCK:
|
| 559 |
await _clear_engine_hash(engine)
|
| 560 |
+
force_memory_release()
|
| 561 |
|
| 562 |
return MoveResponse(
|
| 563 |
bestmove=best_move,
|
|
|
|
| 625 |
return False
|
| 626 |
b = chess.Board(fen_two_moves_ago)
|
| 627 |
result = b.piece_at(previous_move.to_square) is not None
|
| 628 |
+
del b
|
| 629 |
return result
|
| 630 |
|
| 631 |
def get_material_difference(board: chess.Board) -> int:
|
|
|
|
| 681 |
return False
|
| 682 |
|
| 683 |
end_diff = get_material_difference(sim_board)
|
| 684 |
+
del sim_board
|
| 685 |
mat_diff = end_diff - start_diff
|
| 686 |
player_rel = mat_diff if white_to_play else -mat_diff
|
| 687 |
return player_rel < 0
|
|
|
|
| 871 |
accuracy = max(10.0, min(100.0, 100.0 * math.exp(-0.005 * avg_cpl)))
|
| 872 |
estimated_elo = int(max(400, min(3600, round(3600 * math.exp(-0.015 * avg_cpl)))))
|
| 873 |
|
| 874 |
+
# FIX: Clear engine hash + force memory back to OS after full game analysis
|
| 875 |
async with _ENGINE_IO_LOCK:
|
| 876 |
await _clear_engine_hash(engine)
|
| 877 |
+
force_memory_release()
|
| 878 |
|
| 879 |
return AnalyzeResponse(
|
| 880 |
accuracy=round(accuracy, 1),
|