Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
|
@@ -271,9 +271,7 @@ async def _engine_call(engine, coro, timeout_sec: float):
|
|
| 271 |
|
| 272 |
# βββ Background Memory Cleanup Task βββββββββββββββββββββββββββββββββββββββββββ
|
| 273 |
_RAM_CLEANUP_THRESHOLD_MB = float(os.environ.get("RAM_CLEANUP_THRESHOLD_MB", "300"))
|
| 274 |
-
|
| 275 |
-
_RAM_CLEANUP_INTERVAL_SEC = int(os.environ.get("RAM_CLEANUP_INTERVAL_SEC", "30"))
|
| 276 |
-
_ENGINE_NODES_LIMIT = int(os.environ.get("ENGINE_NODES_LIMIT", "1500000"))
|
| 277 |
_CLEAR_HASH_AFTER_MOVE = os.environ.get("CLEAR_HASH_AFTER_MOVE", "1").strip().lower() not in {"0", "false", "no", "off"}
|
| 278 |
_RESTART_ENGINE_AFTER_MOVE = os.environ.get("RESTART_ENGINE_AFTER_MOVE", "1").strip().lower() not in {"0", "false", "no", "off"}
|
| 279 |
|
|
@@ -288,61 +286,27 @@ async def memory_cleanup_task():
|
|
| 288 |
try:
|
| 289 |
process = psutil.Process(os.getpid())
|
| 290 |
mem_mb = process.memory_info().rss / 1024 / 1024
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
child_count = 0
|
| 295 |
-
for child in process.children(recursive=True):
|
| 296 |
-
try:
|
| 297 |
-
total_mb += child.memory_info().rss / 1024 / 1024
|
| 298 |
-
child_count += 1
|
| 299 |
-
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
| 300 |
-
pass
|
| 301 |
-
|
| 302 |
-
# βββ RESTART CHECK (The Last Resort) βββββββββββββββββββββββββββββ
|
| 303 |
-
if total_mb > _RAM_RESTART_THRESHOLD_MB:
|
| 304 |
-
active_count = len(manager.active_connections)
|
| 305 |
-
print(f"[CRITICAL] RAM at {total_mb:.1f}MB β RESTARTING SERVER (Limit: {_RAM_RESTART_THRESHOLD_MB}MB, Active: {active_count})")
|
| 306 |
-
# Immediate exit: Hugging Face or Docker will automatically restart the process/container
|
| 307 |
-
# This clears all leaky memory and hung engine processes at once.
|
| 308 |
-
os._exit(0)
|
| 309 |
-
|
| 310 |
-
# βββ REGULAR CLEANUP βββββββββββββββββββββββββββββββββββββββββββββ
|
| 311 |
-
if total_mb > _RAM_CLEANUP_THRESHOLD_MB:
|
| 312 |
-
print(f"[CLEANUP] total_tree_ram={total_mb:.1f}MB (threshold {_RAM_CLEANUP_THRESHOLD_MB}MB) β purging engines & GC")
|
| 313 |
-
|
| 314 |
-
# 1. Purge global engine
|
| 315 |
engine = _GLOBAL_DEEPCASTLE_ENGINE
|
| 316 |
if engine is not None:
|
| 317 |
try:
|
| 318 |
-
|
| 319 |
-
async with
|
| 320 |
-
|
| 321 |
-
await asyncio.wait_for(engine.quit(), timeout=3.0)
|
| 322 |
-
except Exception:
|
| 323 |
-
pass
|
| 324 |
-
|
| 325 |
-
# 2. Hard purge any orphaned engine processes
|
| 326 |
-
for child in process.children(recursive=True):
|
| 327 |
-
try:
|
| 328 |
-
if "deepcastle" in child.name().lower() or "stockfish" in child.name().lower():
|
| 329 |
-
print(f"[CLEANUP] Killing orphaned engine: {child.pid}")
|
| 330 |
-
child.kill()
|
| 331 |
except Exception:
|
| 332 |
pass
|
| 333 |
-
|
| 334 |
-
# 3. Release memory
|
| 335 |
force_memory_release()
|
| 336 |
after_mb = process.memory_info().rss / 1024 / 1024
|
| 337 |
-
print(f"[CLEANUP]
|
| 338 |
else:
|
| 339 |
-
#
|
| 340 |
force_memory_release()
|
| 341 |
-
|
| 342 |
-
print(f"[CLEANUP] Maintenance. RAM {total_mb:.1f}MB (OK)")
|
| 343 |
|
| 344 |
except Exception as e:
|
| 345 |
-
print(f"[CLEANUP]
|
| 346 |
|
| 347 |
|
| 348 |
@asynccontextmanager
|
|
@@ -454,7 +418,6 @@ def ram_usage():
|
|
| 454 |
"child_process_count": child_count,
|
| 455 |
"vms_mb": round(mem.vms / 1024 / 1024, 2),
|
| 456 |
"threshold_mb": _RAM_CLEANUP_THRESHOLD_MB,
|
| 457 |
-
"restart_threshold_mb": _RAM_RESTART_THRESHOLD_MB,
|
| 458 |
"cleanup_interval_sec": _RAM_CLEANUP_INTERVAL_SEC,
|
| 459 |
"status": "high" if total_mb > _RAM_CLEANUP_THRESHOLD_MB else "ok",
|
| 460 |
"active_rooms": len(manager.active_connections),
|
|
@@ -812,7 +775,7 @@ async def analyze_game(request: AnalyzeRequest):
|
|
| 812 |
try:
|
| 813 |
engine = await get_stockfish_engine(hash_mb=2048)
|
| 814 |
board = chess.Board(request.start_fen) if request.start_fen else chess.Board()
|
| 815 |
-
limit = chess.engine.Limit(time=request.time_per_move
|
| 816 |
|
| 817 |
analysis_results = []
|
| 818 |
ply_timeout = _analyze_ply_timeout(request.time_per_move)
|
|
|
|
| 271 |
|
| 272 |
# βββ Background Memory Cleanup Task βββββββββββββββββββββββββββββββββββββββββββ
|
| 273 |
_RAM_CLEANUP_THRESHOLD_MB = float(os.environ.get("RAM_CLEANUP_THRESHOLD_MB", "300"))
|
| 274 |
+
_RAM_CLEANUP_INTERVAL_SEC = int(os.environ.get("RAM_CLEANUP_INTERVAL_SEC", "60"))
|
|
|
|
|
|
|
| 275 |
_CLEAR_HASH_AFTER_MOVE = os.environ.get("CLEAR_HASH_AFTER_MOVE", "1").strip().lower() not in {"0", "false", "no", "off"}
|
| 276 |
_RESTART_ENGINE_AFTER_MOVE = os.environ.get("RESTART_ENGINE_AFTER_MOVE", "1").strip().lower() not in {"0", "false", "no", "off"}
|
| 277 |
|
|
|
|
| 286 |
try:
|
| 287 |
process = psutil.Process(os.getpid())
|
| 288 |
mem_mb = process.memory_info().rss / 1024 / 1024
|
| 289 |
+
|
| 290 |
+
if mem_mb > _RAM_CLEANUP_THRESHOLD_MB:
|
| 291 |
+
print(f"[CLEANUP] RAM at {mem_mb:.1f}MB (threshold {_RAM_CLEANUP_THRESHOLD_MB}MB) β clearing engine hash")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
engine = _GLOBAL_DEEPCASTLE_ENGINE
|
| 293 |
if engine is not None:
|
| 294 |
try:
|
| 295 |
+
if not engine.is_terminated():
|
| 296 |
+
async with _ENGINE_IO_LOCK:
|
| 297 |
+
await _clear_engine_hash(engine)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
except Exception:
|
| 299 |
pass
|
|
|
|
|
|
|
| 300 |
force_memory_release()
|
| 301 |
after_mb = process.memory_info().rss / 1024 / 1024
|
| 302 |
+
print(f"[CLEANUP] Done. RAM: {mem_mb:.1f}MB β {after_mb:.1f}MB")
|
| 303 |
else:
|
| 304 |
+
# Always nudge GC + malloc_trim even when RAM is fine
|
| 305 |
force_memory_release()
|
| 306 |
+
print(f"[CLEANUP] RAM at {mem_mb:.1f}MB β OK")
|
|
|
|
| 307 |
|
| 308 |
except Exception as e:
|
| 309 |
+
print(f"[CLEANUP] Error during cleanup: {e}")
|
| 310 |
|
| 311 |
|
| 312 |
@asynccontextmanager
|
|
|
|
| 418 |
"child_process_count": child_count,
|
| 419 |
"vms_mb": round(mem.vms / 1024 / 1024, 2),
|
| 420 |
"threshold_mb": _RAM_CLEANUP_THRESHOLD_MB,
|
|
|
|
| 421 |
"cleanup_interval_sec": _RAM_CLEANUP_INTERVAL_SEC,
|
| 422 |
"status": "high" if total_mb > _RAM_CLEANUP_THRESHOLD_MB else "ok",
|
| 423 |
"active_rooms": len(manager.active_connections),
|
|
|
|
| 775 |
try:
|
| 776 |
engine = await get_stockfish_engine(hash_mb=2048)
|
| 777 |
board = chess.Board(request.start_fen) if request.start_fen else chess.Board()
|
| 778 |
+
limit = chess.engine.Limit(time=request.time_per_move)
|
| 779 |
|
| 780 |
analysis_results = []
|
| 781 |
ply_timeout = _analyze_ply_timeout(request.time_per_move)
|