Amogh1221 commited on
Commit
c2681fa
Β·
verified Β·
1 Parent(s): 6ad9d35

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +45 -35
main.py CHANGED
@@ -11,8 +11,25 @@ import chess.engine
11
  import asyncio
12
  import json
13
  import gc
 
14
  import psutil
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  # ─── Multiplayer / Challenge Manager ──────────────────────────────────────────
17
  class ConnectionManager:
18
  def __init__(self):
@@ -244,15 +261,13 @@ async def _engine_call(engine, coro, timeout_sec: float):
244
 
245
 
246
  # ─── Background Memory Cleanup Task ───────────────────────────────────────────
247
- # RAM threshold in MB above which engine hash is also cleared (tune to your HF plan)
248
  _RAM_CLEANUP_THRESHOLD_MB = float(os.environ.get("RAM_CLEANUP_THRESHOLD_MB", "400"))
249
- # How often to run the cleanup in seconds (default: 5 minutes)
250
  _RAM_CLEANUP_INTERVAL_SEC = int(os.environ.get("RAM_CLEANUP_INTERVAL_SEC", "300"))
251
 
252
  async def memory_cleanup_task():
253
  """
254
  Background task that runs every 5 minutes.
255
- - Always nudges Python GC to free unreferenced objects.
256
  - If RAM exceeds threshold, also clears engine hash table.
257
  """
258
  while True:
@@ -261,9 +276,6 @@ async def memory_cleanup_task():
261
  process = psutil.Process(os.getpid())
262
  mem_mb = process.memory_info().rss / 1024 / 1024
263
 
264
- # Always run Python GC
265
- gc.collect()
266
-
267
  if mem_mb > _RAM_CLEANUP_THRESHOLD_MB:
268
  print(f"[CLEANUP] RAM at {mem_mb:.1f}MB (threshold {_RAM_CLEANUP_THRESHOLD_MB}MB) β€” clearing engine hash")
269
  engine = _GLOBAL_DEEPCASTLE_ENGINE
@@ -274,11 +286,13 @@ async def memory_cleanup_task():
274
  await _clear_engine_hash(engine)
275
  except Exception:
276
  pass
277
- gc.collect()
278
  after_mb = process.memory_info().rss / 1024 / 1024
279
  print(f"[CLEANUP] Done. RAM: {mem_mb:.1f}MB β†’ {after_mb:.1f}MB")
280
  else:
281
- print(f"[CLEANUP] RAM at {mem_mb:.1f}MB β€” OK, no action needed")
 
 
282
 
283
  except Exception as e:
284
  print(f"[CLEANUP] Error during cleanup: {e}")
@@ -286,11 +300,9 @@ async def memory_cleanup_task():
286
 
287
  @asynccontextmanager
288
  async def lifespan(app: FastAPI):
289
- # Start background memory cleanup task on boot
290
  cleanup_task = asyncio.create_task(memory_cleanup_task())
291
  print(f"[STARTUP] Memory cleanup task started (every {_RAM_CLEANUP_INTERVAL_SEC}s, threshold {_RAM_CLEANUP_THRESHOLD_MB}MB)")
292
  yield
293
- # On shutdown: cancel cleanup task then quit engine
294
  cleanup_task.cancel()
295
  try:
296
  await cleanup_task
@@ -329,11 +341,11 @@ async def websocket_endpoint(websocket: WebSocket, match_id: str):
329
  except WebSocketDisconnect:
330
  manager.disconnect(websocket, match_id)
331
  await manager.broadcast(json.dumps({"type": "opponent_disconnected"}), match_id)
332
- gc.collect()
333
  except Exception:
334
  manager.disconnect(websocket, match_id)
335
  await manager.broadcast(json.dumps({"type": "opponent_disconnected"}), match_id)
336
- gc.collect()
337
 
338
 
339
  # ─── Health & Monitoring ───────────────────────────────────────────────────────
@@ -347,7 +359,7 @@ def home():
347
  def health():
348
  if not os.path.exists(DEEPCASTLE_ENGINE_PATH):
349
  return {"status": "error", "message": "Missing engine binary: deepcastle"}
350
- gc.collect()
351
  return {"status": "ok", "engine": "deepcastle"}
352
 
353
 
@@ -368,20 +380,18 @@ async def health_ready():
368
 
369
  @app.get("/ram")
370
  def ram_usage():
371
- """Monitor RAM usage β€” call this anytime to check memory health."""
372
  process = psutil.Process(os.getpid())
373
  mem = process.memory_info()
374
  mem_mb = mem.rss / 1024 / 1024
375
  return {
376
- "rss_mb": round(mem_mb, 2), # actual RAM used
377
- "vms_mb": round(mem.vms / 1024 / 1024, 2), # virtual memory
378
- "threshold_mb": _RAM_CLEANUP_THRESHOLD_MB, # cleanup trigger
379
- "cleanup_interval_sec": _RAM_CLEANUP_INTERVAL_SEC, # how often cleanup runs
380
  "status": "high" if mem_mb > _RAM_CLEANUP_THRESHOLD_MB else "ok",
381
- "active_rooms": len(manager.active_connections), # live websocket rooms
382
- "active_connections": sum(
383
- len(v) for v in manager.active_connections.values()
384
- ),
385
  }
386
 
387
 
@@ -400,7 +410,7 @@ async def new_game():
400
  engine = await get_deepcastle_engine()
401
  async with _ENGINE_IO_LOCK:
402
  await _clear_engine_hash(engine)
403
- gc.collect()
404
  return {"status": "ok", "message": "Engine hash cleared"}
405
  except HTTPException:
406
  raise
@@ -469,15 +479,15 @@ async def get_move(request: MoveRequest):
469
  else:
470
  break
471
  pv = " ".join(pv_parts)
472
- del pv_board # FIX: Free board copy
473
 
474
  score_pawns = score_cp / 100.0 if abs(score_cp) < 9900 else (100.0 if score_cp > 0 else -100.0)
475
  board_fen_only = board.fen().split(" ")[0]
476
  opening_name = openings_db.get(board_fen_only)
477
  best_move = result.move.uci()
478
 
479
- del result # FIX: Free engine result
480
- del info # FIX: Free info dict
481
 
482
  return MoveResponse(
483
  bestmove=best_move,
@@ -534,20 +544,20 @@ async def get_analysis_move(request: MoveRequest):
534
  else:
535
  break
536
  pv = " ".join(pv_parts)
537
- del pv_board # FIX: Free board copy
538
 
539
  score_pawns = score_cp / 100.0 if abs(score_cp) < 9900 else (100.0 if score_cp > 0 else -100.0)
540
  board_fen_only = board.fen().split(" ")[0]
541
  opening_name = openings_db.get(board_fen_only)
542
  best_move = result.move.uci()
543
 
544
- del result # FIX: Free engine result
545
- del info # FIX: Free info dict
546
 
547
- # FIX: Clear hash after hint β€” one-shot search, no continuity needed
548
  async with _ENGINE_IO_LOCK:
549
  await _clear_engine_hash(engine)
550
- gc.collect()
551
 
552
  return MoveResponse(
553
  bestmove=best_move,
@@ -615,7 +625,7 @@ def is_simple_recapture(fen_two_moves_ago: str, previous_move: chess.Move, playe
615
  return False
616
  b = chess.Board(fen_two_moves_ago)
617
  result = b.piece_at(previous_move.to_square) is not None
618
- del b # FIX: Free temp board
619
  return result
620
 
621
  def get_material_difference(board: chess.Board) -> int:
@@ -671,7 +681,7 @@ def get_is_piece_sacrifice(board: chess.Board, played_move: chess.Move, best_pv:
671
  return False
672
 
673
  end_diff = get_material_difference(sim_board)
674
- del sim_board # FIX: Free temp board
675
  mat_diff = end_diff - start_diff
676
  player_rel = mat_diff if white_to_play else -mat_diff
677
  return player_rel < 0
@@ -861,10 +871,10 @@ async def analyze_game(request: AnalyzeRequest):
861
  accuracy = max(10.0, min(100.0, 100.0 * math.exp(-0.005 * avg_cpl)))
862
  estimated_elo = int(max(400, min(3600, round(3600 * math.exp(-0.015 * avg_cpl)))))
863
 
864
- # FIX: Clear engine hash after full game analysis β€” analysis fills hash very fast
865
  async with _ENGINE_IO_LOCK:
866
  await _clear_engine_hash(engine)
867
- gc.collect()
868
 
869
  return AnalyzeResponse(
870
  accuracy=round(accuracy, 1),
 
11
  import asyncio
12
  import json
13
  import gc
14
+ import ctypes
15
  import psutil
16
 
17
+ # ─── Force memory back to OS (Linux/HF compatible) ────────────────────────────
18
+ def force_memory_release():
19
+ """
20
+ Run GC twice (catches cyclic references missed on first pass),
21
+ then call malloc_trim to return freed pages back to the OS.
22
+ Without this, Python holds freed memory in its own pool and
23
+ the OS still shows high RAM even after objects are deleted.
24
+ """
25
+ gc.collect()
26
+ gc.collect()
27
+ try:
28
+ ctypes.CDLL("libc.so.6").malloc_trim(0)
29
+ except Exception:
30
+ pass
31
+
32
+
33
  # ─── Multiplayer / Challenge Manager ──────────────────────────────────────────
34
  class ConnectionManager:
35
  def __init__(self):
 
261
 
262
 
263
  # ─── Background Memory Cleanup Task ───────────────────────────────────────────
 
264
  _RAM_CLEANUP_THRESHOLD_MB = float(os.environ.get("RAM_CLEANUP_THRESHOLD_MB", "400"))
 
265
  _RAM_CLEANUP_INTERVAL_SEC = int(os.environ.get("RAM_CLEANUP_INTERVAL_SEC", "300"))
266
 
267
  async def memory_cleanup_task():
268
  """
269
  Background task that runs every 5 minutes.
270
+ - Always runs GC twice and malloc_trim to return memory to OS.
271
  - If RAM exceeds threshold, also clears engine hash table.
272
  """
273
  while True:
 
276
  process = psutil.Process(os.getpid())
277
  mem_mb = process.memory_info().rss / 1024 / 1024
278
 
 
 
 
279
  if mem_mb > _RAM_CLEANUP_THRESHOLD_MB:
280
  print(f"[CLEANUP] RAM at {mem_mb:.1f}MB (threshold {_RAM_CLEANUP_THRESHOLD_MB}MB) β€” clearing engine hash")
281
  engine = _GLOBAL_DEEPCASTLE_ENGINE
 
286
  await _clear_engine_hash(engine)
287
  except Exception:
288
  pass
289
+ force_memory_release()
290
  after_mb = process.memory_info().rss / 1024 / 1024
291
  print(f"[CLEANUP] Done. RAM: {mem_mb:.1f}MB β†’ {after_mb:.1f}MB")
292
  else:
293
+ # Always nudge GC + malloc_trim even when RAM is fine
294
+ force_memory_release()
295
+ print(f"[CLEANUP] RAM at {mem_mb:.1f}MB β€” OK")
296
 
297
  except Exception as e:
298
  print(f"[CLEANUP] Error during cleanup: {e}")
 
300
 
301
  @asynccontextmanager
302
  async def lifespan(app: FastAPI):
 
303
  cleanup_task = asyncio.create_task(memory_cleanup_task())
304
  print(f"[STARTUP] Memory cleanup task started (every {_RAM_CLEANUP_INTERVAL_SEC}s, threshold {_RAM_CLEANUP_THRESHOLD_MB}MB)")
305
  yield
 
306
  cleanup_task.cancel()
307
  try:
308
  await cleanup_task
 
341
  except WebSocketDisconnect:
342
  manager.disconnect(websocket, match_id)
343
  await manager.broadcast(json.dumps({"type": "opponent_disconnected"}), match_id)
344
+ force_memory_release()
345
  except Exception:
346
  manager.disconnect(websocket, match_id)
347
  await manager.broadcast(json.dumps({"type": "opponent_disconnected"}), match_id)
348
+ force_memory_release()
349
 
350
 
351
  # ─── Health & Monitoring ───────────────────────────────────────────────────────
 
359
  def health():
360
  if not os.path.exists(DEEPCASTLE_ENGINE_PATH):
361
  return {"status": "error", "message": "Missing engine binary: deepcastle"}
362
+ force_memory_release()
363
  return {"status": "ok", "engine": "deepcastle"}
364
 
365
 
 
380
 
381
  @app.get("/ram")
382
  def ram_usage():
383
+ """Monitor RAM usage β€” call anytime to check memory health."""
384
  process = psutil.Process(os.getpid())
385
  mem = process.memory_info()
386
  mem_mb = mem.rss / 1024 / 1024
387
  return {
388
+ "rss_mb": round(mem_mb, 2),
389
+ "vms_mb": round(mem.vms / 1024 / 1024, 2),
390
+ "threshold_mb": _RAM_CLEANUP_THRESHOLD_MB,
391
+ "cleanup_interval_sec": _RAM_CLEANUP_INTERVAL_SEC,
392
  "status": "high" if mem_mb > _RAM_CLEANUP_THRESHOLD_MB else "ok",
393
+ "active_rooms": len(manager.active_connections),
394
+ "active_connections": sum(len(v) for v in manager.active_connections.values()),
 
 
395
  }
396
 
397
 
 
410
  engine = await get_deepcastle_engine()
411
  async with _ENGINE_IO_LOCK:
412
  await _clear_engine_hash(engine)
413
+ force_memory_release()
414
  return {"status": "ok", "message": "Engine hash cleared"}
415
  except HTTPException:
416
  raise
 
479
  else:
480
  break
481
  pv = " ".join(pv_parts)
482
+ del pv_board
483
 
484
  score_pawns = score_cp / 100.0 if abs(score_cp) < 9900 else (100.0 if score_cp > 0 else -100.0)
485
  board_fen_only = board.fen().split(" ")[0]
486
  opening_name = openings_db.get(board_fen_only)
487
  best_move = result.move.uci()
488
 
489
+ del result
490
+ del info
491
 
492
  return MoveResponse(
493
  bestmove=best_move,
 
544
  else:
545
  break
546
  pv = " ".join(pv_parts)
547
+ del pv_board
548
 
549
  score_pawns = score_cp / 100.0 if abs(score_cp) < 9900 else (100.0 if score_cp > 0 else -100.0)
550
  board_fen_only = board.fen().split(" ")[0]
551
  opening_name = openings_db.get(board_fen_only)
552
  best_move = result.move.uci()
553
 
554
+ del result
555
+ del info
556
 
557
+ # FIX: Clear hash + force memory back to OS after hint
558
  async with _ENGINE_IO_LOCK:
559
  await _clear_engine_hash(engine)
560
+ force_memory_release()
561
 
562
  return MoveResponse(
563
  bestmove=best_move,
 
625
  return False
626
  b = chess.Board(fen_two_moves_ago)
627
  result = b.piece_at(previous_move.to_square) is not None
628
+ del b
629
  return result
630
 
631
  def get_material_difference(board: chess.Board) -> int:
 
681
  return False
682
 
683
  end_diff = get_material_difference(sim_board)
684
+ del sim_board
685
  mat_diff = end_diff - start_diff
686
  player_rel = mat_diff if white_to_play else -mat_diff
687
  return player_rel < 0
 
871
  accuracy = max(10.0, min(100.0, 100.0 * math.exp(-0.005 * avg_cpl)))
872
  estimated_elo = int(max(400, min(3600, round(3600 * math.exp(-0.015 * avg_cpl)))))
873
 
874
+ # FIX: Clear engine hash + force memory back to OS after full game analysis
875
  async with _ENGINE_IO_LOCK:
876
  await _clear_engine_hash(engine)
877
+ force_memory_release()
878
 
879
  return AnalyzeResponse(
880
  accuracy=round(accuracy, 1),