Amogh1221 commited on
Commit
7953192
Β·
verified Β·
1 Parent(s): c60b8ec

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +29 -102
main.py CHANGED
@@ -122,13 +122,6 @@ _GLOBAL_DEEPCASTLE_ENGINE = None
122
  _ENGINE_LOCK = asyncio.Lock()
123
  _ENGINE_IO_LOCK = asyncio.Lock()
124
 
125
- # Global state for monitoring
126
- _PENDING_ENGINE_REQUESTS = 0
127
- _MAX_PENDING_ENGINE_REQUESTS = 10 # Drop requests if queue is too long to prevent OOM
128
- _MAX_PENDING_GAME_REVIEWS = 2 # Max concurrent full game reviews (heavy RAM)
129
- _PENDING_GAME_REVIEWS = 0
130
- _LOCK_TIMEOUT_SEC = 20.0 # Max wait time in queue before giving up
131
-
132
 
133
  def _engine_hash_mb() -> int:
134
  try:
@@ -268,13 +261,8 @@ async def _engine_call(engine, coro, timeout_sec: float):
268
 
269
 
270
  # ─── Background Memory Cleanup Task ───────────────────────────────────────────
271
- _RAM_CLEANUP_THRESHOLD_MB = float(os.environ.get("RAM_CLEANUP_THRESHOLD_MB", "1024"))
272
- # Global System Limit: 18GB (All analysis sessions freed)
273
- _RAM_PANIC_THRESHOLD_MB = 18000.0
274
- # Individual Session Limit: 4GB
275
- _RAM_SESSION_LIMIT_MB = 4000.0
276
- # How often to run the cleanup in seconds (5s)
277
- _RAM_CLEANUP_INTERVAL_SEC = 5
278
 
279
  async def memory_cleanup_task():
280
  """
@@ -288,14 +276,7 @@ async def memory_cleanup_task():
288
  process = psutil.Process(os.getpid())
289
  mem_mb = process.memory_info().rss / 1024 / 1024
290
 
291
- # PANIC CASE: Over 18GB - total server reset
292
- if mem_mb > _RAM_PANIC_THRESHOLD_MB:
293
- print(f"[PANIC] RAM at {mem_mb:.1f}MB! Exceeded 18GB System limit. Resetting everything...")
294
- await shutdown_engine_async()
295
- force_memory_release()
296
-
297
- # NORMAL CLEANUP
298
- elif mem_mb > _RAM_CLEANUP_THRESHOLD_MB:
299
  print(f"[CLEANUP] RAM at {mem_mb:.1f}MB (threshold {_RAM_CLEANUP_THRESHOLD_MB}MB) β€” clearing engine hash")
300
  engine = _GLOBAL_DEEPCASTLE_ENGINE
301
  if engine is not None:
@@ -463,35 +444,25 @@ def normalize_search_stats(info: dict) -> Tuple[int, int, int]:
463
  # ─── Bot Move (/move) ──────────────────────────────────────────────────────────
464
  @app.post("/move", response_model=MoveResponse)
465
  async def get_move(request: MoveRequest):
466
- global _PENDING_ENGINE_REQUESTS
467
- if _PENDING_ENGINE_REQUESTS > _MAX_PENDING_ENGINE_REQUESTS:
468
- force_memory_release()
469
- raise HTTPException(status_code=429, detail="Server busy β€” too many analysis requests.")
470
-
471
- _PENDING_ENGINE_REQUESTS += 1
472
  try:
473
  engine = await get_deepcastle_engine()
474
  board = chess.Board(request.fen)
475
  limit = chess.engine.Limit(time=request.time, depth=request.depth)
476
  tsec = _search_timeout_sec(request.time, request.depth)
477
 
478
- try:
479
- async with asyncio.timeout(_LOCK_TIMEOUT_SEC):
480
- async with _ENGINE_IO_LOCK:
481
- result = await _engine_call(
482
- engine,
483
- engine.play(board, limit, info=chess.engine.INFO_ALL),
484
- tsec,
485
- )
486
- info = dict(result.info)
487
- if not info:
488
- info = await _engine_call(
489
- engine,
490
- engine.analyse(board, limit, info=chess.engine.INFO_ALL),
491
- tsec,
492
- )
493
- except asyncio.TimeoutError:
494
- raise HTTPException(status_code=503, detail="Server overloaded β€” lock wait timeout.")
495
 
496
  score_cp, mate_in = get_normalized_score(info)
497
  depth, nodes, nps = normalize_search_stats(info)
@@ -517,11 +488,6 @@ async def get_move(request: MoveRequest):
517
 
518
  del result
519
  del info
520
-
521
- # Match Shield: Force cleanup and hash clear after every move to prevent "Idle Growth"
522
- async with _ENGINE_IO_LOCK:
523
- await _clear_engine_hash(engine)
524
- force_memory_release()
525
 
526
  return MoveResponse(
527
  bestmove=best_move,
@@ -538,42 +504,30 @@ async def get_move(request: MoveRequest):
538
  except Exception as e:
539
  print(f"Error: {e}")
540
  raise HTTPException(status_code=500, detail=str(e))
541
- finally:
542
- _PENDING_ENGINE_REQUESTS -= 1
543
 
544
 
545
  # ─── Hint Move (/analysis-move) ───────────────────────────────────────────────
546
  @app.post("/analysis-move", response_model=MoveResponse)
547
  async def get_analysis_move(request: MoveRequest):
548
- global _PENDING_ENGINE_REQUESTS
549
- if _PENDING_ENGINE_REQUESTS > _MAX_PENDING_ENGINE_REQUESTS:
550
- force_memory_release()
551
- raise HTTPException(status_code=429, detail="Server busy β€” too many analysis requests.")
552
-
553
- _PENDING_ENGINE_REQUESTS += 1
554
  try:
555
  engine = await get_stockfish_engine()
556
  board = chess.Board(request.fen)
557
  limit = chess.engine.Limit(time=request.time, depth=request.depth)
558
  tsec = _search_timeout_sec(request.time, request.depth)
559
 
560
- try:
561
- async with asyncio.timeout(_LOCK_TIMEOUT_SEC):
562
- async with _ENGINE_IO_LOCK:
563
- result = await _engine_call(
564
- engine,
565
- engine.play(board, limit, info=chess.engine.INFO_ALL),
566
- tsec,
567
- )
568
- info = dict(result.info)
569
- if not info:
570
- info = await _engine_call(
571
- engine,
572
- engine.analyse(board, limit, info=chess.engine.INFO_ALL),
573
- tsec,
574
- )
575
- except asyncio.TimeoutError:
576
- raise HTTPException(status_code=503, detail="Server overloaded β€” lock wait timeout.")
577
 
578
  score_cp, mate_in = get_normalized_score(info)
579
  depth, nodes, nps = normalize_search_stats(info)
@@ -620,8 +574,6 @@ async def get_analysis_move(request: MoveRequest):
620
  except Exception as e:
621
  print(f"Analysis move error: {e}")
622
  raise HTTPException(status_code=500, detail=str(e))
623
- finally:
624
- _PENDING_ENGINE_REQUESTS -= 1
625
 
626
 
627
  # ─── Openings DB ───────────────────────────────────────────────────────────────
@@ -777,16 +729,6 @@ def get_move_classification(
777
  # ─── Game Analysis (/analyze-game) ────────────────────────────────────────────
778
  @app.post("/analyze-game", response_model=AnalyzeResponse)
779
  async def analyze_game(request: AnalyzeRequest):
780
- global _PENDING_ENGINE_REQUESTS, _PENDING_GAME_REVIEWS
781
-
782
- if _PENDING_GAME_REVIEWS >= _MAX_PENDING_GAME_REVIEWS:
783
- raise HTTPException(status_code=429, detail="Analysis queue full. Only 2 games can be reviewed at once.")
784
-
785
- if _PENDING_ENGINE_REQUESTS >= _MAX_PENDING_ENGINE_REQUESTS:
786
- raise HTTPException(status_code=429, detail="Server busy β€” too many moving parts.")
787
-
788
- _PENDING_ENGINE_REQUESTS += 1
789
- _PENDING_GAME_REVIEWS += 1
790
  try:
791
  engine = await get_stockfish_engine()
792
  board = chess.Board(request.start_fen) if request.start_fen else chess.Board()
@@ -819,19 +761,7 @@ async def analyze_game(request: AnalyzeRequest):
819
  player_moves_count = 0
820
  current_score, _ = get_normalized_score(infos_before[0])
821
 
822
- process = psutil.Process()
823
- mem_start = process.memory_info().rss / 1024 / 1024
824
-
825
  for i, san_move in enumerate(request.moves):
826
- # Check individual session growth (4GB limit)
827
- mem_now = process.memory_info().rss / 1024 / 1024
828
- if (mem_now - mem_start) > _RAM_SESSION_LIMIT_MB:
829
- print(f"[ANALYSIS-RESET] Individual session exceeds 4GB growth. Clearing RAM mid-analysis.")
830
- async with _ENGINE_IO_LOCK:
831
- await _clear_engine_hash(engine)
832
- force_memory_release()
833
- mem_start = process.memory_info().rss / 1024 / 1024
834
-
835
  is_white_turn = board.turn == chess.WHITE
836
  is_player_turn = is_white_turn if player_is_white else not is_white_turn
837
 
@@ -958,9 +888,6 @@ async def analyze_game(request: AnalyzeRequest):
958
  except Exception as e:
959
  print(f"Analysis Error: {e}")
960
  raise HTTPException(status_code=500, detail=str(e))
961
- finally:
962
- _PENDING_ENGINE_REQUESTS -= 1
963
- _PENDING_GAME_REVIEWS -= 1
964
 
965
 
966
  if __name__ == "__main__":
 
122
  _ENGINE_LOCK = asyncio.Lock()
123
  _ENGINE_IO_LOCK = asyncio.Lock()
124
 
 
 
 
 
 
 
 
125
 
126
  def _engine_hash_mb() -> int:
127
  try:
 
261
 
262
 
263
  # ─── Background Memory Cleanup Task ───────────────────────────────────────────
264
+ _RAM_CLEANUP_THRESHOLD_MB = float(os.environ.get("RAM_CLEANUP_THRESHOLD_MB", "400"))
265
+ _RAM_CLEANUP_INTERVAL_SEC = int(os.environ.get("RAM_CLEANUP_INTERVAL_SEC", "300"))
 
 
 
 
 
266
 
267
  async def memory_cleanup_task():
268
  """
 
276
  process = psutil.Process(os.getpid())
277
  mem_mb = process.memory_info().rss / 1024 / 1024
278
 
279
+ if mem_mb > _RAM_CLEANUP_THRESHOLD_MB:
 
 
 
 
 
 
 
280
  print(f"[CLEANUP] RAM at {mem_mb:.1f}MB (threshold {_RAM_CLEANUP_THRESHOLD_MB}MB) β€” clearing engine hash")
281
  engine = _GLOBAL_DEEPCASTLE_ENGINE
282
  if engine is not None:
 
444
  # ─── Bot Move (/move) ──────────────────────────────────────────────────────────
445
  @app.post("/move", response_model=MoveResponse)
446
  async def get_move(request: MoveRequest):
 
 
 
 
 
 
447
  try:
448
  engine = await get_deepcastle_engine()
449
  board = chess.Board(request.fen)
450
  limit = chess.engine.Limit(time=request.time, depth=request.depth)
451
  tsec = _search_timeout_sec(request.time, request.depth)
452
 
453
+ async with _ENGINE_IO_LOCK:
454
+ result = await _engine_call(
455
+ engine,
456
+ engine.play(board, limit, info=chess.engine.INFO_ALL),
457
+ tsec,
458
+ )
459
+ info = dict(result.info)
460
+ if not info:
461
+ info = await _engine_call(
462
+ engine,
463
+ engine.analyse(board, limit, info=chess.engine.INFO_ALL),
464
+ tsec,
465
+ )
 
 
 
 
466
 
467
  score_cp, mate_in = get_normalized_score(info)
468
  depth, nodes, nps = normalize_search_stats(info)
 
488
 
489
  del result
490
  del info
 
 
 
 
 
491
 
492
  return MoveResponse(
493
  bestmove=best_move,
 
504
  except Exception as e:
505
  print(f"Error: {e}")
506
  raise HTTPException(status_code=500, detail=str(e))
 
 
507
 
508
 
509
  # ─── Hint Move (/analysis-move) ───────────────────────────────────────────────
510
  @app.post("/analysis-move", response_model=MoveResponse)
511
  async def get_analysis_move(request: MoveRequest):
 
 
 
 
 
 
512
  try:
513
  engine = await get_stockfish_engine()
514
  board = chess.Board(request.fen)
515
  limit = chess.engine.Limit(time=request.time, depth=request.depth)
516
  tsec = _search_timeout_sec(request.time, request.depth)
517
 
518
+ async with _ENGINE_IO_LOCK:
519
+ result = await _engine_call(
520
+ engine,
521
+ engine.play(board, limit, info=chess.engine.INFO_ALL),
522
+ tsec,
523
+ )
524
+ info = dict(result.info)
525
+ if not info:
526
+ info = await _engine_call(
527
+ engine,
528
+ engine.analyse(board, limit, info=chess.engine.INFO_ALL),
529
+ tsec,
530
+ )
 
 
 
 
531
 
532
  score_cp, mate_in = get_normalized_score(info)
533
  depth, nodes, nps = normalize_search_stats(info)
 
574
  except Exception as e:
575
  print(f"Analysis move error: {e}")
576
  raise HTTPException(status_code=500, detail=str(e))
 
 
577
 
578
 
579
  # ─── Openings DB ───────────────────────────────────────────────────────────────
 
729
  # ─── Game Analysis (/analyze-game) ────────────────────────────────────────────
730
  @app.post("/analyze-game", response_model=AnalyzeResponse)
731
  async def analyze_game(request: AnalyzeRequest):
 
 
 
 
 
 
 
 
 
 
732
  try:
733
  engine = await get_stockfish_engine()
734
  board = chess.Board(request.start_fen) if request.start_fen else chess.Board()
 
761
  player_moves_count = 0
762
  current_score, _ = get_normalized_score(infos_before[0])
763
 
 
 
 
764
  for i, san_move in enumerate(request.moves):
 
 
 
 
 
 
 
 
 
765
  is_white_turn = board.turn == chess.WHITE
766
  is_player_turn = is_white_turn if player_is_white else not is_white_turn
767
 
 
888
  except Exception as e:
889
  print(f"Analysis Error: {e}")
890
  raise HTTPException(status_code=500, detail=str(e))
 
 
 
891
 
892
 
893
  if __name__ == "__main__":