Spaces:

gbrabbit
/

lily_fast_api

Sleeping

App Files Files Community

gbrabbit commited on Aug 9, 2025

Commit

4a42f62

1 Parent(s): e3a8fc8

Auto commit at 10-2025-08 0:59:06

Browse files

Files changed (1) hide show

lily_llm_api/app_v2.py +35 -0

lily_llm_api/app_v2.py CHANGED Viewed

@@ -165,6 +165,39 @@ model_loaded = False
 image_processor = None
 executor = concurrent.futures.ThreadPoolExecutor()
 def select_model_interactive():
     """인터랙티브 모델 선택"""
     available_models = list_available_models()
@@ -187,6 +220,8 @@ def select_model_interactive():
 async def startup_event():
     """[복원] 서버 시작 시 인터랙티브 모델 선택 및 로드"""
     global model_loaded
     selected_model_id = select_model_interactive()
     try:
         await load_model_async(selected_model_id)

 image_processor = None
 executor = concurrent.futures.ThreadPoolExecutor()
+def configure_cpu_threads():
+    """CPU 스레드 환경 최적화 (vCPU 수에 맞게 조정)."""
+    try:
+        # 기본값: 환경변수 또는 시스템 CPU 수를 사용하되 과도한 스레드 방지
+        env_threads = os.getenv("CPU_THREADS")
+        if env_threads is not None:
+            threads = max(1, int(env_threads))
+        else:
+            detected = os.cpu_count() or 2
+            # 컨테이너/서버의 vCPU 수를 그대로 사용하되 상한 8 적용
+            threads = max(1, min(detected, 8))
+        # OpenMP/MKL/numexpr
+        os.environ["OMP_NUM_THREADS"] = str(threads)
+        os.environ["MKL_NUM_THREADS"] = str(threads)
+        os.environ.setdefault("NUMEXPR_NUM_THREADS", str(threads))
+        os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
+        # PyTorch 내부 스레드 설정
+        try:
+            torch.set_num_threads(threads)
+        except Exception:
+            pass
+        try:
+            # 연산 간 스레드 풀은 1~2 권장(컨텍스트 스위칭 비용 절감)
+            torch.set_num_interop_threads(1 if threads <= 4 else 2)
+        except Exception:
+            pass
+        logger.info(f"🧵 CPU thread config -> OMP/MKL/numexpr={threads}, torch_threads={threads}")
+    except Exception as e:
+        logger.warning(f"⚠️ CPU 스레드 설정 실패: {e}")
 def select_model_interactive():
     """인터랙티브 모델 선택"""
     available_models = list_available_models()
 async def startup_event():
     """[복원] 서버 시작 시 인터랙티브 모델 선택 및 로드"""
     global model_loaded
+    # CPU 스레드 최적화 적용
+    configure_cpu_threads()
     selected_model_id = select_model_interactive()
     try:
         await load_model_async(selected_model_id)