Spaces:
Sleeping
Sleeping
Auto commit at 10-2025-08 0:59:06
Browse files- lily_llm_api/app_v2.py +35 -0
lily_llm_api/app_v2.py
CHANGED
|
@@ -165,6 +165,39 @@ model_loaded = False
|
|
| 165 |
image_processor = None
|
| 166 |
executor = concurrent.futures.ThreadPoolExecutor()
|
| 167 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
def select_model_interactive():
|
| 169 |
"""μΈν°λν°λΈ λͺ¨λΈ μ ν"""
|
| 170 |
available_models = list_available_models()
|
|
@@ -187,6 +220,8 @@ def select_model_interactive():
|
|
| 187 |
async def startup_event():
|
| 188 |
"""[볡μ] μλ² μμ μ μΈν°λν°λΈ λͺ¨λΈ μ ν λ° λ‘λ"""
|
| 189 |
global model_loaded
|
|
|
|
|
|
|
| 190 |
selected_model_id = select_model_interactive()
|
| 191 |
try:
|
| 192 |
await load_model_async(selected_model_id)
|
|
|
|
| 165 |
image_processor = None
|
| 166 |
executor = concurrent.futures.ThreadPoolExecutor()
|
| 167 |
|
| 168 |
+
def configure_cpu_threads():
|
| 169 |
+
"""CPU μ€λ λ νκ²½ μ΅μ ν (vCPU μμ λ§κ² μ‘°μ )."""
|
| 170 |
+
try:
|
| 171 |
+
# κΈ°λ³Έκ°: νκ²½λ³μ λλ μμ€ν
CPU μλ₯Ό μ¬μ©νλ κ³Όλν μ€λ λ λ°©μ§
|
| 172 |
+
env_threads = os.getenv("CPU_THREADS")
|
| 173 |
+
if env_threads is not None:
|
| 174 |
+
threads = max(1, int(env_threads))
|
| 175 |
+
else:
|
| 176 |
+
detected = os.cpu_count() or 2
|
| 177 |
+
# 컨ν
μ΄λ/μλ²μ vCPU μλ₯Ό κ·Έλλ‘ μ¬μ©νλ μν 8 μ μ©
|
| 178 |
+
threads = max(1, min(detected, 8))
|
| 179 |
+
|
| 180 |
+
# OpenMP/MKL/numexpr
|
| 181 |
+
os.environ["OMP_NUM_THREADS"] = str(threads)
|
| 182 |
+
os.environ["MKL_NUM_THREADS"] = str(threads)
|
| 183 |
+
os.environ.setdefault("NUMEXPR_NUM_THREADS", str(threads))
|
| 184 |
+
os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
|
| 185 |
+
|
| 186 |
+
# PyTorch λ΄λΆ μ€λ λ μ€μ
|
| 187 |
+
try:
|
| 188 |
+
torch.set_num_threads(threads)
|
| 189 |
+
except Exception:
|
| 190 |
+
pass
|
| 191 |
+
try:
|
| 192 |
+
# μ°μ° κ° μ€λ λ νμ 1~2 κΆμ₯(컨ν
μ€νΈ μ€μμΉ λΉμ© μ κ°)
|
| 193 |
+
torch.set_num_interop_threads(1 if threads <= 4 else 2)
|
| 194 |
+
except Exception:
|
| 195 |
+
pass
|
| 196 |
+
|
| 197 |
+
logger.info(f"π§΅ CPU thread config -> OMP/MKL/numexpr={threads}, torch_threads={threads}")
|
| 198 |
+
except Exception as e:
|
| 199 |
+
logger.warning(f"β οΈ CPU μ€λ λ μ€μ μ€ν¨: {e}")
|
| 200 |
+
|
| 201 |
def select_model_interactive():
|
| 202 |
"""μΈν°λν°λΈ λͺ¨λΈ μ ν"""
|
| 203 |
available_models = list_available_models()
|
|
|
|
| 220 |
async def startup_event():
|
| 221 |
"""[볡μ] μλ² μμ μ μΈν°λν°λΈ λͺ¨λΈ μ ν λ° λ‘λ"""
|
| 222 |
global model_loaded
|
| 223 |
+
# CPU μ€λ λ μ΅μ ν μ μ©
|
| 224 |
+
configure_cpu_threads()
|
| 225 |
selected_model_id = select_model_interactive()
|
| 226 |
try:
|
| 227 |
await load_model_async(selected_model_id)
|