gbrabbit commited on
Commit
4a42f62
Β·
1 Parent(s): e3a8fc8

Auto commit at 10-2025-08 0:59:06

Browse files
Files changed (1) hide show
  1. lily_llm_api/app_v2.py +35 -0
lily_llm_api/app_v2.py CHANGED
@@ -165,6 +165,39 @@ model_loaded = False
165
  image_processor = None
166
  executor = concurrent.futures.ThreadPoolExecutor()
167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  def select_model_interactive():
169
  """μΈν„°λž™ν‹°λΈŒ λͺ¨λΈ 선택"""
170
  available_models = list_available_models()
@@ -187,6 +220,8 @@ def select_model_interactive():
187
  async def startup_event():
188
  """[볡원] μ„œλ²„ μ‹œμž‘ μ‹œ μΈν„°λž™ν‹°λΈŒ λͺ¨λΈ 선택 및 λ‘œλ“œ"""
189
  global model_loaded
 
 
190
  selected_model_id = select_model_interactive()
191
  try:
192
  await load_model_async(selected_model_id)
 
165
  image_processor = None
166
  executor = concurrent.futures.ThreadPoolExecutor()
167
 
168
+ def configure_cpu_threads():
169
+ """CPU μŠ€λ ˆλ“œ ν™˜κ²½ μ΅œμ ν™” (vCPU μˆ˜μ— 맞게 μ‘°μ •)."""
170
+ try:
171
+ # κΈ°λ³Έκ°’: ν™˜κ²½λ³€μˆ˜ λ˜λŠ” μ‹œμŠ€ν…œ CPU 수λ₯Ό μ‚¬μš©ν•˜λ˜ κ³Όλ„ν•œ μŠ€λ ˆλ“œ λ°©μ§€
172
+ env_threads = os.getenv("CPU_THREADS")
173
+ if env_threads is not None:
174
+ threads = max(1, int(env_threads))
175
+ else:
176
+ detected = os.cpu_count() or 2
177
+ # μ»¨ν…Œμ΄λ„ˆ/μ„œλ²„μ˜ vCPU 수λ₯Ό κ·ΈλŒ€λ‘œ μ‚¬μš©ν•˜λ˜ μƒν•œ 8 적용
178
+ threads = max(1, min(detected, 8))
179
+
180
+ # OpenMP/MKL/numexpr
181
+ os.environ["OMP_NUM_THREADS"] = str(threads)
182
+ os.environ["MKL_NUM_THREADS"] = str(threads)
183
+ os.environ.setdefault("NUMEXPR_NUM_THREADS", str(threads))
184
+ os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
185
+
186
+ # PyTorch λ‚΄λΆ€ μŠ€λ ˆλ“œ μ„€μ •
187
+ try:
188
+ torch.set_num_threads(threads)
189
+ except Exception:
190
+ pass
191
+ try:
192
+ # μ—°μ‚° κ°„ μŠ€λ ˆλ“œ 풀은 1~2 ꢌμž₯(μ»¨ν…μŠ€νŠΈ μŠ€μœ„μΉ­ λΉ„μš© 절감)
193
+ torch.set_num_interop_threads(1 if threads <= 4 else 2)
194
+ except Exception:
195
+ pass
196
+
197
+ logger.info(f"🧡 CPU thread config -> OMP/MKL/numexpr={threads}, torch_threads={threads}")
198
+ except Exception as e:
199
+ logger.warning(f"⚠️ CPU μŠ€λ ˆλ“œ μ„€μ • μ‹€νŒ¨: {e}")
200
+
201
  def select_model_interactive():
202
  """μΈν„°λž™ν‹°λΈŒ λͺ¨λΈ 선택"""
203
  available_models = list_available_models()
 
220
  async def startup_event():
221
  """[볡원] μ„œλ²„ μ‹œμž‘ μ‹œ μΈν„°λž™ν‹°λΈŒ λͺ¨λΈ 선택 및 λ‘œλ“œ"""
222
  global model_loaded
223
+ # CPU μŠ€λ ˆλ“œ μ΅œμ ν™” 적용
224
+ configure_cpu_threads()
225
  selected_model_id = select_model_interactive()
226
  try:
227
  await load_model_async(selected_model_id)