File size: 2,623 Bytes
84635f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
"""
System utilities for Lily LLM API
"""
import os
import torch
import logging

logger = logging.getLogger(__name__)

def configure_cpu_threads():
    """CPU μŠ€λ ˆλ“œ ν™˜κ²½ μ΅œμ ν™” (vCPU μˆ˜μ— 맞게 μ‘°μ •)."""
    print(f"πŸ” [DEBUG] configure_cpu_threads μ‹œμž‘")
    try:
        # κΈ°λ³Έκ°’: ν™˜κ²½λ³€μˆ˜ λ˜λŠ” μ‹œμŠ€ν…œ CPU 수λ₯Ό μ‚¬μš©ν•˜λ˜ κ³Όλ„ν•œ μŠ€λ ˆλ“œ λ°©μ§€
        env_threads = os.getenv("CPU_THREADS")
        if env_threads is not None:
            threads = max(1, int(env_threads))
        else:
            detected = os.cpu_count() or 2
            # μ»¨ν…Œμ΄λ„ˆ/μ„œλ²„μ˜ vCPU 수λ₯Ό κ·ΈλŒ€λ‘œ μ‚¬μš©ν•˜λ˜ μƒν•œ 16 적용
            threads = max(1, min(detected, 16))

        # OpenMP/MKL/numexpr
        os.environ["OMP_NUM_THREADS"] = str(threads)
        os.environ["MKL_NUM_THREADS"] = str(threads)
        os.environ.setdefault("NUMEXPR_NUM_THREADS", str(threads))
        os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")

        # PyTorch λ‚΄λΆ€ μŠ€λ ˆλ“œ μ„€μ •
        try:
            torch.set_num_threads(threads)
        except Exception:
            pass
        try:
            # μ—°μ‚° κ°„ μŠ€λ ˆλ“œ 풀은 1~2 ꢌμž₯(μ»¨ν…μŠ€νŠΈ μŠ€μœ„μΉ­ λΉ„μš© 절감)
            torch.set_num_interop_threads(1 if threads <= 4 else 2)
        except Exception:
            pass

        logger.info(f"🧡 CPU thread config -> OMP/MKL/numexpr={threads}, torch_threads={threads}")
    except Exception as e:
        logger.warning(f"⚠️ CPU μŠ€λ ˆλ“œ μ„€μ • μ‹€νŒ¨: {e}")
    print(f"πŸ” [DEBUG] configure_cpu_threads μ’…λ£Œ")

def select_model_interactive():
    """μΈν„°λž™ν‹°λΈŒ λͺ¨λΈ 선택"""
    from ..models import list_available_models
    
    available_models = list_available_models()
    
    print("\n" + "="*60 + "\nπŸ€– Lily LLM API v2 - λͺ¨λΈ 선택\n" + "="*60)
    for i, model_info in enumerate(available_models, 1):
        print(f"{i:2d}. {model_info['name']} ({model_info['model_id']})")
    while True:
        try:
            # choice = input(f"\nπŸ“ μ‚¬μš©ν•  λͺ¨λΈ 번호λ₯Ό μ„ νƒν•˜μ„Έμš” (1-{len(available_models)}): ")
            # selected_model = available_models[int(choice) - 1]
            selected_model = available_models[1]
            print(f"\nβœ… '{selected_model['name']}' λͺ¨λΈμ„ μ„ νƒν–ˆμŠ΅λ‹ˆλ‹€.")
            return selected_model['model_id']
        except (ValueError, IndexError):
            print(f"❌ 1μ—μ„œ {len(available_models)} μ‚¬μ΄μ˜ 숫자λ₯Ό μž…λ ₯ν•΄μ£Όμ„Έμš”.")
        except KeyboardInterrupt: 
            import sys
            sys.exit("\n\nπŸ‘‹ ν”„λ‘œκ·Έλž¨μ„ μ’…λ£Œν•©λ‹ˆλ‹€.")