File size: 1,358 Bytes
428ef01
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# load_model.py
import torch
import os
from transformers import AutoModelForCausalLM, AutoTokenizer

_model = None
_tokenizer = None

def init_model(model_dir: str = "."):
    """Call this ONCE at startup to load model into memory"""
    global _model, _tokenizer
    
    if _model is not None:
        print("✅ Model already loaded!")
        return _model, _tokenizer

    device = "cuda" if torch.cuda.is_available() else "cpu"
    if device == "cpu":
        torch.set_num_threads(os.cpu_count())
    
    print("\n" + "=" * 70)
    print(f"Loading model from local files on {device.upper()}...")
    print("=" * 70)

    _model = AutoModelForCausalLM.from_pretrained(
        model_dir,
        device_map=device,
        torch_dtype="auto",
        trust_remote_code=True,
        local_files_only=True,
    )

    _tokenizer = AutoTokenizer.from_pretrained(
        model_dir,
        local_files_only=True,
    )

    print(f"✅ Model loaded! ({sum(p.numel() for p in _model.parameters()) / 1e9:.1f}B params)")
    print("=" * 70 + "\n")
    
    return _model, _tokenizer

def get_model():
    """Get the already-loaded model (fast)"""
    global _model, _tokenizer
    if _model is None:
        raise RuntimeError("Model not initialized! Call init_model() first.")
    return _model, _tokenizer