File size: 1,275 Bytes
9f64c94 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
# load_model.py
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
_model = None
_tokenizer = None
def init_model(model_dir: str = "."):
"""Call this ONCE at startup to load model into memory"""
global _model, _tokenizer
if _model is not None:
print("✅ Model already loaded!")
return _model, _tokenizer
device = "cuda" if torch.cuda.is_available() else "cpu"
print("\n" + "=" * 70)
print(f"Loading model from local files on {device.upper()}...")
print("=" * 70)
_model = AutoModelForCausalLM.from_pretrained(
model_dir,
device_map=device,
torch_dtype="auto",
trust_remote_code=True,
local_files_only=True,
)
_tokenizer = AutoTokenizer.from_pretrained(
model_dir,
local_files_only=True,
)
print(f"✅ Model loaded! ({sum(p.numel() for p in _model.parameters()) / 1e9:.1f}B params)")
print("=" * 70 + "\n")
return _model, _tokenizer
def get_model():
"""Get the already-loaded model (fast)"""
global _model, _tokenizer
if _model is None:
raise RuntimeError("Model not initialized! Call init_model() first.")
return _model, _tokenizer
|