Spaces:
Sleeping
Sleeping
File size: 1,209 Bytes
8f4b227 5bac7bb 8f4b227 5bac7bb 8f4b227 5bac7bb 8f4b227 a8fd17e f062705 5bac7bb 8f4b227 5bac7bb 8f4b227 dd95ee1 8f4b227 5bac7bb 8f4b227 5bac7bb 8f4b227 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
# グローバル変数として保持
_model = None
_tokenizer = None
def get_model():
global _model, _tokenizer
if _model is None:
print("Loading Qwen2.5-3B-Instruct... (Lazy Loading)")
model_name = "Qwen/Qwen2.5-3B-Instruct"
# model_name = "Qwen/Qwen2.5-1.5B-Instruct" # はやい
_tokenizer = AutoTokenizer.from_pretrained(model_name)
# --- 修正箇所: データ型の決定ロジックを安全にする ---
dtype = torch.float32 # 基本は float32 (約12GB使用、16GBメモリなら入るはず)
if torch.cuda.is_available():
dtype = torch.bfloat16
# torch.cpu に is_bf16_supported があるか確認してから使う
elif hasattr(torch.cpu, "is_bf16_supported") and torch.cpu.is_bf16_supported():
dtype = torch.bfloat16
# -----------------------------------------------
_model = AutoModelForCausalLM.from_pretrained(
model_name,
dtype=dtype,
trust_remote_code=True,
)
print(f"Model Loaded! (dtype: {dtype})")
return _model, _tokenizer
|