Spaces:

yuto0o
/

django-ai-chat

Sleeping

File size: 1,209 Bytes

8f4b227
 
5bac7bb
8f4b227
 
 
5bac7bb
 
 
8f4b227
5bac7bb
8f4b227
a8fd17e
 
f062705
5bac7bb
8f4b227
 
 
 
 
 
 
 
 
 
 
5bac7bb
8f4b227
 
dd95ee1
8f4b227
5bac7bb
8f4b227
5bac7bb
8f4b227

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# グローバル変数として保持
_model = None
_tokenizer = None


def get_model():
    global _model, _tokenizer

    if _model is None:
        print("Loading Qwen2.5-3B-Instruct... (Lazy Loading)")
        model_name = "Qwen/Qwen2.5-3B-Instruct"
        # model_name = "Qwen/Qwen2.5-1.5B-Instruct" # はやい

        _tokenizer = AutoTokenizer.from_pretrained(model_name)

        # --- 修正箇所: データ型の決定ロジックを安全にする ---
        dtype = torch.float32  # 基本は float32 (約12GB使用、16GBメモリなら入るはず)

        if torch.cuda.is_available():
            dtype = torch.bfloat16
        # torch.cpu に is_bf16_supported があるか確認してから使う
        elif hasattr(torch.cpu, "is_bf16_supported") and torch.cpu.is_bf16_supported():
            dtype = torch.bfloat16
        # -----------------------------------------------

        _model = AutoModelForCausalLM.from_pretrained(
            model_name,
            dtype=dtype,
            trust_remote_code=True,
        )
        print(f"Model Loaded! (dtype: {dtype})")

    return _model, _tokenizer