Spaces:

eddddyy
/

bot

Paused

bot

File size: 1,147 Bytes

27693fd
 
0204d4a
27693fd
c07da70
 
 
63e33f3
c07da70
 
63e33f3
c07da70
 
4236a98
c07da70
 
634e670
c07da70
63e33f3
c07da70
 
 
af4148c
63e33f3
27693fd
c07da70
 
 
 
63e33f3
 
 
 
c07da70
17ffe56
c07da70

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from config import HF_TOKEN, MODEL_ID

def load_model():
    try:
        print(f"🔄 Loading tokenizer and model: {MODEL_ID}")
        
        tokenizer = AutoTokenizer.from_pretrained(
            MODEL_ID,
            token=HF_TOKEN or None,
            trust_remote_code=True
        )

        model = AutoModelForCausalLM.from_pretrained(
            MODEL_ID,
            token=HF_TOKEN or None,
            trust_remote_code=True,
            device_map="auto" if torch.cuda.is_available() else "cpu",
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
            low_cpu_mem_usage=True
        )

        print("✅ Model loaded successfully.")

        return pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            max_new_tokens=2048,
            do_sample=True,
            temperature=0.7,
            top_p=0.9
        )

    except Exception as e:
        print(f"❌ Failed to load model: {e}")
        raise RuntimeError(f"Model loading failed: {e}")