import torch from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline from config import HF_TOKEN, MODEL_ID def load_model(): try: print(f"🔄 Loading tokenizer and model: {MODEL_ID}") tokenizer = AutoTokenizer.from_pretrained( MODEL_ID, token=HF_TOKEN or None, trust_remote_code=True ) model = AutoModelForCausalLM.from_pretrained( MODEL_ID, token=HF_TOKEN or None, trust_remote_code=True, device_map="auto" if torch.cuda.is_available() else "cpu", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, low_cpu_mem_usage=True ) print("✅ Model loaded successfully.") return pipeline( "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=2048, do_sample=True, temperature=0.7, top_p=0.9 ) except Exception as e: print(f"❌ Failed to load model: {e}") raise RuntimeError(f"Model loading failed: {e}")