| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
| from config import HF_TOKEN, MODEL_ID | |
| def load_model(): | |
| try: | |
| print(f"π Loading tokenizer and model: {MODEL_ID}") | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| MODEL_ID, | |
| token=HF_TOKEN or None, | |
| trust_remote_code=True | |
| ) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_ID, | |
| token=HF_TOKEN or None, | |
| trust_remote_code=True, | |
| device_map="auto" if torch.cuda.is_available() else "cpu", | |
| torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
| low_cpu_mem_usage=True | |
| ) | |
| print("β Model loaded successfully.") | |
| return pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| max_new_tokens=2048, | |
| do_sample=True, | |
| temperature=0.7, | |
| top_p=0.9 | |
| ) | |
| except Exception as e: | |
| print(f"β Failed to load model: {e}") | |
| raise RuntimeError(f"Model loading failed: {e}") | |