Spaces:
Runtime error
Runtime error
| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
| import torch | |
| import os | |
| from dotenv import load_dotenv | |
| class LLaMAHelper: | |
| def __init__(self, hf_token=None): | |
| self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| self.model_id = "meta-llama/Llama-3.2-3B-Instruct" | |
| load_dotenv() | |
| hf_token = hf_token or os.getenv("HF_TOKEN") | |
| self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, token=hf_token) | |
| self.model = AutoModelForCausalLM.from_pretrained( | |
| self.model_id, | |
| token=hf_token, | |
| torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32 | |
| ).to(self.device) | |
| self.pipe = pipeline( | |
| "text-generation", | |
| model=self.model, | |
| tokenizer=self.tokenizer, | |
| device=0 if torch.cuda.is_available() else -1 | |
| ) | |
| def chat(self, system_prompt, prompt, max_new_tokens=1200, temperature=0.5): | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": prompt}, | |
| ] | |
| outputs = self.pipe(messages, max_new_tokens=max_new_tokens, do_sample=True, temperature=temperature) | |
| if outputs: | |
| if "content" in outputs[0]["generated_text"][-1]: | |
| full_response = outputs[0]["generated_text"][-1]["content"].lower() | |
| else: | |
| full_response = outputs[0]["generated_text"][-1].lower() | |
| return full_response.replace(prompt, "").strip() | |