Spaces:
Runtime error
Runtime error
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import torch | |
| import gradio as gr | |
| # Load tokenizer & model | |
| tokenizer = AutoTokenizer.from_pretrained("codellama/CodeLlama-7b-Instruct-hf") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| "codellama/CodeLlama-7b-Instruct-hf", | |
| device_map="auto" | |
| ) | |
| def chat_fn(user_message): | |
| messages = [{"role": "user", "content": user_message}] | |
| inputs = tokenizer.apply_chat_template( | |
| messages, | |
| add_generation_prompt=True, | |
| tokenize=True, | |
| return_dict=True, | |
| return_tensors="pt" | |
| ).to(model.device) | |
| outputs = model.generate(**inputs, max_new_tokens=100) | |
| return tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:]) | |
| # Gradio UI | |
| demo = gr.Interface( | |
| fn=chat_fn, | |
| inputs=gr.Textbox(label="User Input"), | |
| outputs=gr.Textbox(label="Model Output"), | |
| title="CodeLlama-7b-Instruct Chat" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |