Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import torch | |
| model_id = "meta-llama/Meta-Llama-Guard-2-8B" | |
| device = "cuda" | |
| dtype = torch.bfloat16 | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=dtype, device_map=device) | |
| def moderate(chat): | |
| input_ids = tokenizer.apply_chat_template(chat, return_tensors="pt").to(device) | |
| output = model.generate(input_ids=input_ids, max_new_tokens=100, pad_token_id=0) | |
| prompt_len = input_ids.shape[-1] | |
| return tokenizer.decode(output[0][prompt_len:], skip_special_tokens=True) | |
| def generate_response(user_input): | |
| chat = [ | |
| {"role": "user", "content": user_input}, | |
| {"role": "assistant", "content": ""} | |
| ] | |
| response = moderate(chat) | |
| return response | |
| iface = gr.Interface( | |
| fn=generate_response, | |
| inputs=gr.Textbox(lines=3, placeholder="Enter your message here ..."), | |
| outputs="text", | |
| title="Chat with Meta-Llama-Guard-2-8B", | |
| description="A Gradio interface to interact with the Meta-Llama-Guard-2-8B model for safe chat moderation." | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |