| import gradio as gr |
| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
| import torch |
|
|
| |
| model_id = "HuggingFaceH4/zephyr-7b-beta" |
| tokenizer = AutoTokenizer.from_pretrained(model_id) |
| model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto") |
|
|
| pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) |
|
|
| |
| def chat_with_bot(user_input): |
| system_message = "You are a helpful, honest, and friendly assistant." |
| prompt = f"<|system|>\n{system_message}\n<|user|>\n{user_input}\n<|assistant|>\n" |
| response = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_p=0.9)[0]["generated_text"] |
| answer = response.split("<|assistant|>")[-1].strip() |
| return answer |
|
|
| |
| iface = gr.Interface(fn=chat_with_bot, |
| inputs=gr.Textbox(lines=2, placeholder="Ask me anything..."), |
| outputs="text", |
| title="Zephyr Chatbot", |
| description="Ask general questions and get helpful answers!") |
|
|
| iface.launch() |
|
|