Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import torch | |
| model_id = "microsoft/phi-2" | |
| tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| trust_remote_code=True, | |
| torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
| device_map="auto" | |
| ) | |
| def generate_response(history, user_input): | |
| # Append new user input to history | |
| history = history or [] | |
| history.append((user_input, None)) | |
| # Build prompt from history | |
| prompt = "" | |
| for user, bot in history: | |
| prompt += f"User: {user}\n" | |
| if bot: | |
| prompt += f"Assistant: {bot}\n" | |
| prompt += "Assistant:" | |
| inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=200, | |
| do_sample=True, | |
| top_p=0.95, | |
| temperature=0.7, | |
| pad_token_id=tokenizer.eos_token_id, | |
| ) | |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| response = response.split("Assistant:")[-1].strip() | |
| # Update last bot response in history | |
| history[-1] = (user_input, response) | |
| return history, history | |
| with gr.Blocks() as demo: | |
| chatbot = gr.Chatbot() | |
| state = gr.State([]) | |
| msg = gr.Textbox(label="Enter your message") | |
| submit = gr.Button("Send") | |
| submit.click(generate_response, inputs=[state, msg], outputs=[chatbot, state]) | |
| msg.submit(generate_response, inputs=[state, msg], outputs=[chatbot, state]) | |
| if __name__ == "__main__": | |
| demo.launch() |