Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| # Load a small conversational model on CPU | |
| MODEL_NAME = "microsoft/DialoGPT-small" | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="cpu") | |
| def chat(message, history): | |
| """ | |
| Appends the user message to the conversation history, | |
| generates a response from the model, and returns the updated history. | |
| """ | |
| if history is None: | |
| history = [] | |
| # Build the conversation prompt by joining previous turns. | |
| prompt = "" | |
| for speaker, utterance in history: | |
| prompt += f"{speaker}: {utterance}\n" | |
| prompt += f"User: {message}\nBot: " | |
| # Encode the prompt and generate a response (limit max new tokens for CPU speed) | |
| input_ids = tokenizer.encode(prompt + tokenizer.eos_token, return_tensors="pt") | |
| output_ids = model.generate( | |
| input_ids, | |
| max_length=input_ids.shape[1] + 50, | |
| pad_token_id=tokenizer.eos_token_id, | |
| do_sample=True, | |
| top_p=0.95, | |
| top_k=50 | |
| ) | |
| # Decode only the newly generated tokens. | |
| response = tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True) | |
| # Update history and return an empty message (to clear the input box) | |
| history.append(("User", message)) | |
| history.append(("Bot", response)) | |
| return "", history | |
| # Build the Gradio interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# CPU LLM Chat Demo\nThis is a simple chat interface using DialoGPT-small.") | |
| chatbot = gr.Chatbot() | |
| message_input = gr.Textbox(placeholder="Type your message here...", show_label=False) | |
| state = gr.State([]) | |
| message_input.submit(chat, [message_input, state], [message_input, chatbot]) | |
| demo.launch() | |