import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer # Load a small conversational model on CPU MODEL_NAME = "microsoft/DialoGPT-small" tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="cpu") def chat(message, history): """ Appends the user message to the conversation history, generates a response from the model, and returns the updated history. """ if history is None: history = [] # Build the conversation prompt by joining previous turns. prompt = "" for speaker, utterance in history: prompt += f"{speaker}: {utterance}\n" prompt += f"User: {message}\nBot: " # Encode the prompt and generate a response (limit max new tokens for CPU speed) input_ids = tokenizer.encode(prompt + tokenizer.eos_token, return_tensors="pt") output_ids = model.generate( input_ids, max_length=input_ids.shape[1] + 50, pad_token_id=tokenizer.eos_token_id, do_sample=True, top_p=0.95, top_k=50 ) # Decode only the newly generated tokens. response = tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True) # Update history and return an empty message (to clear the input box) history.append(("User", message)) history.append(("Bot", response)) return "", history # Build the Gradio interface with gr.Blocks() as demo: gr.Markdown("# CPU LLM Chat Demo\nThis is a simple chat interface using DialoGPT-small.") chatbot = gr.Chatbot() message_input = gr.Textbox(placeholder="Type your message here...", show_label=False) state = gr.State([]) message_input.submit(chat, [message_input, state], [message_input, chatbot]) demo.launch()