import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch # Load a SMALL and FAST model print("Loading AI model...") model_name = "microsoft/DialoGPT-small" # Small = Fast! tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) print("Model loaded!") # Store chat history for context chat_history_ids = None def chat(message, history): """ Fast AI chat using DialoGPT-small model. """ global chat_history_ids try: # Encode user input new_input_ids = tokenizer.encode(message + tokenizer.eos_token, return_tensors='pt') # Append to chat history or start fresh if chat_history_ids is not None and len(history) > 0: bot_input_ids = torch.cat([chat_history_ids, new_input_ids], dim=-1) else: bot_input_ids = new_input_ids # Generate response (fast settings) chat_history_ids = model.generate( bot_input_ids, max_length=200, pad_token_id=tokenizer.eos_token_id, do_sample=True, top_k=50, temperature=0.7 ) # Decode response response = tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True) return response if response.strip() else "Hmm, let me think... Could you say that differently?" except Exception as e: chat_history_ids = None # Reset on error return f"Let me try again: {str(e)}" # Create Gradio Chat Interface demo = gr.ChatInterface( fn=chat, title="🤖 AI Chat Assistant", description="Fast AI Chat - Powered by DialoGPT", examples=["Hello!", "Tell me a joke", "How are you?", "What's your name?"] ) if __name__ == "__main__": demo.launch()