import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id) generator = pipeline( "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=200, temperature=0.7, top_p=0.9 ) def chat(user_input): prompt = f"You are a helpful assistant.\nUser: {user_input}\nAssistant:" response = generator(prompt, return_full_text=False) return response[0]['generated_text'].strip() iface = gr.Interface( fn=chat, inputs=gr.Textbox(lines=2, placeholder="Ask a full question, like 'Where is Delhi?'"), outputs="text", title="TinyLlama Chatbot 🤖", description="Lightweight chatbot powered by TinyLlama. Works better with complete questions!" ) iface.launch()