import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, device_map="auto" ) pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer ) def chat_fn(message, history): prompt = f"<|user|>\n{message}\n<|assistant|>\n" out = pipe( prompt, max_new_tokens=200, temperature=0.7, do_sample=True ) return out[0]["generated_text"].split("<|assistant|>")[-1] gr.ChatInterface( chat_fn, title="TinyLlama Chatbot" ).launch()