import torch import gradio as gr from transformers import pipeline # ------------------------------- # Load Model (CPU-safe) # ------------------------------- pipe = pipeline( "text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.float32, device_map=None, ) # ------------------------------- # Chat Function (messages format) # ------------------------------- def chat(user_message, history, system_prompt, temperature, max_tokens): messages = [{"role": "system", "content": system_prompt}] if history: messages.extend(history) messages.append({"role": "user", "content": user_message}) prompt = pipe.tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True, ) output = pipe( prompt, max_new_tokens=int(max_tokens), temperature=temperature, top_p=0.9, do_sample=True, ) assistant_reply = output[0]["generated_text"].split("<|assistant|>")[-1].strip() history.append({"role": "user", "content": user_message}) history.append({"role": "assistant", "content": assistant_reply}) return history # ------------------------------- # Gradio UI # ------------------------------- with gr.Blocks(title="TinyLLaMA Chatbot") as demo: gr.Markdown("## ๐Ÿฆ™ TinyLLaMA Chatbot") preset_prompts = { "Pirate ๐Ÿดโ€โ˜ ๏ธ": "You are a pirate chatbot. Speak like a pirate.", "Teacher ๐Ÿ‘จโ€๐Ÿซ": "You are a patient teacher.", "Coder ๐Ÿ‘จโ€๐Ÿ’ป": "You are a programming assistant.", "Friendly ๐Ÿค–": "You are a friendly assistant." } personality = gr.Dropdown( choices=list(preset_prompts.keys()), value="Pirate ๐Ÿดโ€โ˜ ๏ธ", label="Choose Personality" ) system_prompt = gr.Textbox( value=preset_prompts["Pirate ๐Ÿดโ€โ˜ ๏ธ"], label="System Prompt" ) personality.change( lambda x: preset_prompts[x], inputs=personality, outputs=system_prompt ) chatbot = gr.Chatbot(type="messages", height=400) user_input = gr.Textbox(label="Your Message") temperature = gr.Slider(0.1, 1.2, value=0.85) max_tokens = gr.Slider(32, 128, value=96, step=16) send = gr.Button("Send ๐Ÿš€") clear = gr.Button("Clear ๐Ÿงน") send.click( chat, inputs=[user_input, chatbot, system_prompt, temperature, max_tokens], outputs=chatbot ) user_input.submit( chat, inputs=[user_input, chatbot, system_prompt, temperature, max_tokens], outputs=chatbot ) clear.click(lambda: [], outputs=chatbot) demo.launch()