import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=200,
    temperature=0.7,
    top_p=0.9
)

def chat(user_input):
    prompt = f"You are a helpful assistant.\nUser: {user_input}\nAssistant:"
    response = generator(prompt, return_full_text=False)
    return response[0]['generated_text'].strip()

iface = gr.Interface(
    fn=chat,
    inputs=gr.Textbox(lines=2, placeholder="Ask a full question, like 'Where is Delhi?'"),
    outputs="text",
    title="TinyLlama Chatbot 🤖",
    description="Lightweight chatbot powered by TinyLlama. Works better with complete questions!"
)

iface.launch()