# app.py

import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

# --- 1. Load your Fine-Tuned Model ---
# This is the core of your application.
print("Loading model and tokenizer...")

# Define the names of the base model and your adapter
base_model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
adapter_name = "Hrushi02/Root_Math-TinyLlama-CPU" # Use your HF username

# Load the base model (TinyLlama)
base_model = AutoModelForCausalLM.from_pretrained(base_model_name)

# Load the tokenizer from your fine-tuned model repository
tokenizer = AutoTokenizer.from_pretrained(adapter_name)

# Apply your fine-tuned LoRA adapter to the base model
model = PeftModel.from_pretrained(base_model, adapter_name)

print("✅ Model loaded successfully!")

# --- 2. Define the Chat Function ---
# This function takes user input and chat history, then returns the model's response.
def respond(message, chat_history):
    # Format the conversation history into the model's expected chat template
    instruction = "Solve the following math problem:"
    prompt_list = []
    for user, assistant in chat_history:
        prompt_list.append(f"<|system|>\n{instruction}</s>\n<|user|>\n{user}</s>\n<|assistant|>\n{assistant}</s>")
    
    # Add the current user message
    prompt_list.append(f"<|system|>\n{instruction}</s>\n<|user|>\n{message}</s>\n<|assistant|>\n")
    
    prompt = "".join(prompt_list)

    # Tokenize the full prompt
    inputs = tokenizer(prompt, return_tensors="pt")

    # Generate a response
    # This will be slow on a CPU.
    outputs = model.generate(**inputs, max_new_tokens=256, eos_token_id=tokenizer.eos_token_id)
    
    # Decode the full output
    full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Extract only the last assistant response
    new_response = full_response.split("<|assistant|>")[-1].strip()
    
    return new_response

# --- 3. Create the Gradio Interface ---
# This uses the gr.ChatInterface for a classic chatbot UI.
demo = gr.ChatInterface(
    respond,
    title="Root_Math CPU Chatbot",
    description="A fine-tuned TinyLlama model for solving math problems. Running on a free CPU, so please be patient.",
)

if __name__ == "__main__":
    demo.launch()