# app.py import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel # --- 1. Load your Fine-Tuned Model --- # This is the core of your application. print("Loading model and tokenizer...") # Define the names of the base model and your adapter base_model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" adapter_name = "Hrushi02/Root_Math-TinyLlama-CPU" # Use your HF username # Load the base model (TinyLlama) base_model = AutoModelForCausalLM.from_pretrained(base_model_name) # Load the tokenizer from your fine-tuned model repository tokenizer = AutoTokenizer.from_pretrained(adapter_name) # Apply your fine-tuned LoRA adapter to the base model model = PeftModel.from_pretrained(base_model, adapter_name) print("✅ Model loaded successfully!") # --- 2. Define the Chat Function --- # This function takes user input and chat history, then returns the model's response. def respond(message, chat_history): # Format the conversation history into the model's expected chat template instruction = "Solve the following math problem:" prompt_list = [] for user, assistant in chat_history: prompt_list.append(f"<|system|>\n{instruction}\n<|user|>\n{user}\n<|assistant|>\n{assistant}") # Add the current user message prompt_list.append(f"<|system|>\n{instruction}\n<|user|>\n{message}\n<|assistant|>\n") prompt = "".join(prompt_list) # Tokenize the full prompt inputs = tokenizer(prompt, return_tensors="pt") # Generate a response # This will be slow on a CPU. outputs = model.generate(**inputs, max_new_tokens=256, eos_token_id=tokenizer.eos_token_id) # Decode the full output full_response = tokenizer.decode(outputs[0], skip_special_tokens=True) # Extract only the last assistant response new_response = full_response.split("<|assistant|>")[-1].strip() return new_response # --- 3. Create the Gradio Interface --- # This uses the gr.ChatInterface for a classic chatbot UI. demo = gr.ChatInterface( respond, title="Root_Math CPU Chatbot", description="A fine-tuned TinyLlama model for solving math problems. Running on a free CPU, so please be patient.", ) if __name__ == "__main__": demo.launch()