ddd / app.py
Vivek16's picture
Rename app (2).py to app.py
7da7082 verified
# app.py
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
# --- 1. Load your Fine-Tuned Model ---
# This is the core of your application.
print("Loading model and tokenizer...")
# Define the names of the base model and your adapter
base_model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
adapter_name = "Hrushi02/Root_Math-TinyLlama-CPU" # Use your HF username
# Load the base model (TinyLlama)
base_model = AutoModelForCausalLM.from_pretrained(base_model_name)
# Load the tokenizer from your fine-tuned model repository
tokenizer = AutoTokenizer.from_pretrained(adapter_name)
# Apply your fine-tuned LoRA adapter to the base model
model = PeftModel.from_pretrained(base_model, adapter_name)
print("✅ Model loaded successfully!")
# --- 2. Define the Chat Function ---
# This function takes user input and chat history, then returns the model's response.
def respond(message, chat_history):
# Format the conversation history into the model's expected chat template
instruction = "Solve the following math problem:"
prompt_list = []
for user, assistant in chat_history:
prompt_list.append(f"<|system|>\n{instruction}</s>\n<|user|>\n{user}</s>\n<|assistant|>\n{assistant}</s>")
# Add the current user message
prompt_list.append(f"<|system|>\n{instruction}</s>\n<|user|>\n{message}</s>\n<|assistant|>\n")
prompt = "".join(prompt_list)
# Tokenize the full prompt
inputs = tokenizer(prompt, return_tensors="pt")
# Generate a response
# This will be slow on a CPU.
outputs = model.generate(**inputs, max_new_tokens=256, eos_token_id=tokenizer.eos_token_id)
# Decode the full output
full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract only the last assistant response
new_response = full_response.split("<|assistant|>")[-1].strip()
return new_response
# --- 3. Create the Gradio Interface ---
# This uses the gr.ChatInterface for a classic chatbot UI.
demo = gr.ChatInterface(
respond,
title="Root_Math CPU Chatbot",
description="A fine-tuned TinyLlama model for solving math problems. Running on a free CPU, so please be patient.",
)
if __name__ == "__main__":
demo.launch()