Spaces:

Vivek16
/

ddd

Sleeping

App Files Files Community

ddd / app.py

Vivek16

Rename app (2).py to app.py

7da7082 verified 3 months ago

raw

history blame contribute delete

2.3 kB

	# app.py

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from peft import PeftModel

	# --- 1. Load your Fine-Tuned Model ---
	# This is the core of your application.
	print("Loading model and tokenizer...")

	# Define the names of the base model and your adapter
	base_model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
	adapter_name = "Hrushi02/Root_Math-TinyLlama-CPU" # Use your HF username

	# Load the base model (TinyLlama)
	base_model = AutoModelForCausalLM.from_pretrained(base_model_name)

	# Load the tokenizer from your fine-tuned model repository
	tokenizer = AutoTokenizer.from_pretrained(adapter_name)

	# Apply your fine-tuned LoRA adapter to the base model
	model = PeftModel.from_pretrained(base_model, adapter_name)

	print("✅ Model loaded successfully!")

	# --- 2. Define the Chat Function ---
	# This function takes user input and chat history, then returns the model's response.
	def respond(message, chat_history):
	# Format the conversation history into the model's expected chat template
	instruction = "Solve the following math problem:"
	prompt_list = []
	for user, assistant in chat_history:
	prompt_list.append(f"<\|system\|>\n{instruction}</s>\n<\|user\|>\n{user}</s>\n<\|assistant\|>\n{assistant}</s>")

	# Add the current user message
	prompt_list.append(f"<\|system\|>\n{instruction}</s>\n<\|user\|>\n{message}</s>\n<\|assistant\|>\n")

	prompt = "".join(prompt_list)

	# Tokenize the full prompt
	inputs = tokenizer(prompt, return_tensors="pt")

	# Generate a response
	# This will be slow on a CPU.
	outputs = model.generate(**inputs, max_new_tokens=256, eos_token_id=tokenizer.eos_token_id)

	# Decode the full output
	full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Extract only the last assistant response
	new_response = full_response.split("<\|assistant\|>")[-1].strip()

	return new_response

	# --- 3. Create the Gradio Interface ---
	# This uses the gr.ChatInterface for a classic chatbot UI.
	demo = gr.ChatInterface(
	respond,
	title="Root_Math CPU Chatbot",
	description="A fine-tuned TinyLlama model for solving math problems. Running on a free CPU, so please be patient.",
	)

	if __name__ == "__main__":
	demo.launch()