|
|
|
|
|
|
|
|
import gradio as gr |
|
|
import torch |
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
from peft import PeftModel |
|
|
|
|
|
|
|
|
|
|
|
print("Loading model and tokenizer...") |
|
|
|
|
|
|
|
|
base_model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" |
|
|
adapter_name = "Hrushi02/Root_Math-TinyLlama-CPU" |
|
|
|
|
|
|
|
|
base_model = AutoModelForCausalLM.from_pretrained(base_model_name) |
|
|
|
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(adapter_name) |
|
|
|
|
|
|
|
|
model = PeftModel.from_pretrained(base_model, adapter_name) |
|
|
|
|
|
print("✅ Model loaded successfully!") |
|
|
|
|
|
|
|
|
|
|
|
def respond(message, chat_history): |
|
|
|
|
|
instruction = "Solve the following math problem:" |
|
|
prompt_list = [] |
|
|
for user, assistant in chat_history: |
|
|
prompt_list.append(f"<|system|>\n{instruction}</s>\n<|user|>\n{user}</s>\n<|assistant|>\n{assistant}</s>") |
|
|
|
|
|
|
|
|
prompt_list.append(f"<|system|>\n{instruction}</s>\n<|user|>\n{message}</s>\n<|assistant|>\n") |
|
|
|
|
|
prompt = "".join(prompt_list) |
|
|
|
|
|
|
|
|
inputs = tokenizer(prompt, return_tensors="pt") |
|
|
|
|
|
|
|
|
|
|
|
outputs = model.generate(**inputs, max_new_tokens=256, eos_token_id=tokenizer.eos_token_id) |
|
|
|
|
|
|
|
|
full_response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
|
|
|
new_response = full_response.split("<|assistant|>")[-1].strip() |
|
|
|
|
|
return new_response |
|
|
|
|
|
|
|
|
|
|
|
demo = gr.ChatInterface( |
|
|
respond, |
|
|
title="Root_Math CPU Chatbot", |
|
|
description="A fine-tuned TinyLlama model for solving math problems. Running on a free CPU, so please be patient.", |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |