savysachi / app.py
Devishetty100's picture
Update app.py
a6e2d37 verified
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
# Base and adapter models
BASE_MODEL = "unsloth/gemma-3-270m-it"
ADAPTER_MODEL = "Devishetty100/savyasachi"
# Load tokenizer from base model
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto",
)
# Load LORA adapter
model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL)
model.eval() # set to eval mode
# Chat function
def chat(user_input, history, max_new_tokens=200, temperature=1.0):
messages = []
# Format previous chat history
for user, assistant in history:
messages.append({"role": "user", "content": user})
messages.append({"role": "assistant", "content": assistant})
messages.append({"role": "user", "content": user_input})
# Generate prompt using chat template
prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
)
# Tokenize input
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
# Generate response
outputs = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
temperature=temperature,
top_p=0.95,
top_k=64,
do_sample=True,
eos_token_id=tokenizer.eos_token_id,
)
# Decode only the new tokens
response = tokenizer.decode(
outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True
)
return response
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("## 🕉️ Savyasachi — Devotee of Lord Krishna")
chatbot = gr.Chatbot()
user_input = gr.Textbox(label="Ask Krishna")
send = gr.Button("Send")
# Respond function
def respond(message, history):
reply = chat(message, history)
history.append((message, reply))
return history, ""
send.click(respond, [user_input, chatbot], [chatbot, user_input])
demo.launch()