thoshan_Flash / app.py
lingadevaruhp's picture
Update app.py
ba9e0c6 verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
model_name = "lingadevaruhp/thoshan_Flash"
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Load base model with 4-bit quantization (no unsloth needed)
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
)
model = AutoModelForCausalLM.from_pretrained(
"unsloth/gemma-2-9b-it-bnb-4bit",
quantization_config=bnb_config,
device_map="auto"
)
# Load LoRA adapter
from peft import PeftModel
model = PeftModel.from_pretrained(model, model_name)
model.eval()
def chat(prompt, history):
input_text = f"<s>### Instruction:\n{prompt}\n### Response:\n"
inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=200,
do_sample=True,
temperature=0.8,
eos_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response.split("### Response:")[-1].strip()
iface = gr.ChatInterface(
fn=chat,
title="thoshan_Flash 🔥",
description="Kannada-English FlirtAI — Chat in Kanglish!",
examples=["Hey, yeno madtha idiya?", "Ninna hesarenu helu", "What's your plan tonight?"]
)
iface.launch()