newingtest / app.py
nitya001's picture
Create app.py
a3ec0fc verified
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import gradio as gr
# --------------------
# Model setup
# --------------------
BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
LORA_REPO = "nitya001/autotrain-oa5ez-0dtoc"
device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.float16 if device == "cuda" else torch.float32
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
print("Loading base model...")
base_model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
torch_dtype=dtype,
device_map="auto" if device == "cuda" else None,
)
print("Loading LoRA adapter:", LORA_REPO)
model = PeftModel.from_pretrained(base_model, LORA_REPO)
model.to(device)
model.eval()
# Generic system prompt
SYSTEM_PROMPT = (
"You are a helpful AI assistant. "
"Answer clearly, accurately, and concisely. "
"If you do not know something, say so honestly."
)
# --------------------
# Generation function
# --------------------
def generate_reply(message: str, history: list):
conversation = f"<|system|>{SYSTEM_PROMPT}</s>\n"
if history:
for msg in history:
role = msg.get("role", "user")
content = msg.get("content", "")
if role == "user":
conversation += f"<|user|>{content}</s>\n"
elif role == "assistant":
conversation += f"<|assistant|>{content}</s>\n"
conversation += f"<|user|>{message}</s>\n<|assistant|>"
inputs = tokenizer(
conversation,
return_tensors="pt",
truncation=True,
max_length=2048,
).to(device)
with torch.no_grad():
output_ids = model.generate(
**inputs,
max_new_tokens=256,
temperature=0.7,
top_p=0.9,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
)
generated_ids = output_ids[0][inputs["input_ids"].shape[-1]:]
answer = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()
if not answer:
answer = "I'm not sure how to answer that. Could you rephrase?"
return answer
# --------------------
# Gradio UI
# --------------------
demo = gr.ChatInterface(
fn=generate_reply,
title="Custom AI Assistant",
description="Ask anything.",
)
if __name__ == "__main__":
demo.launch()