llmchat / app.py
aberbossio's picture
Upload 3 files
7d64ae1 verified
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.float32
)
def build_prompt(message, history):
prompt = ""
for user_msg, bot_msg in history:
prompt += f"<|user|>\n{user_msg}\n<|assistant|>\n{bot_msg}\n"
prompt += f"<|user|>\n{message}\n<|assistant|>\n"
return prompt
def chat(message, history):
prompt = build_prompt(message, history)
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=200,
do_sample=True,
temperature=0.7,
top_p=0.9,
repetition_penalty=1.1,
pad_token_id=tokenizer.eos_token_id
)
full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
answer = full_text[len(tokenizer.decode(inputs["input_ids"][0], skip_special_tokens=True)):].strip()
if not answer:
answer = "I am here. Ask me something."
return answer
demo = gr.ChatInterface(
fn=chat,
title="My Local LLM Chat",
description="TinyLlama chatbot running locally without HF_TOKEN",
examples=[
"Hello",
"Who are you?",
"Explain pain in simple words",
"Write a short Python code"
]
)
if __name__ == "__main__":
demo.launch()