Test / app.py
Hirooo00oo's picture
Update app.py
eb24f8f verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
import torch
from threading import Thread
model_id = "TheDrummer/Tiger-Gemma-9B-v3"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="auto"
)
def respond(message, history):
# Build conversation (NO system prompt)
messages = []
for user_msg, bot_msg in history:
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": bot_msg})
messages.append({"role": "user", "content": message})
input_ids = tokenizer.apply_chat_template(
messages,
return_tensors="pt",
add_generation_prompt=True
).to(model.device)
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
thread = Thread(target=model.generate, kwargs=dict(
input_ids=input_ids,
max_new_tokens=512,
temperature=0.7,
do_sample=True,
streamer=streamer
))
thread.start()
partial = ""
for token in streamer:
partial += token
yield partial
gr.ChatInterface(
fn=respond,
title="Tiger-Gemma 9B Chat",
description="Powered by TheDrummer/Tiger-Gemma-9B-v3",
).launch(share=True)