PLAZMAstudios's picture
app.py
9f63a22 verified
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" if torch.cuda.is_available() else "cpu"
model_path = "ibm-granite/granite-4.0-h-350M"
# Model ve tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, device_map=device)
model.eval()
def respond(message, history, max_new_tokens, temperature):
"""
history: önceki mesajlar listesi
"""
history = history or []
# Mesaj geçmişini chat formatına çevir
chat = []
for h in history:
if h["role"] == "user":
chat.append({"role": "user", "content": h["content"]})
chat.append({"role": "user", "content": message})
chat_text = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
input_tokens = tokenizer(chat_text, return_tensors="pt").to(device)
# Yanıt üretimi
output_tokens = model.generate(
**input_tokens,
max_new_tokens=max_new_tokens
)
output_text = tokenizer.batch_decode(output_tokens)[0]
# History güncelle
history.append({"role": "user", "content": message})
history.append({"role": "assistant", "content": output_text})
return output_text, history
# Gradio chat interface
chatbot = gr.ChatInterface(
respond,
type="messages",
additional_inputs=[
gr.Slider(minimum=1, maximum=1024, value=200, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.05, label="Temperature"),
],
)
with gr.Blocks() as demo:
chatbot.render()
if __name__ == "__main__":
demo.launch()