|
|
from threading import Thread |
|
|
|
|
|
import gradio as gr |
|
|
import torch |
|
|
from transformers import (AutoModelForCausalLM, AutoTokenizer, |
|
|
TextIteratorStreamer) |
|
|
|
|
|
MODEL_ID = "alibayram/gemma3-tr-v64k-it" |
|
|
|
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) |
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
MODEL_ID, |
|
|
torch_dtype=torch.bfloat16, |
|
|
device_map="auto", |
|
|
) |
|
|
|
|
|
def build_prompt(gecmis, kullanici_mesaji): |
|
|
mesajlar = [] |
|
|
|
|
|
mesajlar.extend(gecmis) |
|
|
|
|
|
mesajlar.append({ |
|
|
"role": "user", |
|
|
"content": kullanici_mesaji |
|
|
}) |
|
|
|
|
|
return tokenizer.apply_chat_template( |
|
|
mesajlar, |
|
|
tokenize=False, |
|
|
add_generation_prompt=True, |
|
|
) |
|
|
|
|
|
|
|
|
def respond( |
|
|
mesaj, |
|
|
gecmis: list[dict[str, str]], |
|
|
max_tokens, |
|
|
temperature, |
|
|
top_p, |
|
|
): |
|
|
prompt = build_prompt(gecmis, mesaj) |
|
|
|
|
|
girisler = tokenizer(prompt, return_tensors="pt").to(model.device) |
|
|
|
|
|
streamer = TextIteratorStreamer( |
|
|
tokenizer, |
|
|
skip_prompt=True, |
|
|
skip_special_tokens=True, |
|
|
) |
|
|
|
|
|
uretim_parametreleri = dict( |
|
|
**girisler, |
|
|
streamer=streamer, |
|
|
max_new_tokens=max_tokens, |
|
|
temperature=temperature, |
|
|
top_p=top_p, |
|
|
do_sample=True, |
|
|
) |
|
|
|
|
|
thread = Thread(target=model.generate, kwargs=uretim_parametreleri) |
|
|
thread.start() |
|
|
|
|
|
cevap = "" |
|
|
for token in streamer: |
|
|
cevap += token |
|
|
yield cevap |
|
|
|
|
|
|
|
|
chatbot = gr.ChatInterface( |
|
|
respond, |
|
|
type="messages", |
|
|
additional_inputs=[ |
|
|
gr.Slider(1, 1024, value=64, step=1, label="Maksimum Yeni Token"), |
|
|
gr.Slider(0.1, 1.99, value=0.7, step=0.1, label="Sıcaklık (Temperature)"), |
|
|
gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p"), |
|
|
], |
|
|
) |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
chatbot.render() |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |