Gemma / app.py
Efe2898's picture
Update app.py
4811416 verified
import gradio as gr
import torch
import re
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
from threading import Thread
MODEL_ID = "Efe2898/gemma3-1b-sft-reasoning-25"
print("Model yükleniyor (CPU)...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.float32,
low_cpu_mem_usage=True,
)
model.eval()
print(f"Hazır — {sum(p.numel() for p in model.parameters())/1e6:.0f}M params — CPU")
def clean_output(text: str) -> str:
text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL | re.IGNORECASE)
return text.strip()
def generate(prompt, max_new_tokens, temperature, top_p, rep_penalty):
if not prompt.strip():
yield "Lütfen bir şeyler yazın."
return
messages = [{"role": "user", "content": prompt.strip()}]
if hasattr(tokenizer, "apply_chat_template") and tokenizer.chat_template is not None:
formatted_text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
)
inputs = tokenizer(formatted_text, return_tensors="pt")
else:
inputs = tokenizer(prompt.strip(), return_tensors="pt")
streamer = TextIteratorStreamer(
tokenizer,
skip_prompt=True,
skip_special_tokens=True,
)
gen_kwargs = dict(
**inputs,
max_new_tokens=int(max_new_tokens),
do_sample=float(temperature) > 0,
temperature=float(temperature) if float(temperature) > 0 else 1.0,
top_p=float(top_p),
repetition_penalty=float(rep_penalty),
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id,
streamer=streamer,
)
thread = Thread(target=model.generate, kwargs=gen_kwargs)
thread.start()
raw_output = ""
last_cleaned = ""
for chunk in streamer:
raw_output += chunk
cleaned = clean_output(raw_output)
last_cleaned = cleaned
yield cleaned
thread.join()
yield last_cleaned
# ── CSS ───────────────────────────────────────────────────────────────────────
CSS = """
@import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;500;600&family=IBM+Plex+Sans:wght@300;400;500;600&display=swap');
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
:root {
--bg: #0f1117;
--surface: #171b26;
--border: #252a38;
--accent: #4f8ef7;
--warn: #f7a24f;
--text: #cdd6f4;
--muted: #6c7086;
--r: 6px;
}
body, .gradio-container {
background: var(--bg) !important;
font-family: 'IBM Plex Sans', sans-serif !important;
color: var(--text) !important;
}
.gradio-container {
max-width: 1100px !important;
margin: 0 auto !important;
padding: 1.5rem !important;
}
/* Header */
#header {
border-bottom: 1px solid var(--border);
padding-bottom: 1rem;
margin-bottom: 1.25rem;
}
#header h1 {
font-family: 'IBM Plex Mono', monospace;
font-size: 1.4rem;
font-weight: 600;
color: #fff;
letter-spacing: -0.02em;
}
#header h1 span { color: var(--accent); }
/* Warning banner */
#warning {
background: rgba(247,162,79,.08);
border: 1px solid rgba(247,162,79,.3);
border-radius: var(--r);
padding: .6rem 1rem;
margin-bottom: 1.25rem;
font-size: .82rem;
color: var(--warn);
font-family: 'IBM Plex Mono', monospace;
line-height: 1.5;
}
/* Layout */
#main-row { gap: 1rem !important; }
/* Textboxes */
textarea, .gr-textbox textarea {
background: var(--surface) !important;
border: 1px solid var(--border) !important;
border-radius: var(--r) !important;
color: var(--text) !important;
font-family: 'IBM Plex Mono', monospace !important;
font-size: .85rem !important;
line-height: 1.6 !important;
resize: vertical !important;
}
textarea:focus {
border-color: var(--accent) !important;
outline: none !important;
box-shadow: 0 0 0 2px rgba(79,142,247,.12) !important;
}
/* Output stream */
#output-box textarea {
color: #a6e3a1 !important;
min-height: 220px !important;
}
/* Labels */
label span, .gr-form label {
font-family: 'IBM Plex Mono', monospace !important;
font-size: .72rem !important;
color: var(--muted) !important;
text-transform: uppercase !important;
letter-spacing: .07em !important;
}
/* Sliders */
input[type=range] { accent-color: var(--accent) !important; }
/* Slider value */
.gr-number input {
background: var(--surface) !important;
border: 1px solid var(--border) !important;
color: var(--text) !important;
font-family: 'IBM Plex Mono', monospace !important;
}
/* Button */
#gen-btn {
background: var(--accent) !important;
border: none !important;
border-radius: var(--r) !important;
color: #fff !important;
font-family: 'IBM Plex Sans', sans-serif !important;
font-weight: 600 !important;
font-size: .9rem !important;
padding: .65rem 0 !important;
width: 100% !important;
cursor: pointer !important;
transition: opacity .15s !important;
margin-top: .5rem !important;
}
#gen-btn:hover { opacity: .85 !important; }
/* Stop button */
#stop-btn {
background: transparent !important;
border: 1px solid var(--border) !important;
border-radius: var(--r) !important;
color: var(--muted) !important;
font-size: .82rem !important;
padding: .5rem 0 !important;
width: 100% !important;
cursor: pointer !important;
margin-top: .35rem !important;
}
/* Right panel */
#params-panel {
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--r);
padding: 1rem;
}
#params-title {
font-family: 'IBM Plex Mono', monospace;
font-size: .72rem;
color: var(--muted);
text-transform: uppercase;
letter-spacing: .1em;
margin-bottom: .85rem;
padding-bottom: .5rem;
border-bottom: 1px solid var(--border);
}
/* Examples */
.gr-samples-table {
background: var(--surface) !important;
border: 1px solid var(--border) !important;
border-radius: var(--r) !important;
}
.gr-samples-table td {
color: var(--muted) !important;
font-size: .82rem !important;
}
.gr-samples-table tr:hover td {
color: var(--text) !important;
}
/* Footer */
#footer {
margin-top: 1.25rem;
padding-top: .75rem;
border-top: 1px solid var(--border);
font-family: 'IBM Plex Mono', monospace;
font-size: .72rem;
color: var(--muted);
display: flex;
justify-content: space-between;
}
"""
HEADER = """
<div id="header">
<h1>Gemma3 1B <span>SFT Reasoning</span></h1>
</div>
"""
WARNING = """
<div id="warning">
⚠ Bu model SFT aşamasındadır. Şuana kadar 3-4B civarı reasoning gördü. Hedeflenen reasoning kapasitesine ulaşılınca,
gpro aşamasına geçilecek. Şuanlık cevaplarda çıkacak orantısızlıklar, overthinking aşamaları normaldir.
</div>
"""
FOOTER = """
<div id="footer">
<span>Designed by Claude</span>
<span>Gemma3-1B-Turkish</span>
</div>
"""
with gr.Blocks(css=CSS, title="gemma3-1b-sft-reasoning TR") as demo:
gr.HTML(HEADER)
gr.HTML(WARNING)
with gr.Row(elem_id="main-row"):
with gr.Column(scale=3):
prompt = gr.Textbox(
label="İstem (Prompt)",
placeholder="Türkçe bir soru yazın.",
lines=5,
)
output = gr.Textbox(
label="Yanıtınız burda gözükecek. Hatalar olabilir, yüksek derecede halüsinasyon görebilir. Güvenmeyin.",
lines=10,
interactive=False,
elem_id="output-box",
)
gen_btn = gr.Button("Cevap üret", elem_id="gen-btn", variant="primary")
stop_btn = gr.Button("Durdur", elem_id="stop-btn")
gr.Examples(
examples=[
["Bana kısa ve sade bir şiir yaz."],
["Türkiye'nin başkenti neresidir? Kısaca cevapla"],
["Mail yazmama yardımcı olur musun? İş arkadaşıma yollamam lazım. Konu : Taşınmazların satışı hkk."],
["Aşağıdaki soruyu adım adım çöz: 2x + 5 = 17"],
["Benim için 4x+17=57 denklemini çözüp, özetler misin?"],
["Aşağıdaki metni özetle: Yapay zeka son yıllarda birçok alanda kullanılmaya başladı. Bu kullanım alanları başlıca şunlardır : Yazılım, Ofis...."],
],
inputs=prompt,
label="Örnek İstemler",
)
with gr.Column(scale=1, elem_id="params-panel"):
gr.HTML('<div id="params-title">Parametreler</div>')
max_new = gr.Slider(
16, 4096, value=512, step=16,
label="max_new_tokens",
info="Üretilecek maksimum token sayısı",
)
temperature = gr.Slider(
0.0, 1.5, value=0.7, step=0.05,
label="temperature",
info="0 = Daha az risk, 1 = daha yaratıcı",
)
top_p = gr.Slider(
0.1, 1.0, value=0.9, step=0.05,
label="top_p",
info="Nucleus sampling eşiği",
)
rep_penalty = gr.Slider(
1.0, 2.0, value=1.15, step=0.05,
label="repetition_penalty",
info="Tekrar bastırma (>1 = daha az tekrar)",
)
gr.HTML(FOOTER)
gen_event = gen_btn.click(
fn=generate,
inputs=[prompt, max_new, temperature, top_p, rep_penalty],
outputs=output,
)
prompt.submit(
fn=generate,
inputs=[prompt, max_new, temperature, top_p, rep_penalty],
outputs=output,
)
stop_btn.click(fn=None, cancels=[gen_event])
demo.queue(max_size=3).launch()