Spaces:

alibayram
/

chat

Sleeping

App Files Files Community

alibayram commited on Dec 17, 2025

Commit

a42856d

1 Parent(s): d68b785

Refactor chatbot implementation to use local model with transformers. Updated README to reflect new dependencies and usage instructions. Removed Hugging Face Inference API integration.

Browse files

Files changed (4) hide show

.gitignore +2 -0
README.md +14 -2
app.py +67 -42
requirements.txt +5 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ __pycache__/
2	+ .DS_Store

README.md CHANGED Viewed

@@ -9,7 +9,19 @@ app_file: app.py
 pinned: false
 hf_oauth: true
 hf_oauth_scopes:
-- inference-api
 ---
-An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).

 pinned: false
 hf_oauth: true
 hf_oauth_scopes:
+- read-repos
 ---
+An example chatbot using [Gradio](https://gradio.app) and [`transformers`](https://huggingface.co/docs/transformers/index) to run a model locally (downloaded from the Hugging Face Hub).
+### Notes
+- **Model**: `alibayram/gemma3-tr-v64k-it`
+- **Auth (optional)**: If the model is gated/private, sign in via the app’s Hugging Face login button or set `HF_TOKEN` / `HUGGINGFACEHUB_API_TOKEN` in your environment.
+### Local install
+```bash
+pip install -U gradio transformers torch
+python app.py
+```

app.py CHANGED Viewed

@@ -1,70 +1,95 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
 def respond(
-    message,
-    history: list[dict[str, str]],
-    system_message,
     max_tokens,
     temperature,
     top_p,
-    hf_token: gr.OAuthToken,
 ):
-    """
-    For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-    """
-    client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
-    messages = [{"role": "system", "content": system_message}]
-    messages.extend(history)
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
         temperature=temperature,
         top_p=top_p,
-    ):
-        choices = message.choices
-        token = ""
-        if len(choices) and choices[0].delta.content:
-            token = choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
 chatbot = gr.ChatInterface(
     respond,
     type="messages",
     additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
         ),
     ],
 )
 with gr.Blocks() as demo:
-    with gr.Sidebar():
-        gr.LoginButton()
     chatbot.render()
 if __name__ == "__main__":
-    demo.launch()

+from threading import Thread
 import gradio as gr
+import torch
+from transformers import (AutoModelForCausalLM, AutoTokenizer,
+                          TextIteratorStreamer)
+MODEL_ID = "alibayram/gemma3-tr-v64k-it"
+# Model ve tokenizer yükleme
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_ID,
+    torch_dtype=torch.bfloat16,
+    device_map="auto",
+)
+def build_prompt(sistem_mesaji, gecmis, kullanici_mesaji):
+    mesajlar = []
+    if sistem_mesaji:
+        mesajlar.append({
+            "role": "system",
+            "content": sistem_mesaji
+        })
+    mesajlar.extend(gecmis)
+    mesajlar.append({
+        "role": "user",
+        "content": kullanici_mesaji
+    })
+    return tokenizer.apply_chat_template(
+        mesajlar,
+        tokenize=False,
+        add_generation_prompt=True,
+    )
 def respond(
+    mesaj,
+    gecmis: list[dict[str, str]],
+    sistem_mesaji,
     max_tokens,
     temperature,
     top_p,
 ):
+    prompt = build_prompt(sistem_mesaji, gecmis, mesaj)
+    girisler = tokenizer(prompt, return_tensors="pt").to(model.device)
+    streamer = TextIteratorStreamer(
+        tokenizer,
+        skip_prompt=True,
+        skip_special_tokens=True,
+    )
+    uretim_parametreleri = dict(
+        **girisler,
+        streamer=streamer,
+        max_new_tokens=max_tokens,
         temperature=temperature,
         top_p=top_p,
+        do_sample=True,
+    )
+    thread = Thread(target=model.generate, kwargs=uretim_parametreleri)
+    thread.start()
+    cevap = ""
+    for token in streamer:
+        cevap += token
+        yield cevap
 chatbot = gr.ChatInterface(
     respond,
     type="messages",
     additional_inputs=[
+        gr.Textbox(
+            value="Sen yardımsever, nazik ve açık şekilde cevap veren bir Türkçe yapay zekâ asistanısın.",
+            label="Sistem Mesajı",
         ),
+        gr.Slider(1, 4096, value=512, step=1, label="Maksimum Yeni Token"),
+        gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Sıcaklık (Temperature)"),
+        gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p"),
     ],
 )
 with gr.Blocks() as demo:
     chatbot.render()
 if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio==5.42.0
+transformers>=4.45.0
+torch>=2.2.0