alibayram commited on
Commit
a42856d
·
1 Parent(s): d68b785

Refactor chatbot implementation to use local model with transformers. Updated README to reflect new dependencies and usage instructions. Removed Hugging Face Inference API integration.

Browse files
Files changed (4) hide show
  1. .gitignore +2 -0
  2. README.md +14 -2
  3. app.py +67 -42
  4. requirements.txt +5 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ __pycache__/
2
+ .DS_Store
README.md CHANGED
@@ -9,7 +9,19 @@ app_file: app.py
9
  pinned: false
10
  hf_oauth: true
11
  hf_oauth_scopes:
12
- - inference-api
13
  ---
14
 
15
- An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  pinned: false
10
  hf_oauth: true
11
  hf_oauth_scopes:
12
+ - read-repos
13
  ---
14
 
15
+ An example chatbot using [Gradio](https://gradio.app) and [`transformers`](https://huggingface.co/docs/transformers/index) to run a model locally (downloaded from the Hugging Face Hub).
16
+
17
+ ### Notes
18
+
19
+ - **Model**: `alibayram/gemma3-tr-v64k-it`
20
+ - **Auth (optional)**: If the model is gated/private, sign in via the app’s Hugging Face login button or set `HF_TOKEN` / `HUGGINGFACEHUB_API_TOKEN` in your environment.
21
+
22
+ ### Local install
23
+
24
+ ```bash
25
+ pip install -U gradio transformers torch
26
+ python app.py
27
+ ```
app.py CHANGED
@@ -1,70 +1,95 @@
 
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
 
5
  def respond(
6
- message,
7
- history: list[dict[str, str]],
8
- system_message,
9
  max_tokens,
10
  temperature,
11
  top_p,
12
- hf_token: gr.OAuthToken,
13
  ):
14
- """
15
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
16
- """
17
- client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
18
-
19
- messages = [{"role": "system", "content": system_message}]
20
 
21
- messages.extend(history)
22
 
23
- messages.append({"role": "user", "content": message})
 
 
 
 
24
 
25
- response = ""
26
-
27
- for message in client.chat_completion(
28
- messages,
29
- max_tokens=max_tokens,
30
- stream=True,
31
  temperature=temperature,
32
  top_p=top_p,
33
- ):
34
- choices = message.choices
35
- token = ""
36
- if len(choices) and choices[0].delta.content:
37
- token = choices[0].delta.content
38
 
39
- response += token
40
- yield response
 
 
41
 
42
 
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
  chatbot = gr.ChatInterface(
47
  respond,
48
  type="messages",
49
  additional_inputs=[
50
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
51
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
52
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
- gr.Slider(
54
- minimum=0.1,
55
- maximum=1.0,
56
- value=0.95,
57
- step=0.05,
58
- label="Top-p (nucleus sampling)",
59
  ),
 
 
 
60
  ],
61
  )
62
 
63
  with gr.Blocks() as demo:
64
- with gr.Sidebar():
65
- gr.LoginButton()
66
  chatbot.render()
67
 
68
-
69
  if __name__ == "__main__":
70
- demo.launch()
 
1
+ from threading import Thread
2
+
3
  import gradio as gr
4
+ import torch
5
+ from transformers import (AutoModelForCausalLM, AutoTokenizer,
6
+ TextIteratorStreamer)
7
+
8
+ MODEL_ID = "alibayram/gemma3-tr-v64k-it"
9
+
10
+ # Model ve tokenizer yükleme
11
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
12
+ model = AutoModelForCausalLM.from_pretrained(
13
+ MODEL_ID,
14
+ torch_dtype=torch.bfloat16,
15
+ device_map="auto",
16
+ )
17
+
18
+ def build_prompt(sistem_mesaji, gecmis, kullanici_mesaji):
19
+ mesajlar = []
20
+
21
+ if sistem_mesaji:
22
+ mesajlar.append({
23
+ "role": "system",
24
+ "content": sistem_mesaji
25
+ })
26
+
27
+ mesajlar.extend(gecmis)
28
+
29
+ mesajlar.append({
30
+ "role": "user",
31
+ "content": kullanici_mesaji
32
+ })
33
+
34
+ return tokenizer.apply_chat_template(
35
+ mesajlar,
36
+ tokenize=False,
37
+ add_generation_prompt=True,
38
+ )
39
 
40
 
41
  def respond(
42
+ mesaj,
43
+ gecmis: list[dict[str, str]],
44
+ sistem_mesaji,
45
  max_tokens,
46
  temperature,
47
  top_p,
 
48
  ):
49
+ prompt = build_prompt(sistem_mesaji, gecmis, mesaj)
 
 
 
 
 
50
 
51
+ girisler = tokenizer(prompt, return_tensors="pt").to(model.device)
52
 
53
+ streamer = TextIteratorStreamer(
54
+ tokenizer,
55
+ skip_prompt=True,
56
+ skip_special_tokens=True,
57
+ )
58
 
59
+ uretim_parametreleri = dict(
60
+ **girisler,
61
+ streamer=streamer,
62
+ max_new_tokens=max_tokens,
 
 
63
  temperature=temperature,
64
  top_p=top_p,
65
+ do_sample=True,
66
+ )
67
+
68
+ thread = Thread(target=model.generate, kwargs=uretim_parametreleri)
69
+ thread.start()
70
 
71
+ cevap = ""
72
+ for token in streamer:
73
+ cevap += token
74
+ yield cevap
75
 
76
 
 
 
 
77
  chatbot = gr.ChatInterface(
78
  respond,
79
  type="messages",
80
  additional_inputs=[
81
+ gr.Textbox(
82
+ value="Sen yardımsever, nazik ve açık şekilde cevap veren bir Türkçe yapay zekâ asistanısın.",
83
+ label="Sistem Mesajı",
 
 
 
 
 
 
84
  ),
85
+ gr.Slider(1, 4096, value=512, step=1, label="Maksimum Yeni Token"),
86
+ gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Sıcaklık (Temperature)"),
87
+ gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p"),
88
  ],
89
  )
90
 
91
  with gr.Blocks() as demo:
 
 
92
  chatbot.render()
93
 
 
94
  if __name__ == "__main__":
95
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio==5.42.0
2
+ transformers>=4.45.0
3
+ torch>=2.2.0
4
+
5
+