TobDeBer commited on
Commit
17d36ff
·
verified ·
1 Parent(s): 2bfa2c0

Update app.py (#2)

Browse files

- Update app.py (165564616650f1f6ed61dc26d3277e9a4e965acf)

Files changed (1) hide show
  1. app.py +37 -12
app.py CHANGED
@@ -15,6 +15,9 @@ llm_model = None
15
  # TODO: have a list of repo/filename pairs
16
  # TODO: Derive the Dropdown list and default from it. First entry is default.
17
 
 
 
 
18
  hf_hub_download(
19
  repo_id="unsloth/Llama-3.2-1B-Instruct-GGUF",
20
  filename = "Llama-3.2-1B-Instruct-UD-Q2_K_XL.gguf",
@@ -40,6 +43,8 @@ hf_hub_download(
40
  filename="UD-Q3_K_XL/GLM-4.5-Air-UD-Q3_K_XL-00002-of-00002.gguf",
41
  local_dir = "./models"
42
  )
 
 
43
  css = """
44
  .bubble-wrap {
45
  padding-top: calc(var(--spacing-xl) * 3) !important;
@@ -91,6 +96,7 @@ css = """
91
  }
92
  """
93
 
 
94
  def get_messages_formatter_type(model_name):
95
  print(f"getting type for model: {model_name}")
96
  if "Llama" in model_name:
@@ -101,9 +107,9 @@ def get_messages_formatter_type(model_name):
101
  return MessagesFormatterType.CHATML
102
  else:
103
  print("formatter type not found, trying default")
104
- # raise ValueError(f"Unsupported model: {model_name}")
105
  return MessagesFormatterType.CHATML
106
 
 
107
  @spaces.GPU(duration=45)
108
  def respond(
109
  message,
@@ -121,6 +127,7 @@ def respond(
121
 
122
  chat_template = get_messages_formatter_type(model)
123
 
 
124
  if llm is None or llm_model != model:
125
  llm = Llama(
126
  model_path=f"models/{model}",
@@ -140,6 +147,7 @@ def respond(
140
  debug_output=True
141
  )
142
 
 
143
  settings = provider.get_provider_default_settings()
144
  settings.temperature = temperature
145
  settings.top_k = top_k
@@ -148,6 +156,7 @@ def respond(
148
  settings.repeat_penalty = repeat_penalty
149
  settings.stream = True
150
 
 
151
  messages = BasicChatHistory()
152
 
153
  for msn in history:
@@ -162,6 +171,7 @@ def respond(
162
  messages.add_message(user)
163
  messages.add_message(assistant)
164
 
 
165
  stream = agent.get_chat_response(
166
  message,
167
  llm_sampling_settings=settings,
@@ -175,6 +185,7 @@ def respond(
175
  outputs += output
176
  yield outputs
177
 
 
178
  PLACEHOLDER = """
179
  <div class="message-bubble-border" style="display:flex; max-width: 600px; border-radius: 6px; border-width: 1px; border-color: #e5e7eb; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); backdrop-filter: blur(10px);">
180
  <div style="padding: .5rem 1.5rem;display: flex;flex-direction: column;justify-content: space-evenly;">
@@ -200,8 +211,9 @@ PLACEHOLDER = """
200
  </div>
201
  """
202
 
203
- # Definiere die Komponenten für die zusätzlichen Eingaben
204
- # Diese müssen als Liste in 'additional_inputs' übergeben werden
 
205
  model_dropdown = gr.Dropdown([
206
  'granite-4.0-h-tiny-UD-Q3_K_XL.gguf',
207
  'Llama-3.2-1B-Instruct-UD-Q2_K_XL.gguf',
@@ -212,9 +224,16 @@ model_dropdown = gr.Dropdown([
212
  label="Model"
213
  )
214
 
 
215
  system_textbox = gr.Textbox(value="You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your solution or response to the problem.", label="System message")
 
 
216
  max_tokens_slider = gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens")
 
 
217
  temperature_slider = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
 
 
218
  top_p_slider = gr.Slider(
219
  minimum=0.1,
220
  maximum=1.0,
@@ -222,6 +241,8 @@ top_p_slider = gr.Slider(
222
  step=0.05,
223
  label="Top-p",
224
  )
 
 
225
  top_k_slider = gr.Slider(
226
  minimum=0,
227
  maximum=100,
@@ -229,6 +250,8 @@ top_k_slider = gr.Slider(
229
  step=1,
230
  label="Top-k",
231
  )
 
 
232
  repeat_penalty_slider = gr.Slider(
233
  minimum=0.0,
234
  maximum=2.0,
@@ -238,9 +261,10 @@ repeat_penalty_slider = gr.Slider(
238
  )
239
 
240
 
 
241
  demo = gr.ChatInterface(
242
  respond,
243
- # Übergabe der Komponenten als Liste
244
  additional_inputs=[
245
  model_dropdown,
246
  system_textbox,
@@ -250,6 +274,7 @@ demo = gr.ChatInterface(
250
  top_k_slider,
251
  repeat_penalty_slider
252
  ],
 
253
  theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
254
  body_background_fill_dark="#16141c",
255
  block_background_fill_dark="#16141c",
@@ -264,17 +289,17 @@ demo = gr.ChatInterface(
264
  code_background_fill_dark="#292733",
265
  ),
266
  css=css,
267
- retry_btn="Retry",
268
- undo_btn="Undo",
269
- clear_btn="Clear",
270
- submit_btn="Send",
 
271
  description="Llama-cpp-agent: Chat multi llm selection",
272
- # Die Platzhalter-Anzeige für den Chatbot wird jetzt direkt über 'chatbot_placeholder' oder
273
- # über die 'placeholder' Eigenschaft des ChatInterfaces (wie hier) gesetzt,
274
- # nicht mehr durch ein gr.Chatbot Objekt.
275
  placeholder=PLACEHOLDER,
276
- # Der Parameter chatbot wird nicht mehr verwendet
277
  )
278
 
 
279
  if __name__ == "__main__":
 
280
  demo.launch()
 
15
  # TODO: have a list of repo/filename pairs
16
  # TODO: Derive the Dropdown list and default from it. First entry is default.
17
 
18
+ # --- Modell-Downloads ---
19
+ # Beachte: Beim ersten Ausführen werden diese Modelle heruntergeladen und im Ordner ./models gespeichert.
20
+
21
  hf_hub_download(
22
  repo_id="unsloth/Llama-3.2-1B-Instruct-GGUF",
23
  filename = "Llama-3.2-1B-Instruct-UD-Q2_K_XL.gguf",
 
43
  filename="UD-Q3_K_XL/GLM-4.5-Air-UD-Q3_K_XL-00002-of-00002.gguf",
44
  local_dir = "./models"
45
  )
46
+
47
+ # --- CSS Styling ---
48
  css = """
49
  .bubble-wrap {
50
  padding-top: calc(var(--spacing-xl) * 3) !important;
 
96
  }
97
  """
98
 
99
+ # --- Hilfsfunktion für den Message Formatter Typ ---
100
  def get_messages_formatter_type(model_name):
101
  print(f"getting type for model: {model_name}")
102
  if "Llama" in model_name:
 
107
  return MessagesFormatterType.CHATML
108
  else:
109
  print("formatter type not found, trying default")
 
110
  return MessagesFormatterType.CHATML
111
 
112
+ # --- Haupt-Antwortfunktion für ChatInterface ---
113
  @spaces.GPU(duration=45)
114
  def respond(
115
  message,
 
127
 
128
  chat_template = get_messages_formatter_type(model)
129
 
130
+ # Llama-Modell initialisieren oder wechseln, falls nötig
131
  if llm is None or llm_model != model:
132
  llm = Llama(
133
  model_path=f"models/{model}",
 
147
  debug_output=True
148
  )
149
 
150
+ # Sampling-Einstellungen setzen
151
  settings = provider.get_provider_default_settings()
152
  settings.temperature = temperature
153
  settings.top_k = top_k
 
156
  settings.repeat_penalty = repeat_penalty
157
  settings.stream = True
158
 
159
+ # Chat-Verlauf vorbereiten
160
  messages = BasicChatHistory()
161
 
162
  for msn in history:
 
171
  messages.add_message(user)
172
  messages.add_message(assistant)
173
 
174
+ # Antwort streamen
175
  stream = agent.get_chat_response(
176
  message,
177
  llm_sampling_settings=settings,
 
185
  outputs += output
186
  yield outputs
187
 
188
+ # --- HTML Platzhalter für den Chatbot ---
189
  PLACEHOLDER = """
190
  <div class="message-bubble-border" style="display:flex; max-width: 600px; border-radius: 6px; border-width: 1px; border-color: #e5e7eb; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); backdrop-filter: blur(10px);">
191
  <div style="padding: .5rem 1.5rem;display: flex;flex-direction: column;justify-content: space-evenly;">
 
211
  </div>
212
  """
213
 
214
+ # --- Zusätzliche Eingabekomponenten erstellen (Gradio V4+) ---
215
+
216
+ # Komponente 1: Model Dropdown
217
  model_dropdown = gr.Dropdown([
218
  'granite-4.0-h-tiny-UD-Q3_K_XL.gguf',
219
  'Llama-3.2-1B-Instruct-UD-Q2_K_XL.gguf',
 
224
  label="Model"
225
  )
226
 
227
+ # Komponente 2: System Message
228
  system_textbox = gr.Textbox(value="You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your solution or response to the problem.", label="System message")
229
+
230
+ # Komponente 3: Max Tokens
231
  max_tokens_slider = gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens")
232
+
233
+ # Komponente 4: Temperature
234
  temperature_slider = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
235
+
236
+ # Komponente 5: Top-p
237
  top_p_slider = gr.Slider(
238
  minimum=0.1,
239
  maximum=1.0,
 
241
  step=0.05,
242
  label="Top-p",
243
  )
244
+
245
+ # Komponente 6: Top-k
246
  top_k_slider = gr.Slider(
247
  minimum=0,
248
  maximum=100,
 
250
  step=1,
251
  label="Top-k",
252
  )
253
+
254
+ # Komponente 7: Repetition penalty
255
  repeat_penalty_slider = gr.Slider(
256
  minimum=0.0,
257
  maximum=2.0,
 
261
  )
262
 
263
 
264
+ # --- Gradio Chat Interface Definition ---
265
  demo = gr.ChatInterface(
266
  respond,
267
+ # Zusätzliche Komponenten als Liste übergeben
268
  additional_inputs=[
269
  model_dropdown,
270
  system_textbox,
 
274
  top_k_slider,
275
  repeat_penalty_slider
276
  ],
277
+ # Theme und CSS
278
  theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
279
  body_background_fill_dark="#16141c",
280
  block_background_fill_dark="#16141c",
 
289
  code_background_fill_dark="#292733",
290
  ),
291
  css=css,
292
+ # HINWEIS: Schaltflächenparameter wurden von *_btn zu *_label geändert (Gradio V4+ Fix)
293
+ retry_label="Retry",
294
+ undo_label="Undo",
295
+ clear_label="Clear",
296
+ submit_label="Send",
297
  description="Llama-cpp-agent: Chat multi llm selection",
298
+ # Platzhalter für den Chatbot (gr.Chatbot-Objekt wird in V4+ nicht mehr direkt übergeben)
 
 
299
  placeholder=PLACEHOLDER,
 
300
  )
301
 
302
+ # --- App starten ---
303
  if __name__ == "__main__":
304
+ # gr.ChatInterface erfordert Gradio v4+
305
  demo.launch()