dtometzki commited on
Commit
d9a2dc6
·
verified ·
1 Parent(s): 1579a10

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -138
app.py CHANGED
@@ -1,7 +1,5 @@
1
  import os
2
  import logging
3
- import base64
4
- import mimetypes
5
  from datetime import datetime
6
  from zoneinfo import ZoneInfo
7
  from functools import lru_cache
@@ -35,8 +33,6 @@ MODELS = {
35
  "meta-llama/Llama-3.3-70B-Instruct": {"max_tokens": 8192},
36
  "deepseek-ai/DeepSeek-V3": {"max_tokens": 131072},
37
  "openai/gpt-oss-120b": {"max_tokens": 8192},
38
- # Hier könnten weitere Vision-Modelle stehen, sofern vom Anbieter unterstützt
39
- "meta-llama/Llama-4-Maverick-17B-128E": {"max_tokens": 8192},
40
  }
41
  MODEL_CHOICES = list(MODELS.keys())
42
  MAX_TOKENS_GLOBAL = max(v["max_tokens"] for v in MODELS.values())
@@ -77,37 +73,29 @@ def clamp_tokens(model: str, max_tokens) -> int:
77
  except: v = 2048
78
  return max(1, min(v, model_max))
79
 
 
80
  def _clean_response(text: str) -> str:
81
  marker = "<|channel|>final<|message|>"
 
82
  if marker in text:
83
  return text.split(marker, 1)[-1]
 
84
  return text
85
 
86
  def content_to_text(content) -> str:
87
- """Extrahiert nur den Textteil aus komplexen Nachrichten für Logs/Suche."""
88
  if content is None: return ""
89
  if isinstance(content, str): return content
90
  if isinstance(content, list):
91
- # Filtert Dateipfade (die meist Tupel oder Dicts sind) heraus
92
- return "\n".join([str(p.get("text", "") or p.get("content", "")) for p in content if isinstance(p, dict) and "text" in p]).strip()
93
- if isinstance(content, tuple): # Falls Gradio Datei-Tupel sendet
94
- return ""
95
  return str(content)
96
 
97
  def normalize_history_messages(history):
98
- """Bereinigt die History, behält aber die Struktur für Gradio bei."""
99
  history = history or []
100
- # Hier machen wir nichts destruktives, da Gradio 5+ komplexe Objekte (Bilder) in der History braucht.
101
- return history
102
-
103
- def encode_image(image_path):
104
- """Wandelt Bilddatei in Base64 String um."""
105
- try:
106
- with open(image_path, "rb") as image_file:
107
- return base64.b64encode(image_file.read()).decode('utf-8')
108
- except Exception as e:
109
- logging.error(f"Fehler beim Kodieren des Bildes: {e}")
110
- return None
111
 
112
  # ==============================================================================
113
  # 3) GOOGLE SEARCH
@@ -122,6 +110,7 @@ session = create_session()
122
 
123
  @lru_cache(maxsize=128)
124
  def search_web(query: str) -> str | None:
 
125
  if not GOOGLE_API_KEY or not SEARCH_ENGINE_ID or not query:
126
  return None
127
  try:
@@ -145,62 +134,33 @@ def search_web(query: str) -> str | None:
145
  # ==============================================================================
146
  # 4) CHAT STREAM LOGIK
147
  # ==============================================================================
148
- def add_user_message(msg_data, history, profile: gr.OAuthProfile | None = None):
149
- """Verarbeitet Input von MultimodalTextbox (Dict mit 'text' und 'files')."""
150
- if history is None: history = []
151
-
152
  if not _is_allowed(profile):
153
  history.append({"role": "assistant", "content": "🔒 Nicht autorisiert."})
154
- return gr.MultimodalTextbox(value=None, interactive=False), history, ""
155
-
156
- # msg_data ist bei MultimodalTextbox ein Dictionary: {'text': "...", 'files': ["path..."]}
157
- text = ""
158
- files = []
159
-
160
- if isinstance(msg_data, dict):
161
- text = msg_data.get("text", "")
162
- files = msg_data.get("files", [])
163
- elif isinstance(msg_data, str):
164
- text = msg_data
165
 
166
- # Nachricht zusammenbauen für Gradio Chatbot (Lokale Anzeige)
167
- if text or files:
168
- # User Message Block erstellen
169
- content_block = []
170
- if text:
171
- content_block.append(text) # Einfacher Text wird direkt angezeigt
172
-
173
- # Dateien (Bilder) hinzufügen
174
- for f in files:
175
- # Gradio Chatbot (type='messages') erwartet (path, alt_text) oder component tuple
176
- # Wir nutzen hier den Pfad direkt oder verpacken ihn als Gradio Image Tuple
177
- content_block.append((f,))
178
-
179
- # WICHTIG: Wenn Text und Bild gemischt sind, muss es als Liste übergeben werden
180
- # Wenn nur Text, reicht String. Wenn nur Bild, Tuple.
181
- # Gradio 5+ ist hier flexibel, aber am sichersten ist die Multimodal-Logik.
182
-
183
- if len(content_block) == 1 and isinstance(content_block[0], str):
184
- history.append({"role": "user", "content": content_block[0]})
185
- else:
186
- # Gemischter Content oder nur Bild
187
- # Für die reine Anzeige im Chatbot fügen wir es so hinzu:
188
- history.append({"role": "user", "content": content_block})
189
-
190
- return gr.MultimodalTextbox(value=None, interactive=True), history, ""
191
 
192
  def chat_stream(
193
  history, model, system_prompt, max_tokens, temp, top_p, use_search,
194
  profile: gr.OAuthProfile | None = None,
195
  ):
 
196
  usage_text = ""
197
 
 
198
  if not _is_allowed(profile):
 
199
  yield history, "🔒"
200
  return
201
 
 
202
  if not client:
203
- history.append({"role": "assistant", "content": "⚠️ **Konfigurations-Fehler:** `HYPERBOLIC_API_KEY` fehlt."})
204
  yield history, "❌ Key fehlt"
205
  return
206
 
@@ -208,25 +168,15 @@ def chat_stream(
208
  yield history, usage_text
209
  return
210
 
211
- # Letzte User-Nachricht analysieren (kann String oder Liste sein)
212
- last_msg_content = history[-1]["content"]
213
- user_text_for_search = ""
214
 
215
- if isinstance(last_msg_content, str):
216
- user_text_for_search = last_msg_content
217
- elif isinstance(last_msg_content, list):
218
- # Text extrahieren für Suche
219
- for item in last_msg_content:
220
- if isinstance(item, str):
221
- user_text_for_search += item + " "
222
-
223
- # Web Search Logic
224
  context_add = ""
225
- if use_search and user_text_for_search.strip():
226
  if not GOOGLE_API_KEY or not SEARCH_ENGINE_ID:
227
- history.append({"role": "assistant", "content": "⚠️ Google Suche an, aber Keys fehlen. Mache weiter..."})
228
  else:
229
- search_res = search_web(user_text_for_search)
230
  if search_res:
231
  now = _local_now()
232
  short_res = _truncate(search_res, WEB_CONTEXT_MAX_CHARS)
@@ -235,59 +185,21 @@ def chat_stream(
235
  f"{short_res}\n----------------------------------"
236
  )
237
 
238
- # Messages für API aufbauen
239
  messages = []
240
  if system_prompt.strip():
241
  messages.append({"role": "system", "content": system_prompt})
 
 
 
 
 
242
 
243
- # History durchgehen und für API formatieren
244
- for m in history:
245
- role = m["role"]
246
- content = m["content"]
247
-
248
- if role == "assistant":
249
- messages.append({"role": role, "content": content_to_text(content)})
250
- continue
251
-
252
- if role == "user":
253
- # Fall 1: Nur Text
254
- if isinstance(content, str):
255
- # Kontext zur letzten Nachricht hinzufügen falls nötig
256
- final_text = content
257
- if m == history[-1] and context_add:
258
- final_text += context_add
259
- messages.append({"role": role, "content": final_text})
260
-
261
- # Fall 2: Multimodal (Liste von Text/Dateien)
262
- elif isinstance(content, list):
263
- api_content_list = []
264
- for item in content:
265
- if isinstance(item, str):
266
- # Textteil
267
- txt_val = item
268
- if m == history[-1] and context_add: # Context nur ans Ende
269
- txt_val += context_add
270
- api_content_list.append({"type": "text", "text": txt_val})
271
- elif isinstance(item, tuple) and len(item) > 0:
272
- # Dateipfad (Bild)
273
- file_path = item[0]
274
- mime_type, _ = mimetypes.guess_type(file_path)
275
- if not mime_type: mime_type = "image/jpeg"
276
-
277
- b64_img = encode_image(file_path)
278
- if b64_img:
279
- api_content_list.append({
280
- "type": "image_url",
281
- "image_url": {
282
- "url": f"data:{mime_type};base64,{b64_img}"
283
- }
284
- })
285
- messages.append({"role": role, "content": api_content_list})
286
-
287
- # Placeholder für Antwort
288
  history.append({"role": "assistant", "content": ""})
289
  yield history, usage_text
290
 
 
291
  try:
292
  completion = client.chat.completions.create(
293
  model=model,
@@ -303,24 +215,30 @@ def chat_stream(
303
  completion_tokens = 0
304
 
305
  for chunk in completion:
 
306
  delta = ""
307
  if hasattr(chunk, "choices") and chunk.choices and len(chunk.choices) > 0:
308
  delta = chunk.choices[0].delta.content or ""
309
 
310
  if delta:
311
  full_response += delta
 
 
312
  clean_text = _clean_response(full_response)
313
  history[-1]["content"] = clean_text
314
  yield history, usage_text
315
 
 
316
  if hasattr(chunk, "usage") and chunk.usage:
317
  completion_tokens = chunk.usage.completion_tokens or 0
318
 
 
319
  if hasattr(chunk, "choices") and chunk.choices and len(chunk.choices) > 0:
320
  finish = getattr(chunk.choices[0], "finish_reason", None)
321
  if finish in ["stop", "length"]:
322
  break
323
 
 
324
  if completion_tokens > 0:
325
  cost = cost_from_completion_tokens(model, completion_tokens)
326
  usage_text = f"Tokens: {completion_tokens} | Kosten: ${cost:.5f}"
@@ -338,22 +256,14 @@ def update_tokens_ui(model):
338
  val = int(MODELS.get(model, {}).get("max_tokens", 2048))
339
  return gr.update(maximum=val, value=min(2048, val))
340
 
341
- with gr.Blocks(title="Hyperbolic Chat (Multimodal)", fill_height=True) as demo:
342
- gr.Markdown("## 🚀 Hyperbolic Chat (MultimodalEnv Vars)")
343
 
344
  with gr.Row():
345
  with gr.Column(scale=4):
346
- # Type='messages' ist wichtig für Multimodal Rendering
347
- chatbot = gr.Chatbot(height=700)
348
  with gr.Row():
349
- # MultimodalTextbox statt normaler Textbox
350
- msg_input = gr.MultimodalTextbox(
351
- file_count="multiple",
352
- placeholder="Eingabe (Text oder Bild)...",
353
- show_label=False,
354
- scale=4,
355
- file_types=["image"]
356
- )
357
  submit_btn = gr.Button("Senden", variant="primary", scale=1)
358
 
359
  clear_btn = gr.Button("🗑️ Verlauf leeren")
@@ -376,7 +286,6 @@ with gr.Blocks(title="Hyperbolic Chat (Multimodal)", fill_height=True) as demo:
376
  # Event Wiring
377
  params = [chatbot, model_dd, system_txt, tokens_sld, temp_sld, top_p_sld, use_search_chk]
378
 
379
- # Multimodal Input triggert direkt beim Absenden
380
  msg_input.submit(add_user_message, [msg_input, chatbot], [msg_input, chatbot], queue=False).then(
381
  chat_stream, params, [chatbot, usage_md], queue=True
382
  )
@@ -392,10 +301,10 @@ def check_keys_startup():
392
  print("\n" + "="*40)
393
  print("🔎 STARTUP CHECK:")
394
  if HYPERBOLIC_API_KEY: print("✅ HYPERBOLIC_API_KEY gefunden.")
395
- else: print("❌ HYPERBOLIC_API_KEY fehlt!")
396
 
397
  if GOOGLE_API_KEY and SEARCH_ENGINE_ID: print("✅ Google Search Keys gefunden.")
398
- else: print("⚠️ Google Search Keys fehlen.")
399
  print("="*40 + "\n")
400
 
401
  check_keys_startup()
 
1
  import os
2
  import logging
 
 
3
  from datetime import datetime
4
  from zoneinfo import ZoneInfo
5
  from functools import lru_cache
 
33
  "meta-llama/Llama-3.3-70B-Instruct": {"max_tokens": 8192},
34
  "deepseek-ai/DeepSeek-V3": {"max_tokens": 131072},
35
  "openai/gpt-oss-120b": {"max_tokens": 8192},
 
 
36
  }
37
  MODEL_CHOICES = list(MODELS.keys())
38
  MAX_TOKENS_GLOBAL = max(v["max_tokens"] for v in MODELS.values())
 
73
  except: v = 2048
74
  return max(1, min(v, model_max))
75
 
76
+ # --- WICHTIG: Clean Response ohne Blockieren ---
77
  def _clean_response(text: str) -> str:
78
  marker = "<|channel|>final<|message|>"
79
+ # Wenn der Marker da ist -> alles davor abschneiden (sauber)
80
  if marker in text:
81
  return text.split(marker, 1)[-1]
82
+ # Wenn der Marker NICHT da ist -> Text trotzdem anzeigen
83
  return text
84
 
85
  def content_to_text(content) -> str:
 
86
  if content is None: return ""
87
  if isinstance(content, str): return content
88
  if isinstance(content, list):
89
+ return "\n".join([str(p.get("text", "") or p.get("content", "")) for p in content if isinstance(p, dict)]).strip()
 
 
 
90
  return str(content)
91
 
92
  def normalize_history_messages(history):
 
93
  history = history or []
94
+ out = []
95
+ for m in history:
96
+ if isinstance(m, dict) and m.get("role") in ("user", "assistant", "system"):
97
+ out.append({"role": m["role"], "content": content_to_text(m["content"])})
98
+ return out
 
 
 
 
 
 
99
 
100
  # ==============================================================================
101
  # 3) GOOGLE SEARCH
 
110
 
111
  @lru_cache(maxsize=128)
112
  def search_web(query: str) -> str | None:
113
+ # Sicherheit: Wenn Keys fehlen, direkt None
114
  if not GOOGLE_API_KEY or not SEARCH_ENGINE_ID or not query:
115
  return None
116
  try:
 
134
  # ==============================================================================
135
  # 4) CHAT STREAM LOGIK
136
  # ==============================================================================
137
+ def add_user_message(msg, history, profile: gr.OAuthProfile | None = None):
138
+ history = normalize_history_messages(history)
 
 
139
  if not _is_allowed(profile):
140
  history.append({"role": "assistant", "content": "🔒 Nicht autorisiert."})
141
+ return "", history, ""
 
 
 
 
 
 
 
 
 
 
142
 
143
+ msg = (msg or "").strip()
144
+ if msg:
145
+ history.append({"role": "user", "content": msg})
146
+ return "", history, ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
  def chat_stream(
149
  history, model, system_prompt, max_tokens, temp, top_p, use_search,
150
  profile: gr.OAuthProfile | None = None,
151
  ):
152
+ history = normalize_history_messages(history)
153
  usage_text = ""
154
 
155
+ # 1. Auth Check
156
  if not _is_allowed(profile):
157
+ history.append({"role": "assistant", "content": "🔒 Nicht autorisiert."})
158
  yield history, "🔒"
159
  return
160
 
161
+ # 2. Key Check (Kritisch)
162
  if not client:
163
+ history.append({"role": "assistant", "content": "⚠️ **Konfigurations-Fehler:** `HYPERBOLIC_API_KEY` fehlt in den Umgebungsvariablen."})
164
  yield history, "❌ Key fehlt"
165
  return
166
 
 
168
  yield history, usage_text
169
  return
170
 
171
+ user_text = history[-1]["content"]
 
 
172
 
173
+ # 3. Web Search Check (Warnung statt Crash)
 
 
 
 
 
 
 
 
174
  context_add = ""
175
+ if use_search:
176
  if not GOOGLE_API_KEY or not SEARCH_ENGINE_ID:
177
+ history.append({"role": "assistant", "content": "⚠️ Google Suche an, aber `GOOGLE_API_KEY` oder `GOOGLE_CX` fehlen. Mache ohne Suche weiter..."})
178
  else:
179
+ search_res = search_web(user_text)
180
  if search_res:
181
  now = _local_now()
182
  short_res = _truncate(search_res, WEB_CONTEXT_MAX_CHARS)
 
185
  f"{short_res}\n----------------------------------"
186
  )
187
 
188
+ # 4. Message Assembly
189
  messages = []
190
  if system_prompt.strip():
191
  messages.append({"role": "system", "content": system_prompt})
192
+
193
+ for m in history[:-1]:
194
+ messages.append(m)
195
+
196
+ messages.append({"role": "user", "content": user_text + context_add})
197
 
198
+ # Placeholder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
  history.append({"role": "assistant", "content": ""})
200
  yield history, usage_text
201
 
202
+ # 5. API Call
203
  try:
204
  completion = client.chat.completions.create(
205
  model=model,
 
215
  completion_tokens = 0
216
 
217
  for chunk in completion:
218
+ # Text Content sicher extrahieren
219
  delta = ""
220
  if hasattr(chunk, "choices") and chunk.choices and len(chunk.choices) > 0:
221
  delta = chunk.choices[0].delta.content or ""
222
 
223
  if delta:
224
  full_response += delta
225
+ # Hier der Fix: Wir zeigen immer Text an, damit nichts hängt.
226
+ # Wenn der Clean-Marker kommt, springt der Text um auf "sauber".
227
  clean_text = _clean_response(full_response)
228
  history[-1]["content"] = clean_text
229
  yield history, usage_text
230
 
231
+ # Usage Stats
232
  if hasattr(chunk, "usage") and chunk.usage:
233
  completion_tokens = chunk.usage.completion_tokens or 0
234
 
235
+ # Finish Reason Check (Safety gegen NoneType Fehler)
236
  if hasattr(chunk, "choices") and chunk.choices and len(chunk.choices) > 0:
237
  finish = getattr(chunk.choices[0], "finish_reason", None)
238
  if finish in ["stop", "length"]:
239
  break
240
 
241
+ # Final Costs
242
  if completion_tokens > 0:
243
  cost = cost_from_completion_tokens(model, completion_tokens)
244
  usage_text = f"Tokens: {completion_tokens} | Kosten: ${cost:.5f}"
 
256
  val = int(MODELS.get(model, {}).get("max_tokens", 2048))
257
  return gr.update(maximum=val, value=min(2048, val))
258
 
259
+ with gr.Blocks(title="Hyperbolic Chat", fill_height=True) as demo:
260
+ gr.Markdown("## 🚀 Hyperbolic Chat (Env Vars Allowlist: dtometzki)")
261
 
262
  with gr.Row():
263
  with gr.Column(scale=4):
264
+ chatbot = gr.Chatbot(height=700)
 
265
  with gr.Row():
266
+ msg_input = gr.Textbox(placeholder="Eingabe...", show_label=False, scale=4)
 
 
 
 
 
 
 
267
  submit_btn = gr.Button("Senden", variant="primary", scale=1)
268
 
269
  clear_btn = gr.Button("🗑️ Verlauf leeren")
 
286
  # Event Wiring
287
  params = [chatbot, model_dd, system_txt, tokens_sld, temp_sld, top_p_sld, use_search_chk]
288
 
 
289
  msg_input.submit(add_user_message, [msg_input, chatbot], [msg_input, chatbot], queue=False).then(
290
  chat_stream, params, [chatbot, usage_md], queue=True
291
  )
 
301
  print("\n" + "="*40)
302
  print("🔎 STARTUP CHECK:")
303
  if HYPERBOLIC_API_KEY: print("✅ HYPERBOLIC_API_KEY gefunden.")
304
+ else: print("❌ HYPERBOLIC_API_KEY fehlt! Chat wird Fehler zeigen.")
305
 
306
  if GOOGLE_API_KEY and SEARCH_ENGINE_ID: print("✅ Google Search Keys gefunden.")
307
+ else: print("⚠️ Google Search Keys fehlen (Suche wird ignoriert).")
308
  print("="*40 + "\n")
309
 
310
  check_keys_startup()