Spaces:

dtometzki
/

hyperbolic

Running

App Files Files Community

hyperbolic / app.py

dtometzki

Update app.py

d9a2dc6 verified 16 days ago

raw

history blame contribute delete

12.1 kB

	import os
	import logging
	from datetime import datetime
	from zoneinfo import ZoneInfo
	from functools import lru_cache
	import requests
	from requests.adapters import HTTPAdapter
	from urllib3.util.retry import Retry
	import gradio as gr
	from openai import OpenAI

	# ==============================================================================
	# 1) SETUP & KONFIGURATION
	# ==============================================================================
	logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

	HYPERBOLIC_API_KEY = os.environ.get("HYPERBOLIC_API_KEY")
	GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
	SEARCH_ENGINE_ID = os.environ.get("GOOGLE_CX")

	ALLOWED_HF_USERS = {"dtometzki"}
	ALLOWED_HF_USERS_LOWER = {u.lower() for u in ALLOWED_HF_USERS}

	WEB_CONTEXT_MAX_CHARS = 800

	# Client initialisieren (falls Key da ist)
	client = None
	if HYPERBOLIC_API_KEY:
	client = OpenAI(api_key=HYPERBOLIC_API_KEY, base_url="https://api.hyperbolic.xyz/v1")

	MODELS = {
	"Qwen/Qwen3-Next-80B-A3B-Instruct": {"max_tokens": 8192},
	"meta-llama/Llama-3.3-70B-Instruct": {"max_tokens": 8192},
	"deepseek-ai/DeepSeek-V3": {"max_tokens": 131072},
	"openai/gpt-oss-120b": {"max_tokens": 8192},
	}
	MODEL_CHOICES = list(MODELS.keys())
	MAX_TOKENS_GLOBAL = max(v["max_tokens"] for v in MODELS.values())

	PRICE_PER_1M_OUTPUT = {
	"Qwen/Qwen3-Next-80B-A3B-Instruct": 0.30,
	"meta-llama/Llama-3.3-70B-Instruct": 0.40,
	"deepseek-ai/DeepSeek-V3": 0.25,
	"openai/gpt-oss-120b": 0.30,
	}

	# ==============================================================================
	# 2) HELPERS
	# ==============================================================================
	def cost_from_completion_tokens(model: str, completion_tokens: int) -> float:
	p = float(PRICE_PER_1M_OUTPUT.get(model, 0.0))
	return (float(completion_tokens) / 1_000_000.0) * p

	def _local_now(tz="Europe/Berlin") -> datetime:
	return datetime.now(ZoneInfo(tz))

	def _truncate(text: str, max_chars: int) -> str:
	text = (text or "").strip()
	if len(text) <= max_chars:
	return text
	return text[: max_chars - 1].rstrip() + "…"

	def _profile_username(profile) -> str:
	if profile is None: return ""
	return (getattr(profile, "username", None) or getattr(profile, "name", None) or "").strip()

	def _is_allowed(profile) -> bool:
	return _profile_username(profile).lower() in ALLOWED_HF_USERS_LOWER

	def clamp_tokens(model: str, max_tokens) -> int:
	model_max = int(MODELS.get(model, {}).get("max_tokens", 2048))
	try: v = int(max_tokens)
	except: v = 2048
	return max(1, min(v, model_max))

	# --- WICHTIG: Clean Response ohne Blockieren ---
	def _clean_response(text: str) -> str:
	marker = "<\|channel\|>final<\|message\|>"
	# Wenn der Marker da ist -> alles davor abschneiden (sauber)
	if marker in text:
	return text.split(marker, 1)[-1]
	# Wenn der Marker NICHT da ist -> Text trotzdem anzeigen
	return text

	def content_to_text(content) -> str:
	if content is None: return ""
	if isinstance(content, str): return content
	if isinstance(content, list):
	return "\n".join([str(p.get("text", "") or p.get("content", "")) for p in content if isinstance(p, dict)]).strip()
	return str(content)

	def normalize_history_messages(history):
	history = history or []
	out = []
	for m in history:
	if isinstance(m, dict) and m.get("role") in ("user", "assistant", "system"):
	out.append({"role": m["role"], "content": content_to_text(m["content"])})
	return out

	# ==============================================================================
	# 3) GOOGLE SEARCH
	# ==============================================================================
	def create_session() -> requests.Session:
	s = requests.Session()
	retries = Retry(total=3, backoff_factor=1, status_forcelist=[429, 500, 502, 503], allowed_methods=["GET"])
	s.mount("https://", HTTPAdapter(max_retries=retries))
	return s

	session = create_session()

	@lru_cache(maxsize=128)
	def search_web(query: str) -> str \| None:
	# Sicherheit: Wenn Keys fehlen, direkt None
	if not GOOGLE_API_KEY or not SEARCH_ENGINE_ID or not query:
	return None
	try:
	logging.info(f"🔍 Google Suche: {query}")
	res = session.get(
	"https://www.googleapis.com/customsearch/v1",
	params={"key": GOOGLE_API_KEY, "cx": SEARCH_ENGINE_ID, "q": query, "num": 3},
	timeout=8,
	)
	res.raise_for_status()
	items = res.json().get("items", [])
	if not items: return None
	lines = []
	for i in items:
	lines.append(f"- {i.get('title', '')}\n {i.get('snippet', '')}\n {i.get('link', '')}")
	return "\n".join(lines)
	except Exception as e:
	logging.error(f"Search Fail: {e}")
	return None

	# ==============================================================================
	# 4) CHAT STREAM LOGIK
	# ==============================================================================
	def add_user_message(msg, history, profile: gr.OAuthProfile \| None = None):
	history = normalize_history_messages(history)
	if not _is_allowed(profile):
	history.append({"role": "assistant", "content": "🔒 Nicht autorisiert."})
	return "", history, ""

	msg = (msg or "").strip()
	if msg:
	history.append({"role": "user", "content": msg})
	return "", history, ""

	def chat_stream(
	history, model, system_prompt, max_tokens, temp, top_p, use_search,
	profile: gr.OAuthProfile \| None = None,
	):
	history = normalize_history_messages(history)
	usage_text = ""

	# 1. Auth Check
	if not _is_allowed(profile):
	history.append({"role": "assistant", "content": "🔒 Nicht autorisiert."})
	yield history, "🔒"
	return

	# 2. Key Check (Kritisch)
	if not client:
	history.append({"role": "assistant", "content": "⚠️ Konfigurations-Fehler: `HYPERBOLIC_API_KEY` fehlt in den Umgebungsvariablen."})
	yield history, "❌ Key fehlt"
	return

	if not history or history[-1]["role"] != "user":
	yield history, usage_text
	return

	user_text = history[-1]["content"]

	# 3. Web Search Check (Warnung statt Crash)
	context_add = ""
	if use_search:
	if not GOOGLE_API_KEY or not SEARCH_ENGINE_ID:
	history.append({"role": "assistant", "content": "⚠️ Google Suche an, aber `GOOGLE_API_KEY` oder `GOOGLE_CX` fehlen. Mache ohne Suche weiter..."})
	else:
	search_res = search_web(user_text)
	if search_res:
	now = _local_now()
	short_res = _truncate(search_res, WEB_CONTEXT_MAX_CHARS)
	context_add = (
	f"\n\n--- WEB INFO ({now:%Y-%m-%d %H:%M}) ---\n"
	f"{short_res}\n----------------------------------"
	)

	# 4. Message Assembly
	messages = []
	if system_prompt.strip():
	messages.append({"role": "system", "content": system_prompt})

	for m in history[:-1]:
	messages.append(m)

	messages.append({"role": "user", "content": user_text + context_add})

	# Placeholder
	history.append({"role": "assistant", "content": ""})
	yield history, usage_text

	# 5. API Call
	try:
	completion = client.chat.completions.create(
	model=model,
	messages=messages,
	max_tokens=clamp_tokens(model, max_tokens),
	temperature=float(temp),
	top_p=float(top_p),
	stream=True,
	stream_options={"include_usage": True},
	)

	full_response = ""
	completion_tokens = 0

	for chunk in completion:
	# Text Content sicher extrahieren
	delta = ""
	if hasattr(chunk, "choices") and chunk.choices and len(chunk.choices) > 0:
	delta = chunk.choices[0].delta.content or ""

	if delta:
	full_response += delta
	# Hier der Fix: Wir zeigen immer Text an, damit nichts hängt.
	# Wenn der Clean-Marker kommt, springt der Text um auf "sauber".
	clean_text = _clean_response(full_response)
	history[-1]["content"] = clean_text
	yield history, usage_text

	# Usage Stats
	if hasattr(chunk, "usage") and chunk.usage:
	completion_tokens = chunk.usage.completion_tokens or 0

	# Finish Reason Check (Safety gegen NoneType Fehler)
	if hasattr(chunk, "choices") and chunk.choices and len(chunk.choices) > 0:
	finish = getattr(chunk.choices[0], "finish_reason", None)
	if finish in ["stop", "length"]:
	break

	# Final Costs
	if completion_tokens > 0:
	cost = cost_from_completion_tokens(model, completion_tokens)
	usage_text = f"Tokens: {completion_tokens} \| Kosten: ${cost:.5f}"

	yield history, usage_text

	except Exception as e:
	history[-1]["content"] += f"\n\n⚠️ API Fehler: {str(e)}"
	yield history, "❌ Fehler"

	# ==============================================================================
	# 5) UI LAYOUT
	# ==============================================================================
	def update_tokens_ui(model):
	val = int(MODELS.get(model, {}).get("max_tokens", 2048))
	return gr.update(maximum=val, value=min(2048, val))

	with gr.Blocks(title="Hyperbolic Chat", fill_height=True) as demo:
	gr.Markdown("## 🚀 Hyperbolic Chat (Env Vars • Allowlist: dtometzki)")

	with gr.Row():
	with gr.Column(scale=4):
	chatbot = gr.Chatbot(height=700)
	with gr.Row():
	msg_input = gr.Textbox(placeholder="Eingabe...", show_label=False, scale=4)
	submit_btn = gr.Button("Senden", variant="primary", scale=1)

	clear_btn = gr.Button("🗑️ Verlauf leeren")

	with gr.Column(scale=1, variant="panel"):
	gr.LoginButton()

	model_dd = gr.Dropdown(MODEL_CHOICES, value=MODEL_CHOICES[0], label="Modell")
	use_search_chk = gr.Checkbox(label="🌐 Google Suche nutzen", value=False)

	gr.Markdown("---")
	system_txt = gr.Textbox("Du bist ein hilfreicher Assistent.", label="System Prompt", lines=2)
	tokens_sld = gr.Slider(1, MAX_TOKENS_GLOBAL, value=2048, label="Max Tokens")
	temp_sld = gr.Slider(0.0, 2.0, value=0.7, label="Temperature")
	top_p_sld = gr.Slider(0.1, 1.0, value=0.95, label="Top-P")

	gr.Markdown("---")
	usage_md = gr.Markdown("Kosten: -")

	# Event Wiring
	params = [chatbot, model_dd, system_txt, tokens_sld, temp_sld, top_p_sld, use_search_chk]

	msg_input.submit(add_user_message, [msg_input, chatbot], [msg_input, chatbot], queue=False).then(
	chat_stream, params, [chatbot, usage_md], queue=True
	)

	submit_btn.click(add_user_message, [msg_input, chatbot], [msg_input, chatbot], queue=False).then(
	chat_stream, params, [chatbot, usage_md], queue=True
	)

	model_dd.change(update_tokens_ui, model_dd, tokens_sld)
	clear_btn.click(lambda: ([], ""), None, [chatbot, usage_md])

	def check_keys_startup():
	print("\n" + "="*40)
	print("🔎 STARTUP CHECK:")
	if HYPERBOLIC_API_KEY: print("✅ HYPERBOLIC_API_KEY gefunden.")
	else: print("❌ HYPERBOLIC_API_KEY fehlt! Chat wird Fehler zeigen.")

	if GOOGLE_API_KEY and SEARCH_ENGINE_ID: print("✅ Google Search Keys gefunden.")
	else: print("⚠️ Google Search Keys fehlen (Suche wird ignoriert).")
	print("="*40 + "\n")

	check_keys_startup()

	if __name__ == "__main__":
	os.environ["GRADIO_SSR_MODE"] = "False"
	demo.queue().launch(server_name="0.0.0.0", server_port=7860, share=False)