Spaces:

MachineFi
/

QuickSilverPro-Chat

Running

App Files Files Community

QuickSilverPro-Chat / app.py

Raullen

fix(gradio6): drop theme + removed btn args

7c937ef verified about 2 months ago

raw

history blame

6.92 kB

	"""
	QuickSilver Pro Chat — Hugging Face Space.

	A zero-friction try-it demo for QuickSilver Pro. Anyone on HF can chat with
	DeepSeek V3 / R1 / Qwen 3.5 through our OpenAI-compatible endpoint, without
	creating an account first. The goal is top-of-funnel discoverability: the
	banner at the bottom sends them to quicksilverpro.io for their own key.

	Single-tenant QSP key (stored as the `QSP_KEY` Space secret) with a monthly
	budget cap configured on the QSP side. In-process per-session rate-limit
	keeps casual spam from spiking the bill.
	"""

	from __future__ import annotations

	import os
	import time
	from collections import deque
	from typing import Iterable

	import gradio as gr
	from openai import OpenAI

	# ────────────────────────── Configuration ──────────────────────────

	QSP_KEY = os.environ.get("QSP_KEY", "").strip()
	QSP_BASE = os.environ.get("QSP_BASE", "https://api.quicksilverpro.io/v1")

	MODELS = [
	("deepseek-v3", "DeepSeek V3 — general-purpose, fast"),
	("deepseek-r1", "DeepSeek R1 — reasoning, slower, deeper"),
	("qwen3.5-35b", "Qwen 3.5-35B-A3B — 262K context, multilingual"),
	]
	MODEL_CHOICES = [f"{m} — {desc}" for m, desc in MODELS]
	DEFAULT_MODEL_LABEL = MODEL_CHOICES[0]

	DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant."

	# Per-session soft rate limit. Not a security boundary — the QSP-side budget
	# cap on the shared key is. This just keeps one noisy session from blowing
	# through the daily allowance in 90 seconds.
	RATE_WINDOW_SEC = 60
	RATE_MAX_MSGS = 8

	_session_buckets: dict[str, deque] = {}


	def _rate_limited(session_hash: str) -> bool:
	now = time.time()
	bucket = _session_buckets.setdefault(session_hash, deque())
	while bucket and now - bucket[0] > RATE_WINDOW_SEC:
	bucket.popleft()
	if len(bucket) >= RATE_MAX_MSGS:
	return True
	bucket.append(now)
	return False


	# ────────────────────────── OpenAI client ──────────────────────────

	if not QSP_KEY:
	# Don't crash on import — let the UI render a clear error banner instead,
	# so the Space owner sees "QSP_KEY secret not set" rather than a 500.
	client = None
	else:
	client = OpenAI(base_url=QSP_BASE, api_key=QSP_KEY)


	def _parse_model_label(label: str) -> str:
	return label.split(" — ", 1)[0]


	def respond(
	message: str,
	history: list[tuple[str, str]],
	model_label: str,
	system_prompt: str,
	temperature: float,
	max_tokens: int,
	request: gr.Request \| None = None,
	) -> Iterable[str]:
	if client is None:
	yield (
	"⚠️ Space misconfigured: `QSP_KEY` secret is not set. "
	"Owner: configure it in Settings → Variables and secrets."
	)
	return

	session_hash = (request.session_hash if request else "anon") or "anon"
	if _rate_limited(session_hash):
	yield (
	f"⏳ Rate limit reached ({RATE_MAX_MSGS} messages / "
	f"{RATE_WINDOW_SEC}s). Take a breath, then try again."
	)
	return

	model = _parse_model_label(model_label)
	messages: list[dict[str, str]] = []
	if system_prompt.strip():
	messages.append({"role": "system", "content": system_prompt.strip()})
	for user_msg, assistant_msg in history or []:
	if user_msg:
	messages.append({"role": "user", "content": user_msg})
	if assistant_msg:
	messages.append({"role": "assistant", "content": assistant_msg})
	messages.append({"role": "user", "content": message})

	try:
	stream = client.chat.completions.create(
	model=model,
	messages=messages,
	temperature=float(temperature),
	max_tokens=int(max_tokens),
	stream=True,
	)
	except Exception as e:
	yield f"❌ API error: {type(e).__name__}: {str(e)[:300]}"
	return

	accumulated = ""
	for chunk in stream:
	try:
	delta = chunk.choices[0].delta.content or ""
	except (AttributeError, IndexError):
	delta = ""
	if delta:
	accumulated += delta
	yield accumulated


	# ────────────────────────── UI ──────────────────────────

	HEADER_MD = """
	# ⚡ QuickSilver Pro Chat

	Try DeepSeek V3 / R1 and Qwen 3.5-35B-A3B via an OpenAI-compatible API — no signup needed here.

	<sub>Running on [QuickSilver Pro](https://quicksilverpro.io) · Get your own key ($1 free credits): [quicksilverpro.io](https://quicksilverpro.io) · CLI: `pip install quicksilverpro`</sub>
	"""

	FOOTER_MD = """
	---
	<sub>Powered by <a href="https://quicksilverpro.io">QuickSilver Pro</a> — open-source LLM inference, OpenAI-compatible, ~20% below OpenRouter / Together / Fireworks. Built by <a href="https://quicksilverpro.io">MachineFi Labs</a>.</sub>
	"""

	# theme moved to launch() in Gradio 6, dropped here to stay forward-compatible
	with gr.Blocks(title="QuickSilver Pro Chat") as demo:
	gr.Markdown(HEADER_MD)

	with gr.Row():
	with gr.Column(scale=1):
	model_dropdown = gr.Dropdown(
	choices=MODEL_CHOICES,
	value=DEFAULT_MODEL_LABEL,
	label="Model",
	interactive=True,
	)
	system_prompt = gr.Textbox(
	label="System prompt",
	value=DEFAULT_SYSTEM_PROMPT,
	lines=3,
	max_lines=8,
	)
	temperature = gr.Slider(
	label="Temperature", minimum=0.0, maximum=2.0, step=0.1, value=0.7
	)
	max_tokens = gr.Slider(
	label="Max tokens", minimum=64, maximum=4096, step=64, value=1024
	)
	with gr.Column(scale=3):
	# Gradio 6.0 removed submit_btn / retry_btn / undo_btn / clear_btn args
	# in favor of a more opinionated default layout; dropping them keeps
	# this compatible with both 5.x and 6.x.
	gr.ChatInterface(
	fn=respond,
	additional_inputs=[model_dropdown, system_prompt, temperature, max_tokens],
	examples=[
	["Write a concise git commit message for: fixed off-by-one error in pagination"],
	["Explain closures in JavaScript in 2 sentences"],
	["What's the fastest sorting algorithm for 100k integers and why?"],
	["Translate 'Hello, how are you?' into formal Japanese, Hindi, and Russian"],
	],
	cache_examples=False,
	)

	gr.Markdown(FOOTER_MD)


	if __name__ == "__main__":
	demo.queue(default_concurrency_limit=4, max_size=64).launch()