"""
QuickSilver Pro Chat — Hugging Face Space.
A zero-friction try-it demo for QuickSilver Pro. Anyone on HF can chat with
DeepSeek V3 / R1 / Qwen 3.5 through our OpenAI-compatible endpoint, without
creating an account first. The goal is top-of-funnel discoverability: the
banner at the bottom sends them to quicksilverpro.io for their own key.
Single-tenant QSP key (stored as the `QSP_KEY` Space secret) with a monthly
budget cap configured on the QSP side. In-process per-session rate-limit
keeps casual spam from spiking the bill.
"""
from __future__ import annotations
import os
import time
from collections import deque
from typing import Iterable
import gradio as gr
from openai import OpenAI
# ────────────────────────── Configuration ──────────────────────────
QSP_KEY = os.environ.get("QSP_KEY", "").strip()
QSP_BASE = os.environ.get("QSP_BASE", "https://api.quicksilverpro.io/v1")
MODELS = [
("deepseek-v3", "DeepSeek V3 — general-purpose, fast"),
("deepseek-r1", "DeepSeek R1 — reasoning, slower, deeper"),
("qwen3.5-35b", "Qwen 3.5-35B-A3B — 262K context, multilingual"),
]
MODEL_CHOICES = [f"{m} — {desc}" for m, desc in MODELS]
DEFAULT_MODEL_LABEL = MODEL_CHOICES[0]
DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant."
# Per-session soft rate limit. Not a security boundary — the QSP-side budget
# cap on the shared key is. This just keeps one noisy session from blowing
# through the daily allowance in 90 seconds.
RATE_WINDOW_SEC = 60
RATE_MAX_MSGS = 8
_session_buckets: dict[str, deque] = {}
def _rate_limited(session_hash: str) -> bool:
now = time.time()
bucket = _session_buckets.setdefault(session_hash, deque())
while bucket and now - bucket[0] > RATE_WINDOW_SEC:
bucket.popleft()
if len(bucket) >= RATE_MAX_MSGS:
return True
bucket.append(now)
return False
# ────────────────────────── OpenAI client ──────────────────────────
if not QSP_KEY:
# Don't crash on import — let the UI render a clear error banner instead,
# so the Space owner sees "QSP_KEY secret not set" rather than a 500.
client = None
else:
client = OpenAI(base_url=QSP_BASE, api_key=QSP_KEY)
def _parse_model_label(label: str) -> str:
return label.split(" — ", 1)[0]
def respond(
message: str,
history: list[tuple[str, str]],
model_label: str,
system_prompt: str,
temperature: float,
max_tokens: int,
request: gr.Request | None = None,
) -> Iterable[str]:
if client is None:
yield (
"⚠️ Space misconfigured: `QSP_KEY` secret is not set. "
"Owner: configure it in Settings → Variables and secrets."
)
return
session_hash = (request.session_hash if request else "anon") or "anon"
if _rate_limited(session_hash):
yield (
f"⏳ Rate limit reached ({RATE_MAX_MSGS} messages / "
f"{RATE_WINDOW_SEC}s). Take a breath, then try again."
)
return
model = _parse_model_label(model_label)
messages: list[dict[str, str]] = []
if system_prompt.strip():
messages.append({"role": "system", "content": system_prompt.strip()})
for user_msg, assistant_msg in history or []:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
messages.append({"role": "user", "content": message})
try:
stream = client.chat.completions.create(
model=model,
messages=messages,
temperature=float(temperature),
max_tokens=int(max_tokens),
stream=True,
)
except Exception as e:
yield f"❌ API error: {type(e).__name__}: {str(e)[:300]}"
return
accumulated = ""
for chunk in stream:
try:
delta = chunk.choices[0].delta.content or ""
except (AttributeError, IndexError):
delta = ""
if delta:
accumulated += delta
yield accumulated
# ────────────────────────── UI ──────────────────────────
HEADER_MD = """
# ⚡ QuickSilver Pro Chat
Try **DeepSeek V3 / R1** and **Qwen 3.5-35B-A3B** via an OpenAI-compatible API — no signup needed here.
Running on [QuickSilver Pro](https://quicksilverpro.io) · Get your own key ($1 free credits): [quicksilverpro.io](https://quicksilverpro.io) · CLI: `pip install quicksilverpro`
"""
FOOTER_MD = """
---
Powered by QuickSilver Pro — open-source LLM inference, OpenAI-compatible, ~20% below OpenRouter / Together / Fireworks. Built by MachineFi Labs.
"""
# theme moved to launch() in Gradio 6, dropped here to stay forward-compatible
with gr.Blocks(title="QuickSilver Pro Chat") as demo:
gr.Markdown(HEADER_MD)
with gr.Row():
with gr.Column(scale=1):
model_dropdown = gr.Dropdown(
choices=MODEL_CHOICES,
value=DEFAULT_MODEL_LABEL,
label="Model",
interactive=True,
)
system_prompt = gr.Textbox(
label="System prompt",
value=DEFAULT_SYSTEM_PROMPT,
lines=3,
max_lines=8,
)
temperature = gr.Slider(
label="Temperature", minimum=0.0, maximum=2.0, step=0.1, value=0.7
)
max_tokens = gr.Slider(
label="Max tokens", minimum=64, maximum=4096, step=64, value=1024
)
with gr.Column(scale=3):
# Gradio 6.0 removed submit_btn / retry_btn / undo_btn / clear_btn args
# in favor of a more opinionated default layout; dropping them keeps
# this compatible with both 5.x and 6.x.
gr.ChatInterface(
fn=respond,
additional_inputs=[model_dropdown, system_prompt, temperature, max_tokens],
examples=[
["Write a concise git commit message for: fixed off-by-one error in pagination"],
["Explain closures in JavaScript in 2 sentences"],
["What's the fastest sorting algorithm for 100k integers and why?"],
["Translate 'Hello, how are you?' into formal Japanese, Hindi, and Russian"],
],
cache_examples=False,
)
gr.Markdown(FOOTER_MD)
if __name__ == "__main__":
demo.queue(default_concurrency_limit=4, max_size=64).launch()