""" QuickSilver Pro Chat — Hugging Face Space. A zero-friction try-it demo for QuickSilver Pro. Anyone on HF can chat with DeepSeek V3 / R1 / Qwen 3.5 through our OpenAI-compatible endpoint, without creating an account first. The goal is top-of-funnel discoverability: the banner at the bottom sends them to quicksilverpro.io for their own key. Single-tenant QSP key (stored as the `QSP_KEY` Space secret) with a monthly budget cap configured on the QSP side. In-process per-session rate-limit keeps casual spam from spiking the bill. """ from __future__ import annotations import os import time from collections import deque from typing import Iterable import gradio as gr from openai import OpenAI # ────────────────────────── Configuration ────────────────────────── QSP_KEY = os.environ.get("QSP_KEY", "").strip() QSP_BASE = os.environ.get("QSP_BASE", "https://api.quicksilverpro.io/v1") MODELS = [ ("deepseek-v3", "DeepSeek V3 — general-purpose, fast"), ("deepseek-r1", "DeepSeek R1 — reasoning, slower, deeper"), ("qwen3.5-35b", "Qwen 3.5-35B-A3B — 262K context, multilingual"), ] MODEL_CHOICES = [f"{m} — {desc}" for m, desc in MODELS] DEFAULT_MODEL_LABEL = MODEL_CHOICES[0] DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant." # Per-session soft rate limit. Not a security boundary — the QSP-side budget # cap on the shared key is. This just keeps one noisy session from blowing # through the daily allowance in 90 seconds. RATE_WINDOW_SEC = 60 RATE_MAX_MSGS = 8 _session_buckets: dict[str, deque] = {} def _rate_limited(session_hash: str) -> bool: now = time.time() bucket = _session_buckets.setdefault(session_hash, deque()) while bucket and now - bucket[0] > RATE_WINDOW_SEC: bucket.popleft() if len(bucket) >= RATE_MAX_MSGS: return True bucket.append(now) return False # ────────────────────────── OpenAI client ────────────────────────── if not QSP_KEY: # Don't crash on import — let the UI render a clear error banner instead, # so the Space owner sees "QSP_KEY secret not set" rather than a 500. client = None else: client = OpenAI(base_url=QSP_BASE, api_key=QSP_KEY) def _parse_model_label(label: str) -> str: return label.split(" — ", 1)[0] def respond( message: str, history: list[tuple[str, str]], model_label: str, system_prompt: str, temperature: float, max_tokens: int, request: gr.Request | None = None, ) -> Iterable[str]: if client is None: yield ( "⚠️ Space misconfigured: `QSP_KEY` secret is not set. " "Owner: configure it in Settings → Variables and secrets." ) return session_hash = (request.session_hash if request else "anon") or "anon" if _rate_limited(session_hash): yield ( f"⏳ Rate limit reached ({RATE_MAX_MSGS} messages / " f"{RATE_WINDOW_SEC}s). Take a breath, then try again." ) return model = _parse_model_label(model_label) messages: list[dict[str, str]] = [] if system_prompt.strip(): messages.append({"role": "system", "content": system_prompt.strip()}) for user_msg, assistant_msg in history or []: if user_msg: messages.append({"role": "user", "content": user_msg}) if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg}) messages.append({"role": "user", "content": message}) try: stream = client.chat.completions.create( model=model, messages=messages, temperature=float(temperature), max_tokens=int(max_tokens), stream=True, ) except Exception as e: yield f"❌ API error: {type(e).__name__}: {str(e)[:300]}" return accumulated = "" for chunk in stream: try: delta = chunk.choices[0].delta.content or "" except (AttributeError, IndexError): delta = "" if delta: accumulated += delta yield accumulated # ────────────────────────── UI ────────────────────────── HEADER_MD = """ # ⚡ QuickSilver Pro Chat Try **DeepSeek V3 / R1** and **Qwen 3.5-35B-A3B** via an OpenAI-compatible API — no signup needed here. Running on [QuickSilver Pro](https://quicksilverpro.io) · Get your own key ($1 free credits): [quicksilverpro.io](https://quicksilverpro.io) · CLI: `pip install quicksilverpro` """ FOOTER_MD = """ --- Powered by QuickSilver Pro — open-source LLM inference, OpenAI-compatible, ~20% below OpenRouter / Together / Fireworks. Built by MachineFi Labs. """ # theme moved to launch() in Gradio 6, dropped here to stay forward-compatible with gr.Blocks(title="QuickSilver Pro Chat") as demo: gr.Markdown(HEADER_MD) with gr.Row(): with gr.Column(scale=1): model_dropdown = gr.Dropdown( choices=MODEL_CHOICES, value=DEFAULT_MODEL_LABEL, label="Model", interactive=True, ) system_prompt = gr.Textbox( label="System prompt", value=DEFAULT_SYSTEM_PROMPT, lines=3, max_lines=8, ) temperature = gr.Slider( label="Temperature", minimum=0.0, maximum=2.0, step=0.1, value=0.7 ) max_tokens = gr.Slider( label="Max tokens", minimum=64, maximum=4096, step=64, value=1024 ) with gr.Column(scale=3): # Gradio 6.0 removed submit_btn / retry_btn / undo_btn / clear_btn args # in favor of a more opinionated default layout; dropping them keeps # this compatible with both 5.x and 6.x. gr.ChatInterface( fn=respond, additional_inputs=[model_dropdown, system_prompt, temperature, max_tokens], examples=[ ["Write a concise git commit message for: fixed off-by-one error in pagination"], ["Explain closures in JavaScript in 2 sentences"], ["What's the fastest sorting algorithm for 100k integers and why?"], ["Translate 'Hello, how are you?' into formal Japanese, Hindi, and Russian"], ], cache_examples=False, ) gr.Markdown(FOOTER_MD) if __name__ == "__main__": demo.queue(default_concurrency_limit=4, max_size=64).launch()