Spaces:
Running
Running
| """ | |
| QuickSilver Pro Chat — Hugging Face Space. | |
| A zero-friction try-it demo for QuickSilver Pro. Anyone on HF can chat with | |
| DeepSeek V3 / R1 / Qwen 3.5 through our OpenAI-compatible endpoint, without | |
| creating an account first. The goal is top-of-funnel discoverability: the | |
| banner at the bottom sends them to quicksilverpro.io for their own key. | |
| Single-tenant QSP key (stored as the `QSP_KEY` Space secret) with a monthly | |
| budget cap configured on the QSP side. In-process per-session rate-limit | |
| keeps casual spam from spiking the bill. | |
| """ | |
| from __future__ import annotations | |
| import os | |
| import time | |
| from collections import deque | |
| from typing import Iterable | |
| import gradio as gr | |
| from openai import OpenAI | |
| # ────────────────────────── Configuration ────────────────────────── | |
| QSP_KEY = os.environ.get("QSP_KEY", "").strip() | |
| QSP_BASE = os.environ.get("QSP_BASE", "https://api.quicksilverpro.io/v1") | |
| MODELS = [ | |
| ("deepseek-v3", "DeepSeek V3 — general-purpose, fast"), | |
| ("deepseek-r1", "DeepSeek R1 — reasoning, slower, deeper"), | |
| ("qwen3.5-35b", "Qwen 3.5-35B-A3B — 262K context, multilingual"), | |
| ] | |
| MODEL_CHOICES = [f"{m} — {desc}" for m, desc in MODELS] | |
| DEFAULT_MODEL_LABEL = MODEL_CHOICES[0] | |
| DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant." | |
| # Per-session soft rate limit. Not a security boundary — the QSP-side budget | |
| # cap on the shared key is. This just keeps one noisy session from blowing | |
| # through the daily allowance in 90 seconds. | |
| RATE_WINDOW_SEC = 60 | |
| RATE_MAX_MSGS = 8 | |
| _session_buckets: dict[str, deque] = {} | |
| def _rate_limited(session_hash: str) -> bool: | |
| now = time.time() | |
| bucket = _session_buckets.setdefault(session_hash, deque()) | |
| while bucket and now - bucket[0] > RATE_WINDOW_SEC: | |
| bucket.popleft() | |
| if len(bucket) >= RATE_MAX_MSGS: | |
| return True | |
| bucket.append(now) | |
| return False | |
| # ────────────────────────── OpenAI client ────────────────────────── | |
| if not QSP_KEY: | |
| # Don't crash on import — let the UI render a clear error banner instead, | |
| # so the Space owner sees "QSP_KEY secret not set" rather than a 500. | |
| client = None | |
| else: | |
| client = OpenAI(base_url=QSP_BASE, api_key=QSP_KEY) | |
| def _parse_model_label(label: str) -> str: | |
| return label.split(" — ", 1)[0] | |
| def respond( | |
| message: str, | |
| history: list[tuple[str, str]], | |
| model_label: str, | |
| system_prompt: str, | |
| temperature: float, | |
| max_tokens: int, | |
| request: gr.Request | None = None, | |
| ) -> Iterable[str]: | |
| if client is None: | |
| yield ( | |
| "⚠️ Space misconfigured: `QSP_KEY` secret is not set. " | |
| "Owner: configure it in Settings → Variables and secrets." | |
| ) | |
| return | |
| session_hash = (request.session_hash if request else "anon") or "anon" | |
| if _rate_limited(session_hash): | |
| yield ( | |
| f"⏳ Rate limit reached ({RATE_MAX_MSGS} messages / " | |
| f"{RATE_WINDOW_SEC}s). Take a breath, then try again." | |
| ) | |
| return | |
| model = _parse_model_label(model_label) | |
| messages: list[dict[str, str]] = [] | |
| if system_prompt.strip(): | |
| messages.append({"role": "system", "content": system_prompt.strip()}) | |
| for user_msg, assistant_msg in history or []: | |
| if user_msg: | |
| messages.append({"role": "user", "content": user_msg}) | |
| if assistant_msg: | |
| messages.append({"role": "assistant", "content": assistant_msg}) | |
| messages.append({"role": "user", "content": message}) | |
| try: | |
| stream = client.chat.completions.create( | |
| model=model, | |
| messages=messages, | |
| temperature=float(temperature), | |
| max_tokens=int(max_tokens), | |
| stream=True, | |
| ) | |
| except Exception as e: | |
| yield f"❌ API error: {type(e).__name__}: {str(e)[:300]}" | |
| return | |
| accumulated = "" | |
| for chunk in stream: | |
| try: | |
| delta = chunk.choices[0].delta.content or "" | |
| except (AttributeError, IndexError): | |
| delta = "" | |
| if delta: | |
| accumulated += delta | |
| yield accumulated | |
| # ────────────────────────── UI ────────────────────────── | |
| HEADER_MD = """ | |
| # ⚡ QuickSilver Pro Chat | |
| Try **DeepSeek V3 / R1** and **Qwen 3.5-35B-A3B** via an OpenAI-compatible API — no signup needed here. | |
| <sub>Running on [QuickSilver Pro](https://quicksilverpro.io) · Get your own key ($1 free credits): [quicksilverpro.io](https://quicksilverpro.io) · CLI: `pip install quicksilverpro`</sub> | |
| """ | |
| FOOTER_MD = """ | |
| --- | |
| <sub>Powered by <a href="https://quicksilverpro.io">QuickSilver Pro</a> — open-source LLM inference, OpenAI-compatible, ~20% below OpenRouter / Together / Fireworks. Built by <a href="https://quicksilverpro.io">MachineFi Labs</a>.</sub> | |
| """ | |
| # theme moved to launch() in Gradio 6, dropped here to stay forward-compatible | |
| with gr.Blocks(title="QuickSilver Pro Chat") as demo: | |
| gr.Markdown(HEADER_MD) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| model_dropdown = gr.Dropdown( | |
| choices=MODEL_CHOICES, | |
| value=DEFAULT_MODEL_LABEL, | |
| label="Model", | |
| interactive=True, | |
| ) | |
| system_prompt = gr.Textbox( | |
| label="System prompt", | |
| value=DEFAULT_SYSTEM_PROMPT, | |
| lines=3, | |
| max_lines=8, | |
| ) | |
| temperature = gr.Slider( | |
| label="Temperature", minimum=0.0, maximum=2.0, step=0.1, value=0.7 | |
| ) | |
| max_tokens = gr.Slider( | |
| label="Max tokens", minimum=64, maximum=4096, step=64, value=1024 | |
| ) | |
| with gr.Column(scale=3): | |
| # Gradio 6.0 removed submit_btn / retry_btn / undo_btn / clear_btn args | |
| # in favor of a more opinionated default layout; dropping them keeps | |
| # this compatible with both 5.x and 6.x. | |
| gr.ChatInterface( | |
| fn=respond, | |
| additional_inputs=[model_dropdown, system_prompt, temperature, max_tokens], | |
| examples=[ | |
| ["Write a concise git commit message for: fixed off-by-one error in pagination"], | |
| ["Explain closures in JavaScript in 2 sentences"], | |
| ["What's the fastest sorting algorithm for 100k integers and why?"], | |
| ["Translate 'Hello, how are you?' into formal Japanese, Hindi, and Russian"], | |
| ], | |
| cache_examples=False, | |
| ) | |
| gr.Markdown(FOOTER_MD) | |
| if __name__ == "__main__": | |
| demo.queue(default_concurrency_limit=4, max_size=64).launch() | |