Spaces:

TCOps-Jeff
/

PLSCyberTableTop

Paused

App Files Files Community

jefmon01 commited on Oct 10, 2025

Commit

f29395d

1 Parent(s): 285c545

Update space

Browse files

Files changed (1) hide show

app.py +233 -52

app.py CHANGED Viewed

@@ -1,70 +1,251 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-def respond(
-    message,
-    history: list[dict[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-    hf_token: gr.OAuthToken,
-):
     """
-    For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
     """
-    client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
-    messages = [{"role": "system", "content": system_message}]
-    messages.extend(history)
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        choices = message.choices
-        token = ""
-        if len(choices) and choices[0].delta.content:
-            token = choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-chatbot = gr.ChatInterface(
-    respond,
     type="messages",
     additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
     ],
 )
-with gr.Blocks() as demo:
-    with gr.Sidebar():
-        gr.LoginButton()
-    chatbot.render()
 if __name__ == "__main__":
-    demo.launch()

+import os
+import time
+from collections import deque, defaultdict
+from threading import Lock
 import gradio as gr
+from openai import OpenAI
+"""
+Hugging Face Space — Gradio app for a live, executive-focused Cybersecurity RPG
+- Streams responses from OpenAI Chat Completions
+- Per-IP cooldown + global RPM soft limit (token bucket)
+- Optional event passcode gate
+- OpenAI Moderation API on each incoming message
+- Optional pre-game briefing field
+Required Secrets/Variables (set in Space Settings → Secrets & Variables):
+  - OPENAI_API_KEY           (Secret)
+Optional Variables (provide sane defaults below):
+  - OPENAI_MODEL             (e.g., "gpt-5-mini")
+  - SYSTEM_PROMPT            (overrides the default RPG prompt below)
+  - EVENT_CODE               (short passcode shown on your slide; leave blank to disable gate)
+  - USER_COOLDOWN_SECONDS    (integer; default 25)
+  - GLOBAL_RPM_SOFT          (integer; default 350)
+  - MAX_TOKENS               (integer; default 320)
+  - TEMPERATURE              (float; default 0.5)
+Notes
+- Gradio queue/concurrency is configured at the bottom of this file.
+- This app never exposes your API key to the browser; it resides server-side.
+- Keep messages concise to stay under TPM; adjust MAX_TOKENS as needed.
+"""
+# -----------------
+# Config & Clients
+# -----------------
+DEFAULT_RPG_PROMPT = (
     """
+You are a game master for a cybersecurity role-playing game (RPG) designed for short, high-impact training sessions. You are a cybersecurity expert with over 25 years of experience and a deep understanding of gamification and instructional design. Your role is to guide players through an engaging 30-minute cybersecurity RPG, with a fast-paced structure of approximately one turn per minute.
+Your audience consists of CEOs, CFOs, and other senior executives at small to mid-market privately owned companies. These participants typically have limited technical expertise, so you emphasize business impact, risk management, and decision-making over technical minutiae. Scenarios focus on real-world threats like ransomware, phishing, insider risk, third-party breaches, and regulatory concerns.
+Scenarios are designed to be industry-agnostic but draw on a broad range of sector-relevant examples—from manufacturing to healthcare to finance—depending on context or user preference. You avoid assuming any specific industry background but remain ready to adapt if one is mentioned.
+Before each session, you present optional pre-game briefing materials. These include a brief company profile (size, market, leadership structure), the current simulated date, and character roles such as CEO, General Counsel, IT Director, and Head of Operations. These materials help participants quickly get into role and better understand the situation they’re stepping into. You explain these materials clearly and briefly so the game can start smoothly.
+You use plain, non-technical language and draw analogies to familiar business risks to explain complex ideas. Your tone is confident, engaging, and accessible, aiming to empower rather than intimidate. You avoid jargon unless requested and provide short, clear feedback after each decision to reinforce learning.
+Participants respond freely rather than selecting from multiple choice options. You interpret open-ended answers and adapt the story naturally. You do not suggest example actions or options unless explicitly asked. Players are also free to ask questions or seek advice from in-game characters (e.g., CFO, legal counsel, IT lead) at any time, and you roleplay their responses to guide decision-making.
+You adapt the game’s pace and tone to suit the audience, occasionally using humor or tension to build engagement. You ensure every session ends with 2–3 clear takeaways relevant to executive roles. You ask clarifying questions only when necessary and always prioritize immersion and flow.
     """
+).strip()
+OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-5-mini")
+SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT", DEFAULT_RPG_PROMPT)
+EVENT_CODE = os.getenv("EVENT_CODE", "").strip()
+USER_COOLDOWN_SECONDS = int(os.getenv("USER_COOLDOWN_SECONDS", "25"))
+GLOBAL_RPM_SOFT = int(os.getenv("GLOBAL_RPM_SOFT", "350"))  # soft limit; app self-throttles
+MAX_TOKENS = int(os.getenv("MAX_TOKENS", "320"))
+TEMPERATURE = float(os.getenv("TEMPERATURE", "0.5"))
+if not os.getenv("OPENAI_API_KEY"):
+    raise RuntimeError("OPENAI_API_KEY is not set. Add it in Space Settings → Secrets.")
+client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
+# -----------------
+# Rate limiting
+# -----------------
+# Global RPM token bucket (soft cap): allow up to GLOBAL_RPM_SOFT requests in the last 60s
+_glob_lock = Lock()
+_glob_requests: deque = deque()  # timestamps of recent requests
+# Per-IP cooldown: one message per USER_COOLDOWN_SECONDS per IP
+_ip_lock = Lock()
+_ip_last: dict[str, float] = defaultdict(lambda: 0.0)
+def now() -> float:
+    return time.time()
+def check_global_rpm() -> tuple[bool, int]:
+    """Return (allowed, wait_seconds)."""
+    with _glob_lock:
+        t = now()
+        # prune entries older than 60s
+        while _glob_requests and (t - _glob_requests[0] > 60):
+            _glob_requests.popleft()
+        if len(_glob_requests) >= GLOBAL_RPM_SOFT:
+            # wait until the oldest request exits the 60s window
+            wait = int(60 - (t - _glob_requests[0])) + 1
+            return False, max(wait, 1)
+        # reserve a slot
+        _glob_requests.append(t)
+        return True, 0
+def check_ip_cooldown(ip: str) -> tuple[bool, int]:
+    """Return (allowed, wait_seconds)."""
+    if not ip:
+        return True, 0
+    with _ip_lock:
+        t = now()
+        last = _ip_last[ip]
+        delta = t - last
+        if delta < USER_COOLDOWN_SECONDS:
+            return False, int(USER_COOLDOWN_SECONDS - delta) + 1
+        _ip_last[ip] = t
+        return True, 0
+# -----------------
+# Moderation
+# -----------------
+def is_allowed_by_moderation(text: str) -> bool:
+    try:
+        res = client.moderations.create(model="omni-moderation-latest", input=text)
+        # Block if flagged
+        return not bool(res.results[0].flagged)
+    except Exception:
+        # If moderation is unavailable, choose to allow (fail-open). Flip to fail-closed if preferred.
+        return True
+# -----------------
+# Chat logic
+# -----------------
+def build_messages(history: list[dict], message: str, briefing: str | None, turn_index: int) -> list[dict]:
+    msgs: list[dict] = [{"role": "system", "content": SYSTEM_PROMPT}]
+    # Dynamic pacing hint as an additional system instruction
+    pacing_hint = (
+        f"Turn pacing: This is turn {turn_index} of ~30. Maintain ~one turn per minute. "
+        "Keep language plain and business-focused. Provide short feedback after each decision. "
+        "Do not list options unless asked."
+    )
+    msgs.append({"role": "system", "content": pacing_hint})
+    # Include pre-game briefing on every turn (keeps context without relying on prior system messages)
+    if briefing:
+        msgs.append({
+            "role": "system",
+            "content": f"Pre-game briefing (user-provided):
+{briefing[:4000]}"
+        })
+    for m in history or []:
+        role = m.get("role")
+        content = m.get("content")
+        if role in ("user", "assistant") and isinstance(content, str):
+            # Truncate pathological long turns in history to control TPM
+            msgs.append({"role": role, "content": content[:6000]})
+    msgs.append({"role": "user", "content": message[:6000]})
+    return msgs
+def extract_ip(req: gr.Request) -> str:
+    try:
+        # Behind HF proxy, X-Forwarded-For may contain a list
+        fwd = (req.headers.get("x-forwarded-for") or "").split(",")[0].strip()
+        return fwd or (req.client.host if req and req.client else "")
+    except Exception:
+        return ""
+def predict(message, history, event_code, briefing_text, request: gr.Request):
+    # 1) Event code gate
+    if EVENT_CODE and (event_code or "").strip() != EVENT_CODE:
+        yield "Enter the event code shown on the slide to use the demo."
+        return
+    # 2) Per-IP cooldown
+    ip = extract_ip(request)
+    ok_ip, wait_ip = check_ip_cooldown(ip)
+    if not ok_ip:
+        yield f"You’re sending messages too fast. Try again in ~{wait_ip}s."
+        return
+    # 3) Global RPM soft limit
+    ok_rpm, wait_rpm = check_global_rpm()
+    if not ok_rpm:
+        yield f"We’re a bit busy. Please retry in ~{wait_rpm}s."
+        return
+    # 4) Moderation (pre)
+    if not is_allowed_by_moderation(str(message)):
+        yield "Let’s keep it appropriate. Try another question."
+        return
+    # 5) Build messages & call OpenAI with streaming
+    try:
+        # Turn index = number of user messages so far + 1
+        turn_index = sum(1 for m in (history or []) if m.get("role") == "user") + 1
+        messages = build_messages(history, message, (briefing_text or "").strip(), turn_index)
+        stream = client.chat.completions.create(
+            model=OPENAI_MODEL,
+            messages=messages,
+            temperature=TEMPERATURE,
+            max_tokens=MAX_TOKENS,
+            stream=True,
+        )
+        partial = []
+        for chunk in stream:
+            delta = chunk.choices[0].delta.content or ""
+            if delta:
+                partial.append(delta)
+                yield "".join(partial)
+    except Exception as e:
+        # Friendly error surfacing for rate limits / network hiccups
+        msg = str(e)
+        if "Rate limit" in msg or "429" in msg:
+            yield "We hit a provider rate limit briefly. Please wait a few seconds and try again."
+        else:
+            yield "Something went wrong while contacting the model. Please try again."
+# -----------------
+# UI
+# -----------------
+TITLE = "Cybersecurity RPG — Executive Edition"
+DESC = (
+    "30-minute, fast-paced tabletop simulation. Ask/answer in plain language. No PII/PHI. "
+    "Pacing hint: ~one turn per minute; short feedback after each decision."
+)
+chat = gr.ChatInterface(
+    fn=predict,
     type="messages",
+    title=TITLE,
+    description=DESC,
     additional_inputs=[
+        gr.Textbox(label="Event code", type="password", value=""),
+        gr.Textbox(label="Pre-game briefing (optional): company profile, simulated date, roles", lines=4),
     ],
+    retry_btn=None,
+    undo_btn=None,
+    clear_btn=True,
 )
+# Queue: allow high concurrency and buffer bursts
+# - default_concurrency_limit=80 lets many requests process simultaneously
+# - max_size=300 provides a visible queue during spikes
+# - concurrency_limit is unlimited for this ChatInterface by default when using .queue()
+chat = chat.queue(default_concurrency_limit=80, max_size=300)
 if __name__ == "__main__":
+    chat.launch()