Spaces:

TCOps-Jeff
/

PLSCyberTableTop

Paused

File size: 10,531 Bytes

import os
import time
from collections import deque, defaultdict
from threading import Lock
import gradio as gr
from openai import OpenAI

"""
Hugging Face Space — Gradio app for a live, executive-focused Cybersecurity RPG
- Streams responses from OpenAI Chat Completions
- Per-IP cooldown + global RPM soft limit (token bucket)
- Optional event passcode gate
- OpenAI Moderation API on each incoming message
- Optional pre-game briefing field

Required Secrets/Variables (set in Space Settings → Secrets & Variables):
  - TableTopKey              (Secret)
Optional Variables (provide sane defaults below):
  - OPENAI_MODEL             (e.g., "gpt-5-mini")
  - SYSTEM_PROMPT            (overrides the default RPG prompt below)
  - EVENT_CODE               (short passcode shown on your slide; leave blank to disable gate)
  - USER_COOLDOWN_SECONDS    (integer; default 25)
  - GLOBAL_RPM_SOFT          (integer; default 350)
  - MAX_TOKENS               (integer; default 320)
  - TEMPERATURE              (float; default 0.5)

Notes
- Gradio queue/concurrency is configured at the bottom of this file.
- This app never exposes your API key to the browser; it resides server-side.
- Keep messages concise to stay under TPM; adjust MAX_TOKENS as needed.
"""

# -----------------
# Config & Clients
# -----------------
DEFAULT_RPG_PROMPT = """
You are a game master for a cybersecurity role-playing game (RPG) designed for short, high-impact training sessions. You are a cybersecurity expert with over 25 years of experience and a deep understanding of gamification and instructional design. Your role is to guide players through an engaging 30-minute cybersecurity RPG, with a fast-paced structure of approximately one turn per minute.

Your audience consists of CEOs, CFOs, and other senior executives at small to mid-market privately owned companies. These participants typically have limited technical expertise, so you emphasize business impact, risk management, and decision-making over technical minutiae. Scenarios focus on real-world threats like ransomware, phishing, insider risk, third-party breaches, and regulatory concerns.

Scenarios are designed to be industry-agnostic but draw on a broad range of sector-relevant examples—from manufacturing to healthcare to finance—depending on context or user preference. You avoid assuming any specific industry background but remain ready to adapt if one is mentioned.

Before each session, you present optional pre-game briefing materials. These include a brief company profile (size, market, leadership structure), the current simulated date, and character roles such as CEO, General Counsel, IT Director, and Head of Operations. These materials help participants quickly get into role and better understand the situation they’re stepping into. You explain these materials clearly and briefly so the game can start smoothly.

You use plain, non-technical language and draw analogies to familiar business risks to explain complex ideas. Your tone is confident, engaging, and accessible, aiming to empower rather than intimidate. You avoid jargon unless requested and provide short, clear feedback after each decision to reinforce learning.

Participants respond freely rather than selecting from multiple choice options. You interpret open-ended answers and adapt the story naturally. You do not suggest example actions or options unless explicitly asked. Players are also free to ask questions or seek advice from in-game characters (e.g., CFO, legal counsel, IT lead) at any time, and you roleplay their responses to guide decision-making.

You adapt the game’s pace and tone to suit the audience, occasionally using humor or tension to build engagement. You ensure every session ends with 2–3 clear takeaways relevant to executive roles. You ask clarifying questions only when necessary and always prioritize immersion and flow.
"""

OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-5-mini")
SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT", DEFAULT_RPG_PROMPT)
EVENT_CODE = os.getenv("EVENT_CODE", "").strip()
USER_COOLDOWN_SECONDS = int(os.getenv("USER_COOLDOWN_SECONDS", "25"))
GLOBAL_RPM_SOFT = int(os.getenv("GLOBAL_RPM_SOFT", "350"))  # soft limit; app self-throttles
MAX_TOKENS = int(os.getenv("MAX_TOKENS", "320"))
TEMPERATURE = float(os.getenv("TEMPERATURE", "0.5"))

if not os.getenv("TableTopKey"):
    raise RuntimeError("TableTopKey is not set. Add it in Space Settings → Secrets.")

client = OpenAI(api_key=os.environ["TableTopKey"])

# -----------------
# Rate limiting
# -----------------
# Global RPM token bucket (soft cap): allow up to GLOBAL_RPM_SOFT requests in the last 60s
_glob_lock = Lock()
_glob_requests = deque()  # timestamps of recent requests

# Per-IP cooldown: one message per USER_COOLDOWN_SECONDS per IP
_ip_lock = Lock()
_ip_last = defaultdict(lambda: 0.0)


def _now():
    return time.time()


def check_global_rpm():
    """Return (allowed: bool, wait_seconds: int)."""
    with _glob_lock:
        t = _now()
        # prune entries older than 60s
        while _glob_requests and (t - _glob_requests[0] > 60):
            _glob_requests.popleft()
        if len(_glob_requests) >= GLOBAL_RPM_SOFT:
            # wait until the oldest request exits the 60s window
            wait = int(60 - (t - _glob_requests[0])) + 1
            return False, max(wait, 1)
        # reserve a slot
        _glob_requests.append(t)
        return True, 0


def check_ip_cooldown(ip):
    """Return (allowed: bool, wait_seconds: int)."""
    if not ip:
        return True, 0
    with _ip_lock:
        t = _now()
        last = _ip_last[ip]
        delta = t - last
        if delta < USER_COOLDOWN_SECONDS:
            return False, int(USER_COOLDOWN_SECONDS - delta) + 1
        _ip_last[ip] = t
        return True, 0


# -----------------
# Moderation
# -----------------

def is_allowed_by_moderation(text):
    try:
        res = client.moderations.create(model="omni-moderation-latest", input=text)
        # Block if flagged
        return not bool(res.results[0].flagged)
    except Exception:
        # If moderation is unavailable, choose to allow (fail-open). Flip to fail-closed if preferred.
        return True


# -----------------
# Chat logic
# -----------------

def build_messages(history, message, briefing, turn_index):
    msgs = [{"role": "system", "content": SYSTEM_PROMPT}]

    # Dynamic pacing hint as an additional system instruction
    pacing_hint = (
        f"Turn pacing: This is turn {turn_index} of ~30. Maintain ~one turn per minute. "
        "Keep language plain and business-focused. Provide short feedback after each decision. "
        "Do not list options unless asked."
    )
    msgs.append({"role": "system", "content": pacing_hint})

    # Include pre-game briefing on every turn (keeps context without relying on prior system messages)
    if isinstance(briefing, str) and briefing.strip():
        msgs.append({
            "role": "system",
            "content": "Pre-game briefing (user-provided):\n" + briefing[:4000]
        })

    for m in (history or []):
        role = m.get("role")
        content = m.get("content")
        if role in ("user", "assistant") and isinstance(content, str):
            # Truncate long turns in history to control TPM
            msgs.append({"role": role, "content": content[:6000]})

    msgs.append({"role": "user", "content": (message or "")[:6000]})
    return msgs


def extract_ip(req: gr.Request) -> str:
    try:
        # Behind HF proxy, X-Forwarded-For may contain a list
        fwd = (req.headers.get("x-forwarded-for") or "").split(",")[0].strip()
        return fwd or (req.client.host if getattr(req, "client", None) else "")
    except Exception:
        return ""


def predict(message, history, event_code, briefing_text, request: gr.Request):
    # 1) Event code gate
    if EVENT_CODE and (event_code or "").strip() != EVENT_CODE:
        yield "Enter the event code shown on the slide to use the demo."
        return

    # 2) Per-IP cooldown
    ip = extract_ip(request)
    ok_ip, wait_ip = check_ip_cooldown(ip)
    if not ok_ip:
        yield f"You’re sending messages too fast. Try again in ~{wait_ip}s."
        return

    # 3) Global RPM soft limit
    ok_rpm, wait_rpm = check_global_rpm()
    if not ok_rpm:
        yield f"We’re a bit busy. Please retry in ~{wait_rpm}s."
        return

    # 4) Moderation (pre)
    if not is_allowed_by_moderation(str(message)):
        yield "Let’s keep it appropriate. Try another question."
        return

    # 5) Build messages & call OpenAI with streaming
    try:
        # Turn index = number of user messages so far + 1
        turn_index = sum(1 for m in (history or []) if m.get("role") == "user") + 1
        messages = build_messages(history, message, (briefing_text or "").strip(), turn_index)
        stream = client.chat.completions.create(
            model=OPENAI_MODEL,
            messages=messages,
            temperature=TEMPERATURE,
            max_tokens=MAX_TOKENS,
            stream=True,
        )
        partial = []
        for chunk in stream:
            delta = chunk.choices[0].delta.content or ""
            if delta:
                partial.append(delta)
                yield "".join(partial)
    except Exception as e:
        # Friendly error surfacing for rate limits / network hiccups
        msg = str(e)
        if "Rate limit" in msg or "429" in msg:
            yield "We hit a provider rate limit briefly. Please wait a few seconds and try again."
        else:
            yield "Something went wrong while contacting the model. Please try again."


# -----------------
# UI
# -----------------
TITLE = "Cybersecurity RPG — Executive Edition"
DESC = (
    "30-minute, fast-paced tabletop simulation. Ask/answer in plain language. No PII/PHI. "
    "Pacing hint: ~one turn per minute; short feedback after each decision."
)

chat = gr.ChatInterface(
    fn=predict,
    type="messages",
    title=TITLE,
    description=DESC,
    additional_inputs=[
        gr.Textbox(label="Event code", type="password", value=""),
        gr.Textbox(label="Pre-game briefing (optional): company profile, simulated date, roles", lines=4),
    ],
)

# Queue: allow high concurrency and buffer bursts
# - default_concurrency_limit=80 lets many requests process simultaneously
# - max_size=300 provides a visible queue during spikes
chat = chat.queue(default_concurrency_limit=80, max_size=300)

if __name__ == "__main__":
    chat.launch()