jefmon01 commited on
Commit
f29395d
·
1 Parent(s): 285c545

Update space

Browse files
Files changed (1) hide show
  1. app.py +233 -52
app.py CHANGED
@@ -1,70 +1,251 @@
 
 
 
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
-
5
- def respond(
6
- message,
7
- history: list[dict[str, str]],
8
- system_message,
9
- max_tokens,
10
- temperature,
11
- top_p,
12
- hf_token: gr.OAuthToken,
13
- ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  """
15
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 
 
 
 
 
 
 
 
 
 
 
 
16
  """
17
- client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
18
 
19
- messages = [{"role": "system", "content": system_message}]
 
 
 
 
 
 
20
 
21
- messages.extend(history)
 
22
 
23
- messages.append({"role": "user", "content": message})
24
 
25
- response = ""
 
 
 
 
 
26
 
27
- for message in client.chat_completion(
28
- messages,
29
- max_tokens=max_tokens,
30
- stream=True,
31
- temperature=temperature,
32
- top_p=top_p,
33
- ):
34
- choices = message.choices
35
- token = ""
36
- if len(choices) and choices[0].delta.content:
37
- token = choices[0].delta.content
38
 
39
- response += token
40
- yield response
41
 
 
 
42
 
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- chatbot = gr.ChatInterface(
47
- respond,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  type="messages",
 
 
49
  additional_inputs=[
50
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
51
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
52
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
- gr.Slider(
54
- minimum=0.1,
55
- maximum=1.0,
56
- value=0.95,
57
- step=0.05,
58
- label="Top-p (nucleus sampling)",
59
- ),
60
  ],
 
 
 
61
  )
62
 
63
- with gr.Blocks() as demo:
64
- with gr.Sidebar():
65
- gr.LoginButton()
66
- chatbot.render()
67
-
68
 
69
  if __name__ == "__main__":
70
- demo.launch()
 
1
+ import os
2
+ import time
3
+ from collections import deque, defaultdict
4
+ from threading import Lock
5
  import gradio as gr
6
+ from openai import OpenAI
7
+
8
+ """
9
+ Hugging Face Space — Gradio app for a live, executive-focused Cybersecurity RPG
10
+ - Streams responses from OpenAI Chat Completions
11
+ - Per-IP cooldown + global RPM soft limit (token bucket)
12
+ - Optional event passcode gate
13
+ - OpenAI Moderation API on each incoming message
14
+ - Optional pre-game briefing field
15
+
16
+ Required Secrets/Variables (set in Space Settings → Secrets & Variables):
17
+ - OPENAI_API_KEY (Secret)
18
+ Optional Variables (provide sane defaults below):
19
+ - OPENAI_MODEL (e.g., "gpt-5-mini")
20
+ - SYSTEM_PROMPT (overrides the default RPG prompt below)
21
+ - EVENT_CODE (short passcode shown on your slide; leave blank to disable gate)
22
+ - USER_COOLDOWN_SECONDS (integer; default 25)
23
+ - GLOBAL_RPM_SOFT (integer; default 350)
24
+ - MAX_TOKENS (integer; default 320)
25
+ - TEMPERATURE (float; default 0.5)
26
+
27
+ Notes
28
+ - Gradio queue/concurrency is configured at the bottom of this file.
29
+ - This app never exposes your API key to the browser; it resides server-side.
30
+ - Keep messages concise to stay under TPM; adjust MAX_TOKENS as needed.
31
+ """
32
+
33
+ # -----------------
34
+ # Config & Clients
35
+ # -----------------
36
+ DEFAULT_RPG_PROMPT = (
37
  """
38
+ You are a game master for a cybersecurity role-playing game (RPG) designed for short, high-impact training sessions. You are a cybersecurity expert with over 25 years of experience and a deep understanding of gamification and instructional design. Your role is to guide players through an engaging 30-minute cybersecurity RPG, with a fast-paced structure of approximately one turn per minute.
39
+
40
+ Your audience consists of CEOs, CFOs, and other senior executives at small to mid-market privately owned companies. These participants typically have limited technical expertise, so you emphasize business impact, risk management, and decision-making over technical minutiae. Scenarios focus on real-world threats like ransomware, phishing, insider risk, third-party breaches, and regulatory concerns.
41
+
42
+ Scenarios are designed to be industry-agnostic but draw on a broad range of sector-relevant examples—from manufacturing to healthcare to finance—depending on context or user preference. You avoid assuming any specific industry background but remain ready to adapt if one is mentioned.
43
+
44
+ Before each session, you present optional pre-game briefing materials. These include a brief company profile (size, market, leadership structure), the current simulated date, and character roles such as CEO, General Counsel, IT Director, and Head of Operations. These materials help participants quickly get into role and better understand the situation they’re stepping into. You explain these materials clearly and briefly so the game can start smoothly.
45
+
46
+ You use plain, non-technical language and draw analogies to familiar business risks to explain complex ideas. Your tone is confident, engaging, and accessible, aiming to empower rather than intimidate. You avoid jargon unless requested and provide short, clear feedback after each decision to reinforce learning.
47
+
48
+ Participants respond freely rather than selecting from multiple choice options. You interpret open-ended answers and adapt the story naturally. You do not suggest example actions or options unless explicitly asked. Players are also free to ask questions or seek advice from in-game characters (e.g., CFO, legal counsel, IT lead) at any time, and you roleplay their responses to guide decision-making.
49
+
50
+ You adapt the game’s pace and tone to suit the audience, occasionally using humor or tension to build engagement. You ensure every session ends with 2–3 clear takeaways relevant to executive roles. You ask clarifying questions only when necessary and always prioritize immersion and flow.
51
  """
52
+ ).strip()
53
 
54
+ OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-5-mini")
55
+ SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT", DEFAULT_RPG_PROMPT)
56
+ EVENT_CODE = os.getenv("EVENT_CODE", "").strip()
57
+ USER_COOLDOWN_SECONDS = int(os.getenv("USER_COOLDOWN_SECONDS", "25"))
58
+ GLOBAL_RPM_SOFT = int(os.getenv("GLOBAL_RPM_SOFT", "350")) # soft limit; app self-throttles
59
+ MAX_TOKENS = int(os.getenv("MAX_TOKENS", "320"))
60
+ TEMPERATURE = float(os.getenv("TEMPERATURE", "0.5"))
61
 
62
+ if not os.getenv("OPENAI_API_KEY"):
63
+ raise RuntimeError("OPENAI_API_KEY is not set. Add it in Space Settings → Secrets.")
64
 
65
+ client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
66
 
67
+ # -----------------
68
+ # Rate limiting
69
+ # -----------------
70
+ # Global RPM token bucket (soft cap): allow up to GLOBAL_RPM_SOFT requests in the last 60s
71
+ _glob_lock = Lock()
72
+ _glob_requests: deque = deque() # timestamps of recent requests
73
 
74
+ # Per-IP cooldown: one message per USER_COOLDOWN_SECONDS per IP
75
+ _ip_lock = Lock()
76
+ _ip_last: dict[str, float] = defaultdict(lambda: 0.0)
 
 
 
 
 
 
 
 
77
 
 
 
78
 
79
+ def now() -> float:
80
+ return time.time()
81
 
82
+
83
+ def check_global_rpm() -> tuple[bool, int]:
84
+ """Return (allowed, wait_seconds)."""
85
+ with _glob_lock:
86
+ t = now()
87
+ # prune entries older than 60s
88
+ while _glob_requests and (t - _glob_requests[0] > 60):
89
+ _glob_requests.popleft()
90
+ if len(_glob_requests) >= GLOBAL_RPM_SOFT:
91
+ # wait until the oldest request exits the 60s window
92
+ wait = int(60 - (t - _glob_requests[0])) + 1
93
+ return False, max(wait, 1)
94
+ # reserve a slot
95
+ _glob_requests.append(t)
96
+ return True, 0
97
+
98
+
99
+ def check_ip_cooldown(ip: str) -> tuple[bool, int]:
100
+ """Return (allowed, wait_seconds)."""
101
+ if not ip:
102
+ return True, 0
103
+ with _ip_lock:
104
+ t = now()
105
+ last = _ip_last[ip]
106
+ delta = t - last
107
+ if delta < USER_COOLDOWN_SECONDS:
108
+ return False, int(USER_COOLDOWN_SECONDS - delta) + 1
109
+ _ip_last[ip] = t
110
+ return True, 0
111
+
112
+
113
+ # -----------------
114
+ # Moderation
115
+ # -----------------
116
+
117
+ def is_allowed_by_moderation(text: str) -> bool:
118
+ try:
119
+ res = client.moderations.create(model="omni-moderation-latest", input=text)
120
+ # Block if flagged
121
+ return not bool(res.results[0].flagged)
122
+ except Exception:
123
+ # If moderation is unavailable, choose to allow (fail-open). Flip to fail-closed if preferred.
124
+ return True
125
+
126
+
127
+ # -----------------
128
+ # Chat logic
129
+ # -----------------
130
+
131
+ def build_messages(history: list[dict], message: str, briefing: str | None, turn_index: int) -> list[dict]:
132
+ msgs: list[dict] = [{"role": "system", "content": SYSTEM_PROMPT}]
133
+
134
+ # Dynamic pacing hint as an additional system instruction
135
+ pacing_hint = (
136
+ f"Turn pacing: This is turn {turn_index} of ~30. Maintain ~one turn per minute. "
137
+ "Keep language plain and business-focused. Provide short feedback after each decision. "
138
+ "Do not list options unless asked."
139
+ )
140
+ msgs.append({"role": "system", "content": pacing_hint})
141
+
142
+ # Include pre-game briefing on every turn (keeps context without relying on prior system messages)
143
+ if briefing:
144
+ msgs.append({
145
+ "role": "system",
146
+ "content": f"Pre-game briefing (user-provided):
147
+ {briefing[:4000]}"
148
+ })
149
+
150
+ for m in history or []:
151
+ role = m.get("role")
152
+ content = m.get("content")
153
+ if role in ("user", "assistant") and isinstance(content, str):
154
+ # Truncate pathological long turns in history to control TPM
155
+ msgs.append({"role": role, "content": content[:6000]})
156
+
157
+ msgs.append({"role": "user", "content": message[:6000]})
158
+ return msgs
159
+
160
+
161
+ def extract_ip(req: gr.Request) -> str:
162
+ try:
163
+ # Behind HF proxy, X-Forwarded-For may contain a list
164
+ fwd = (req.headers.get("x-forwarded-for") or "").split(",")[0].strip()
165
+ return fwd or (req.client.host if req and req.client else "")
166
+ except Exception:
167
+ return ""
168
+
169
+
170
+ def predict(message, history, event_code, briefing_text, request: gr.Request):
171
+ # 1) Event code gate
172
+ if EVENT_CODE and (event_code or "").strip() != EVENT_CODE:
173
+ yield "Enter the event code shown on the slide to use the demo."
174
+ return
175
+
176
+ # 2) Per-IP cooldown
177
+ ip = extract_ip(request)
178
+ ok_ip, wait_ip = check_ip_cooldown(ip)
179
+ if not ok_ip:
180
+ yield f"You’re sending messages too fast. Try again in ~{wait_ip}s."
181
+ return
182
+
183
+ # 3) Global RPM soft limit
184
+ ok_rpm, wait_rpm = check_global_rpm()
185
+ if not ok_rpm:
186
+ yield f"We’re a bit busy. Please retry in ~{wait_rpm}s."
187
+ return
188
+
189
+ # 4) Moderation (pre)
190
+ if not is_allowed_by_moderation(str(message)):
191
+ yield "Let’s keep it appropriate. Try another question."
192
+ return
193
+
194
+ # 5) Build messages & call OpenAI with streaming
195
+ try:
196
+ # Turn index = number of user messages so far + 1
197
+ turn_index = sum(1 for m in (history or []) if m.get("role") == "user") + 1
198
+ messages = build_messages(history, message, (briefing_text or "").strip(), turn_index)
199
+ stream = client.chat.completions.create(
200
+ model=OPENAI_MODEL,
201
+ messages=messages,
202
+ temperature=TEMPERATURE,
203
+ max_tokens=MAX_TOKENS,
204
+ stream=True,
205
+ )
206
+ partial = []
207
+ for chunk in stream:
208
+ delta = chunk.choices[0].delta.content or ""
209
+ if delta:
210
+ partial.append(delta)
211
+ yield "".join(partial)
212
+ except Exception as e:
213
+ # Friendly error surfacing for rate limits / network hiccups
214
+ msg = str(e)
215
+ if "Rate limit" in msg or "429" in msg:
216
+ yield "We hit a provider rate limit briefly. Please wait a few seconds and try again."
217
+ else:
218
+ yield "Something went wrong while contacting the model. Please try again."
219
+
220
+
221
+ # -----------------
222
+ # UI
223
+ # -----------------
224
+ TITLE = "Cybersecurity RPG — Executive Edition"
225
+ DESC = (
226
+ "30-minute, fast-paced tabletop simulation. Ask/answer in plain language. No PII/PHI. "
227
+ "Pacing hint: ~one turn per minute; short feedback after each decision."
228
+ )
229
+
230
+ chat = gr.ChatInterface(
231
+ fn=predict,
232
  type="messages",
233
+ title=TITLE,
234
+ description=DESC,
235
  additional_inputs=[
236
+ gr.Textbox(label="Event code", type="password", value=""),
237
+ gr.Textbox(label="Pre-game briefing (optional): company profile, simulated date, roles", lines=4),
 
 
 
 
 
 
 
 
238
  ],
239
+ retry_btn=None,
240
+ undo_btn=None,
241
+ clear_btn=True,
242
  )
243
 
244
+ # Queue: allow high concurrency and buffer bursts
245
+ # - default_concurrency_limit=80 lets many requests process simultaneously
246
+ # - max_size=300 provides a visible queue during spikes
247
+ # - concurrency_limit is unlimited for this ChatInterface by default when using .queue()
248
+ chat = chat.queue(default_concurrency_limit=80, max_size=300)
249
 
250
  if __name__ == "__main__":
251
+ chat.launch()