KChad commited on
Commit
6da61f5
·
1 Parent(s): 91a0048

simple page

Browse files
Files changed (2) hide show
  1. server/app.py +349 -7
  2. submission_tests/test_server_api.py +13 -0
server/app.py CHANGED
@@ -4,6 +4,7 @@ from typing import Any, Dict, Optional
4
 
5
  from fastapi import Body, FastAPI, HTTPException
6
  from fastapi.middleware.cors import CORSMiddleware
 
7
  import uvicorn
8
 
9
  from env.environment import IDPIExfilEnv
@@ -48,14 +49,355 @@ def _model_schema(model: Any) -> Dict[str, Any]:
48
  return model.schema()
49
 
50
 
51
- @app.get("/", response_model=MetadataResponse)
52
- def root() -> MetadataResponse:
53
- return metadata()
54
-
 
 
 
 
 
 
 
 
 
 
 
55
 
56
- @app.get("/web", response_model=MetadataResponse)
57
- def web_root() -> MetadataResponse:
58
- return metadata()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
 
61
  @app.get("/health", response_model=HealthResponse)
 
4
 
5
  from fastapi import Body, FastAPI, HTTPException
6
  from fastapi.middleware.cors import CORSMiddleware
7
+ from fastapi.responses import HTMLResponse
8
  import uvicorn
9
 
10
  from env.environment import IDPIExfilEnv
 
49
  return model.schema()
50
 
51
 
52
+ def _landing_page_html() -> str:
53
+ task_cards = "".join(
54
+ f"""
55
+ <article class="task-card">
56
+ <span class="task-pill">{task.upper()}</span>
57
+ <h3>{task.title()}</h3>
58
+ <p>{description}</p>
59
+ </article>
60
+ """
61
+ for task, description in (
62
+ ("easy", "Obvious or lightly hidden prompt injections that try to force unsafe outbound behavior."),
63
+ ("medium", "Wrapper-based, authority-spoofed, or session-confusing attacks that require better judgment."),
64
+ ("hard", "Safe-completion episodes where the agent must write a clean internal summary without leaking attack content."),
65
+ )
66
+ )
67
 
68
+ return f"""
69
+ <!DOCTYPE html>
70
+ <html lang="en">
71
+ <head>
72
+ <meta charset="utf-8" />
73
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
74
+ <title>IDPI Exfil Environment</title>
75
+ <style>
76
+ :root {{
77
+ --bg: #fbf6ef;
78
+ --panel: rgba(255, 250, 242, 0.9);
79
+ --ink: #23160d;
80
+ --muted: #6b5848;
81
+ --accent: #a64712;
82
+ --accent-2: #155d4b;
83
+ --line: rgba(53, 31, 15, 0.12);
84
+ --shadow: 0 24px 60px rgba(74, 35, 12, 0.12);
85
+ }}
86
+
87
+ * {{
88
+ box-sizing: border-box;
89
+ }}
90
+
91
+ body {{
92
+ margin: 0;
93
+ font-family: "IBM Plex Sans", "Segoe UI", sans-serif;
94
+ color: var(--ink);
95
+ background:
96
+ radial-gradient(circle at top left, rgba(212, 132, 56, 0.18), transparent 28%),
97
+ radial-gradient(circle at top right, rgba(21, 93, 75, 0.16), transparent 26%),
98
+ linear-gradient(180deg, #fffaf4 0%, var(--bg) 100%);
99
+ }}
100
+
101
+ .shell {{
102
+ max-width: 1160px;
103
+ margin: 0 auto;
104
+ padding: 40px 24px 56px;
105
+ }}
106
+
107
+ .hero {{
108
+ display: grid;
109
+ grid-template-columns: 1.5fr 1fr;
110
+ gap: 24px;
111
+ align-items: stretch;
112
+ }}
113
+
114
+ .panel {{
115
+ background: var(--panel);
116
+ border: 1px solid var(--line);
117
+ border-radius: 24px;
118
+ box-shadow: var(--shadow);
119
+ backdrop-filter: blur(8px);
120
+ }}
121
+
122
+ .hero-copy {{
123
+ padding: 32px;
124
+ }}
125
+
126
+ .eyebrow {{
127
+ display: inline-flex;
128
+ align-items: center;
129
+ gap: 8px;
130
+ padding: 8px 12px;
131
+ border-radius: 999px;
132
+ background: rgba(166, 71, 18, 0.08);
133
+ color: var(--accent);
134
+ font-weight: 700;
135
+ letter-spacing: 0.04em;
136
+ text-transform: uppercase;
137
+ font-size: 12px;
138
+ }}
139
+
140
+ h1 {{
141
+ margin: 18px 0 14px;
142
+ font-family: Georgia, "Times New Roman", serif;
143
+ font-size: clamp(2.4rem, 5vw, 4.3rem);
144
+ line-height: 0.96;
145
+ letter-spacing: -0.03em;
146
+ }}
147
+
148
+ .lede {{
149
+ font-size: 1.05rem;
150
+ line-height: 1.7;
151
+ color: var(--muted);
152
+ max-width: 60ch;
153
+ }}
154
+
155
+ .cta-row {{
156
+ display: flex;
157
+ flex-wrap: wrap;
158
+ gap: 12px;
159
+ margin-top: 24px;
160
+ }}
161
+
162
+ .cta {{
163
+ display: inline-flex;
164
+ align-items: center;
165
+ justify-content: center;
166
+ padding: 12px 16px;
167
+ border-radius: 14px;
168
+ text-decoration: none;
169
+ font-weight: 700;
170
+ border: 1px solid transparent;
171
+ }}
172
+
173
+ .cta.primary {{
174
+ background: var(--accent);
175
+ color: #fff7f0;
176
+ }}
177
+
178
+ .cta.secondary {{
179
+ border-color: var(--line);
180
+ color: var(--ink);
181
+ background: rgba(255, 255, 255, 0.6);
182
+ }}
183
+
184
+ .status-card {{
185
+ padding: 28px;
186
+ display: flex;
187
+ flex-direction: column;
188
+ justify-content: space-between;
189
+ min-height: 100%;
190
+ }}
191
+
192
+ .status-grid {{
193
+ display: grid;
194
+ grid-template-columns: repeat(2, minmax(0, 1fr));
195
+ gap: 12px;
196
+ margin-top: 18px;
197
+ }}
198
+
199
+ .stat {{
200
+ padding: 14px;
201
+ border-radius: 18px;
202
+ background: rgba(255, 255, 255, 0.72);
203
+ border: 1px solid var(--line);
204
+ }}
205
+
206
+ .stat-label {{
207
+ font-size: 0.72rem;
208
+ text-transform: uppercase;
209
+ letter-spacing: 0.08em;
210
+ color: var(--muted);
211
+ margin-bottom: 6px;
212
+ }}
213
+
214
+ .stat-value {{
215
+ font-size: 1.1rem;
216
+ font-weight: 700;
217
+ }}
218
+
219
+ .section {{
220
+ margin-top: 24px;
221
+ padding: 28px;
222
+ }}
223
+
224
+ .section h2 {{
225
+ margin: 0 0 14px;
226
+ font-size: 1.4rem;
227
+ }}
228
+
229
+ .task-grid {{
230
+ display: grid;
231
+ grid-template-columns: repeat(3, minmax(0, 1fr));
232
+ gap: 14px;
233
+ }}
234
+
235
+ .task-card {{
236
+ padding: 20px;
237
+ border-radius: 20px;
238
+ background: rgba(255, 255, 255, 0.7);
239
+ border: 1px solid var(--line);
240
+ }}
241
+
242
+ .task-card h3 {{
243
+ margin: 10px 0 10px;
244
+ }}
245
+
246
+ .task-pill {{
247
+ display: inline-block;
248
+ font-size: 0.74rem;
249
+ font-weight: 700;
250
+ letter-spacing: 0.06em;
251
+ color: var(--accent-2);
252
+ text-transform: uppercase;
253
+ }}
254
+
255
+ .api-grid {{
256
+ display: grid;
257
+ grid-template-columns: 1.1fr 0.9fr;
258
+ gap: 16px;
259
+ }}
260
+
261
+ pre {{
262
+ margin: 0;
263
+ padding: 18px;
264
+ border-radius: 18px;
265
+ background: #1f1712;
266
+ color: #f5e7d9;
267
+ overflow-x: auto;
268
+ font-family: "IBM Plex Mono", "Consolas", monospace;
269
+ font-size: 0.93rem;
270
+ line-height: 1.55;
271
+ }}
272
+
273
+ code.inline {{
274
+ padding: 2px 8px;
275
+ border-radius: 999px;
276
+ background: rgba(166, 71, 18, 0.09);
277
+ color: var(--accent);
278
+ font-family: "IBM Plex Mono", "Consolas", monospace;
279
+ }}
280
+
281
+ ul {{
282
+ margin: 0;
283
+ padding-left: 20px;
284
+ line-height: 1.8;
285
+ color: var(--muted);
286
+ }}
287
+
288
+ .footer-note {{
289
+ margin-top: 16px;
290
+ color: var(--muted);
291
+ font-size: 0.95rem;
292
+ }}
293
+
294
+ @media (max-width: 900px) {{
295
+ .hero,
296
+ .task-grid,
297
+ .api-grid {{
298
+ grid-template-columns: 1fr;
299
+ }}
300
+
301
+ .shell {{
302
+ padding: 24px 16px 40px;
303
+ }}
304
+
305
+ .hero-copy,
306
+ .status-card,
307
+ .section {{
308
+ padding: 22px;
309
+ }}
310
+ }}
311
+ </style>
312
+ </head>
313
+ <body>
314
+ <main class="shell">
315
+ <section class="hero">
316
+ <div class="panel hero-copy">
317
+ <span class="eyebrow">OpenEnv Benchmark</span>
318
+ <h1>IDPI Exfil Environment</h1>
319
+ <p class="lede">
320
+ A prompt-injection benchmark for email-style agent workflows. Agents must finish
321
+ legitimate reading and summarization tasks while refusing hidden exfiltration
322
+ attempts, delayed triggers, and contaminated summary instructions.
323
+ </p>
324
+ <div class="cta-row">
325
+ <a class="cta primary" href="/docs">Open API Docs</a>
326
+ <a class="cta secondary" href="/metadata">View Metadata</a>
327
+ <a class="cta secondary" href="/openapi.json">OpenAPI JSON</a>
328
+ </div>
329
+ </div>
330
+ <aside class="panel status-card">
331
+ <div>
332
+ <span class="eyebrow">Service Status</span>
333
+ <div class="status-grid">
334
+ <div class="stat">
335
+ <div class="stat-label">Env</div>
336
+ <div class="stat-value">{ENV_NAME}</div>
337
+ </div>
338
+ <div class="stat">
339
+ <div class="stat-label">Version</div>
340
+ <div class="stat-value">{APP_VERSION}</div>
341
+ </div>
342
+ <div class="stat">
343
+ <div class="stat-label">Tasks</div>
344
+ <div class="stat-value">{", ".join(TASK_NAMES)}</div>
345
+ </div>
346
+ <div class="stat">
347
+ <div class="stat-label">Reset</div>
348
+ <div class="stat-value">POST /reset</div>
349
+ </div>
350
+ </div>
351
+ </div>
352
+ <p class="footer-note">
353
+ The API remains fully OpenEnv-compatible. This page is just a human-friendly landing
354
+ surface for the same benchmark endpoints the validator uses.
355
+ </p>
356
+ </aside>
357
+ </section>
358
+
359
+ <section class="panel section">
360
+ <h2>Task Ladder</h2>
361
+ <div class="task-grid">
362
+ {task_cards}
363
+ </div>
364
+ </section>
365
+
366
+ <section class="panel section">
367
+ <h2>Quick Start</h2>
368
+ <div class="api-grid">
369
+ <pre>curl -X POST https://your-space.hf.space/reset
370
+
371
+ curl -X POST https://your-space.hf.space/step \\
372
+ -H "Content-Type: application/json" \\
373
+ -d '{{"action": {{"type": "ReadDoc"}}}}'
374
+
375
+ curl https://your-space.hf.space/state
376
+ curl https://your-space.hf.space/result</pre>
377
+ <div>
378
+ <ul>
379
+ <li>Use <span class="inline">POST /reset</span> to start a new episode. An empty body is valid.</li>
380
+ <li>Use <span class="inline">POST /step</span> with typed OpenEnv actions.</li>
381
+ <li>Inspect live schemas at <span class="inline">/schema</span> and interactive docs at <span class="inline">/docs</span>.</li>
382
+ <li>Call <span class="inline">/metadata</span> for task info and <span class="inline">/health</span> for service health.</li>
383
+ </ul>
384
+ </div>
385
+ </div>
386
+ </section>
387
+ </main>
388
+ </body>
389
+ </html>
390
+ """
391
+
392
+
393
+ @app.get("/", response_class=HTMLResponse)
394
+ def root() -> HTMLResponse:
395
+ return HTMLResponse(content=_landing_page_html())
396
+
397
+
398
+ @app.get("/web", response_class=HTMLResponse)
399
+ def web_root() -> HTMLResponse:
400
+ return HTMLResponse(content=_landing_page_html())
401
 
402
 
403
  @app.get("/health", response_model=HealthResponse)
submission_tests/test_server_api.py CHANGED
@@ -22,6 +22,19 @@ class ServerApiTests(unittest.TestCase):
22
  self.assertEqual(payload["status"], "healthy")
23
  self.assertEqual(payload["env"], "idpi-exfil-env")
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  def test_metadata_endpoint(self) -> None:
26
  response = self.client.get("/metadata")
27
  self.assertEqual(response.status_code, 200)
 
22
  self.assertEqual(payload["status"], "healthy")
23
  self.assertEqual(payload["env"], "idpi-exfil-env")
24
 
25
+ def test_root_landing_page_is_html(self) -> None:
26
+ response = self.client.get("/")
27
+ self.assertEqual(response.status_code, 200)
28
+ self.assertIn("text/html", response.headers.get("content-type", ""))
29
+ self.assertIn("IDPI Exfil Environment", response.text)
30
+ self.assertIn("/docs", response.text)
31
+
32
+ def test_web_landing_page_is_html(self) -> None:
33
+ response = self.client.get("/web")
34
+ self.assertEqual(response.status_code, 200)
35
+ self.assertIn("text/html", response.headers.get("content-type", ""))
36
+ self.assertIn("OpenEnv-compatible", response.text)
37
+
38
  def test_metadata_endpoint(self) -> None:
39
  response = self.client.get("/metadata")
40
  self.assertEqual(response.status_code, 200)