Kaushik Rajan commited on
Commit
ce03581
·
1 Parent(s): db6e385

Feat: Add collapsible intro and percentage-based allocation

Browse files
Files changed (1) hide show
  1. app.py +110 -22
app.py CHANGED
@@ -187,10 +187,42 @@ def create_interface():
187
  game_env = gr.State(BusinessCompetitionEnv())
188
 
189
  gr.Markdown(f"# 🎮 {TITLE}")
190
- gr.Markdown(
191
- "**Demonstrating how complex, multi-turn strategic reasoning emerges from self-play.**\n"
192
- "*This simulation replaces Tic-Tac-Toe with a business competition to better illustrate the practical takeaways from the SPIRAL paper.*"
193
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
  with gr.Row():
196
  with gr.Column(scale=3):
@@ -206,11 +238,19 @@ def create_interface():
206
 
207
  with gr.Group():
208
  player_budget_display = gr.Label(f"Your Budget: ${INITIAL_BUDGET}")
209
- rd_slider = gr.Slider(0, INITIAL_BUDGET, label="R&D Investment", value=333, step=10)
210
- mkt_slider = gr.Slider(0, INITIAL_BUDGET, label="Marketing Investment", value=333, step=10)
211
- sales_slider = gr.Slider(0, INITIAL_BUDGET, label="Sales Investment", value=334, step=10)
212
-
213
- total_allocated_display = gr.Label("Total Allocated: $1000")
 
 
 
 
 
 
 
 
214
 
215
  with gr.Row():
216
  submit_btn = gr.Button("End Quarter", variant="primary")
@@ -236,14 +276,37 @@ def create_interface():
236
  return fig_ms, fig_b, fig_q
237
 
238
  @spaces.GPU
239
- def game_step_and_update(env, rd, mkt, sales):
240
  player_budget = env.player_stats["budget"]
241
- if (rd + mkt + sales) > player_budget:
242
- status_text = f"Error: Allocation (${rd + mkt + sales}) exceeds budget (${player_budget})."
243
- return env, status_text, env.ai_stats, *create_plots(env.history), gr.Label(f"Your Budget: ${player_budget}"), gr.Slider(maximum=player_budget), gr.Slider(maximum=player_budget), gr.Slider(maximum=player_budget)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
 
245
- player_alloc = {"rd": rd, "marketing": mkt, "sales": sales}
 
246
  ai_alloc, ai_reasoning = ai_strategy(env.ai_stats, env.player_stats)
 
247
 
248
  env.step(player_alloc, ai_alloc)
249
  state = env.get_state()
@@ -259,11 +322,14 @@ def create_interface():
259
 
260
  new_budget = state["player_stats"]["budget"]
261
 
 
262
  return (state, status_text, ai_reasoning, *plots,
263
  gr.Label(f"Your Budget: ${new_budget}"),
264
  gr.Slider(maximum=new_budget, value=int(new_budget/3)),
265
  gr.Slider(maximum=new_budget, value=int(new_budget/3)),
266
- gr.Slider(maximum=new_budget, value=new_budget - 2 * int(new_budget/3)))
 
 
267
 
268
  def on_new_game():
269
  env = BusinessCompetitionEnv()
@@ -275,20 +341,29 @@ def create_interface():
275
  gr.Slider(maximum=INITIAL_BUDGET, value=333),
276
  gr.Slider(maximum=INITIAL_BUDGET, value=333),
277
  gr.Slider(maximum=INITIAL_BUDGET, value=334),
 
278
  gr.Button(interactive=True)
279
  )
280
 
281
- def update_total_display(rd, mkt, sales):
282
  return gr.Label(f"Total Allocated: ${rd + mkt + sales}")
283
 
 
 
 
 
 
 
284
  # --- Event Handlers ---
285
  submit_btn.click(
286
  fn=game_step_and_update,
287
- inputs=[game_env, rd_slider, mkt_slider, sales_slider],
288
  outputs=[
289
  game_env, status_box, ai_reasoning_box,
290
  plot_market_share, plot_budget, plot_quality,
291
- player_budget_display, rd_slider, mkt_slider, sales_slider
 
 
292
  ]
293
  )
294
 
@@ -298,15 +373,28 @@ def create_interface():
298
  outputs=[
299
  game_env, status_box, ai_reasoning_box,
300
  plot_market_share, plot_budget, plot_quality,
301
- player_budget_display, rd_slider, mkt_slider, sales_slider,
 
 
302
  submit_btn
303
  ]
304
  )
305
 
306
- for slider in [rd_slider, mkt_slider, sales_slider]:
307
- slider.change(fn=update_total_display, inputs=[rd_slider, mkt_slider, sales_slider], outputs=total_allocated_display)
 
 
 
 
 
 
 
 
 
 
 
308
 
309
- demo.load(on_new_game, outputs=[game_env, status_box, ai_reasoning_box, plot_market_share, plot_budget, plot_quality, player_budget_display, rd_slider, mkt_slider, sales_slider, submit_btn])
310
 
311
  return demo
312
 
 
187
  game_env = gr.State(BusinessCompetitionEnv())
188
 
189
  gr.Markdown(f"# 🎮 {TITLE}")
190
+
191
+ with gr.Accordion("ℹ️ What is this app about & How to play", open=False):
192
+ gr.Markdown("""
193
+ ### What is this app about?
194
+
195
+ **For Business Strategists, Product Managers, and Students:**
196
+
197
+ This simulator is a hands-on sandbox for exploring the core trade-offs of business strategy. You are in control of a company competing against a strategic AI. By allocating your budget each quarter, you can directly see the impact of your decisions:
198
+
199
+ - **Short-term vs. Long-term:** Feel the tension between investing in Marketing for immediate market share gains versus investing in R&D for a long-term product advantage.
200
+ - **Resource Management:** Learn how investing in Sales grows your future budget, enabling more significant investments later on.
201
+ - **Competitive Dynamics:** The AI opponent doesn't play a fixed strategy. It analyzes your moves and adapts, forcing you to think multiple turns ahead. This provides an intuitive feel for how competitive landscapes evolve.
202
+
203
+ **For AI/ML Engineers and Data Scientists:**
204
+
205
+ This demo provides a practical look at the principles of advanced AI reasoning described in the SPIRAL research paper. The AI opponent is not just a set of `if/else` rules; it uses a strategy model that mimics the outcomes of self-play reinforcement learning.
206
+
207
+ - **Emergent Strategy:** The AI's decision-making process illustrates how an agent can learn to balance priorities, react to threats, and press advantages—all without being explicitly programmed for each scenario. This is a core concept of self-play.
208
+ - **Multi-Turn Reasoning:** Observe the AI's rationale. It often makes decisions based on future projections (e.g., potential budget shortfalls or quality gaps), showcasing a capacity for long-term planning.
209
+ - **Zero-Sum Dynamics:** The simulation is a zero-sum game for market share, creating the competitive pressure that, according to the SPIRAL paper, is essential for incentivizing robust reasoning.
210
+
211
+ ### How to Use the App
212
+
213
+ 1. **Your Goal:** Achieve a higher market share than the AI by the end of 12 quarters.
214
+ 2. **Choose Your Mode:** Select either "Raw Values" or "Percentages" to allocate your budget.
215
+ 3. **Allocate Budget:** Use the sliders to decide how much of your quarterly budget to invest in three key areas.
216
+ - `R&D`: Improves your product quality, giving you a persistent, long-term edge.
217
+ - `Marketing`: Provides an immediate boost to your market share for the current quarter.
218
+ - `Sales`: Increases your budget for the *next* quarter, fueling future growth.
219
+ 4. **End the Quarter:** Click the "End Quarter" button to submit your decisions.
220
+ 5. **Analyze the Results:**
221
+ - The charts on the left will update to show the new market landscape.
222
+ - The "AI Strategic Reasoning" box will explain the logic behind the AI's counter-move.
223
+ - Your budget for the next quarter will be updated.
224
+ 6. **Adapt and Win:** Continue making decisions for 12 quarters, adapting your strategy to counter the AI and win the market.
225
+ """)
226
 
227
  with gr.Row():
228
  with gr.Column(scale=3):
 
238
 
239
  with gr.Group():
240
  player_budget_display = gr.Label(f"Your Budget: ${INITIAL_BUDGET}")
241
+ allocation_mode_radio = gr.Radio(["Raw Values", "Percentages"], label="Allocation Mode", value="Raw Values")
242
+
243
+ with gr.Group() as raw_values_group:
244
+ rd_slider_raw = gr.Slider(0, INITIAL_BUDGET, label="R&D Investment", value=333, step=10)
245
+ mkt_slider_raw = gr.Slider(0, INITIAL_BUDGET, label="Marketing Investment", value=333, step=10)
246
+ sales_slider_raw = gr.Slider(0, INITIAL_BUDGET, label="Sales Investment", value=334, step=10)
247
+ total_allocated_raw_display = gr.Label("Total Allocated: $1000")
248
+
249
+ with gr.Group(visible=False) as percentage_group:
250
+ rd_slider_pct = gr.Slider(0, 100, label="R&D Allocation (%)", value=33, step=1)
251
+ mkt_slider_pct = gr.Slider(0, 100, label="Marketing Allocation (%)", value=33, step=1)
252
+ sales_slider_pct = gr.Slider(0, 100, label="Sales Allocation (%)", value=34, step=1)
253
+ total_allocated_pct_display = gr.Label("Total Allocated: 100%")
254
 
255
  with gr.Row():
256
  submit_btn = gr.Button("End Quarter", variant="primary")
 
276
  return fig_ms, fig_b, fig_q
277
 
278
  @spaces.GPU
279
+ def game_step_and_update(env, mode, rd_raw, mkt_raw, sales_raw, rd_pct, mkt_pct, sales_pct):
280
  player_budget = env.player_stats["budget"]
281
+
282
+ if mode == "Percentages":
283
+ if rd_pct + mkt_pct + sales_pct != 100:
284
+ status_text = "Error: Percentage allocations must sum to 100%."
285
+ return env, status_text, env.ai_stats.get("last_reasoning", ""), *create_plots(env.history), gr.Label(f"Your Budget: ${player_budget}"), rd_slider_raw, mkt_slider_raw, sales_slider_raw, rd_slider_pct, mkt_slider_pct, sales_slider_pct
286
+
287
+ rd_alloc_val = int(player_budget * rd_pct / 100)
288
+ mkt_alloc_val = int(player_budget * mkt_pct / 100)
289
+ sales_alloc_val = int(player_budget * sales_pct / 100)
290
+
291
+ # Distribute rounding errors
292
+ total = rd_alloc_val + mkt_alloc_val + sales_alloc_val
293
+ sales_alloc_val += player_budget - total
294
+
295
+ else: # Raw Values
296
+ rd_alloc_val, mkt_alloc_val, sales_alloc_val = rd_raw, mkt_raw, sales_raw
297
+
298
+ if (rd_alloc_val + mkt_alloc_val + sales_alloc_val) > player_budget:
299
+ status_text = f"Error: Allocation (${rd_alloc_val + mkt_alloc_val + sales_alloc_val}) exceeds budget (${player_budget})."
300
+ # This part needs to return updates for all sliders to avoid errors
301
+ return (env, status_text, env.ai_stats.get("last_reasoning", ""), *create_plots(env.history),
302
+ gr.Label(f"Your Budget: ${player_budget}"),
303
+ gr.Slider(maximum=player_budget), gr.Slider(maximum=player_budget), gr.Slider(maximum=player_budget),
304
+ rd_slider_pct, mkt_slider_pct, sales_slider_pct)
305
 
306
+
307
+ player_alloc = {"rd": rd_alloc_val, "marketing": mkt_alloc_val, "sales": sales_alloc_val}
308
  ai_alloc, ai_reasoning = ai_strategy(env.ai_stats, env.player_stats)
309
+ env.ai_stats["last_reasoning"] = ai_reasoning # Store reasoning for error case
310
 
311
  env.step(player_alloc, ai_alloc)
312
  state = env.get_state()
 
322
 
323
  new_budget = state["player_stats"]["budget"]
324
 
325
+ # Return updates for all sliders
326
  return (state, status_text, ai_reasoning, *plots,
327
  gr.Label(f"Your Budget: ${new_budget}"),
328
  gr.Slider(maximum=new_budget, value=int(new_budget/3)),
329
  gr.Slider(maximum=new_budget, value=int(new_budget/3)),
330
+ gr.Slider(maximum=new_budget, value=new_budget - 2 * int(new_budget/3)),
331
+ gr.Slider(value=33), gr.Slider(value=33), gr.Slider(value=34)
332
+ )
333
 
334
  def on_new_game():
335
  env = BusinessCompetitionEnv()
 
341
  gr.Slider(maximum=INITIAL_BUDGET, value=333),
342
  gr.Slider(maximum=INITIAL_BUDGET, value=333),
343
  gr.Slider(maximum=INITIAL_BUDGET, value=334),
344
+ gr.Slider(value=33), gr.Slider(value=33), gr.Slider(value=34),
345
  gr.Button(interactive=True)
346
  )
347
 
348
+ def update_total_raw_display(rd, mkt, sales):
349
  return gr.Label(f"Total Allocated: ${rd + mkt + sales}")
350
 
351
+ def update_total_pct_display(rd, mkt, sales):
352
+ return gr.Label(f"Total Allocated: {rd + mkt + sales}%")
353
+
354
+ def toggle_allocation_mode(mode):
355
+ return gr.update(visible=mode == "Raw Values"), gr.update(visible=mode == "Percentages")
356
+
357
  # --- Event Handlers ---
358
  submit_btn.click(
359
  fn=game_step_and_update,
360
+ inputs=[game_env, allocation_mode_radio, rd_slider_raw, mkt_slider_raw, sales_slider_raw, rd_slider_pct, mkt_slider_pct, sales_slider_pct],
361
  outputs=[
362
  game_env, status_box, ai_reasoning_box,
363
  plot_market_share, plot_budget, plot_quality,
364
+ player_budget_display,
365
+ rd_slider_raw, mkt_slider_raw, sales_slider_raw,
366
+ rd_slider_pct, mkt_slider_pct, sales_slider_pct
367
  ]
368
  )
369
 
 
373
  outputs=[
374
  game_env, status_box, ai_reasoning_box,
375
  plot_market_share, plot_budget, plot_quality,
376
+ player_budget_display,
377
+ rd_slider_raw, mkt_slider_raw, sales_slider_raw,
378
+ rd_slider_pct, mkt_slider_pct, sales_slider_pct,
379
  submit_btn
380
  ]
381
  )
382
 
383
+ # Handlers for updating total displays
384
+ for slider in [rd_slider_raw, mkt_slider_raw, sales_slider_raw]:
385
+ slider.change(fn=update_total_raw_display, inputs=[rd_slider_raw, mkt_slider_raw, sales_slider_raw], outputs=total_allocated_raw_display)
386
+
387
+ for slider in [rd_slider_pct, mkt_slider_pct, sales_slider_pct]:
388
+ slider.change(fn=update_total_pct_display, inputs=[rd_slider_pct, mkt_slider_pct, sales_slider_pct], outputs=total_allocated_pct_display)
389
+
390
+ # Handler for toggling allocation modes
391
+ allocation_mode_radio.change(
392
+ fn=toggle_allocation_mode,
393
+ inputs=allocation_mode_radio,
394
+ outputs=[raw_values_group, percentage_group]
395
+ )
396
 
397
+ demo.load(on_new_game, outputs=[game_env, status_box, ai_reasoning_box, plot_market_share, plot_budget, plot_quality, player_budget_display, rd_slider_raw, mkt_slider_raw, sales_slider_raw, rd_slider_pct, mkt_slider_pct, sales_slider_pct, submit_btn])
398
 
399
  return demo
400