""" SPIRAL: Strategic Business Competition Simulator This demo has been updated to more intuitively demonstrate the key concepts from the "Self-Play in Zero-Sum Games Incentivizes Reasoning" (SPIRAL) research paper. Instead of Tic-Tac-Toe, this simulation uses a zero-sum business competition to showcase complex, multi-turn strategic reasoning in a more practical and relatable context. """ import gradio as gr import numpy as np import pandas as pd import plotly.express as px # --- Game Configuration --- INITIAL_BUDGET = 1000 INITIAL_MARKET_SHARE = 50 INITIAL_PRODUCT_QUALITY = 50 NUM_QUARTERS = 12 TITLE = "SPIRAL: Strategic Business Competition" # --- Game Environment --- class BusinessCompetitionEnv: """Manages the state of the strategic business competition.""" def __init__(self): self.reset() def reset(self): """Resets the game to its initial state.""" self.quarter = 0 self.game_over = False self.player_stats = { "budget": INITIAL_BUDGET, "market_share": INITIAL_MARKET_SHARE, "product_quality": INITIAL_PRODUCT_QUALITY, } self.ai_stats = { "budget": INITIAL_BUDGET, "market_share": INITIAL_MARKET_SHARE, "product_quality": INITIAL_PRODUCT_QUALITY, } # History stores the state at the *end* of each quarter self.history = [] self._add_to_history() # Initial state at quarter 0 return self.get_state() def _add_to_history(self): """Adds the current state to the history log.""" self.history.append({ "Quarter": self.quarter, "Player Budget": self.player_stats["budget"], "AI Budget": self.ai_stats["budget"], "Player Market Share": self.player_stats["market_share"], "AI Market Share": self.ai_stats["market_share"], "Player Product Quality": self.player_stats["product_quality"], "AI Product Quality": self.ai_stats["product_quality"], }) def get_state(self): """Returns the complete current state of the game.""" return { "quarter": self.quarter, "player_stats": self.player_stats, "ai_stats": self.ai_stats, "game_over": self.game_over, "history": self.history } def get_winner(self): """Determines the winner at the end of the game.""" if not self.game_over: return None if self.player_stats["market_share"] > self.ai_stats["market_share"]: return "You" elif self.ai_stats["market_share"] > self.player_stats["market_share"]: return "AI" else: return "It's a Draw" def step(self, player_allocation, ai_allocation): """Executes one quarter of the game.""" if self.game_over: return self.get_state() self.quarter += 1 # 1. Update Product Quality from R&D investment self.player_stats["product_quality"] += int(np.sqrt(player_allocation["rd"]) * 1.5) self.ai_stats["product_quality"] += int(np.sqrt(ai_allocation["rd"]) * 1.5) # 2. Calculate market share shift from Marketing and Quality mkt_diff = player_allocation["marketing"] - ai_allocation["marketing"] quality_diff = self.player_stats["product_quality"] - self.ai_stats["product_quality"] # Marketing has a direct but temporary effect, quality has a persistent effect market_share_shift = (mkt_diff / 100.0) + (quality_diff / 50.0) market_share_shift = np.clip(market_share_shift, -7, 7) # Cap shifts per quarter self.player_stats["market_share"] += market_share_shift self.ai_stats["market_share"] -= market_share_shift self.player_stats["market_share"] = np.clip(self.player_stats["market_share"], 0, 100) self.ai_stats["market_share"] = 100 - self.player_stats["market_share"] # 3. Calculate next quarter's budget from Sales investment and market share player_remaining_budget = self.player_stats['budget'] - sum(player_allocation.values()) ai_remaining_budget = self.ai_stats['budget'] - sum(ai_allocation.values()) player_sales_roi = 1.2 + (self.player_stats["market_share"] / 200.0) ai_sales_roi = 1.2 + (self.ai_stats["market_share"] / 200.0) self.player_stats["budget"] = int(player_allocation["sales"] * player_sales_roi + player_remaining_budget) self.ai_stats["budget"] = int(ai_allocation["sales"] * ai_sales_roi + ai_remaining_budget) if self.quarter >= NUM_QUARTERS: self.game_over = True self._add_to_history() return self.get_state() # --- AI Logic --- def ai_strategy(ai_stats, player_stats): """ A heuristic-based AI to simulate a strategic opponent. This mimics the kind of robust strategy that would emerge from self-play, reacting to the opponent and planning for the long term. """ budget = ai_stats["budget"] reasoning = [] # Default balanced strategy allocation = {"rd": 0.33, "marketing": 0.34, "sales": 0.33} # --- Strategic Adjustments based on SPIRAL principles --- # 1. React to quality gap (long-term planning) if ai_stats["product_quality"] < player_stats["product_quality"] - 15: allocation["rd"] += 0.2 allocation["marketing"] -= 0.1 allocation["sales"] -= 0.1 reasoning.append("My analysis indicates a growing product quality gap. I'm increasing R&D investment to innovate and secure a long-term competitive advantage.") # 2. React to market share loss (short-term defense) elif ai_stats["market_share"] < player_stats["market_share"] - 10: allocation["marketing"] += 0.2 allocation["rd"] -= 0.1 allocation["sales"] -= 0.1 reasoning.append("You've recently captured significant market share. I'm launching an aggressive marketing campaign to win back customers and regain my position.") # 3. Exploit a quality advantage (pressing an advantage) if ai_stats["product_quality"] > player_stats["product_quality"] + 20: allocation["marketing"] += 0.15 allocation["rd"] -= 0.15 reasoning.append(f"My product quality ({ai_stats['product_quality']:.0f}) is superior. I will leverage this with a marketing push to translate product leadership into market dominance.") # 4. Manage budget (resource management) if ai_stats["budget"] < player_stats["budget"] * 0.8: allocation["sales"] += 0.15 allocation["rd"] -= 0.15 reasoning.append("My projections show a potential budget shortfall. I am focusing on sales to ensure strong revenue growth for future quarters.") if not reasoning: reasoning.append("I am pursuing a balanced strategy, investing across R&D, Marketing, and Sales to ensure steady, long-term growth and market presence.") # Normalize allocations total_allocation = sum(allocation.values()) final_allocation = {key: int(budget * (val / total_allocation)) for key, val in allocation.items()} # Ensure the sum is exactly the budget diff = budget - sum(final_allocation.values()) final_allocation['sales'] += diff return final_allocation, " ".join(reasoning) # --- Gradio UI --- def create_interface(): """Creates the Gradio web interface for the simulator.""" with gr.Blocks(title=TITLE, theme=gr.themes.Soft()) as demo: game_env = gr.State(BusinessCompetitionEnv()) gr.Markdown(f"# 🎮 {TITLE}") gr.Markdown( "**Demonstrating how complex, multi-turn strategic reasoning emerges from self-play.**\n" "*This simulation replaces Tic-Tac-Toe with a business competition to better illustrate the practical takeaways from the SPIRAL paper.*" ) with gr.Row(): with gr.Column(scale=3): gr.Markdown("### 📈 Market Dashboard") plot_market_share = gr.Plot() with gr.Row(): plot_budget = gr.Plot() plot_quality = gr.Plot() with gr.Column(scale=2): gr.Markdown("### 📊 Your Decisions") status_box = gr.Textbox(f"Quarter 1 of {NUM_QUARTERS}. Your move.", label="Game Status", interactive=False) with gr.Box(): player_budget_display = gr.Label(f"Your Budget: ${INITIAL_BUDGET}") rd_slider = gr.Slider(0, INITIAL_BUDGET, label="R&D Investment", value=333, step=10) mkt_slider = gr.Slider(0, INITIAL_BUDGET, label="Marketing Investment", value=333, step=10) sales_slider = gr.Slider(0, INITIAL_BUDGET, label="Sales Investment", value=334, step=10) total_allocated_display = gr.Label("Total Allocated: $1000") with gr.Row(): submit_btn = gr.Button("End Quarter", variant="primary") new_game_btn = gr.Button("Start New Game") gr.Markdown("### 🧠 AI Strategic Reasoning") ai_reasoning_box = gr.Textbox("", label="AI Decision Rationale", lines=5, interactive=False) gr.Markdown("---") with gr.Accordion("Key Takeaways from the SPIRAL Research Paper", open=False): gr.Markdown(open("spiral_paper_takeaways.md").read()) def create_plots(history): df = pd.DataFrame(history) if df.empty: return None, None, None fig_ms = px.line(df, x="Quarter", y=["Player Market Share", "AI Market Share"], title="Market Share (%)", markers=True, color_discrete_map={"Player Market Share": "#3b82f6", "AI Market Share": "#ef4444"}) fig_ms.update_layout(yaxis_range=[0,100], legend_title_text='') fig_b = px.line(df, x="Quarter", y=["Player Budget", "AI Budget"], title="Budget ($)", markers=True, color_discrete_map={"Player Budget": "#3b82f6", "AI Budget": "#ef4444"}) fig_b.update_layout(legend_title_text='') fig_q = px.line(df, x="Quarter", y=["Player Product Quality", "AI Product Quality"], title="Product Quality Index", markers=True, color_discrete_map={"Player Product Quality": "#3b82f6", "AI Product Quality": "#ef4444"}) fig_q.update_layout(legend_title_text='') return fig_ms, fig_b, fig_q def game_step_and_update(env, rd, mkt, sales): player_budget = env.player_stats["budget"] if (rd + mkt + sales) > player_budget: status_text = f"Error: Allocation (${rd + mkt + sales}) exceeds budget (${player_budget})." return env, status_text, env.ai_stats, *create_plots(env.history), gr.Label(f"Your Budget: ${player_budget}"), gr.Slider(maximum=player_budget), gr.Slider(maximum=player_budget), gr.Slider(maximum=player_budget) player_alloc = {"rd": rd, "marketing": mkt, "sales": sales} ai_alloc, ai_reasoning = ai_strategy(env.ai_stats, env.player_stats) env.step(player_alloc, ai_alloc) state = env.get_state() plots = create_plots(state["history"]) if state["game_over"]: winner = env.get_winner() status_text = f"Game Over! Winner: {winner}. Final market share: You ({state['player_stats']['market_share']:.1f}%) vs AI ({state['ai_stats']['market_share']:.1f}%)." submit_btn.interactive = False else: status_text = f"End of Quarter {state['quarter']}. Your turn." new_budget = state["player_stats"]["budget"] return (state, status_text, ai_reasoning, *plots, gr.Label(f"Your Budget: ${new_budget}"), gr.Slider(maximum=new_budget, value=int(new_budget/3)), gr.Slider(maximum=new_budget, value=int(new_budget/3)), gr.Slider(maximum=new_budget, value=new_budget - 2 * int(new_budget/3))) def on_new_game(): env = BusinessCompetitionEnv() state = env.get_state() plots = create_plots(state["history"]) return ( env, f"Quarter 1 of {NUM_QUARTERS}. Your move.", "", *plots, gr.Label(f"Your Budget: ${INITIAL_BUDGET}"), gr.Slider(maximum=INITIAL_BUDGET, value=333), gr.Slider(maximum=INITIAL_BUDGET, value=333), gr.Slider(maximum=INITIAL_BUDGET, value=334), gr.Button(interactive=True) ) def update_total_display(rd, mkt, sales): return gr.Label(f"Total Allocated: ${rd + mkt + sales}") # --- Event Handlers --- submit_btn.click( fn=game_step_and_update, inputs=[game_env, rd_slider, mkt_slider, sales_slider], outputs=[ game_env, status_box, ai_reasoning_box, plot_market_share, plot_budget, plot_quality, player_budget_display, rd_slider, mkt_slider, sales_slider ] ) new_game_btn.click( fn=on_new_game, inputs=[], outputs=[ game_env, status_box, ai_reasoning_box, plot_market_share, plot_budget, plot_quality, player_budget_display, rd_slider, mkt_slider, sales_slider, submit_btn ] ) for slider in [rd_slider, mkt_slider, sales_slider]: slider.change(fn=update_total_display, inputs=[rd_slider, mkt_slider, sales_slider], outputs=total_allocated_display) demo.load(on_new_game, outputs=[game_env, status_box, ai_reasoning_box, plot_market_share, plot_budget, plot_quality, player_budget_display, rd_slider, mkt_slider, sales_slider, submit_btn]) return demo if __name__ == "__main__": spiral_demo = create_interface() spiral_demo.launch()