Spaces:
Build error
Build error
| """ | |
| SPIRAL: Strategic Business Competition Simulator | |
| This demo has been updated to more intuitively demonstrate the key concepts from the | |
| "Self-Play in Zero-Sum Games Incentivizes Reasoning" (SPIRAL) research paper. | |
| Instead of Tic-Tac-Toe, this simulation uses a zero-sum business competition to showcase | |
| complex, multi-turn strategic reasoning in a more practical and relatable context. | |
| """ | |
| import gradio as gr | |
| import numpy as np | |
| import pandas as pd | |
| import plotly.express as px | |
| # --- Game Configuration --- | |
| INITIAL_BUDGET = 1000 | |
| INITIAL_MARKET_SHARE = 50 | |
| INITIAL_PRODUCT_QUALITY = 50 | |
| NUM_QUARTERS = 12 | |
| TITLE = "SPIRAL: Strategic Business Competition" | |
| # --- Game Environment --- | |
| class BusinessCompetitionEnv: | |
| """Manages the state of the strategic business competition.""" | |
| def __init__(self): | |
| self.reset() | |
| def reset(self): | |
| """Resets the game to its initial state.""" | |
| self.quarter = 0 | |
| self.game_over = False | |
| self.player_stats = { | |
| "budget": INITIAL_BUDGET, | |
| "market_share": INITIAL_MARKET_SHARE, | |
| "product_quality": INITIAL_PRODUCT_QUALITY, | |
| } | |
| self.ai_stats = { | |
| "budget": INITIAL_BUDGET, | |
| "market_share": INITIAL_MARKET_SHARE, | |
| "product_quality": INITIAL_PRODUCT_QUALITY, | |
| } | |
| # History stores the state at the *end* of each quarter | |
| self.history = [] | |
| self._add_to_history() # Initial state at quarter 0 | |
| return self.get_state() | |
| def _add_to_history(self): | |
| """Adds the current state to the history log.""" | |
| self.history.append({ | |
| "Quarter": self.quarter, | |
| "Player Budget": self.player_stats["budget"], | |
| "AI Budget": self.ai_stats["budget"], | |
| "Player Market Share": self.player_stats["market_share"], | |
| "AI Market Share": self.ai_stats["market_share"], | |
| "Player Product Quality": self.player_stats["product_quality"], | |
| "AI Product Quality": self.ai_stats["product_quality"], | |
| }) | |
| def get_state(self): | |
| """Returns the complete current state of the game.""" | |
| return { | |
| "quarter": self.quarter, | |
| "player_stats": self.player_stats, | |
| "ai_stats": self.ai_stats, | |
| "game_over": self.game_over, | |
| "history": self.history | |
| } | |
| def get_winner(self): | |
| """Determines the winner at the end of the game.""" | |
| if not self.game_over: | |
| return None | |
| if self.player_stats["market_share"] > self.ai_stats["market_share"]: | |
| return "You" | |
| elif self.ai_stats["market_share"] > self.player_stats["market_share"]: | |
| return "AI" | |
| else: | |
| return "It's a Draw" | |
| def step(self, player_allocation, ai_allocation): | |
| """Executes one quarter of the game.""" | |
| if self.game_over: | |
| return self.get_state() | |
| self.quarter += 1 | |
| # 1. Update Product Quality from R&D investment | |
| self.player_stats["product_quality"] += int(np.sqrt(player_allocation["rd"]) * 1.5) | |
| self.ai_stats["product_quality"] += int(np.sqrt(ai_allocation["rd"]) * 1.5) | |
| # 2. Calculate market share shift from Marketing and Quality | |
| mkt_diff = player_allocation["marketing"] - ai_allocation["marketing"] | |
| quality_diff = self.player_stats["product_quality"] - self.ai_stats["product_quality"] | |
| # Marketing has a direct but temporary effect, quality has a persistent effect | |
| market_share_shift = (mkt_diff / 100.0) + (quality_diff / 50.0) | |
| market_share_shift = np.clip(market_share_shift, -7, 7) # Cap shifts per quarter | |
| self.player_stats["market_share"] += market_share_shift | |
| self.ai_stats["market_share"] -= market_share_shift | |
| self.player_stats["market_share"] = np.clip(self.player_stats["market_share"], 0, 100) | |
| self.ai_stats["market_share"] = 100 - self.player_stats["market_share"] | |
| # 3. Calculate next quarter's budget from Sales investment and market share | |
| player_remaining_budget = self.player_stats['budget'] - sum(player_allocation.values()) | |
| ai_remaining_budget = self.ai_stats['budget'] - sum(ai_allocation.values()) | |
| player_sales_roi = 1.2 + (self.player_stats["market_share"] / 200.0) | |
| ai_sales_roi = 1.2 + (self.ai_stats["market_share"] / 200.0) | |
| self.player_stats["budget"] = int(player_allocation["sales"] * player_sales_roi + player_remaining_budget) | |
| self.ai_stats["budget"] = int(ai_allocation["sales"] * ai_sales_roi + ai_remaining_budget) | |
| if self.quarter >= NUM_QUARTERS: | |
| self.game_over = True | |
| self._add_to_history() | |
| return self.get_state() | |
| # --- AI Logic --- | |
| def ai_strategy(ai_stats, player_stats): | |
| """ | |
| A heuristic-based AI to simulate a strategic opponent. | |
| This mimics the kind of robust strategy that would emerge from self-play, | |
| reacting to the opponent and planning for the long term. | |
| """ | |
| budget = ai_stats["budget"] | |
| reasoning = [] | |
| # Default balanced strategy | |
| allocation = {"rd": 0.33, "marketing": 0.34, "sales": 0.33} | |
| # --- Strategic Adjustments based on SPIRAL principles --- | |
| # 1. React to quality gap (long-term planning) | |
| if ai_stats["product_quality"] < player_stats["product_quality"] - 15: | |
| allocation["rd"] += 0.2 | |
| allocation["marketing"] -= 0.1 | |
| allocation["sales"] -= 0.1 | |
| reasoning.append("My analysis indicates a growing product quality gap. I'm increasing R&D investment to innovate and secure a long-term competitive advantage.") | |
| # 2. React to market share loss (short-term defense) | |
| elif ai_stats["market_share"] < player_stats["market_share"] - 10: | |
| allocation["marketing"] += 0.2 | |
| allocation["rd"] -= 0.1 | |
| allocation["sales"] -= 0.1 | |
| reasoning.append("You've recently captured significant market share. I'm launching an aggressive marketing campaign to win back customers and regain my position.") | |
| # 3. Exploit a quality advantage (pressing an advantage) | |
| if ai_stats["product_quality"] > player_stats["product_quality"] + 20: | |
| allocation["marketing"] += 0.15 | |
| allocation["rd"] -= 0.15 | |
| reasoning.append(f"My product quality ({ai_stats['product_quality']:.0f}) is superior. I will leverage this with a marketing push to translate product leadership into market dominance.") | |
| # 4. Manage budget (resource management) | |
| if ai_stats["budget"] < player_stats["budget"] * 0.8: | |
| allocation["sales"] += 0.15 | |
| allocation["rd"] -= 0.15 | |
| reasoning.append("My projections show a potential budget shortfall. I am focusing on sales to ensure strong revenue growth for future quarters.") | |
| if not reasoning: | |
| reasoning.append("I am pursuing a balanced strategy, investing across R&D, Marketing, and Sales to ensure steady, long-term growth and market presence.") | |
| # Normalize allocations | |
| total_allocation = sum(allocation.values()) | |
| final_allocation = {key: int(budget * (val / total_allocation)) for key, val in allocation.items()} | |
| # Ensure the sum is exactly the budget | |
| diff = budget - sum(final_allocation.values()) | |
| final_allocation['sales'] += diff | |
| return final_allocation, " ".join(reasoning) | |
| # --- Gradio UI --- | |
| def create_interface(): | |
| """Creates the Gradio web interface for the simulator.""" | |
| with gr.Blocks(title=TITLE, theme=gr.themes.Soft()) as demo: | |
| game_env = gr.State(BusinessCompetitionEnv()) | |
| gr.Markdown(f"# 🎮 {TITLE}") | |
| gr.Markdown( | |
| "**Demonstrating how complex, multi-turn strategic reasoning emerges from self-play.**\n" | |
| "*This simulation replaces Tic-Tac-Toe with a business competition to better illustrate the practical takeaways from the SPIRAL paper.*" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| gr.Markdown("### 📈 Market Dashboard") | |
| plot_market_share = gr.Plot() | |
| with gr.Row(): | |
| plot_budget = gr.Plot() | |
| plot_quality = gr.Plot() | |
| with gr.Column(scale=2): | |
| gr.Markdown("### 📊 Your Decisions") | |
| status_box = gr.Textbox(f"Quarter 1 of {NUM_QUARTERS}. Your move.", label="Game Status", interactive=False) | |
| with gr.Box(): | |
| player_budget_display = gr.Label(f"Your Budget: ${INITIAL_BUDGET}") | |
| rd_slider = gr.Slider(0, INITIAL_BUDGET, label="R&D Investment", value=333, step=10) | |
| mkt_slider = gr.Slider(0, INITIAL_BUDGET, label="Marketing Investment", value=333, step=10) | |
| sales_slider = gr.Slider(0, INITIAL_BUDGET, label="Sales Investment", value=334, step=10) | |
| total_allocated_display = gr.Label("Total Allocated: $1000") | |
| with gr.Row(): | |
| submit_btn = gr.Button("End Quarter", variant="primary") | |
| new_game_btn = gr.Button("Start New Game") | |
| gr.Markdown("### 🧠 AI Strategic Reasoning") | |
| ai_reasoning_box = gr.Textbox("", label="AI Decision Rationale", lines=5, interactive=False) | |
| gr.Markdown("---") | |
| with gr.Accordion("Key Takeaways from the SPIRAL Research Paper", open=False): | |
| gr.Markdown(open("spiral_paper_takeaways.md").read()) | |
| def create_plots(history): | |
| df = pd.DataFrame(history) | |
| if df.empty: | |
| return None, None, None | |
| fig_ms = px.line(df, x="Quarter", y=["Player Market Share", "AI Market Share"], title="Market Share (%)", markers=True, color_discrete_map={"Player Market Share": "#3b82f6", "AI Market Share": "#ef4444"}) | |
| fig_ms.update_layout(yaxis_range=[0,100], legend_title_text='') | |
| fig_b = px.line(df, x="Quarter", y=["Player Budget", "AI Budget"], title="Budget ($)", markers=True, color_discrete_map={"Player Budget": "#3b82f6", "AI Budget": "#ef4444"}) | |
| fig_b.update_layout(legend_title_text='') | |
| fig_q = px.line(df, x="Quarter", y=["Player Product Quality", "AI Product Quality"], title="Product Quality Index", markers=True, color_discrete_map={"Player Product Quality": "#3b82f6", "AI Product Quality": "#ef4444"}) | |
| fig_q.update_layout(legend_title_text='') | |
| return fig_ms, fig_b, fig_q | |
| def game_step_and_update(env, rd, mkt, sales): | |
| player_budget = env.player_stats["budget"] | |
| if (rd + mkt + sales) > player_budget: | |
| status_text = f"Error: Allocation (${rd + mkt + sales}) exceeds budget (${player_budget})." | |
| return env, status_text, env.ai_stats, *create_plots(env.history), gr.Label(f"Your Budget: ${player_budget}"), gr.Slider(maximum=player_budget), gr.Slider(maximum=player_budget), gr.Slider(maximum=player_budget) | |
| player_alloc = {"rd": rd, "marketing": mkt, "sales": sales} | |
| ai_alloc, ai_reasoning = ai_strategy(env.ai_stats, env.player_stats) | |
| env.step(player_alloc, ai_alloc) | |
| state = env.get_state() | |
| plots = create_plots(state["history"]) | |
| if state["game_over"]: | |
| winner = env.get_winner() | |
| status_text = f"Game Over! Winner: {winner}. Final market share: You ({state['player_stats']['market_share']:.1f}%) vs AI ({state['ai_stats']['market_share']:.1f}%)." | |
| submit_btn.interactive = False | |
| else: | |
| status_text = f"End of Quarter {state['quarter']}. Your turn." | |
| new_budget = state["player_stats"]["budget"] | |
| return (state, status_text, ai_reasoning, *plots, | |
| gr.Label(f"Your Budget: ${new_budget}"), | |
| gr.Slider(maximum=new_budget, value=int(new_budget/3)), | |
| gr.Slider(maximum=new_budget, value=int(new_budget/3)), | |
| gr.Slider(maximum=new_budget, value=new_budget - 2 * int(new_budget/3))) | |
| def on_new_game(): | |
| env = BusinessCompetitionEnv() | |
| state = env.get_state() | |
| plots = create_plots(state["history"]) | |
| return ( | |
| env, f"Quarter 1 of {NUM_QUARTERS}. Your move.", "", *plots, | |
| gr.Label(f"Your Budget: ${INITIAL_BUDGET}"), | |
| gr.Slider(maximum=INITIAL_BUDGET, value=333), | |
| gr.Slider(maximum=INITIAL_BUDGET, value=333), | |
| gr.Slider(maximum=INITIAL_BUDGET, value=334), | |
| gr.Button(interactive=True) | |
| ) | |
| def update_total_display(rd, mkt, sales): | |
| return gr.Label(f"Total Allocated: ${rd + mkt + sales}") | |
| # --- Event Handlers --- | |
| submit_btn.click( | |
| fn=game_step_and_update, | |
| inputs=[game_env, rd_slider, mkt_slider, sales_slider], | |
| outputs=[ | |
| game_env, status_box, ai_reasoning_box, | |
| plot_market_share, plot_budget, plot_quality, | |
| player_budget_display, rd_slider, mkt_slider, sales_slider | |
| ] | |
| ) | |
| new_game_btn.click( | |
| fn=on_new_game, | |
| inputs=[], | |
| outputs=[ | |
| game_env, status_box, ai_reasoning_box, | |
| plot_market_share, plot_budget, plot_quality, | |
| player_budget_display, rd_slider, mkt_slider, sales_slider, | |
| submit_btn | |
| ] | |
| ) | |
| for slider in [rd_slider, mkt_slider, sales_slider]: | |
| slider.change(fn=update_total_display, inputs=[rd_slider, mkt_slider, sales_slider], outputs=total_allocated_display) | |
| demo.load(on_new_game, outputs=[game_env, status_box, ai_reasoning_box, plot_market_share, plot_budget, plot_quality, player_budget_display, rd_slider, mkt_slider, sales_slider, submit_btn]) | |
| return demo | |
| if __name__ == "__main__": | |
| spiral_demo = create_interface() | |
| spiral_demo.launch() | |