Kaushik Rajan
Feat: Replace Tic-Tac-Toe with Strategic Business Competition
898b55a
raw
history blame
14.2 kB
"""
SPIRAL: Strategic Business Competition Simulator
This demo has been updated to more intuitively demonstrate the key concepts from the
"Self-Play in Zero-Sum Games Incentivizes Reasoning" (SPIRAL) research paper.
Instead of Tic-Tac-Toe, this simulation uses a zero-sum business competition to showcase
complex, multi-turn strategic reasoning in a more practical and relatable context.
"""
import gradio as gr
import numpy as np
import pandas as pd
import plotly.express as px
# --- Game Configuration ---
INITIAL_BUDGET = 1000
INITIAL_MARKET_SHARE = 50
INITIAL_PRODUCT_QUALITY = 50
NUM_QUARTERS = 12
TITLE = "SPIRAL: Strategic Business Competition"
# --- Game Environment ---
class BusinessCompetitionEnv:
"""Manages the state of the strategic business competition."""
def __init__(self):
self.reset()
def reset(self):
"""Resets the game to its initial state."""
self.quarter = 0
self.game_over = False
self.player_stats = {
"budget": INITIAL_BUDGET,
"market_share": INITIAL_MARKET_SHARE,
"product_quality": INITIAL_PRODUCT_QUALITY,
}
self.ai_stats = {
"budget": INITIAL_BUDGET,
"market_share": INITIAL_MARKET_SHARE,
"product_quality": INITIAL_PRODUCT_QUALITY,
}
# History stores the state at the *end* of each quarter
self.history = []
self._add_to_history() # Initial state at quarter 0
return self.get_state()
def _add_to_history(self):
"""Adds the current state to the history log."""
self.history.append({
"Quarter": self.quarter,
"Player Budget": self.player_stats["budget"],
"AI Budget": self.ai_stats["budget"],
"Player Market Share": self.player_stats["market_share"],
"AI Market Share": self.ai_stats["market_share"],
"Player Product Quality": self.player_stats["product_quality"],
"AI Product Quality": self.ai_stats["product_quality"],
})
def get_state(self):
"""Returns the complete current state of the game."""
return {
"quarter": self.quarter,
"player_stats": self.player_stats,
"ai_stats": self.ai_stats,
"game_over": self.game_over,
"history": self.history
}
def get_winner(self):
"""Determines the winner at the end of the game."""
if not self.game_over:
return None
if self.player_stats["market_share"] > self.ai_stats["market_share"]:
return "You"
elif self.ai_stats["market_share"] > self.player_stats["market_share"]:
return "AI"
else:
return "It's a Draw"
def step(self, player_allocation, ai_allocation):
"""Executes one quarter of the game."""
if self.game_over:
return self.get_state()
self.quarter += 1
# 1. Update Product Quality from R&D investment
self.player_stats["product_quality"] += int(np.sqrt(player_allocation["rd"]) * 1.5)
self.ai_stats["product_quality"] += int(np.sqrt(ai_allocation["rd"]) * 1.5)
# 2. Calculate market share shift from Marketing and Quality
mkt_diff = player_allocation["marketing"] - ai_allocation["marketing"]
quality_diff = self.player_stats["product_quality"] - self.ai_stats["product_quality"]
# Marketing has a direct but temporary effect, quality has a persistent effect
market_share_shift = (mkt_diff / 100.0) + (quality_diff / 50.0)
market_share_shift = np.clip(market_share_shift, -7, 7) # Cap shifts per quarter
self.player_stats["market_share"] += market_share_shift
self.ai_stats["market_share"] -= market_share_shift
self.player_stats["market_share"] = np.clip(self.player_stats["market_share"], 0, 100)
self.ai_stats["market_share"] = 100 - self.player_stats["market_share"]
# 3. Calculate next quarter's budget from Sales investment and market share
player_remaining_budget = self.player_stats['budget'] - sum(player_allocation.values())
ai_remaining_budget = self.ai_stats['budget'] - sum(ai_allocation.values())
player_sales_roi = 1.2 + (self.player_stats["market_share"] / 200.0)
ai_sales_roi = 1.2 + (self.ai_stats["market_share"] / 200.0)
self.player_stats["budget"] = int(player_allocation["sales"] * player_sales_roi + player_remaining_budget)
self.ai_stats["budget"] = int(ai_allocation["sales"] * ai_sales_roi + ai_remaining_budget)
if self.quarter >= NUM_QUARTERS:
self.game_over = True
self._add_to_history()
return self.get_state()
# --- AI Logic ---
def ai_strategy(ai_stats, player_stats):
"""
A heuristic-based AI to simulate a strategic opponent.
This mimics the kind of robust strategy that would emerge from self-play,
reacting to the opponent and planning for the long term.
"""
budget = ai_stats["budget"]
reasoning = []
# Default balanced strategy
allocation = {"rd": 0.33, "marketing": 0.34, "sales": 0.33}
# --- Strategic Adjustments based on SPIRAL principles ---
# 1. React to quality gap (long-term planning)
if ai_stats["product_quality"] < player_stats["product_quality"] - 15:
allocation["rd"] += 0.2
allocation["marketing"] -= 0.1
allocation["sales"] -= 0.1
reasoning.append("My analysis indicates a growing product quality gap. I'm increasing R&D investment to innovate and secure a long-term competitive advantage.")
# 2. React to market share loss (short-term defense)
elif ai_stats["market_share"] < player_stats["market_share"] - 10:
allocation["marketing"] += 0.2
allocation["rd"] -= 0.1
allocation["sales"] -= 0.1
reasoning.append("You've recently captured significant market share. I'm launching an aggressive marketing campaign to win back customers and regain my position.")
# 3. Exploit a quality advantage (pressing an advantage)
if ai_stats["product_quality"] > player_stats["product_quality"] + 20:
allocation["marketing"] += 0.15
allocation["rd"] -= 0.15
reasoning.append(f"My product quality ({ai_stats['product_quality']:.0f}) is superior. I will leverage this with a marketing push to translate product leadership into market dominance.")
# 4. Manage budget (resource management)
if ai_stats["budget"] < player_stats["budget"] * 0.8:
allocation["sales"] += 0.15
allocation["rd"] -= 0.15
reasoning.append("My projections show a potential budget shortfall. I am focusing on sales to ensure strong revenue growth for future quarters.")
if not reasoning:
reasoning.append("I am pursuing a balanced strategy, investing across R&D, Marketing, and Sales to ensure steady, long-term growth and market presence.")
# Normalize allocations
total_allocation = sum(allocation.values())
final_allocation = {key: int(budget * (val / total_allocation)) for key, val in allocation.items()}
# Ensure the sum is exactly the budget
diff = budget - sum(final_allocation.values())
final_allocation['sales'] += diff
return final_allocation, " ".join(reasoning)
# --- Gradio UI ---
def create_interface():
"""Creates the Gradio web interface for the simulator."""
with gr.Blocks(title=TITLE, theme=gr.themes.Soft()) as demo:
game_env = gr.State(BusinessCompetitionEnv())
gr.Markdown(f"# 🎮 {TITLE}")
gr.Markdown(
"**Demonstrating how complex, multi-turn strategic reasoning emerges from self-play.**\n"
"*This simulation replaces Tic-Tac-Toe with a business competition to better illustrate the practical takeaways from the SPIRAL paper.*"
)
with gr.Row():
with gr.Column(scale=3):
gr.Markdown("### 📈 Market Dashboard")
plot_market_share = gr.Plot()
with gr.Row():
plot_budget = gr.Plot()
plot_quality = gr.Plot()
with gr.Column(scale=2):
gr.Markdown("### 📊 Your Decisions")
status_box = gr.Textbox(f"Quarter 1 of {NUM_QUARTERS}. Your move.", label="Game Status", interactive=False)
with gr.Box():
player_budget_display = gr.Label(f"Your Budget: ${INITIAL_BUDGET}")
rd_slider = gr.Slider(0, INITIAL_BUDGET, label="R&D Investment", value=333, step=10)
mkt_slider = gr.Slider(0, INITIAL_BUDGET, label="Marketing Investment", value=333, step=10)
sales_slider = gr.Slider(0, INITIAL_BUDGET, label="Sales Investment", value=334, step=10)
total_allocated_display = gr.Label("Total Allocated: $1000")
with gr.Row():
submit_btn = gr.Button("End Quarter", variant="primary")
new_game_btn = gr.Button("Start New Game")
gr.Markdown("### 🧠 AI Strategic Reasoning")
ai_reasoning_box = gr.Textbox("", label="AI Decision Rationale", lines=5, interactive=False)
gr.Markdown("---")
with gr.Accordion("Key Takeaways from the SPIRAL Research Paper", open=False):
gr.Markdown(open("spiral_paper_takeaways.md").read())
def create_plots(history):
df = pd.DataFrame(history)
if df.empty:
return None, None, None
fig_ms = px.line(df, x="Quarter", y=["Player Market Share", "AI Market Share"], title="Market Share (%)", markers=True, color_discrete_map={"Player Market Share": "#3b82f6", "AI Market Share": "#ef4444"})
fig_ms.update_layout(yaxis_range=[0,100], legend_title_text='')
fig_b = px.line(df, x="Quarter", y=["Player Budget", "AI Budget"], title="Budget ($)", markers=True, color_discrete_map={"Player Budget": "#3b82f6", "AI Budget": "#ef4444"})
fig_b.update_layout(legend_title_text='')
fig_q = px.line(df, x="Quarter", y=["Player Product Quality", "AI Product Quality"], title="Product Quality Index", markers=True, color_discrete_map={"Player Product Quality": "#3b82f6", "AI Product Quality": "#ef4444"})
fig_q.update_layout(legend_title_text='')
return fig_ms, fig_b, fig_q
def game_step_and_update(env, rd, mkt, sales):
player_budget = env.player_stats["budget"]
if (rd + mkt + sales) > player_budget:
status_text = f"Error: Allocation (${rd + mkt + sales}) exceeds budget (${player_budget})."
return env, status_text, env.ai_stats, *create_plots(env.history), gr.Label(f"Your Budget: ${player_budget}"), gr.Slider(maximum=player_budget), gr.Slider(maximum=player_budget), gr.Slider(maximum=player_budget)
player_alloc = {"rd": rd, "marketing": mkt, "sales": sales}
ai_alloc, ai_reasoning = ai_strategy(env.ai_stats, env.player_stats)
env.step(player_alloc, ai_alloc)
state = env.get_state()
plots = create_plots(state["history"])
if state["game_over"]:
winner = env.get_winner()
status_text = f"Game Over! Winner: {winner}. Final market share: You ({state['player_stats']['market_share']:.1f}%) vs AI ({state['ai_stats']['market_share']:.1f}%)."
submit_btn.interactive = False
else:
status_text = f"End of Quarter {state['quarter']}. Your turn."
new_budget = state["player_stats"]["budget"]
return (state, status_text, ai_reasoning, *plots,
gr.Label(f"Your Budget: ${new_budget}"),
gr.Slider(maximum=new_budget, value=int(new_budget/3)),
gr.Slider(maximum=new_budget, value=int(new_budget/3)),
gr.Slider(maximum=new_budget, value=new_budget - 2 * int(new_budget/3)))
def on_new_game():
env = BusinessCompetitionEnv()
state = env.get_state()
plots = create_plots(state["history"])
return (
env, f"Quarter 1 of {NUM_QUARTERS}. Your move.", "", *plots,
gr.Label(f"Your Budget: ${INITIAL_BUDGET}"),
gr.Slider(maximum=INITIAL_BUDGET, value=333),
gr.Slider(maximum=INITIAL_BUDGET, value=333),
gr.Slider(maximum=INITIAL_BUDGET, value=334),
gr.Button(interactive=True)
)
def update_total_display(rd, mkt, sales):
return gr.Label(f"Total Allocated: ${rd + mkt + sales}")
# --- Event Handlers ---
submit_btn.click(
fn=game_step_and_update,
inputs=[game_env, rd_slider, mkt_slider, sales_slider],
outputs=[
game_env, status_box, ai_reasoning_box,
plot_market_share, plot_budget, plot_quality,
player_budget_display, rd_slider, mkt_slider, sales_slider
]
)
new_game_btn.click(
fn=on_new_game,
inputs=[],
outputs=[
game_env, status_box, ai_reasoning_box,
plot_market_share, plot_budget, plot_quality,
player_budget_display, rd_slider, mkt_slider, sales_slider,
submit_btn
]
)
for slider in [rd_slider, mkt_slider, sales_slider]:
slider.change(fn=update_total_display, inputs=[rd_slider, mkt_slider, sales_slider], outputs=total_allocated_display)
demo.load(on_new_game, outputs=[game_env, status_box, ai_reasoning_box, plot_market_share, plot_budget, plot_quality, player_budget_display, rd_slider, mkt_slider, sales_slider, submit_btn])
return demo
if __name__ == "__main__":
spiral_demo = create_interface()
spiral_demo.launch()