Spaces:

kaushikvr06
/

reasoning-simulator

Build error

Kaushik Rajan

Feat: Replace Tic-Tac-Toe with Strategic Business Competition

898b55a 5 months ago

14.2 kB

	"""
	SPIRAL: Strategic Business Competition Simulator

	This demo has been updated to more intuitively demonstrate the key concepts from the
	"Self-Play in Zero-Sum Games Incentivizes Reasoning" (SPIRAL) research paper.

	Instead of Tic-Tac-Toe, this simulation uses a zero-sum business competition to showcase
	complex, multi-turn strategic reasoning in a more practical and relatable context.
	"""

	import gradio as gr
	import numpy as np
	import pandas as pd
	import plotly.express as px

	# --- Game Configuration ---
	INITIAL_BUDGET = 1000
	INITIAL_MARKET_SHARE = 50
	INITIAL_PRODUCT_QUALITY = 50
	NUM_QUARTERS = 12
	TITLE = "SPIRAL: Strategic Business Competition"

	# --- Game Environment ---

	class BusinessCompetitionEnv:
	"""Manages the state of the strategic business competition."""
	def __init__(self):
	self.reset()

	def reset(self):
	"""Resets the game to its initial state."""
	self.quarter = 0
	self.game_over = False

	self.player_stats = {
	"budget": INITIAL_BUDGET,
	"market_share": INITIAL_MARKET_SHARE,
	"product_quality": INITIAL_PRODUCT_QUALITY,
	}
	self.ai_stats = {
	"budget": INITIAL_BUDGET,
	"market_share": INITIAL_MARKET_SHARE,
	"product_quality": INITIAL_PRODUCT_QUALITY,
	}

	# History stores the state at the end of each quarter
	self.history = []
	self._add_to_history() # Initial state at quarter 0

	return self.get_state()

	def _add_to_history(self):
	"""Adds the current state to the history log."""
	self.history.append({
	"Quarter": self.quarter,
	"Player Budget": self.player_stats["budget"],
	"AI Budget": self.ai_stats["budget"],
	"Player Market Share": self.player_stats["market_share"],
	"AI Market Share": self.ai_stats["market_share"],
	"Player Product Quality": self.player_stats["product_quality"],
	"AI Product Quality": self.ai_stats["product_quality"],
	})

	def get_state(self):
	"""Returns the complete current state of the game."""
	return {
	"quarter": self.quarter,
	"player_stats": self.player_stats,
	"ai_stats": self.ai_stats,
	"game_over": self.game_over,
	"history": self.history
	}

	def get_winner(self):
	"""Determines the winner at the end of the game."""
	if not self.game_over:
	return None
	if self.player_stats["market_share"] > self.ai_stats["market_share"]:
	return "You"
	elif self.ai_stats["market_share"] > self.player_stats["market_share"]:
	return "AI"
	else:
	return "It's a Draw"

	def step(self, player_allocation, ai_allocation):
	"""Executes one quarter of the game."""
	if self.game_over:
	return self.get_state()

	self.quarter += 1

	# 1. Update Product Quality from R&D investment
	self.player_stats["product_quality"] += int(np.sqrt(player_allocation["rd"]) * 1.5)
	self.ai_stats["product_quality"] += int(np.sqrt(ai_allocation["rd"]) * 1.5)

	# 2. Calculate market share shift from Marketing and Quality
	mkt_diff = player_allocation["marketing"] - ai_allocation["marketing"]
	quality_diff = self.player_stats["product_quality"] - self.ai_stats["product_quality"]

	# Marketing has a direct but temporary effect, quality has a persistent effect
	market_share_shift = (mkt_diff / 100.0) + (quality_diff / 50.0)
	market_share_shift = np.clip(market_share_shift, -7, 7) # Cap shifts per quarter

	self.player_stats["market_share"] += market_share_shift
	self.ai_stats["market_share"] -= market_share_shift
	self.player_stats["market_share"] = np.clip(self.player_stats["market_share"], 0, 100)
	self.ai_stats["market_share"] = 100 - self.player_stats["market_share"]

	# 3. Calculate next quarter's budget from Sales investment and market share
	player_remaining_budget = self.player_stats['budget'] - sum(player_allocation.values())
	ai_remaining_budget = self.ai_stats['budget'] - sum(ai_allocation.values())

	player_sales_roi = 1.2 + (self.player_stats["market_share"] / 200.0)
	ai_sales_roi = 1.2 + (self.ai_stats["market_share"] / 200.0)

	self.player_stats["budget"] = int(player_allocation["sales"] * player_sales_roi + player_remaining_budget)
	self.ai_stats["budget"] = int(ai_allocation["sales"] * ai_sales_roi + ai_remaining_budget)

	if self.quarter >= NUM_QUARTERS:
	self.game_over = True

	self._add_to_history()

	return self.get_state()

	# --- AI Logic ---

	def ai_strategy(ai_stats, player_stats):
	"""
	A heuristic-based AI to simulate a strategic opponent.
	This mimics the kind of robust strategy that would emerge from self-play,
	reacting to the opponent and planning for the long term.
	"""
	budget = ai_stats["budget"]
	reasoning = []

	# Default balanced strategy
	allocation = {"rd": 0.33, "marketing": 0.34, "sales": 0.33}

	# --- Strategic Adjustments based on SPIRAL principles ---
	# 1. React to quality gap (long-term planning)
	if ai_stats["product_quality"] < player_stats["product_quality"] - 15:
	allocation["rd"] += 0.2
	allocation["marketing"] -= 0.1
	allocation["sales"] -= 0.1
	reasoning.append("My analysis indicates a growing product quality gap. I'm increasing R&D investment to innovate and secure a long-term competitive advantage.")

	# 2. React to market share loss (short-term defense)
	elif ai_stats["market_share"] < player_stats["market_share"] - 10:
	allocation["marketing"] += 0.2
	allocation["rd"] -= 0.1
	allocation["sales"] -= 0.1
	reasoning.append("You've recently captured significant market share. I'm launching an aggressive marketing campaign to win back customers and regain my position.")

	# 3. Exploit a quality advantage (pressing an advantage)
	if ai_stats["product_quality"] > player_stats["product_quality"] + 20:
	allocation["marketing"] += 0.15
	allocation["rd"] -= 0.15
	reasoning.append(f"My product quality ({ai_stats['product_quality']:.0f}) is superior. I will leverage this with a marketing push to translate product leadership into market dominance.")

	# 4. Manage budget (resource management)
	if ai_stats["budget"] < player_stats["budget"] * 0.8:
	allocation["sales"] += 0.15
	allocation["rd"] -= 0.15
	reasoning.append("My projections show a potential budget shortfall. I am focusing on sales to ensure strong revenue growth for future quarters.")

	if not reasoning:
	reasoning.append("I am pursuing a balanced strategy, investing across R&D, Marketing, and Sales to ensure steady, long-term growth and market presence.")

	# Normalize allocations
	total_allocation = sum(allocation.values())
	final_allocation = {key: int(budget * (val / total_allocation)) for key, val in allocation.items()}

	# Ensure the sum is exactly the budget
	diff = budget - sum(final_allocation.values())
	final_allocation['sales'] += diff

	return final_allocation, " ".join(reasoning)

	# --- Gradio UI ---

	def create_interface():
	"""Creates the Gradio web interface for the simulator."""

	with gr.Blocks(title=TITLE, theme=gr.themes.Soft()) as demo:
	game_env = gr.State(BusinessCompetitionEnv())

	gr.Markdown(f"# 🎮 {TITLE}")
	gr.Markdown(
	"Demonstrating how complex, multi-turn strategic reasoning emerges from self-play.\n"
	"This simulation replaces Tic-Tac-Toe with a business competition to better illustrate the practical takeaways from the SPIRAL paper."
	)

	with gr.Row():
	with gr.Column(scale=3):
	gr.Markdown("### 📈 Market Dashboard")
	plot_market_share = gr.Plot()
	with gr.Row():
	plot_budget = gr.Plot()
	plot_quality = gr.Plot()

	with gr.Column(scale=2):
	gr.Markdown("### 📊 Your Decisions")
	status_box = gr.Textbox(f"Quarter 1 of {NUM_QUARTERS}. Your move.", label="Game Status", interactive=False)

	with gr.Box():
	player_budget_display = gr.Label(f"Your Budget: ${INITIAL_BUDGET}")
	rd_slider = gr.Slider(0, INITIAL_BUDGET, label="R&D Investment", value=333, step=10)
	mkt_slider = gr.Slider(0, INITIAL_BUDGET, label="Marketing Investment", value=333, step=10)
	sales_slider = gr.Slider(0, INITIAL_BUDGET, label="Sales Investment", value=334, step=10)

	total_allocated_display = gr.Label("Total Allocated: $1000")

	with gr.Row():
	submit_btn = gr.Button("End Quarter", variant="primary")
	new_game_btn = gr.Button("Start New Game")

	gr.Markdown("### 🧠 AI Strategic Reasoning")
	ai_reasoning_box = gr.Textbox("", label="AI Decision Rationale", lines=5, interactive=False)

	gr.Markdown("---")
	with gr.Accordion("Key Takeaways from the SPIRAL Research Paper", open=False):
	gr.Markdown(open("spiral_paper_takeaways.md").read())

	def create_plots(history):
	df = pd.DataFrame(history)
	if df.empty:
	return None, None, None

	fig_ms = px.line(df, x="Quarter", y=["Player Market Share", "AI Market Share"], title="Market Share (%)", markers=True, color_discrete_map={"Player Market Share": "#3b82f6", "AI Market Share": "#ef4444"})
	fig_ms.update_layout(yaxis_range=[0,100], legend_title_text='')

	fig_b = px.line(df, x="Quarter", y=["Player Budget", "AI Budget"], title="Budget ($)", markers=True, color_discrete_map={"Player Budget": "#3b82f6", "AI Budget": "#ef4444"})
	fig_b.update_layout(legend_title_text='')

	fig_q = px.line(df, x="Quarter", y=["Player Product Quality", "AI Product Quality"], title="Product Quality Index", markers=True, color_discrete_map={"Player Product Quality": "#3b82f6", "AI Product Quality": "#ef4444"})
	fig_q.update_layout(legend_title_text='')

	return fig_ms, fig_b, fig_q

	def game_step_and_update(env, rd, mkt, sales):
	player_budget = env.player_stats["budget"]
	if (rd + mkt + sales) > player_budget:
	status_text = f"Error: Allocation (${rd + mkt + sales}) exceeds budget (${player_budget})."
	return env, status_text, env.ai_stats, *create_plots(env.history), gr.Label(f"Your Budget: ${player_budget}"), gr.Slider(maximum=player_budget), gr.Slider(maximum=player_budget), gr.Slider(maximum=player_budget)

	player_alloc = {"rd": rd, "marketing": mkt, "sales": sales}
	ai_alloc, ai_reasoning = ai_strategy(env.ai_stats, env.player_stats)

	env.step(player_alloc, ai_alloc)
	state = env.get_state()

	plots = create_plots(state["history"])

	if state["game_over"]:
	winner = env.get_winner()
	status_text = f"Game Over! Winner: {winner}. Final market share: You ({state['player_stats']['market_share']:.1f}%) vs AI ({state['ai_stats']['market_share']:.1f}%)."
	submit_btn.interactive = False
	else:
	status_text = f"End of Quarter {state['quarter']}. Your turn."

	new_budget = state["player_stats"]["budget"]

	return (state, status_text, ai_reasoning, *plots,
	gr.Label(f"Your Budget: ${new_budget}"),
	gr.Slider(maximum=new_budget, value=int(new_budget/3)),
	gr.Slider(maximum=new_budget, value=int(new_budget/3)),
	gr.Slider(maximum=new_budget, value=new_budget - 2 * int(new_budget/3)))

	def on_new_game():
	env = BusinessCompetitionEnv()
	state = env.get_state()
	plots = create_plots(state["history"])
	return (
	env, f"Quarter 1 of {NUM_QUARTERS}. Your move.", "", *plots,
	gr.Label(f"Your Budget: ${INITIAL_BUDGET}"),
	gr.Slider(maximum=INITIAL_BUDGET, value=333),
	gr.Slider(maximum=INITIAL_BUDGET, value=333),
	gr.Slider(maximum=INITIAL_BUDGET, value=334),
	gr.Button(interactive=True)
	)

	def update_total_display(rd, mkt, sales):
	return gr.Label(f"Total Allocated: ${rd + mkt + sales}")

	# --- Event Handlers ---
	submit_btn.click(
	fn=game_step_and_update,
	inputs=[game_env, rd_slider, mkt_slider, sales_slider],
	outputs=[
	game_env, status_box, ai_reasoning_box,
	plot_market_share, plot_budget, plot_quality,
	player_budget_display, rd_slider, mkt_slider, sales_slider
	]
	)

	new_game_btn.click(
	fn=on_new_game,
	inputs=[],
	outputs=[
	game_env, status_box, ai_reasoning_box,
	plot_market_share, plot_budget, plot_quality,
	player_budget_display, rd_slider, mkt_slider, sales_slider,
	submit_btn
	]
	)

	for slider in [rd_slider, mkt_slider, sales_slider]:
	slider.change(fn=update_total_display, inputs=[rd_slider, mkt_slider, sales_slider], outputs=total_allocated_display)

	demo.load(on_new_game, outputs=[game_env, status_box, ai_reasoning_box, plot_market_share, plot_budget, plot_quality, player_budget_display, rd_slider, mkt_slider, sales_slider, submit_btn])

	return demo


	if __name__ == "__main__":
	spiral_demo = create_interface()
	spiral_demo.launch()