Spaces:

Alogotron
/

GameTheory-Chat

Running on Zero

App Files Files Community

GameTheory-Chat / app.py

Alogotron

Upload app.py with huggingface_hub

10889b4 verified 6 days ago

raw

history blame contribute delete

14.5 kB

	import spaces
	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
	from peft import PeftModel
	from threading import Thread

	# ── Configuration ──────────────────────────────────────────────
	BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct"
	FORMULATOR_ADAPTER = "Alogotron/GameTheory-Formulator-Model"
	SOLVER_ADAPTER = "Alogotron/GameTheory-Solver"

	FORMULATOR_SYSTEM_PROMPT = (
	"You are a game theory expert. When given a real-world scenario, "
	"formulate it as a formal game theory model. Identify players, "
	"strategies, payoffs, and information structure. Solve the game "
	"and provide real-world interpretation of the results."
	)

	SOLVER_SYSTEM_PROMPT = (
	"You are a game theory solver. Given a formal game theory problem, "
	"solve it step by step. Find all Nash equilibria, dominant strategies, "
	"and optimal solutions. Show your work clearly with mathematical rigor."
	)

	# ── Global state ───────────────────────────────────────────────
	model = None
	tokenizer = None
	current_adapter = None


	def _load_adapter(adapter_name: str):
	"""Load base model + LoRA adapter. Call only inside @spaces.GPU."""
	global model, tokenizer, current_adapter
	if current_adapter == adapter_name:
	return

	tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
	base = AutoModelForCausalLM.from_pretrained(
	BASE_MODEL,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	)
	adapter_id = FORMULATOR_ADAPTER if adapter_name == "formulator" else SOLVER_ADAPTER
	model = PeftModel.from_pretrained(base, adapter_id)
	model.eval()
	current_adapter = adapter_name


	# ── Inference functions ────────────────────────────────────────
	@spaces.GPU
	def chat_respond(message: str, history: list):
	"""Streaming chat with the Formulator model."""
	_load_adapter("formulator")

	messages = [{"role": "system", "content": FORMULATOR_SYSTEM_PROMPT}]
	for h in history:
	messages.append({"role": h["role"], "content": h["content"]})
	messages.append({"role": "user", "content": message})

	text = tokenizer.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True
	)
	inputs = tokenizer(text, return_tensors="pt").to(model.device)

	streamer = TextIteratorStreamer(
	tokenizer, skip_special_tokens=True, skip_prompt=True
	)
	gen_kwargs = dict(
	**inputs,
	max_new_tokens=2048,
	temperature=0.7,
	top_p=0.9,
	do_sample=True,
	streamer=streamer,
	)
	thread = Thread(target=model.generate, kwargs=gen_kwargs)
	thread.start()

	response = ""
	for token in streamer:
	response += token
	yield response


	@spaces.GPU
	def solve_respond(problem_text: str):
	"""Single-turn solve with the Solver model."""
	if not problem_text or not problem_text.strip():
	return "Please enter a game theory problem to solve."

	_load_adapter("solver")

	messages = [
	{"role": "system", "content": SOLVER_SYSTEM_PROMPT},
	{"role": "user", "content": problem_text},
	]
	text = tokenizer.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True
	)
	inputs = tokenizer(text, return_tensors="pt").to(model.device)

	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=2048,
	temperature=0.7,
	top_p=0.9,
	do_sample=True,
	)
	result = tokenizer.decode(
	outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True
	)
	return result


	# ── Custom CSS ─────────────────────────────────────────────────
	CSS = """
	.header-banner {
	text-align: center;
	padding: 1.5rem 1rem;
	background: linear-gradient(135deg, #0d1117 0%, #112240 50%, #0d3b3b 100%);
	border-radius: 12px;
	margin-bottom: 1rem;
	border: 1px solid #1e3a5f;
	}
	.header-banner h1 {
	color: #58d5ba;
	font-size: 2rem;
	margin: 0 0 0.3rem 0;
	}
	.header-banner p {
	color: #8899aa;
	font-size: 0.95rem;
	margin: 0;
	}
	.contain .tabs .tab-nav button.selected {
	border-color: #58d5ba !important;
	color: #58d5ba !important;
	}
	footer { display: none !important; }
	.asset-grid {
	display: grid;
	grid-template-columns: repeat(auto-fit, minmax(260px, 1fr));
	gap: 0.75rem;
	margin: 1rem 0;
	}
	.asset-card {
	background: #161b22;
	border: 1px solid #30363d;
	border-radius: 8px;
	padding: 0.9rem 1rem;
	}
	.asset-card h4 { color: #58d5ba; margin: 0 0 0.3rem 0; }
	.asset-card p { color: #8b949e; margin: 0; font-size: 0.88rem; }
	.asset-card a { color: #58a6ff; text-decoration: none; }
	.quota-notice {
	background: #1c1c1c;
	border: 1px solid #3b3b00;
	border-radius: 8px;
	padding: 0.7rem 1rem;
	margin-top: 0.5rem;
	font-size: 0.85rem;
	color: #c9a400;
	}
	"""

	# ── Example prompts ────────────────────────────────────────────
	EXAMPLES = [
	["Two coffee shops are opening on the same street and need to set prices. How should they think about this?"],
	["I'm bidding on a house in a sealed-bid auction. How should I decide my bid?"],
	["Three countries share a river and need to decide on pollution controls. What's the game theory perspective?"],
	["My company is deciding whether to enter a market with one dominant player. Should we?"],
	["Two political candidates are choosing their platform positions. How does game theory apply?"],
	]

	SOLVER_EXAMPLES = [
	["Consider a 2-player normal form game with payoff matrix:\nPlayer 1 \\ Player 2: L R\nU (3,1) (0,2)\nD (1,3) (2,1)\nFind all Nash equilibria."],
	["Three firms compete in Cournot competition. Market demand is P = 100 - Q, where Q = q1 + q2 + q3. Each firm has marginal cost c = 10. Find the Nash equilibrium quantities and profits."],
	]

	# ── About tab content ──────────────────────────────────────────
	ABOUT_MD = """
	# About GameTheory Chat

	GameTheory Chat is the interactive demo for the GameTheory-Bench project — a 3-phase
	pipeline that fine-tunes Qwen2.5-7B-Instruct into a game theory specialist.

	---

	## The Three-Phase Pipeline

	\| Phase \| Model \| Method \| Result \|
	\|-------\|-------\|--------\|--------\|
	\| Phase 1 — Solver \| GameTheory-Solver \| Supervised Fine-Tuning on 2,913 verified problems \| 82% to 94% accuracy \|
	\| Phase 2 — Reasoner \| GameTheory-Reasoner \| GRPO reinforcement learning (750 steps) \| +6% reasoning quality \|
	\| Phase 3 — Formulator \| GameTheory-Formulator-Model \| SFT on 1,215 real-world formulation problems \| 100% valid formulations \|

	---

	## HuggingFace Assets

	<div class="asset-grid">
	<div class="asset-card">
	<h4>GameTheory-Bench</h4>
	<p>2,913 computationally verified game theory problems across 8 categories</p>
	<a href="https://huggingface.co/datasets/Alogotron/GameTheory-Bench" target="_blank">View Dataset</a>
	</div>
	<div class="asset-card">
	<h4>GameTheory-Formulator</h4>
	<p>1,215 real-world to game theory formulation problems (6 domains, 33 subtypes)</p>
	<a href="https://huggingface.co/datasets/Alogotron/GameTheory-Formulator" target="_blank">View Dataset</a>
	</div>
	<div class="asset-card">
	<h4>GameTheory-Solver</h4>
	<p>Phase 1 SFT LoRA — accurate solver for formal game theory problems</p>
	<a href="https://huggingface.co/Alogotron/GameTheory-Solver" target="_blank">View Model</a>
	</div>
	<div class="asset-card">
	<h4>GameTheory-Reasoner</h4>
	<p>Phase 2 GRPO LoRA — enhanced reasoning via reinforcement learning</p>
	<a href="https://huggingface.co/Alogotron/GameTheory-Reasoner" target="_blank">View Model</a>
	</div>
	<div class="asset-card">
	<h4>GameTheory-Formulator-Model</h4>
	<p>Phase 3 SFT LoRA — translates real-world scenarios into formal models</p>
	<a href="https://huggingface.co/Alogotron/GameTheory-Formulator-Model" target="_blank">View Model</a>
	</div>
	<div class="asset-card">
	<h4>GameTheory-Solver-Demo</h4>
	<p>Interactive demo for the Phase 1 Solver model</p>
	<a href="https://huggingface.co/spaces/Alogotron/GameTheory-Solver-Demo" target="_blank">Open Space</a>
	</div>
	<div class="asset-card">
	<h4>Game Theory LLM Blog</h4>
	<p>Technical deep-dive into the full 3-phase training pipeline</p>
	<a href="https://huggingface.co/spaces/Alogotron/game-theory-llm-blog" target="_blank">Read Blog</a>
	</div>
	</div>

	---

	## Benchmark Results

	\| Category \| Base Model \| After Phase 1 (SFT) \| After Phase 2 (GRPO) \|
	\|----------\|-----------\|---------------------\|----------------------\|
	\| 2x2 Normal Form \| 78% \| 95% \| 96% \|
	\| NxM Normal Form \| 65% \| 89% \| 92% \|
	\| Zero-Sum Games \| 80% \| 96% \| 97% \|
	\| Bayesian Games \| 52% \| 85% \| 90% \|
	\| Extensive Form \| 58% \| 88% \| 92% \|
	\| Mechanism Design \| 45% \| 82% \| 88% \|
	\| Cooperative Games \| 60% \| 90% \| 93% \|
	\| Evolutionary Games \| 55% \| 87% \| 91% \|
	\| Overall \| 62% \| 89% \| 92% \|

	---

	## Technical Details

	- Base model: Qwen2.5-7B-Instruct
	- Training: QLoRA (r=32, alpha=64, 4-bit NF4 quantization)
	- Hardware: Dual RTX 3090 (training), ZeroGPU A10G (inference)
	- Inference: bfloat16, streaming generation

	---

	<p style="text-align:center; color:#555; font-size:0.85rem;">
	Built by <a href="https://huggingface.co/Alogotron" style="color:#58d5ba;">Alogotron</a>
	\| Powered by Qwen2.5 + PEFT + Gradio
	</p>
	"""

	# ── Build UI ───────────────────────────────────────────────────
	theme = gr.themes.Base(
	primary_hue=gr.themes.colors.teal,
	secondary_hue=gr.themes.colors.cyan,
	neutral_hue=gr.themes.colors.gray,
	font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"],
	).set(
	body_background_fill="#0d1117",
	body_background_fill_dark="#0d1117",
	block_background_fill="#161b22",
	block_background_fill_dark="#161b22",
	block_border_color="#30363d",
	block_border_color_dark="#30363d",
	input_background_fill="#0d1117",
	input_background_fill_dark="#0d1117",
	input_border_color="#30363d",
	input_border_color_dark="#30363d",
	button_primary_background_fill="#238636",
	button_primary_background_fill_dark="#238636",
	button_primary_background_fill_hover="#2ea043",
	button_primary_background_fill_hover_dark="#2ea043",
	button_primary_text_color="#ffffff",
	button_primary_text_color_dark="#ffffff",
	)

	with gr.Blocks(theme=theme, css=CSS, title="GameTheory Chat") as demo:
	# Header
	gr.HTML(
	'<div class="header-banner">'
	'<h1>🎯 GameTheory Chat</h1>'
	'<p>AI-Powered Strategic Reasoning · Powered by Qwen2.5-7B + LoRA Fine-Tuning</p>'
	'</div>'
	)

	with gr.Tabs():
	# ── Tab 1: Strategy Chat ──────────────────────────
	with gr.TabItem("Strategy Chat", id="chat"):
	gr.Markdown(
	"Describe any real-world strategic scenario and the "
	"Formulator model will frame it as a game theory "
	"problem, solve it, and interpret the results."
	)
	chat = gr.ChatInterface(
	fn=chat_respond,
	type="messages",
	examples=EXAMPLES,
	cache_examples=False,
	chatbot=gr.Chatbot(
	height=520,
	show_copy_button=True,
	placeholder="Describe a strategic scenario...",
	),
	)
	gr.HTML(
	'<div class="quota-notice">'
	'<strong>GPU Quota Notice:</strong> This Space runs on '
	'ZeroGPU. Free users get ~5 min/day of GPU time; '
	'Pro users get ~25 min/day. First message may take '
	'30-60s while the model loads.'
	'</div>'
	)

	# ── Tab 2: Quick Solve ────────────────────────────
	with gr.TabItem("Quick Solve", id="solve"):
	gr.Markdown(
	"Paste a formal game theory problem (payoff matrix, "
	"game description, etc.) and the Solver model will "
	"find equilibria and optimal strategies."
	)
	with gr.Row():
	with gr.Column(scale=1):
	solve_input = gr.Textbox(
	label="Problem Input",
	placeholder="Paste a payoff matrix or formal game description...",
	lines=10,
	)
	solve_btn = gr.Button("Solve", variant="primary", size="lg")
	gr.Examples(
	examples=SOLVER_EXAMPLES,
	inputs=solve_input,
	label="Example Problems",
	)
	with gr.Column(scale=1):
	solve_output = gr.Textbox(
	label="Solution",
	lines=18,
	show_copy_button=True,
	interactive=False,
	)
	solve_btn.click(fn=solve_respond, inputs=solve_input, outputs=solve_output)
	gr.HTML(
	'<div class="quota-notice">'
	'<strong>GPU Quota Notice:</strong> Solving uses '
	'GPU time from your ZeroGPU quota. Typical solve '
	'takes 15-45 seconds.'
	'</div>'
	)

	# ── Tab 3: About ──────────────────────────────────
	with gr.TabItem("About", id="about"):
	gr.Markdown(ABOUT_MD)


	if __name__ == "__main__":
	demo.queue(max_size=10)
	demo.launch()