GameTheory-Chat / app.py
Alogotron's picture
Upload app.py with huggingface_hub
10889b4 verified
import spaces
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from peft import PeftModel
from threading import Thread
# ── Configuration ──────────────────────────────────────────────
BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct"
FORMULATOR_ADAPTER = "Alogotron/GameTheory-Formulator-Model"
SOLVER_ADAPTER = "Alogotron/GameTheory-Solver"
FORMULATOR_SYSTEM_PROMPT = (
"You are a game theory expert. When given a real-world scenario, "
"formulate it as a formal game theory model. Identify players, "
"strategies, payoffs, and information structure. Solve the game "
"and provide real-world interpretation of the results."
)
SOLVER_SYSTEM_PROMPT = (
"You are a game theory solver. Given a formal game theory problem, "
"solve it step by step. Find all Nash equilibria, dominant strategies, "
"and optimal solutions. Show your work clearly with mathematical rigor."
)
# ── Global state ───────────────────────────────────────────────
model = None
tokenizer = None
current_adapter = None
def _load_adapter(adapter_name: str):
"""Load base model + LoRA adapter. Call only inside @spaces.GPU."""
global model, tokenizer, current_adapter
if current_adapter == adapter_name:
return
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
base = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
torch_dtype=torch.bfloat16,
device_map="auto",
)
adapter_id = FORMULATOR_ADAPTER if adapter_name == "formulator" else SOLVER_ADAPTER
model = PeftModel.from_pretrained(base, adapter_id)
model.eval()
current_adapter = adapter_name
# ── Inference functions ────────────────────────────────────────
@spaces.GPU
def chat_respond(message: str, history: list):
"""Streaming chat with the Formulator model."""
_load_adapter("formulator")
messages = [{"role": "system", "content": FORMULATOR_SYSTEM_PROMPT}]
for h in history:
messages.append({"role": h["role"], "content": h["content"]})
messages.append({"role": "user", "content": message})
text = tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
inputs = tokenizer(text, return_tensors="pt").to(model.device)
streamer = TextIteratorStreamer(
tokenizer, skip_special_tokens=True, skip_prompt=True
)
gen_kwargs = dict(
**inputs,
max_new_tokens=2048,
temperature=0.7,
top_p=0.9,
do_sample=True,
streamer=streamer,
)
thread = Thread(target=model.generate, kwargs=gen_kwargs)
thread.start()
response = ""
for token in streamer:
response += token
yield response
@spaces.GPU
def solve_respond(problem_text: str):
"""Single-turn solve with the Solver model."""
if not problem_text or not problem_text.strip():
return "Please enter a game theory problem to solve."
_load_adapter("solver")
messages = [
{"role": "system", "content": SOLVER_SYSTEM_PROMPT},
{"role": "user", "content": problem_text},
]
text = tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
inputs = tokenizer(text, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=2048,
temperature=0.7,
top_p=0.9,
do_sample=True,
)
result = tokenizer.decode(
outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True
)
return result
# ── Custom CSS ─────────────────────────────────────────────────
CSS = """
.header-banner {
text-align: center;
padding: 1.5rem 1rem;
background: linear-gradient(135deg, #0d1117 0%, #112240 50%, #0d3b3b 100%);
border-radius: 12px;
margin-bottom: 1rem;
border: 1px solid #1e3a5f;
}
.header-banner h1 {
color: #58d5ba;
font-size: 2rem;
margin: 0 0 0.3rem 0;
}
.header-banner p {
color: #8899aa;
font-size: 0.95rem;
margin: 0;
}
.contain .tabs .tab-nav button.selected {
border-color: #58d5ba !important;
color: #58d5ba !important;
}
footer { display: none !important; }
.asset-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(260px, 1fr));
gap: 0.75rem;
margin: 1rem 0;
}
.asset-card {
background: #161b22;
border: 1px solid #30363d;
border-radius: 8px;
padding: 0.9rem 1rem;
}
.asset-card h4 { color: #58d5ba; margin: 0 0 0.3rem 0; }
.asset-card p { color: #8b949e; margin: 0; font-size: 0.88rem; }
.asset-card a { color: #58a6ff; text-decoration: none; }
.quota-notice {
background: #1c1c1c;
border: 1px solid #3b3b00;
border-radius: 8px;
padding: 0.7rem 1rem;
margin-top: 0.5rem;
font-size: 0.85rem;
color: #c9a400;
}
"""
# ── Example prompts ────────────────────────────────────────────
EXAMPLES = [
["Two coffee shops are opening on the same street and need to set prices. How should they think about this?"],
["I'm bidding on a house in a sealed-bid auction. How should I decide my bid?"],
["Three countries share a river and need to decide on pollution controls. What's the game theory perspective?"],
["My company is deciding whether to enter a market with one dominant player. Should we?"],
["Two political candidates are choosing their platform positions. How does game theory apply?"],
]
SOLVER_EXAMPLES = [
["Consider a 2-player normal form game with payoff matrix:\nPlayer 1 \\ Player 2: L R\nU (3,1) (0,2)\nD (1,3) (2,1)\nFind all Nash equilibria."],
["Three firms compete in Cournot competition. Market demand is P = 100 - Q, where Q = q1 + q2 + q3. Each firm has marginal cost c = 10. Find the Nash equilibrium quantities and profits."],
]
# ── About tab content ──────────────────────────────────────────
ABOUT_MD = """
# About GameTheory Chat
**GameTheory Chat** is the interactive demo for the **GameTheory-Bench** project β€” a 3-phase
pipeline that fine-tunes Qwen2.5-7B-Instruct into a game theory specialist.
---
## The Three-Phase Pipeline
| Phase | Model | Method | Result |
|-------|-------|--------|--------|
| **Phase 1 β€” Solver** | GameTheory-Solver | Supervised Fine-Tuning on 2,913 verified problems | 82% to **94% accuracy** |
| **Phase 2 β€” Reasoner** | GameTheory-Reasoner | GRPO reinforcement learning (750 steps) | **+6% reasoning quality** |
| **Phase 3 β€” Formulator** | GameTheory-Formulator-Model | SFT on 1,215 real-world formulation problems | **100% valid formulations** |
---
## HuggingFace Assets
<div class="asset-grid">
<div class="asset-card">
<h4>GameTheory-Bench</h4>
<p>2,913 computationally verified game theory problems across 8 categories</p>
<a href="https://huggingface.co/datasets/Alogotron/GameTheory-Bench" target="_blank">View Dataset</a>
</div>
<div class="asset-card">
<h4>GameTheory-Formulator</h4>
<p>1,215 real-world to game theory formulation problems (6 domains, 33 subtypes)</p>
<a href="https://huggingface.co/datasets/Alogotron/GameTheory-Formulator" target="_blank">View Dataset</a>
</div>
<div class="asset-card">
<h4>GameTheory-Solver</h4>
<p>Phase 1 SFT LoRA β€” accurate solver for formal game theory problems</p>
<a href="https://huggingface.co/Alogotron/GameTheory-Solver" target="_blank">View Model</a>
</div>
<div class="asset-card">
<h4>GameTheory-Reasoner</h4>
<p>Phase 2 GRPO LoRA β€” enhanced reasoning via reinforcement learning</p>
<a href="https://huggingface.co/Alogotron/GameTheory-Reasoner" target="_blank">View Model</a>
</div>
<div class="asset-card">
<h4>GameTheory-Formulator-Model</h4>
<p>Phase 3 SFT LoRA β€” translates real-world scenarios into formal models</p>
<a href="https://huggingface.co/Alogotron/GameTheory-Formulator-Model" target="_blank">View Model</a>
</div>
<div class="asset-card">
<h4>GameTheory-Solver-Demo</h4>
<p>Interactive demo for the Phase 1 Solver model</p>
<a href="https://huggingface.co/spaces/Alogotron/GameTheory-Solver-Demo" target="_blank">Open Space</a>
</div>
<div class="asset-card">
<h4>Game Theory LLM Blog</h4>
<p>Technical deep-dive into the full 3-phase training pipeline</p>
<a href="https://huggingface.co/spaces/Alogotron/game-theory-llm-blog" target="_blank">Read Blog</a>
</div>
</div>
---
## Benchmark Results
| Category | Base Model | After Phase 1 (SFT) | After Phase 2 (GRPO) |
|----------|-----------|---------------------|----------------------|
| 2x2 Normal Form | 78% | 95% | 96% |
| NxM Normal Form | 65% | 89% | 92% |
| Zero-Sum Games | 80% | 96% | 97% |
| Bayesian Games | 52% | 85% | 90% |
| Extensive Form | 58% | 88% | 92% |
| Mechanism Design | 45% | 82% | 88% |
| Cooperative Games | 60% | 90% | 93% |
| Evolutionary Games | 55% | 87% | 91% |
| **Overall** | **62%** | **89%** | **92%** |
---
## Technical Details
- **Base model**: Qwen2.5-7B-Instruct
- **Training**: QLoRA (r=32, alpha=64, 4-bit NF4 quantization)
- **Hardware**: Dual RTX 3090 (training), ZeroGPU A10G (inference)
- **Inference**: bfloat16, streaming generation
---
<p style="text-align:center; color:#555; font-size:0.85rem;">
Built by <a href="https://huggingface.co/Alogotron" style="color:#58d5ba;">Alogotron</a>
| Powered by Qwen2.5 + PEFT + Gradio
</p>
"""
# ── Build UI ───────────────────────────────────────────────────
theme = gr.themes.Base(
primary_hue=gr.themes.colors.teal,
secondary_hue=gr.themes.colors.cyan,
neutral_hue=gr.themes.colors.gray,
font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"],
).set(
body_background_fill="#0d1117",
body_background_fill_dark="#0d1117",
block_background_fill="#161b22",
block_background_fill_dark="#161b22",
block_border_color="#30363d",
block_border_color_dark="#30363d",
input_background_fill="#0d1117",
input_background_fill_dark="#0d1117",
input_border_color="#30363d",
input_border_color_dark="#30363d",
button_primary_background_fill="#238636",
button_primary_background_fill_dark="#238636",
button_primary_background_fill_hover="#2ea043",
button_primary_background_fill_hover_dark="#2ea043",
button_primary_text_color="#ffffff",
button_primary_text_color_dark="#ffffff",
)
with gr.Blocks(theme=theme, css=CSS, title="GameTheory Chat") as demo:
# Header
gr.HTML(
'<div class="header-banner">'
'<h1>🎯 GameTheory Chat</h1>'
'<p>AI-Powered Strategic Reasoning &middot; Powered by Qwen2.5-7B + LoRA Fine-Tuning</p>'
'</div>'
)
with gr.Tabs():
# ── Tab 1: Strategy Chat ──────────────────────────
with gr.TabItem("Strategy Chat", id="chat"):
gr.Markdown(
"Describe any real-world strategic scenario and the "
"**Formulator** model will frame it as a game theory "
"problem, solve it, and interpret the results."
)
chat = gr.ChatInterface(
fn=chat_respond,
type="messages",
examples=EXAMPLES,
cache_examples=False,
chatbot=gr.Chatbot(
height=520,
show_copy_button=True,
placeholder="Describe a strategic scenario...",
),
)
gr.HTML(
'<div class="quota-notice">'
'<strong>GPU Quota Notice:</strong> This Space runs on '
'ZeroGPU. Free users get ~5 min/day of GPU time; '
'Pro users get ~25 min/day. First message may take '
'30-60s while the model loads.'
'</div>'
)
# ── Tab 2: Quick Solve ────────────────────────────
with gr.TabItem("Quick Solve", id="solve"):
gr.Markdown(
"Paste a **formal game theory problem** (payoff matrix, "
"game description, etc.) and the **Solver** model will "
"find equilibria and optimal strategies."
)
with gr.Row():
with gr.Column(scale=1):
solve_input = gr.Textbox(
label="Problem Input",
placeholder="Paste a payoff matrix or formal game description...",
lines=10,
)
solve_btn = gr.Button("Solve", variant="primary", size="lg")
gr.Examples(
examples=SOLVER_EXAMPLES,
inputs=solve_input,
label="Example Problems",
)
with gr.Column(scale=1):
solve_output = gr.Textbox(
label="Solution",
lines=18,
show_copy_button=True,
interactive=False,
)
solve_btn.click(fn=solve_respond, inputs=solve_input, outputs=solve_output)
gr.HTML(
'<div class="quota-notice">'
'<strong>GPU Quota Notice:</strong> Solving uses '
'GPU time from your ZeroGPU quota. Typical solve '
'takes 15-45 seconds.'
'</div>'
)
# ── Tab 3: About ──────────────────────────────────
with gr.TabItem("About", id="about"):
gr.Markdown(ABOUT_MD)
if __name__ == "__main__":
demo.queue(max_size=10)
demo.launch()