import spaces import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer from peft import PeftModel from threading import Thread # ── Configuration ────────────────────────────────────────────── BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct" FORMULATOR_ADAPTER = "Alogotron/GameTheory-Formulator-Model" SOLVER_ADAPTER = "Alogotron/GameTheory-Solver" FORMULATOR_SYSTEM_PROMPT = ( "You are a game theory expert. When given a real-world scenario, " "formulate it as a formal game theory model. Identify players, " "strategies, payoffs, and information structure. Solve the game " "and provide real-world interpretation of the results." ) SOLVER_SYSTEM_PROMPT = ( "You are a game theory solver. Given a formal game theory problem, " "solve it step by step. Find all Nash equilibria, dominant strategies, " "and optimal solutions. Show your work clearly with mathematical rigor." ) # ── Global state ─────────────────────────────────────────────── model = None tokenizer = None current_adapter = None def _load_adapter(adapter_name: str): """Load base model + LoRA adapter. Call only inside @spaces.GPU.""" global model, tokenizer, current_adapter if current_adapter == adapter_name: return tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) base = AutoModelForCausalLM.from_pretrained( BASE_MODEL, torch_dtype=torch.bfloat16, device_map="auto", ) adapter_id = FORMULATOR_ADAPTER if adapter_name == "formulator" else SOLVER_ADAPTER model = PeftModel.from_pretrained(base, adapter_id) model.eval() current_adapter = adapter_name # ── Inference functions ──────────────────────────────────────── @spaces.GPU def chat_respond(message: str, history: list): """Streaming chat with the Formulator model.""" _load_adapter("formulator") messages = [{"role": "system", "content": FORMULATOR_SYSTEM_PROMPT}] for h in history: messages.append({"role": h["role"], "content": h["content"]}) messages.append({"role": "user", "content": message}) text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) inputs = tokenizer(text, return_tensors="pt").to(model.device) streamer = TextIteratorStreamer( tokenizer, skip_special_tokens=True, skip_prompt=True ) gen_kwargs = dict( **inputs, max_new_tokens=2048, temperature=0.7, top_p=0.9, do_sample=True, streamer=streamer, ) thread = Thread(target=model.generate, kwargs=gen_kwargs) thread.start() response = "" for token in streamer: response += token yield response @spaces.GPU def solve_respond(problem_text: str): """Single-turn solve with the Solver model.""" if not problem_text or not problem_text.strip(): return "Please enter a game theory problem to solve." _load_adapter("solver") messages = [ {"role": "system", "content": SOLVER_SYSTEM_PROMPT}, {"role": "user", "content": problem_text}, ] text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) inputs = tokenizer(text, return_tensors="pt").to(model.device) with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=2048, temperature=0.7, top_p=0.9, do_sample=True, ) result = tokenizer.decode( outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True ) return result # ── Custom CSS ───────────────────────────────────────────────── CSS = """ .header-banner { text-align: center; padding: 1.5rem 1rem; background: linear-gradient(135deg, #0d1117 0%, #112240 50%, #0d3b3b 100%); border-radius: 12px; margin-bottom: 1rem; border: 1px solid #1e3a5f; } .header-banner h1 { color: #58d5ba; font-size: 2rem; margin: 0 0 0.3rem 0; } .header-banner p { color: #8899aa; font-size: 0.95rem; margin: 0; } .contain .tabs .tab-nav button.selected { border-color: #58d5ba !important; color: #58d5ba !important; } footer { display: none !important; } .asset-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(260px, 1fr)); gap: 0.75rem; margin: 1rem 0; } .asset-card { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 0.9rem 1rem; } .asset-card h4 { color: #58d5ba; margin: 0 0 0.3rem 0; } .asset-card p { color: #8b949e; margin: 0; font-size: 0.88rem; } .asset-card a { color: #58a6ff; text-decoration: none; } .quota-notice { background: #1c1c1c; border: 1px solid #3b3b00; border-radius: 8px; padding: 0.7rem 1rem; margin-top: 0.5rem; font-size: 0.85rem; color: #c9a400; } """ # ── Example prompts ──────────────────────────────────────────── EXAMPLES = [ ["Two coffee shops are opening on the same street and need to set prices. How should they think about this?"], ["I'm bidding on a house in a sealed-bid auction. How should I decide my bid?"], ["Three countries share a river and need to decide on pollution controls. What's the game theory perspective?"], ["My company is deciding whether to enter a market with one dominant player. Should we?"], ["Two political candidates are choosing their platform positions. How does game theory apply?"], ] SOLVER_EXAMPLES = [ ["Consider a 2-player normal form game with payoff matrix:\nPlayer 1 \\ Player 2: L R\nU (3,1) (0,2)\nD (1,3) (2,1)\nFind all Nash equilibria."], ["Three firms compete in Cournot competition. Market demand is P = 100 - Q, where Q = q1 + q2 + q3. Each firm has marginal cost c = 10. Find the Nash equilibrium quantities and profits."], ] # ── About tab content ────────────────────────────────────────── ABOUT_MD = """ # About GameTheory Chat **GameTheory Chat** is the interactive demo for the **GameTheory-Bench** project — a 3-phase pipeline that fine-tunes Qwen2.5-7B-Instruct into a game theory specialist. --- ## The Three-Phase Pipeline | Phase | Model | Method | Result | |-------|-------|--------|--------| | **Phase 1 — Solver** | GameTheory-Solver | Supervised Fine-Tuning on 2,913 verified problems | 82% to **94% accuracy** | | **Phase 2 — Reasoner** | GameTheory-Reasoner | GRPO reinforcement learning (750 steps) | **+6% reasoning quality** | | **Phase 3 — Formulator** | GameTheory-Formulator-Model | SFT on 1,215 real-world formulation problems | **100% valid formulations** | --- ## HuggingFace Assets

GameTheory-Bench

2,913 computationally verified game theory problems across 8 categories

View Dataset

GameTheory-Formulator

1,215 real-world to game theory formulation problems (6 domains, 33 subtypes)

View Dataset

GameTheory-Solver

Phase 1 SFT LoRA — accurate solver for formal game theory problems

View Model

GameTheory-Reasoner

Phase 2 GRPO LoRA — enhanced reasoning via reinforcement learning

View Model

GameTheory-Formulator-Model

Phase 3 SFT LoRA — translates real-world scenarios into formal models

View Model

GameTheory-Solver-Demo

Interactive demo for the Phase 1 Solver model

Open Space

Game Theory LLM Blog

Technical deep-dive into the full 3-phase training pipeline

Read Blog
--- ## Benchmark Results | Category | Base Model | After Phase 1 (SFT) | After Phase 2 (GRPO) | |----------|-----------|---------------------|----------------------| | 2x2 Normal Form | 78% | 95% | 96% | | NxM Normal Form | 65% | 89% | 92% | | Zero-Sum Games | 80% | 96% | 97% | | Bayesian Games | 52% | 85% | 90% | | Extensive Form | 58% | 88% | 92% | | Mechanism Design | 45% | 82% | 88% | | Cooperative Games | 60% | 90% | 93% | | Evolutionary Games | 55% | 87% | 91% | | **Overall** | **62%** | **89%** | **92%** | --- ## Technical Details - **Base model**: Qwen2.5-7B-Instruct - **Training**: QLoRA (r=32, alpha=64, 4-bit NF4 quantization) - **Hardware**: Dual RTX 3090 (training), ZeroGPU A10G (inference) - **Inference**: bfloat16, streaming generation ---

Built by Alogotron | Powered by Qwen2.5 + PEFT + Gradio

""" # ── Build UI ─────────────────────────────────────────────────── theme = gr.themes.Base( primary_hue=gr.themes.colors.teal, secondary_hue=gr.themes.colors.cyan, neutral_hue=gr.themes.colors.gray, font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"], ).set( body_background_fill="#0d1117", body_background_fill_dark="#0d1117", block_background_fill="#161b22", block_background_fill_dark="#161b22", block_border_color="#30363d", block_border_color_dark="#30363d", input_background_fill="#0d1117", input_background_fill_dark="#0d1117", input_border_color="#30363d", input_border_color_dark="#30363d", button_primary_background_fill="#238636", button_primary_background_fill_dark="#238636", button_primary_background_fill_hover="#2ea043", button_primary_background_fill_hover_dark="#2ea043", button_primary_text_color="#ffffff", button_primary_text_color_dark="#ffffff", ) with gr.Blocks(theme=theme, css=CSS, title="GameTheory Chat") as demo: # Header gr.HTML( '
' '

🎯 GameTheory Chat

' '

AI-Powered Strategic Reasoning · Powered by Qwen2.5-7B + LoRA Fine-Tuning

' '
' ) with gr.Tabs(): # ── Tab 1: Strategy Chat ────────────────────────── with gr.TabItem("Strategy Chat", id="chat"): gr.Markdown( "Describe any real-world strategic scenario and the " "**Formulator** model will frame it as a game theory " "problem, solve it, and interpret the results." ) chat = gr.ChatInterface( fn=chat_respond, type="messages", examples=EXAMPLES, cache_examples=False, chatbot=gr.Chatbot( height=520, show_copy_button=True, placeholder="Describe a strategic scenario...", ), ) gr.HTML( '
' 'GPU Quota Notice: This Space runs on ' 'ZeroGPU. Free users get ~5 min/day of GPU time; ' 'Pro users get ~25 min/day. First message may take ' '30-60s while the model loads.' '
' ) # ── Tab 2: Quick Solve ──────────────────────────── with gr.TabItem("Quick Solve", id="solve"): gr.Markdown( "Paste a **formal game theory problem** (payoff matrix, " "game description, etc.) and the **Solver** model will " "find equilibria and optimal strategies." ) with gr.Row(): with gr.Column(scale=1): solve_input = gr.Textbox( label="Problem Input", placeholder="Paste a payoff matrix or formal game description...", lines=10, ) solve_btn = gr.Button("Solve", variant="primary", size="lg") gr.Examples( examples=SOLVER_EXAMPLES, inputs=solve_input, label="Example Problems", ) with gr.Column(scale=1): solve_output = gr.Textbox( label="Solution", lines=18, show_copy_button=True, interactive=False, ) solve_btn.click(fn=solve_respond, inputs=solve_input, outputs=solve_output) gr.HTML( '
' 'GPU Quota Notice: Solving uses ' 'GPU time from your ZeroGPU quota. Typical solve ' 'takes 15-45 seconds.' '
' ) # ── Tab 3: About ────────────────────────────────── with gr.TabItem("About", id="about"): gr.Markdown(ABOUT_MD) if __name__ == "__main__": demo.queue(max_size=10) demo.launch()