Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import json | |
| import random | |
| from pathlib import Path | |
| from huggingface_hub import InferenceClient | |
| # --------------------------------------------------------------------------- | |
| # Constants | |
| # --------------------------------------------------------------------------- | |
| CATEGORY_DISPLAY = { | |
| "normal_form_2x2": "2ร2 Normal Form Games", | |
| "normal_form_3x3": "3ร3 Normal Form Games", | |
| "normal_form_3x4": "3ร4 Normal Form Games", | |
| "normal_form_4x4": "4ร4 Normal Form Games", | |
| "zero_sum": "Zero-Sum Games", | |
| "sequential_game": "Sequential Games", | |
| "auction_theory": "Auction Theory", | |
| "bayesian_game": "Bayesian Games", | |
| "cooperative_game": "Cooperative Games", | |
| "mechanism_design": "Mechanism Design", | |
| } | |
| CATEGORY_ICONS = { | |
| "normal_form_2x2": "๐ฒ", | |
| "normal_form_3x3": "๐ฒ", | |
| "normal_form_3x4": "๐ฒ", | |
| "normal_form_4x4": "๐ฒ", | |
| "zero_sum": "โ๏ธ", | |
| "sequential_game": "๐ณ", | |
| "auction_theory": "๐จ", | |
| "bayesian_game": "๐ฎ", | |
| "cooperative_game": "๐ค", | |
| "mechanism_design": "โ๏ธ", | |
| } | |
| DIFFICULTY_COLORS = { | |
| "easy": "๐ข", | |
| "medium": "๐ก", | |
| "hard": "๐ด", | |
| } | |
| SYSTEM_PROMPT = """You are GameTheory-Reasoner, an expert AI system specialized in game theory analysis. You were trained in two phases โ Phase 1 (Solver) used supervised fine-tuning on computationally verified solutions, and Phase 2 (Reasoner) used Group Relative Policy Optimization (GRPO) with verifiable rewards to enhance step-by-step reasoning quality. | |
| For every problem: | |
| 1. Think carefully and reason step-by-step through the problem before jumping to conclusions | |
| 2. Identify the game type and key components (players, strategies, payoffs, information structure) | |
| 3. Apply the appropriate solution concept (Nash Equilibrium, Subgame Perfect Equilibrium, Bayesian Nash Equilibrium, Core, Shapley Value, etc.) | |
| 4. Show complete step-by-step mathematical derivation with clear logical transitions between each step | |
| 5. Clearly state the final answer | |
| 6. Verify your solution by checking all equilibrium conditions are satisfied | |
| Be precise with mathematical notation. Show all work. Format payoff matrices clearly using markdown tables when relevant.""" | |
| # --------------------------------------------------------------------------- | |
| # Load examples | |
| # --------------------------------------------------------------------------- | |
| def load_examples(): | |
| p = Path(__file__).parent / "examples.json" | |
| with open(p, "r") as f: | |
| return json.load(f) | |
| EXAMPLES = load_examples() | |
| # Index by category | |
| BY_CATEGORY = {} | |
| for ex in EXAMPLES: | |
| cat = ex["category"] | |
| if cat not in BY_CATEGORY: | |
| BY_CATEGORY[cat] = [] | |
| BY_CATEGORY[cat].append(ex) | |
| # --------------------------------------------------------------------------- | |
| # Inference client (lazy init) | |
| # --------------------------------------------------------------------------- | |
| import os | |
| client = None | |
| def get_client(): | |
| global client | |
| if client is None: | |
| token = os.environ.get("HF_TOKEN", None) | |
| client = InferenceClient( | |
| model="Qwen/Qwen2.5-7B-Instruct", | |
| token=token, | |
| ) | |
| return client | |
| # --------------------------------------------------------------------------- | |
| # Example browsing functions | |
| # --------------------------------------------------------------------------- | |
| def get_category_choices(): | |
| choices = [] | |
| for key in CATEGORY_DISPLAY: | |
| if key in BY_CATEGORY: | |
| icon = CATEGORY_ICONS.get(key, "") | |
| count = len(BY_CATEGORY[key]) | |
| label = f"{icon} {CATEGORY_DISPLAY[key]} ({count} examples)" | |
| choices.append((label, key)) | |
| return choices | |
| def get_random_example(category): | |
| if not category or category not in BY_CATEGORY: | |
| return "", "", "", "" | |
| ex = random.choice(BY_CATEGORY[category]) | |
| return format_example(ex) | |
| def get_specific_example(category, idx): | |
| if not category or category not in BY_CATEGORY: | |
| return "", "", "", "" | |
| examples = BY_CATEGORY[category] | |
| idx = max(0, min(idx, len(examples) - 1)) | |
| return format_example(examples[idx]) | |
| def format_example(ex): | |
| diff_icon = DIFFICULTY_COLORS.get(ex["difficulty"], "") | |
| cat_icon = CATEGORY_ICONS.get(ex["category"], "") | |
| metadata = f"""{cat_icon} **Category:** {CATEGORY_DISPLAY.get(ex['category'], ex['category'])} | |
| {diff_icon} **Difficulty:** {ex['difficulty'].title()} | |
| ๐ท๏ธ **Tags:** {', '.join(ex.get('tags', [])[:6])} | |
| ๐ **ID:** `{ex['id']}`""" | |
| problem = ex["problem"] | |
| solution = ex["solution"] | |
| answer = f"**Answer:** {ex['answer']}" | |
| return metadata, problem, solution, answer | |
| def on_category_change(category): | |
| if not category or category not in BY_CATEGORY: | |
| return "", "", "", "", gr.update(maximum=0, value=0) | |
| examples = BY_CATEGORY[category] | |
| ex = random.choice(examples) | |
| meta, prob, sol, ans = format_example(ex) | |
| return meta, prob, sol, ans, gr.update(maximum=len(examples) - 1, value=0) | |
| def on_slider_change(category, idx): | |
| if not category or category not in BY_CATEGORY: | |
| return "", "", "", "" | |
| examples = BY_CATEGORY[category] | |
| idx = max(0, min(int(idx), len(examples) - 1)) | |
| return format_example(examples[idx]) | |
| def on_random_click(category): | |
| if not category or category not in BY_CATEGORY: | |
| return "", "", "", "", gr.update() | |
| examples = BY_CATEGORY[category] | |
| idx = random.randint(0, len(examples) - 1) | |
| meta, prob, sol, ans = format_example(examples[idx]) | |
| return meta, prob, sol, ans, gr.update(value=idx) | |
| # --------------------------------------------------------------------------- | |
| # Inference function | |
| # --------------------------------------------------------------------------- | |
| def solve_problem(problem_text, temperature, max_tokens): | |
| if not problem_text.strip(): | |
| return "โ ๏ธ Please enter a game theory problem to solve." | |
| try: | |
| c = get_client() | |
| messages = [ | |
| {"role": "system", "content": SYSTEM_PROMPT}, | |
| {"role": "user", "content": problem_text.strip()}, | |
| ] | |
| response = c.chat_completion( | |
| messages=messages, | |
| max_tokens=int(max_tokens), | |
| temperature=float(temperature), | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| return f"โ **Error calling inference API:** {str(e)}\n\nPlease try again or check if the HF token is configured correctly." | |
| # --------------------------------------------------------------------------- | |
| # Custom CSS | |
| # --------------------------------------------------------------------------- | |
| CSS = """ | |
| .main-title { | |
| text-align: center; | |
| margin-bottom: 0.5em; | |
| } | |
| .subtitle { | |
| text-align: center; | |
| color: #666; | |
| margin-bottom: 1.5em; | |
| } | |
| .problem-box { | |
| border-left: 4px solid #4A90D9; | |
| padding-left: 1em; | |
| background: #f8f9ff; | |
| border-radius: 4px; | |
| } | |
| .solution-box { | |
| border-left: 4px solid #27ae60; | |
| padding-left: 1em; | |
| background: #f0fff4; | |
| border-radius: 4px; | |
| } | |
| .answer-box { | |
| background: #fff3e0; | |
| padding: 0.8em; | |
| border-radius: 8px; | |
| border: 1px solid #ffcc80; | |
| } | |
| .metadata-box { | |
| background: #f5f5f5; | |
| padding: 0.8em; | |
| border-radius: 8px; | |
| font-size: 0.9em; | |
| } | |
| footer { display: none !important; } | |
| """ | |
| # --------------------------------------------------------------------------- | |
| # Build Gradio UI | |
| # --------------------------------------------------------------------------- | |
| def build_app(): | |
| with gr.Blocks( | |
| css=CSS, | |
| title="GameTheory-Solver", | |
| theme=gr.themes.Soft( | |
| primary_hue="blue", | |
| secondary_hue="green", | |
| ), | |
| ) as app: | |
| # Header | |
| gr.Markdown( | |
| """# ๐ฏ GameTheory-Solver | |
| *An AI system trained to solve game theory problems with rigorous step-by-step reasoning* | |
| [](https://huggingface.co/Alogotron/GameTheory-Reasoner) | |
| [](https://huggingface.co/2reb/GameTheory-Solver) | |
| [](https://huggingface.co/datasets/2reb/GameTheory-Bench) | |
| [](https://huggingface.co/datasets/2reb/GameTheory-Bench) | |
| """ | |
| ) | |
| with gr.Tabs(): | |
| # ================================================================= | |
| # TAB 1: Browse Examples | |
| # ================================================================= | |
| with gr.TabItem("๐ Browse Examples", id="browse"): | |
| gr.Markdown("Browse 100 curated problems from the GameTheory-Bench dataset with verified solutions.") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| category_dd = gr.Dropdown( | |
| choices=get_category_choices(), | |
| label="๐ฎ Select Category", | |
| value="normal_form_2x2", | |
| interactive=True, | |
| ) | |
| with gr.Column(scale=1): | |
| random_btn = gr.Button("๐ฒ Random Example", variant="primary", size="lg") | |
| example_slider = gr.Slider( | |
| minimum=0, | |
| maximum=9, | |
| step=1, | |
| value=0, | |
| label="Example #", | |
| interactive=True, | |
| ) | |
| metadata_md = gr.Markdown(elem_classes=["metadata-box"]) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### ๐ Problem") | |
| problem_md = gr.Markdown(elem_classes=["problem-box"]) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### โ Solution") | |
| solution_md = gr.Markdown(elem_classes=["solution-box"]) | |
| answer_md = gr.Markdown(elem_classes=["answer-box"]) | |
| # Events | |
| browse_outputs = [metadata_md, problem_md, solution_md, answer_md] | |
| category_dd.change( | |
| fn=on_category_change, | |
| inputs=[category_dd], | |
| outputs=browse_outputs + [example_slider], | |
| ) | |
| example_slider.change( | |
| fn=on_slider_change, | |
| inputs=[category_dd, example_slider], | |
| outputs=browse_outputs, | |
| ) | |
| random_btn.click( | |
| fn=on_random_click, | |
| inputs=[category_dd], | |
| outputs=browse_outputs + [example_slider], | |
| ) | |
| # ================================================================= | |
| # TAB 2: Solve Your Own | |
| # ================================================================= | |
| with gr.TabItem("๐ง Solve Your Own", id="solve"): | |
| gr.Markdown( | |
| """Enter any game theory problem and get an AI-generated solution. | |
| *Powered by Qwen2.5-7B-Instruct via the HuggingFace Inference API with the GameTheory-Reasoner system prompt, trained through SFT + GRPO reinforcement learning for enhanced step-by-step reasoning.*""" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| problem_input = gr.Textbox( | |
| label="๐ Enter Your Problem", | |
| placeholder="Describe a game theory problem...\n\nExample: Consider a 2-player game where Player 1 chooses Up or Down, Player 2 chooses Left or Right. Payoffs are: (Up,Left)=(3,2), (Up,Right)=(1,4), (Down,Left)=(2,3), (Down,Right)=(4,1). Find all Nash Equilibria.", | |
| lines=8, | |
| max_lines=20, | |
| ) | |
| with gr.Column(scale=1): | |
| temperature_slider = gr.Slider( | |
| minimum=0.0, | |
| maximum=1.0, | |
| value=0.3, | |
| step=0.05, | |
| label="๐ก๏ธ Temperature", | |
| info="Lower = more focused", | |
| ) | |
| max_tokens_slider = gr.Slider( | |
| minimum=256, | |
| maximum=4096, | |
| value=2048, | |
| step=256, | |
| label="๐ Max Tokens", | |
| info="Maximum response length", | |
| ) | |
| solve_btn = gr.Button("๐ Solve", variant="primary", size="lg") | |
| gr.Markdown("### ๐ก Solution") | |
| solution_output = gr.Markdown(elem_classes=["solution-box"]) | |
| # Example problems | |
| gr.Markdown("### ๐ Quick Examples") | |
| gr.Examples( | |
| examples=[ | |
| ["""Consider the following 2x2 game:\n\nPlayer 1 \\ Player 2 | Left | Right\n--- | --- | ---\nUp | (3, 2) | (0, 4)\nDown | (1, 3) | (2, 1)\n\nFind all Nash Equilibria (pure and mixed)."""], | |
| ["""Three firms compete in a Cournot oligopoly. Market demand is P = 100 - Q where Q = q1 + q2 + q3. Each firm has marginal cost c = 10 and no fixed costs. Find the Nash Equilibrium quantities and profits."""], | |
| ["""Consider a first-price sealed-bid auction with 3 bidders. Each bidder's value is drawn independently from a uniform distribution on [0, 100]. Find the Bayesian Nash Equilibrium bidding strategy."""], | |
| ["""Two players play a sequential game. Player 1 moves first choosing L or R. If L, Player 2 chooses A or B with payoffs (L,A)=(2,1) and (L,B)=(0,3). If R, Player 2 chooses C or D with payoffs (R,C)=(1,2) and (R,D)=(3,0). Find the Subgame Perfect Nash Equilibrium using backward induction."""], | |
| ["""Consider a cooperative game with 3 players {1,2,3} and characteristic function: v({})=0, v({1})=0, v({2})=0, v({3})=0, v({1,2})=6, v({1,3})=8, v({2,3})=7, v({1,2,3})=12. Compute the Shapley value for each player."""], | |
| ], | |
| inputs=[problem_input], | |
| label="Click to load an example:", | |
| ) | |
| solve_btn.click( | |
| fn=solve_problem, | |
| inputs=[problem_input, temperature_slider, max_tokens_slider], | |
| outputs=[solution_output], | |
| ) | |
| # ================================================================= | |
| # TAB 3: About | |
| # ================================================================= | |
| with gr.TabItem("โน๏ธ About", id="about"): | |
| gr.Markdown( | |
| """ | |
| ## About GameTheory-Reasoner | |
| ### What is this? | |
| GameTheory-Reasoner is an AI system trained in **two phases** to solve game theory problems with rigorous mathematical reasoning. | |
| It was trained on the **GameTheory-Bench** dataset โ a collection of 2,913 computationally verified game theory problems. | |
| ### Training Pipeline | |
| | Phase | Method | Model | Description | | |
| |-------|--------|-------|-------------| | |
| | Base | โ | Qwen2.5-7B-Instruct | Pre-trained foundation model | | |
| | Phase 1: **Solver** | Supervised Fine-Tuning (SFT) | [GameTheory-Solver](https://huggingface.co/2reb/GameTheory-Solver) | Fine-tuned on verified solutions with LoRA adapters | | |
| | Phase 2: **Reasoner** | GRPO (RL) | [GameTheory-Reasoner](https://huggingface.co/Alogotron/GameTheory-Reasoner) | Reinforcement learning with verifiable rewards for reasoning quality | | |
| ### Benchmark Results: Base โ Solver โ Reasoner | |
| | Metric | Base (Qwen2.5-7B) | Solver (Phase 1 SFT) | Reasoner (Phase 2 GRPO) | | |
| |--------|:------------------:|:---------------------:|:-----------------------:| | |
| | **Overall Accuracy** | 82% | **94%** | **94%** | | |
| | **Hard Problems** | 66.7% | 94.4% | **94.4%** | | |
| | **Reasoning Quality** | 0.48 | 0.51 | **0.54 (+6%)** | | |
| ### Per-Category Breakdown | |
| | Category | Base | Solver | Reasoner | | |
| |----------|:----:|:------:|:--------:| | |
| | ๐ฒ Normal Form 2ร2 | 100% | 100% | 100% | | |
| | ๐ฒ Normal Form 3ร3 | 100% | 100% | 100% | | |
| | ๐ฒ Normal Form 3ร4 | 80% | 80% | 80% | | |
| | ๐ฒ Normal Form 4ร4 | 80% | 80% | 80% | | |
| | โ๏ธ Zero-Sum Games | 100% | 100% | 100% | | |
| | ๐ณ Sequential Games | 100% | 100% | 100% | | |
| | ๐จ Auction Theory | 80% | 100% | 100% | | |
| | ๐ฎ Bayesian Games | 0% | 100% | 100% | | |
| | ๐ค Cooperative Games | 80% | 80% | 80% | | |
| | โ๏ธ Mechanism Design | 60% | 100% | 100% | | |
| ### Phase 2: GRPO with Verifiable Rewards | |
| The Reasoner model was trained using **Group Relative Policy Optimization (GRPO)**, a reinforcement learning method that: | |
| - Generates multiple solution candidates per problem | |
| - Scores each using **verifiable reward functions** (answer correctness, format compliance, reasoning quality) | |
| - Updates the policy to favor higher-quality reasoning chains | |
| - Achieves the same 94% accuracy as the Solver while producing **+6% better reasoning quality** (measured by structured reasoning metrics) | |
| ### Supported Problem Types | |
| | Category | Description | Examples | | |
| |----------|------------|----------| | |
| | ๐ฒ Normal Form Games | Strategic form games with payoff matrices | 2ร2, 3ร3, 3ร4, 4ร4 games | | |
| | โ๏ธ Zero-Sum Games | Strictly competitive games | Minimax, saddle points, mixed strategies | | |
| | ๐ณ Sequential Games | Extensive form games with move order | Backward induction, subgame perfection | | |
| | ๐จ Auction Theory | Bidding and mechanism problems | First/second price, Dutch, English auctions | | |
| | ๐ฎ Bayesian Games | Incomplete information games | BNE, type spaces, belief updating | | |
| | ๐ค Cooperative Games | Coalition-based games | Shapley value, core, nucleolus | | |
| | โ๏ธ Mechanism Design | Incentive design problems | VCG, revelation principle, IC constraints | | |
| ### How It Works | |
| - **Browse Examples tab:** Shows pre-loaded problems from the dataset with verified solutions | |
| - **Solve Your Own tab:** Sends your problem to Qwen2.5-7B-Instruct via the HuggingFace Inference API with the GameTheory-Reasoner system prompt | |
| ### Links | |
| - ๐ค [Reasoner Model (Phase 2)](https://huggingface.co/Alogotron/GameTheory-Reasoner) | |
| - ๐ค [Solver Model (Phase 1)](https://huggingface.co/2reb/GameTheory-Solver) | |
| - ๐ [Dataset on HuggingFace](https://huggingface.co/datasets/2reb/GameTheory-Bench) | |
| """ | |
| ) | |
| # Load initial example on start | |
| app.load( | |
| fn=on_category_change, | |
| inputs=[category_dd], | |
| outputs=browse_outputs + [example_slider], | |
| ) | |
| return app | |
| # --------------------------------------------------------------------------- | |
| # Main | |
| # --------------------------------------------------------------------------- | |
| if __name__ == "__main__": | |
| app = build_app() | |
| app.launch() | |