Spaces:
Running on Zero
Running on Zero
| import spaces | |
| import gradio as gr | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer | |
| from peft import PeftModel | |
| from threading import Thread | |
| # ββ Configuration ββββββββββββββββββββββββββββββββββββββββββββββ | |
| BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct" | |
| FORMULATOR_ADAPTER = "Alogotron/GameTheory-Formulator-Model" | |
| SOLVER_ADAPTER = "Alogotron/GameTheory-Solver" | |
| FORMULATOR_SYSTEM_PROMPT = ( | |
| "You are a game theory expert. When given a real-world scenario, " | |
| "formulate it as a formal game theory model. Identify players, " | |
| "strategies, payoffs, and information structure. Solve the game " | |
| "and provide real-world interpretation of the results." | |
| ) | |
| SOLVER_SYSTEM_PROMPT = ( | |
| "You are a game theory solver. Given a formal game theory problem, " | |
| "solve it step by step. Find all Nash equilibria, dominant strategies, " | |
| "and optimal solutions. Show your work clearly with mathematical rigor." | |
| ) | |
| # ββ Global state βββββββββββββββββββββββββββββββββββββββββββββββ | |
| model = None | |
| tokenizer = None | |
| current_adapter = None | |
| def _load_adapter(adapter_name: str): | |
| """Load base model + LoRA adapter. Call only inside @spaces.GPU.""" | |
| global model, tokenizer, current_adapter | |
| if current_adapter == adapter_name: | |
| return | |
| tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) | |
| base = AutoModelForCausalLM.from_pretrained( | |
| BASE_MODEL, | |
| torch_dtype=torch.bfloat16, | |
| device_map="auto", | |
| ) | |
| adapter_id = FORMULATOR_ADAPTER if adapter_name == "formulator" else SOLVER_ADAPTER | |
| model = PeftModel.from_pretrained(base, adapter_id) | |
| model.eval() | |
| current_adapter = adapter_name | |
| # ββ Inference functions ββββββββββββββββββββββββββββββββββββββββ | |
| def chat_respond(message: str, history: list): | |
| """Streaming chat with the Formulator model.""" | |
| _load_adapter("formulator") | |
| messages = [{"role": "system", "content": FORMULATOR_SYSTEM_PROMPT}] | |
| for h in history: | |
| messages.append({"role": h["role"], "content": h["content"]}) | |
| messages.append({"role": "user", "content": message}) | |
| text = tokenizer.apply_chat_template( | |
| messages, tokenize=False, add_generation_prompt=True | |
| ) | |
| inputs = tokenizer(text, return_tensors="pt").to(model.device) | |
| streamer = TextIteratorStreamer( | |
| tokenizer, skip_special_tokens=True, skip_prompt=True | |
| ) | |
| gen_kwargs = dict( | |
| **inputs, | |
| max_new_tokens=2048, | |
| temperature=0.7, | |
| top_p=0.9, | |
| do_sample=True, | |
| streamer=streamer, | |
| ) | |
| thread = Thread(target=model.generate, kwargs=gen_kwargs) | |
| thread.start() | |
| response = "" | |
| for token in streamer: | |
| response += token | |
| yield response | |
| def solve_respond(problem_text: str): | |
| """Single-turn solve with the Solver model.""" | |
| if not problem_text or not problem_text.strip(): | |
| return "Please enter a game theory problem to solve." | |
| _load_adapter("solver") | |
| messages = [ | |
| {"role": "system", "content": SOLVER_SYSTEM_PROMPT}, | |
| {"role": "user", "content": problem_text}, | |
| ] | |
| text = tokenizer.apply_chat_template( | |
| messages, tokenize=False, add_generation_prompt=True | |
| ) | |
| inputs = tokenizer(text, return_tensors="pt").to(model.device) | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=2048, | |
| temperature=0.7, | |
| top_p=0.9, | |
| do_sample=True, | |
| ) | |
| result = tokenizer.decode( | |
| outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True | |
| ) | |
| return result | |
| # ββ Custom CSS βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| CSS = """ | |
| .header-banner { | |
| text-align: center; | |
| padding: 1.5rem 1rem; | |
| background: linear-gradient(135deg, #0d1117 0%, #112240 50%, #0d3b3b 100%); | |
| border-radius: 12px; | |
| margin-bottom: 1rem; | |
| border: 1px solid #1e3a5f; | |
| } | |
| .header-banner h1 { | |
| color: #58d5ba; | |
| font-size: 2rem; | |
| margin: 0 0 0.3rem 0; | |
| } | |
| .header-banner p { | |
| color: #8899aa; | |
| font-size: 0.95rem; | |
| margin: 0; | |
| } | |
| .contain .tabs .tab-nav button.selected { | |
| border-color: #58d5ba !important; | |
| color: #58d5ba !important; | |
| } | |
| footer { display: none !important; } | |
| .asset-grid { | |
| display: grid; | |
| grid-template-columns: repeat(auto-fit, minmax(260px, 1fr)); | |
| gap: 0.75rem; | |
| margin: 1rem 0; | |
| } | |
| .asset-card { | |
| background: #161b22; | |
| border: 1px solid #30363d; | |
| border-radius: 8px; | |
| padding: 0.9rem 1rem; | |
| } | |
| .asset-card h4 { color: #58d5ba; margin: 0 0 0.3rem 0; } | |
| .asset-card p { color: #8b949e; margin: 0; font-size: 0.88rem; } | |
| .asset-card a { color: #58a6ff; text-decoration: none; } | |
| .quota-notice { | |
| background: #1c1c1c; | |
| border: 1px solid #3b3b00; | |
| border-radius: 8px; | |
| padding: 0.7rem 1rem; | |
| margin-top: 0.5rem; | |
| font-size: 0.85rem; | |
| color: #c9a400; | |
| } | |
| """ | |
| # ββ Example prompts ββββββββββββββββββββββββββββββββββββββββββββ | |
| EXAMPLES = [ | |
| ["Two coffee shops are opening on the same street and need to set prices. How should they think about this?"], | |
| ["I'm bidding on a house in a sealed-bid auction. How should I decide my bid?"], | |
| ["Three countries share a river and need to decide on pollution controls. What's the game theory perspective?"], | |
| ["My company is deciding whether to enter a market with one dominant player. Should we?"], | |
| ["Two political candidates are choosing their platform positions. How does game theory apply?"], | |
| ] | |
| SOLVER_EXAMPLES = [ | |
| ["Consider a 2-player normal form game with payoff matrix:\nPlayer 1 \\ Player 2: L R\nU (3,1) (0,2)\nD (1,3) (2,1)\nFind all Nash equilibria."], | |
| ["Three firms compete in Cournot competition. Market demand is P = 100 - Q, where Q = q1 + q2 + q3. Each firm has marginal cost c = 10. Find the Nash equilibrium quantities and profits."], | |
| ] | |
| # ββ About tab content ββββββββββββββββββββββββββββββββββββββββββ | |
| ABOUT_MD = """ | |
| # About GameTheory Chat | |
| **GameTheory Chat** is the interactive demo for the **GameTheory-Bench** project β a 3-phase | |
| pipeline that fine-tunes Qwen2.5-7B-Instruct into a game theory specialist. | |
| --- | |
| ## The Three-Phase Pipeline | |
| | Phase | Model | Method | Result | | |
| |-------|-------|--------|--------| | |
| | **Phase 1 β Solver** | GameTheory-Solver | Supervised Fine-Tuning on 2,913 verified problems | 82% to **94% accuracy** | | |
| | **Phase 2 β Reasoner** | GameTheory-Reasoner | GRPO reinforcement learning (750 steps) | **+6% reasoning quality** | | |
| | **Phase 3 β Formulator** | GameTheory-Formulator-Model | SFT on 1,215 real-world formulation problems | **100% valid formulations** | | |
| --- | |
| ## HuggingFace Assets | |
| <div class="asset-grid"> | |
| <div class="asset-card"> | |
| <h4>GameTheory-Bench</h4> | |
| <p>2,913 computationally verified game theory problems across 8 categories</p> | |
| <a href="https://huggingface.co/datasets/Alogotron/GameTheory-Bench" target="_blank">View Dataset</a> | |
| </div> | |
| <div class="asset-card"> | |
| <h4>GameTheory-Formulator</h4> | |
| <p>1,215 real-world to game theory formulation problems (6 domains, 33 subtypes)</p> | |
| <a href="https://huggingface.co/datasets/Alogotron/GameTheory-Formulator" target="_blank">View Dataset</a> | |
| </div> | |
| <div class="asset-card"> | |
| <h4>GameTheory-Solver</h4> | |
| <p>Phase 1 SFT LoRA β accurate solver for formal game theory problems</p> | |
| <a href="https://huggingface.co/Alogotron/GameTheory-Solver" target="_blank">View Model</a> | |
| </div> | |
| <div class="asset-card"> | |
| <h4>GameTheory-Reasoner</h4> | |
| <p>Phase 2 GRPO LoRA β enhanced reasoning via reinforcement learning</p> | |
| <a href="https://huggingface.co/Alogotron/GameTheory-Reasoner" target="_blank">View Model</a> | |
| </div> | |
| <div class="asset-card"> | |
| <h4>GameTheory-Formulator-Model</h4> | |
| <p>Phase 3 SFT LoRA β translates real-world scenarios into formal models</p> | |
| <a href="https://huggingface.co/Alogotron/GameTheory-Formulator-Model" target="_blank">View Model</a> | |
| </div> | |
| <div class="asset-card"> | |
| <h4>GameTheory-Solver-Demo</h4> | |
| <p>Interactive demo for the Phase 1 Solver model</p> | |
| <a href="https://huggingface.co/spaces/Alogotron/GameTheory-Solver-Demo" target="_blank">Open Space</a> | |
| </div> | |
| <div class="asset-card"> | |
| <h4>Game Theory LLM Blog</h4> | |
| <p>Technical deep-dive into the full 3-phase training pipeline</p> | |
| <a href="https://huggingface.co/spaces/Alogotron/game-theory-llm-blog" target="_blank">Read Blog</a> | |
| </div> | |
| </div> | |
| --- | |
| ## Benchmark Results | |
| | Category | Base Model | After Phase 1 (SFT) | After Phase 2 (GRPO) | | |
| |----------|-----------|---------------------|----------------------| | |
| | 2x2 Normal Form | 78% | 95% | 96% | | |
| | NxM Normal Form | 65% | 89% | 92% | | |
| | Zero-Sum Games | 80% | 96% | 97% | | |
| | Bayesian Games | 52% | 85% | 90% | | |
| | Extensive Form | 58% | 88% | 92% | | |
| | Mechanism Design | 45% | 82% | 88% | | |
| | Cooperative Games | 60% | 90% | 93% | | |
| | Evolutionary Games | 55% | 87% | 91% | | |
| | **Overall** | **62%** | **89%** | **92%** | | |
| --- | |
| ## Technical Details | |
| - **Base model**: Qwen2.5-7B-Instruct | |
| - **Training**: QLoRA (r=32, alpha=64, 4-bit NF4 quantization) | |
| - **Hardware**: Dual RTX 3090 (training), ZeroGPU A10G (inference) | |
| - **Inference**: bfloat16, streaming generation | |
| --- | |
| <p style="text-align:center; color:#555; font-size:0.85rem;"> | |
| Built by <a href="https://huggingface.co/Alogotron" style="color:#58d5ba;">Alogotron</a> | |
| | Powered by Qwen2.5 + PEFT + Gradio | |
| </p> | |
| """ | |
| # ββ Build UI βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| theme = gr.themes.Base( | |
| primary_hue=gr.themes.colors.teal, | |
| secondary_hue=gr.themes.colors.cyan, | |
| neutral_hue=gr.themes.colors.gray, | |
| font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"], | |
| ).set( | |
| body_background_fill="#0d1117", | |
| body_background_fill_dark="#0d1117", | |
| block_background_fill="#161b22", | |
| block_background_fill_dark="#161b22", | |
| block_border_color="#30363d", | |
| block_border_color_dark="#30363d", | |
| input_background_fill="#0d1117", | |
| input_background_fill_dark="#0d1117", | |
| input_border_color="#30363d", | |
| input_border_color_dark="#30363d", | |
| button_primary_background_fill="#238636", | |
| button_primary_background_fill_dark="#238636", | |
| button_primary_background_fill_hover="#2ea043", | |
| button_primary_background_fill_hover_dark="#2ea043", | |
| button_primary_text_color="#ffffff", | |
| button_primary_text_color_dark="#ffffff", | |
| ) | |
| with gr.Blocks(theme=theme, css=CSS, title="GameTheory Chat") as demo: | |
| # Header | |
| gr.HTML( | |
| '<div class="header-banner">' | |
| '<h1>π― GameTheory Chat</h1>' | |
| '<p>AI-Powered Strategic Reasoning · Powered by Qwen2.5-7B + LoRA Fine-Tuning</p>' | |
| '</div>' | |
| ) | |
| with gr.Tabs(): | |
| # ββ Tab 1: Strategy Chat ββββββββββββββββββββββββββ | |
| with gr.TabItem("Strategy Chat", id="chat"): | |
| gr.Markdown( | |
| "Describe any real-world strategic scenario and the " | |
| "**Formulator** model will frame it as a game theory " | |
| "problem, solve it, and interpret the results." | |
| ) | |
| chat = gr.ChatInterface( | |
| fn=chat_respond, | |
| type="messages", | |
| examples=EXAMPLES, | |
| cache_examples=False, | |
| chatbot=gr.Chatbot( | |
| height=520, | |
| show_copy_button=True, | |
| placeholder="Describe a strategic scenario...", | |
| ), | |
| ) | |
| gr.HTML( | |
| '<div class="quota-notice">' | |
| '<strong>GPU Quota Notice:</strong> This Space runs on ' | |
| 'ZeroGPU. Free users get ~5 min/day of GPU time; ' | |
| 'Pro users get ~25 min/day. First message may take ' | |
| '30-60s while the model loads.' | |
| '</div>' | |
| ) | |
| # ββ Tab 2: Quick Solve ββββββββββββββββββββββββββββ | |
| with gr.TabItem("Quick Solve", id="solve"): | |
| gr.Markdown( | |
| "Paste a **formal game theory problem** (payoff matrix, " | |
| "game description, etc.) and the **Solver** model will " | |
| "find equilibria and optimal strategies." | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| solve_input = gr.Textbox( | |
| label="Problem Input", | |
| placeholder="Paste a payoff matrix or formal game description...", | |
| lines=10, | |
| ) | |
| solve_btn = gr.Button("Solve", variant="primary", size="lg") | |
| gr.Examples( | |
| examples=SOLVER_EXAMPLES, | |
| inputs=solve_input, | |
| label="Example Problems", | |
| ) | |
| with gr.Column(scale=1): | |
| solve_output = gr.Textbox( | |
| label="Solution", | |
| lines=18, | |
| show_copy_button=True, | |
| interactive=False, | |
| ) | |
| solve_btn.click(fn=solve_respond, inputs=solve_input, outputs=solve_output) | |
| gr.HTML( | |
| '<div class="quota-notice">' | |
| '<strong>GPU Quota Notice:</strong> Solving uses ' | |
| 'GPU time from your ZeroGPU quota. Typical solve ' | |
| 'takes 15-45 seconds.' | |
| '</div>' | |
| ) | |
| # ββ Tab 3: About ββββββββββββββββββββββββββββββββββ | |
| with gr.TabItem("About", id="about"): | |
| gr.Markdown(ABOUT_MD) | |
| if __name__ == "__main__": | |
| demo.queue(max_size=10) | |
| demo.launch() | |