import spaces
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from peft import PeftModel
from threading import Thread

# ── Configuration ──────────────────────────────────────────────
BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct"
FORMULATOR_ADAPTER = "Alogotron/GameTheory-Formulator-Model"
SOLVER_ADAPTER = "Alogotron/GameTheory-Solver"

FORMULATOR_SYSTEM_PROMPT = (
    "You are a game theory expert. When given a real-world scenario, "
    "formulate it as a formal game theory model. Identify players, "
    "strategies, payoffs, and information structure. Solve the game "
    "and provide real-world interpretation of the results."
)

SOLVER_SYSTEM_PROMPT = (
    "You are a game theory solver. Given a formal game theory problem, "
    "solve it step by step. Find all Nash equilibria, dominant strategies, "
    "and optimal solutions. Show your work clearly with mathematical rigor."
)

# ── Global state ───────────────────────────────────────────────
model = None
tokenizer = None
current_adapter = None


def _load_adapter(adapter_name: str):
    """Load base model + LoRA adapter. Call only inside @spaces.GPU."""
    global model, tokenizer, current_adapter
    if current_adapter == adapter_name:
        return

    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
    base = AutoModelForCausalLM.from_pretrained(
        BASE_MODEL,
        torch_dtype=torch.bfloat16,
        device_map="auto",
    )
    adapter_id = FORMULATOR_ADAPTER if adapter_name == "formulator" else SOLVER_ADAPTER
    model = PeftModel.from_pretrained(base, adapter_id)
    model.eval()
    current_adapter = adapter_name


# ── Inference functions ────────────────────────────────────────
@spaces.GPU
def chat_respond(message: str, history: list):
    """Streaming chat with the Formulator model."""
    _load_adapter("formulator")

    messages = [{"role": "system", "content": FORMULATOR_SYSTEM_PROMPT}]
    for h in history:
        messages.append({"role": h["role"], "content": h["content"]})
    messages.append({"role": "user", "content": message})

    text = tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )
    inputs = tokenizer(text, return_tensors="pt").to(model.device)

    streamer = TextIteratorStreamer(
        tokenizer, skip_special_tokens=True, skip_prompt=True
    )
    gen_kwargs = dict(
        **inputs,
        max_new_tokens=2048,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        streamer=streamer,
    )
    thread = Thread(target=model.generate, kwargs=gen_kwargs)
    thread.start()

    response = ""
    for token in streamer:
        response += token
        yield response


@spaces.GPU
def solve_respond(problem_text: str):
    """Single-turn solve with the Solver model."""
    if not problem_text or not problem_text.strip():
        return "Please enter a game theory problem to solve."

    _load_adapter("solver")

    messages = [
        {"role": "system", "content": SOLVER_SYSTEM_PROMPT},
        {"role": "user", "content": problem_text},
    ]
    text = tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )
    inputs = tokenizer(text, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=2048,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
        )
    result = tokenizer.decode(
        outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True
    )
    return result


# ── Custom CSS ─────────────────────────────────────────────────
CSS = """
.header-banner {
    text-align: center;
    padding: 1.5rem 1rem;
    background: linear-gradient(135deg, #0d1117 0%, #112240 50%, #0d3b3b 100%);
    border-radius: 12px;
    margin-bottom: 1rem;
    border: 1px solid #1e3a5f;
}
.header-banner h1 {
    color: #58d5ba;
    font-size: 2rem;
    margin: 0 0 0.3rem 0;
}
.header-banner p {
    color: #8899aa;
    font-size: 0.95rem;
    margin: 0;
}
.contain .tabs .tab-nav button.selected {
    border-color: #58d5ba !important;
    color: #58d5ba !important;
}
footer { display: none !important; }
.asset-grid {
    display: grid;
    grid-template-columns: repeat(auto-fit, minmax(260px, 1fr));
    gap: 0.75rem;
    margin: 1rem 0;
}
.asset-card {
    background: #161b22;
    border: 1px solid #30363d;
    border-radius: 8px;
    padding: 0.9rem 1rem;
}
.asset-card h4 { color: #58d5ba; margin: 0 0 0.3rem 0; }
.asset-card p { color: #8b949e; margin: 0; font-size: 0.88rem; }
.asset-card a { color: #58a6ff; text-decoration: none; }
.quota-notice {
    background: #1c1c1c;
    border: 1px solid #3b3b00;
    border-radius: 8px;
    padding: 0.7rem 1rem;
    margin-top: 0.5rem;
    font-size: 0.85rem;
    color: #c9a400;
}
"""

# ── Example prompts ────────────────────────────────────────────
EXAMPLES = [
    ["Two coffee shops are opening on the same street and need to set prices. How should they think about this?"],
    ["I'm bidding on a house in a sealed-bid auction. How should I decide my bid?"],
    ["Three countries share a river and need to decide on pollution controls. What's the game theory perspective?"],
    ["My company is deciding whether to enter a market with one dominant player. Should we?"],
    ["Two political candidates are choosing their platform positions. How does game theory apply?"],
]

SOLVER_EXAMPLES = [
    ["Consider a 2-player normal form game with payoff matrix:\nPlayer 1 \\ Player 2:  L    R\nU                    (3,1) (0,2)\nD                    (1,3) (2,1)\nFind all Nash equilibria."],
    ["Three firms compete in Cournot competition. Market demand is P = 100 - Q, where Q = q1 + q2 + q3. Each firm has marginal cost c = 10. Find the Nash equilibrium quantities and profits."],
]

# ── About tab content ──────────────────────────────────────────
ABOUT_MD = """
# About GameTheory Chat

**GameTheory Chat** is the interactive demo for the **GameTheory-Bench** project — a 3-phase
pipeline that fine-tunes Qwen2.5-7B-Instruct into a game theory specialist.

---

## The Three-Phase Pipeline

| Phase | Model | Method | Result |
|-------|-------|--------|--------|
| **Phase 1 — Solver** | GameTheory-Solver | Supervised Fine-Tuning on 2,913 verified problems | 82% to **94% accuracy** |
| **Phase 2 — Reasoner** | GameTheory-Reasoner | GRPO reinforcement learning (750 steps) | **+6% reasoning quality** |
| **Phase 3 — Formulator** | GameTheory-Formulator-Model | SFT on 1,215 real-world formulation problems | **100% valid formulations** |

---

## HuggingFace Assets

<div class="asset-grid">
<div class="asset-card">
  <h4>GameTheory-Bench</h4>
  <p>2,913 computationally verified game theory problems across 8 categories</p>
  <a href="https://huggingface.co/datasets/Alogotron/GameTheory-Bench" target="_blank">View Dataset</a>
</div>
<div class="asset-card">
  <h4>GameTheory-Formulator</h4>
  <p>1,215 real-world to game theory formulation problems (6 domains, 33 subtypes)</p>
  <a href="https://huggingface.co/datasets/Alogotron/GameTheory-Formulator" target="_blank">View Dataset</a>
</div>
<div class="asset-card">
  <h4>GameTheory-Solver</h4>
  <p>Phase 1 SFT LoRA — accurate solver for formal game theory problems</p>
  <a href="https://huggingface.co/Alogotron/GameTheory-Solver" target="_blank">View Model</a>
</div>
<div class="asset-card">
  <h4>GameTheory-Reasoner</h4>
  <p>Phase 2 GRPO LoRA — enhanced reasoning via reinforcement learning</p>
  <a href="https://huggingface.co/Alogotron/GameTheory-Reasoner" target="_blank">View Model</a>
</div>
<div class="asset-card">
  <h4>GameTheory-Formulator-Model</h4>
  <p>Phase 3 SFT LoRA — translates real-world scenarios into formal models</p>
  <a href="https://huggingface.co/Alogotron/GameTheory-Formulator-Model" target="_blank">View Model</a>
</div>
<div class="asset-card">
  <h4>GameTheory-Solver-Demo</h4>
  <p>Interactive demo for the Phase 1 Solver model</p>
  <a href="https://huggingface.co/spaces/Alogotron/GameTheory-Solver-Demo" target="_blank">Open Space</a>
</div>
<div class="asset-card">
  <h4>Game Theory LLM Blog</h4>
  <p>Technical deep-dive into the full 3-phase training pipeline</p>
  <a href="https://huggingface.co/spaces/Alogotron/game-theory-llm-blog" target="_blank">Read Blog</a>
</div>
</div>

---

## Benchmark Results

| Category | Base Model | After Phase 1 (SFT) | After Phase 2 (GRPO) |
|----------|-----------|---------------------|----------------------|
| 2x2 Normal Form | 78% | 95% | 96% |
| NxM Normal Form | 65% | 89% | 92% |
| Zero-Sum Games | 80% | 96% | 97% |
| Bayesian Games | 52% | 85% | 90% |
| Extensive Form | 58% | 88% | 92% |
| Mechanism Design | 45% | 82% | 88% |
| Cooperative Games | 60% | 90% | 93% |
| Evolutionary Games | 55% | 87% | 91% |
| **Overall** | **62%** | **89%** | **92%** |

---

## Technical Details

- **Base model**: Qwen2.5-7B-Instruct
- **Training**: QLoRA (r=32, alpha=64, 4-bit NF4 quantization)
- **Hardware**: Dual RTX 3090 (training), ZeroGPU A10G (inference)
- **Inference**: bfloat16, streaming generation

---

<p style="text-align:center; color:#555; font-size:0.85rem;">
  Built by <a href="https://huggingface.co/Alogotron" style="color:#58d5ba;">Alogotron</a>
  | Powered by Qwen2.5 + PEFT + Gradio
</p>
"""

# ── Build UI ───────────────────────────────────────────────────
theme = gr.themes.Base(
    primary_hue=gr.themes.colors.teal,
    secondary_hue=gr.themes.colors.cyan,
    neutral_hue=gr.themes.colors.gray,
    font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"],
).set(
    body_background_fill="#0d1117",
    body_background_fill_dark="#0d1117",
    block_background_fill="#161b22",
    block_background_fill_dark="#161b22",
    block_border_color="#30363d",
    block_border_color_dark="#30363d",
    input_background_fill="#0d1117",
    input_background_fill_dark="#0d1117",
    input_border_color="#30363d",
    input_border_color_dark="#30363d",
    button_primary_background_fill="#238636",
    button_primary_background_fill_dark="#238636",
    button_primary_background_fill_hover="#2ea043",
    button_primary_background_fill_hover_dark="#2ea043",
    button_primary_text_color="#ffffff",
    button_primary_text_color_dark="#ffffff",
)

with gr.Blocks(theme=theme, css=CSS, title="GameTheory Chat") as demo:
    # Header
    gr.HTML(
        '<div class="header-banner">'
        '<h1>🎯 GameTheory Chat</h1>'
        '<p>AI-Powered Strategic Reasoning &middot; Powered by Qwen2.5-7B + LoRA Fine-Tuning</p>'
        '</div>'
    )

    with gr.Tabs():
        # ── Tab 1: Strategy Chat ──────────────────────────
        with gr.TabItem("Strategy Chat", id="chat"):
            gr.Markdown(
                "Describe any real-world strategic scenario and the "
                "**Formulator** model will frame it as a game theory "
                "problem, solve it, and interpret the results."
            )
            chat = gr.ChatInterface(
                fn=chat_respond,
                type="messages",
                examples=EXAMPLES,
                cache_examples=False,
                chatbot=gr.Chatbot(
                    height=520,
                    show_copy_button=True,
                    placeholder="Describe a strategic scenario...",
                ),
            )
            gr.HTML(
                '<div class="quota-notice">'
                '<strong>GPU Quota Notice:</strong> This Space runs on '
                'ZeroGPU. Free users get ~5 min/day of GPU time; '
                'Pro users get ~25 min/day. First message may take '
                '30-60s while the model loads.'
                '</div>'
            )

        # ── Tab 2: Quick Solve ────────────────────────────
        with gr.TabItem("Quick Solve", id="solve"):
            gr.Markdown(
                "Paste a **formal game theory problem** (payoff matrix, "
                "game description, etc.) and the **Solver** model will "
                "find equilibria and optimal strategies."
            )
            with gr.Row():
                with gr.Column(scale=1):
                    solve_input = gr.Textbox(
                        label="Problem Input",
                        placeholder="Paste a payoff matrix or formal game description...",
                        lines=10,
                    )
                    solve_btn = gr.Button("Solve", variant="primary", size="lg")
                    gr.Examples(
                        examples=SOLVER_EXAMPLES,
                        inputs=solve_input,
                        label="Example Problems",
                    )
                with gr.Column(scale=1):
                    solve_output = gr.Textbox(
                        label="Solution",
                        lines=18,
                        show_copy_button=True,
                        interactive=False,
                    )
            solve_btn.click(fn=solve_respond, inputs=solve_input, outputs=solve_output)
            gr.HTML(
                '<div class="quota-notice">'
                '<strong>GPU Quota Notice:</strong> Solving uses '
                'GPU time from your ZeroGPU quota. Typical solve '
                'takes 15-45 seconds.'
                '</div>'
            )

        # ── Tab 3: About ──────────────────────────────────
        with gr.TabItem("About", id="about"):
            gr.Markdown(ABOUT_MD)


if __name__ == "__main__":
    demo.queue(max_size=10)
    demo.launch()