import gradio as gr
import numpy as np
from typing import List, Tuple, Dict, Any
import random
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import json

class SolitaireEnvironment:
    def __init__(self):
        self.reset()
    
    def reset(self):
        # Initialize a solitaire game state
        self.deck = list(range(1, 14)) * 4  # 1-13 for each suit
        random.shuffle(self.deck)
        self.foundation = [[], [], [], []]  # Four foundation piles
        self.tableau = [[] for _ in range(7)]  # Seven tableau piles
        self.deal_cards()
        
    def deal_cards(self):
        # Deal cards to tableau (Solitaire rules)
        for i in range(7):
            self.tableau[i] = self.deck[:i+1]
            self.deck = self.deck[i+1:]
        
    def get_valid_moves(self):
        # Simplified valid moves for demonstration
        moves = []
        # Check moves from tableau to foundation
        for pile_idx, pile in enumerate(self.tableau):
            if pile:
                card = pile[-1]
                moves.append(f"Move {card} to foundation")
        # Check moves within tableau
        for src_idx, src_pile in enumerate(self.tableau):
            if src_pile:
                card = src_pile[-1]
                # Can we move to another tableau pile?
        return moves[:5]  # Limit to 5 moves for simplicity

class SolitaireRLTrainer:
    def __init__(self):
        self.env = SolitaireEnvironment()
        self.model_name = "mistralai/Mistral-7B-v0.1"  # Using a smaller model for demo
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
            
    def get_game_state(self):
        return {
            "tableau": self.env.tableau,
            "foundation": self.env.foundation,
            "remaining_deck": len(self.env.deck)
        
    def train_step(self, state_description: str, action: str, reward: float):
        # In a real implementation, this would update the model weights
        return f"Training step completed. Reward: {reward}"
        
    def get_reward(self, action: str):
        # Simple reward function for demonstration
        if "foundation" in action:
            return 1.0
        return 0.0

class MistralSolitaireAgent:
    def __init__(self):
        self.trainer = SolitaireRLTrainer()
        self.game_history = []
        
    def take_action(self, action: str):
        try:
            # Simulate game action and calculate reward
        if "move" in action.lower():
            reward = random.uniform(0, 1)
        return reward

def train_mistral_solitaire(num_episodes: int, learning_rate: float):
    """Train Mistral model to play Solitaire using reinforcement learning"""
    agent = MistralSolitaireAgent()
    progress = []
    
    for episode in range(num_episodes):
        # Simulate training progress
        current_reward = episode * 0.1
        progress.append({
            "episode": episode,
            "reward": current_reward,
            "progress": (episode + 1) / num_episodes * 100
    return progress

def play_solitaire_game(state_description: str, action: str):
    """Execute a move in the Solitaire game"""
    # In a real implementation, this would modify the actual game state
    game_state = {
        "tableau": [[random.randint(1, 13) for _ in range(random.randint(1, 5)] for _ in range(7)]
    
    # Calculate reward based on action quality
    if "foundation" in action:
        reward = 0.8
    elif "tableau" in action:
        reward = 0.5
    else:
        reward = 0.2
        
    return {
        "action_taken": action,
        "reward": reward,
        "new_state": f"Game state after {action}",
        "is_valid": True
    }

def format_game_state(state: Dict) -> str:
    """Format the current Solitaire game state for display"""
    formatted = "## Current Solitaire Game State\n\n"
    
    # Tableau piles
    formatted += "### Tableau Piles\n"
    for i, pile in enumerate(state.get("tableau", [])):
        pile_str = " | ".join(str(card) for card in pile[-3:]]) if pile else "Empty"
        formatted += "\n"
        
    return formatted

def create_solitaire_ui():
    """Create the main Gradio interface for the Solitaire RL project"""
    
    with gr.Blocks() as demo:
        gr.Markdown("# 🎮 Mistral 3B Solitaire RL Trainer")
        gr.Markdown("Train Mistral 3B to play Solitaire using Reinforcement Learning")
        
        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("### 🏗️ Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)")
                
        with gr.Tab("Training Interface"):
            with gr.Row():
                episodes = gr.Slider(
                    label="Number of Training Episodes",
                    minimum=10,
                    maximum=1000,
                    value=100,
                    step=10,
                    info="More episodes = better training but longer wait"
                )
                learning_rate = gr.Slider(
                    label="Learning Rate",
                    minimum=0.001,
                    maximum=0.1,
                    value=0.01,
                    step=0.001,
                )
                
            train_btn = gr.Button("Start Training", variant="primary")
            training_output = gr.JSON(label="Training Progress")
            
            train_btn.click(
                fn=train_mistral_solitaire,
                inputs=[episodes, learning_rate],
                outputs=[training_output],
                api_visibility="public"
            )
            
        with gr.Tab("Game Play"):
            with gr.Row():
                game_state = gr.Textbox(
                    label="Current Game State",
                    value="A♠ 2♠ 3♠ | K♥ | Q♦ | J♣",
                lines=3
            )
            
            with gr.Row():
                action_input = gr.Textbox(
                    label="Action to Take",
                    placeholder="e.g., Move A♠ to foundation, Draw from deck"
                )
                
            play_btn = gr.Button("Execute Move", variant="secondary")
            game_result = gr.JSON(label="Game Result")
            
            play_btn.click(
                fn=play_solitaire_game,
                inputs=[game_state, action_input],
                outputs=[game_result],
                api_visibility="public"
            )
            
        with gr.Tab("Analysis"):
            with gr.Row():
                move_history = gr.Textbox(
                    label="Move History",
                    lines=4
            )
            
        with gr.Accordion("Advanced Options", open=False):
            exploration_rate = gr.Slider(
                label="Exploration Rate",
                minimum=0.01,
                    maximum=1.0,
                    value=0.1,
                    step=0.01,
                    info="Higher exploration = more experimentation"
                )
                
        gr.Markdown("---\n*This demo simulates training a language model to play Solitaire*")
        
    return demo

if __name__ == "__main__":
    demo = create_solitaire_ui()
    demo.launch(
        theme=gr.themes.Soft(
            primary_hue="blue",
            secondary_hue="indigo", 
            neutral_hue="slate",
            font=gr.themes.GoogleFont("Inter"),
            text_size="lg",
            spacing_size="lg",
            radius_size="md"
        ).set(
            button_primary_background_fill="*primary_600",
            button_primary_background_fill_hover="*primary_700"
        ),
        footer_links=[{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"]
    )