Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import numpy as np | |
| from typing import List, Tuple, Dict, Any | |
| import random | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import json | |
| class SolitaireEnvironment: | |
| def __init__(self): | |
| self.reset() | |
| def reset(self): | |
| # Initialize a solitaire game state | |
| self.deck = list(range(1, 14)) * 4 # 1-13 for each suit | |
| random.shuffle(self.deck) | |
| self.foundation = [[], [], [], []] # Four foundation piles | |
| self.tableau = [[] for _ in range(7)] # Seven tableau piles | |
| self.deal_cards() | |
| def deal_cards(self): | |
| # Deal cards to tableau (Solitaire rules) | |
| for i in range(7): | |
| self.tableau[i] = self.deck[:i+1] | |
| self.deck = self.deck[i+1:] | |
| def get_valid_moves(self): | |
| # Simplified valid moves for demonstration | |
| moves = [] | |
| # Check moves from tableau to foundation | |
| for pile_idx, pile in enumerate(self.tableau): | |
| if pile: | |
| card = pile[-1] | |
| moves.append(f"Move {card} to foundation") | |
| # Check moves within tableau | |
| for src_idx, src_pile in enumerate(self.tableau): | |
| if src_pile: | |
| card = src_pile[-1] | |
| # Can we move to another tableau pile? | |
| return moves[:5] # Limit to 5 moves for simplicity | |
| class SolitaireRLTrainer: | |
| def __init__(self): | |
| self.env = SolitaireEnvironment() | |
| self.model_name = "mistralai/Mistral-7B-v0.1" # Using a smaller model for demo | |
| self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) | |
| if self.tokenizer.pad_token is None: | |
| self.tokenizer.pad_token = self.tokenizer.eos_token | |
| def get_game_state(self): | |
| return { | |
| "tableau": self.env.tableau, | |
| "foundation": self.env.foundation, | |
| "remaining_deck": len(self.env.deck) | |
| def train_step(self, state_description: str, action: str, reward: float): | |
| # In a real implementation, this would update the model weights | |
| return f"Training step completed. Reward: {reward}" | |
| def get_reward(self, action: str): | |
| # Simple reward function for demonstration | |
| if "foundation" in action: | |
| return 1.0 | |
| return 0.0 | |
| class MistralSolitaireAgent: | |
| def __init__(self): | |
| self.trainer = SolitaireRLTrainer() | |
| self.game_history = [] | |
| def take_action(self, action: str): | |
| try: | |
| # Simulate game action and calculate reward | |
| if "move" in action.lower(): | |
| reward = random.uniform(0, 1) | |
| return reward | |
| def train_mistral_solitaire(num_episodes: int, learning_rate: float): | |
| """Train Mistral model to play Solitaire using reinforcement learning""" | |
| agent = MistralSolitaireAgent() | |
| progress = [] | |
| for episode in range(num_episodes): | |
| # Simulate training progress | |
| current_reward = episode * 0.1 | |
| progress.append({ | |
| "episode": episode, | |
| "reward": current_reward, | |
| "progress": (episode + 1) / num_episodes * 100 | |
| return progress | |
| def play_solitaire_game(state_description: str, action: str): | |
| """Execute a move in the Solitaire game""" | |
| # In a real implementation, this would modify the actual game state | |
| game_state = { | |
| "tableau": [[random.randint(1, 13) for _ in range(random.randint(1, 5)] for _ in range(7)] | |
| # Calculate reward based on action quality | |
| if "foundation" in action: | |
| reward = 0.8 | |
| elif "tableau" in action: | |
| reward = 0.5 | |
| else: | |
| reward = 0.2 | |
| return { | |
| "action_taken": action, | |
| "reward": reward, | |
| "new_state": f"Game state after {action}", | |
| "is_valid": True | |
| } | |
| def format_game_state(state: Dict) -> str: | |
| """Format the current Solitaire game state for display""" | |
| formatted = "## Current Solitaire Game State\n\n" | |
| # Tableau piles | |
| formatted += "### Tableau Piles\n" | |
| for i, pile in enumerate(state.get("tableau", [])): | |
| pile_str = " | ".join(str(card) for card in pile[-3:]]) if pile else "Empty" | |
| formatted += "\n" | |
| return formatted | |
| def create_solitaire_ui(): | |
| """Create the main Gradio interface for the Solitaire RL project""" | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# 🎮 Mistral 3B Solitaire RL Trainer") | |
| gr.Markdown("Train Mistral 3B to play Solitaire using Reinforcement Learning") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 🏗️ Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)") | |
| with gr.Tab("Training Interface"): | |
| with gr.Row(): | |
| episodes = gr.Slider( | |
| label="Number of Training Episodes", | |
| minimum=10, | |
| maximum=1000, | |
| value=100, | |
| step=10, | |
| info="More episodes = better training but longer wait" | |
| ) | |
| learning_rate = gr.Slider( | |
| label="Learning Rate", | |
| minimum=0.001, | |
| maximum=0.1, | |
| value=0.01, | |
| step=0.001, | |
| ) | |
| train_btn = gr.Button("Start Training", variant="primary") | |
| training_output = gr.JSON(label="Training Progress") | |
| train_btn.click( | |
| fn=train_mistral_solitaire, | |
| inputs=[episodes, learning_rate], | |
| outputs=[training_output], | |
| api_visibility="public" | |
| ) | |
| with gr.Tab("Game Play"): | |
| with gr.Row(): | |
| game_state = gr.Textbox( | |
| label="Current Game State", | |
| value="A♠ 2♠ 3♠ | K♥ | Q♦ | J♣", | |
| lines=3 | |
| ) | |
| with gr.Row(): | |
| action_input = gr.Textbox( | |
| label="Action to Take", | |
| placeholder="e.g., Move A♠ to foundation, Draw from deck" | |
| ) | |
| play_btn = gr.Button("Execute Move", variant="secondary") | |
| game_result = gr.JSON(label="Game Result") | |
| play_btn.click( | |
| fn=play_solitaire_game, | |
| inputs=[game_state, action_input], | |
| outputs=[game_result], | |
| api_visibility="public" | |
| ) | |
| with gr.Tab("Analysis"): | |
| with gr.Row(): | |
| move_history = gr.Textbox( | |
| label="Move History", | |
| lines=4 | |
| ) | |
| with gr.Accordion("Advanced Options", open=False): | |
| exploration_rate = gr.Slider( | |
| label="Exploration Rate", | |
| minimum=0.01, | |
| maximum=1.0, | |
| value=0.1, | |
| step=0.01, | |
| info="Higher exploration = more experimentation" | |
| ) | |
| gr.Markdown("---\n*This demo simulates training a language model to play Solitaire*") | |
| return demo | |
| if __name__ == "__main__": | |
| demo = create_solitaire_ui() | |
| demo.launch( | |
| theme=gr.themes.Soft( | |
| primary_hue="blue", | |
| secondary_hue="indigo", | |
| neutral_hue="slate", | |
| font=gr.themes.GoogleFont("Inter"), | |
| text_size="lg", | |
| spacing_size="lg", | |
| radius_size="md" | |
| ).set( | |
| button_primary_background_fill="*primary_600", | |
| button_primary_background_fill_hover="*primary_700" | |
| ), | |
| footer_links=[{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"] | |
| ) |