Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import numpy as np | |
| from typing import List, Tuple, Dict, Any | |
| import random | |
| class SolitaireEnvironment: | |
| def __init__(self): | |
| self.reset() | |
| def reset(self): | |
| # Initialize a solitaire game state | |
| self.deck = list(range(1, 14)) * 4 # 1-13 for each suit | |
| random.shuffle(self.deck) | |
| self.foundation = [[], [], [], []] # Four foundation piles | |
| self.tableau = [[] for _ in range(7)] # Seven tableau piles | |
| self.deal_cards() | |
| def deal_cards(self): | |
| # Deal cards to tableau (Solitaire rules) | |
| for i in range(7): | |
| self.tableau[i] = self.deck[:i+1] | |
| self.deck = self.deck[i+1:] | |
| def get_valid_moves(self): | |
| # Simplified valid moves for demonstration | |
| moves = [] | |
| # Check moves from tableau to foundation | |
| for pile_idx, pile in enumerate(self.tableau): | |
| if pile: | |
| card = pile[-1] | |
| moves.append(f"Move {card} to foundation") | |
| return moves[:5] # Limit to 5 moves for simplicity | |
| class SolitaireRLTrainer: | |
| def __init__(self): | |
| self.env = SolitaireEnvironment() | |
| def get_game_state(self): | |
| return { | |
| "tableau": self.env.tableau, | |
| "foundation": self.env.foundation, | |
| "remaining_deck": len(self.env.deck) | |
| def train_step(self, state_description: str, action: str, reward: float): | |
| # In a real implementation, this would update the model weights | |
| return f"Training step completed. Reward: {reward}" | |
| class MistralSolitaireAgent: | |
| def __init__(self): | |
| self.trainer = SolitaireRLTrainer() | |
| self.game_history = [] | |
| def take_action(self, action: str): | |
| try: | |
| # Simulate game action and calculate reward | |
| reward = random.uniform(0, 1) | |
| return reward | |
| def train_mistral_solitaire(num_episodes: int, learning_rate: float): | |
| """Train Mistral model to play Solitaire using reinforcement learning""" | |
| agent = MistralSolitaireAgent() | |
| progress = [] | |
| for episode in range(num_episodes): | |
| # Simulate training progress | |
| current_reward = episode * 0.1 | |
| progress.append({ | |
| "episode": episode, | |
| "reward": current_reward, | |
| "progress": (episode + 1) / num_episodes * 100 | |
| return progress | |
| def play_solitaire_game(state_description: str, action: str): | |
| """Execute a move in the Solitaire game""" | |
| # Simulate game action | |
| if "foundation" in action: | |
| reward = 0.8 | |
| elif "tableau" in action: | |
| reward = 0.5 | |
| else: | |
| reward = 0.2 | |
| return { | |
| "action_taken": action, | |
| "reward": reward, | |
| "new_state": f"Game state after {action}" | |
| } | |
| def format_game_state(state: Dict) -> str: | |
| """Format the current Solitaire game state for display""" | |
| formatted = "## Current Solitaire Game State\n\n" | |
| # Tableau piles | |
| formatted += "### Tableau Piles\n" | |
| for i in range(7): | |
| pile = state.get("tableau", [[]] * 7))[i] | |
| if pile: | |
| formatted += f"Pile {i+1}: {pile[-3:]} \n" | |
| else: | |
| formatted += f"Pile {i+1}: Empty\n" | |
| return formatted | |
| def create_solitaire_ui(): | |
| """Create the main Gradio interface for the Solitaire RL project""" | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# 🎮 Mistral Solitaire RL Trainer") | |
| gr.Markdown("Train Mistral to play Solitaire using Reinforcement Learning") | |
| with gr.Tab("Training Interface"): | |
| with gr.Row(): | |
| episodes = gr.Slider( | |
| label="Number of Training Episodes", | |
| minimum=10, | |
| maximum=1000, | |
| value=100, | |
| step=10 | |
| ) | |
| with gr.Row(): | |
| learning_rate = gr.Slider( | |
| label="Learning Rate", | |
| minimum=0.001, | |
| maximum=0.1, | |
| value=0.01, | |
| step=0.001 | |
| ) | |
| train_btn = gr.Button("Start Training", variant="primary") | |
| training_output = gr.JSON(label="Training Progress") | |
| train_btn.click( | |
| fn=train_mistral_solitaire, | |
| inputs=[episodes, learning_rate], | |
| outputs=[training_output], | |
| api_visibility="public" | |
| ) | |
| with gr.Tab("Game Play"): | |
| with gr.Row(): | |
| game_state_input = gr.Textbox( | |
| label="Current Game State", | |
| lines=3, | |
| placeholder="Describe current game state..." | |
| ) | |
| action_input = gr.Textbox( | |
| label="Action to Take", | |
| placeholder="e.g., Move A♠ to foundation, Draw from deck" | |
| ) | |
| play_btn = gr.Button("Execute Move", variant="secondary") | |
| game_result = gr.JSON(label="Game Result") | |
| play_btn.click( | |
| fn=play_solitaire_game, | |
| inputs=[game_state_input, action_input], | |
| outputs=[game_result], | |
| api_visibility="public" | |
| ) | |
| with gr.Tab("Analysis"): | |
| with gr.Row(): | |
| move_history = gr.Textbox( | |
| label="Move History", | |
| lines=4 | |
| ) | |
| with gr.Accordion("Advanced Options", open=False): | |
| exploration_rate = gr.Slider( | |
| label="Exploration Rate", | |
| minimum=0.01, | |
| maximum=1.0, | |
| value=0.1, | |
| step=0.01 | |
| ) | |
| gr.Markdown("---\n*This demo simulates training a language model to play Solitaire*") | |
| return demo | |
| if __name__ == "__main__": | |
| demo = create_solitaire_ui() | |
| demo.launch( | |
| theme=gr.themes.Soft( | |
| primary_hue="blue", | |
| secondary_hue="indigo", | |
| neutral_hue="slate", | |
| font=gr.themes.GoogleFont("Inter"), | |
| text_size="lg", | |
| spacing_size="lg", | |
| radius_size="md" | |
| ), | |
| footer_links=[{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder" | |
| ) |