XnOwO's picture
Update app.py from anycoder
9bb4e39 verified
import gradio as gr
import numpy as np
from typing import List, Tuple, Dict, Any
import random
class SolitaireEnvironment:
def __init__(self):
self.reset()
def reset(self):
# Initialize a solitaire game state
self.deck = list(range(1, 14)) * 4 # 1-13 for each suit
random.shuffle(self.deck)
self.foundation = [[], [], [], []] # Four foundation piles
self.tableau = [[] for _ in range(7)] # Seven tableau piles
self.deal_cards()
def deal_cards(self):
# Deal cards to tableau (Solitaire rules)
for i in range(7):
self.tableau[i] = self.deck[:i+1]
self.deck = self.deck[i+1:]
def get_valid_moves(self):
# Simplified valid moves for demonstration
moves = []
# Check moves from tableau to foundation
for pile_idx, pile in enumerate(self.tableau):
if pile:
card = pile[-1]
moves.append(f"Move {card} to foundation")
return moves[:5] # Limit to 5 moves for simplicity
class SolitaireRLTrainer:
def __init__(self):
self.env = SolitaireEnvironment()
def get_game_state(self):
return {
"tableau": self.env.tableau,
"foundation": self.env.foundation,
"remaining_deck": len(self.env.deck)
def train_step(self, state_description: str, action: str, reward: float):
# In a real implementation, this would update the model weights
return f"Training step completed. Reward: {reward}"
class MistralSolitaireAgent:
def __init__(self):
self.trainer = SolitaireRLTrainer()
self.game_history = []
def take_action(self, action: str):
try:
# Simulate game action and calculate reward
reward = random.uniform(0, 1)
return reward
def train_mistral_solitaire(num_episodes: int, learning_rate: float):
"""Train Mistral model to play Solitaire using reinforcement learning"""
agent = MistralSolitaireAgent()
progress = []
for episode in range(num_episodes):
# Simulate training progress
current_reward = episode * 0.1
progress.append({
"episode": episode,
"reward": current_reward,
"progress": (episode + 1) / num_episodes * 100
return progress
def play_solitaire_game(state_description: str, action: str):
"""Execute a move in the Solitaire game"""
# Simulate game action
if "foundation" in action:
reward = 0.8
elif "tableau" in action:
reward = 0.5
else:
reward = 0.2
return {
"action_taken": action,
"reward": reward,
"new_state": f"Game state after {action}"
}
def format_game_state(state: Dict) -> str:
"""Format the current Solitaire game state for display"""
formatted = "## Current Solitaire Game State\n\n"
# Tableau piles
formatted += "### Tableau Piles\n"
for i in range(7):
pile = state.get("tableau", [[]] * 7))[i]
if pile:
formatted += f"Pile {i+1}: {pile[-3:]} \n"
else:
formatted += f"Pile {i+1}: Empty\n"
return formatted
def create_solitaire_ui():
"""Create the main Gradio interface for the Solitaire RL project"""
with gr.Blocks() as demo:
gr.Markdown("# 🎮 Mistral Solitaire RL Trainer")
gr.Markdown("Train Mistral to play Solitaire using Reinforcement Learning")
with gr.Tab("Training Interface"):
with gr.Row():
episodes = gr.Slider(
label="Number of Training Episodes",
minimum=10,
maximum=1000,
value=100,
step=10
)
with gr.Row():
learning_rate = gr.Slider(
label="Learning Rate",
minimum=0.001,
maximum=0.1,
value=0.01,
step=0.001
)
train_btn = gr.Button("Start Training", variant="primary")
training_output = gr.JSON(label="Training Progress")
train_btn.click(
fn=train_mistral_solitaire,
inputs=[episodes, learning_rate],
outputs=[training_output],
api_visibility="public"
)
with gr.Tab("Game Play"):
with gr.Row():
game_state_input = gr.Textbox(
label="Current Game State",
lines=3,
placeholder="Describe current game state..."
)
action_input = gr.Textbox(
label="Action to Take",
placeholder="e.g., Move A♠ to foundation, Draw from deck"
)
play_btn = gr.Button("Execute Move", variant="secondary")
game_result = gr.JSON(label="Game Result")
play_btn.click(
fn=play_solitaire_game,
inputs=[game_state_input, action_input],
outputs=[game_result],
api_visibility="public"
)
with gr.Tab("Analysis"):
with gr.Row():
move_history = gr.Textbox(
label="Move History",
lines=4
)
with gr.Accordion("Advanced Options", open=False):
exploration_rate = gr.Slider(
label="Exploration Rate",
minimum=0.01,
maximum=1.0,
value=0.1,
step=0.01
)
gr.Markdown("---\n*This demo simulates training a language model to play Solitaire*")
return demo
if __name__ == "__main__":
demo = create_solitaire_ui()
demo.launch(
theme=gr.themes.Soft(
primary_hue="blue",
secondary_hue="indigo",
neutral_hue="slate",
font=gr.themes.GoogleFont("Inter"),
text_size="lg",
spacing_size="lg",
radius_size="md"
),
footer_links=[{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"
)