XnOwO commited on
Commit
a9a0a3c
·
verified ·
1 Parent(s): ae12549

Update app.py from anycoder

Browse files
Files changed (1) hide show
  1. app.py +26 -41
app.py CHANGED
@@ -2,9 +2,6 @@ import gradio as gr
2
  import numpy as np
3
  from typing import List, Tuple, Dict, Any
4
  import random
5
- import torch
6
- from transformers import AutoTokenizer, AutoModelForCausalLM
7
- import json
8
 
9
  class SolitaireEnvironment:
10
  def __init__(self):
@@ -22,7 +19,7 @@ class SolitaireEnvironment:
22
  # Deal cards to tableau (Solitaire rules)
23
  for i in range(7):
24
  self.tableau[i] = self.deck[:i+1]
25
- self.deck = self.deck[i+1:]
26
 
27
  def get_valid_moves(self):
28
  # Simplified valid moves for demonstration
@@ -42,11 +39,7 @@ class SolitaireEnvironment:
42
  class SolitaireRLTrainer:
43
  def __init__(self):
44
  self.env = SolitaireEnvironment()
45
- self.model_name = "mistralai/Mistral-7B-v0.1" # Using a smaller model for demo
46
- self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
47
- if self.tokenizer.pad_token is None:
48
- self.tokenizer.pad_token = self.tokenizer.eos_token
49
-
50
  def get_game_state(self):
51
  return {
52
  "tableau": self.env.tableau,
@@ -71,9 +64,8 @@ class MistralSolitaireAgent:
71
  def take_action(self, action: str):
72
  try:
73
  # Simulate game action and calculate reward
74
- if "move" in action.lower():
75
  reward = random.uniform(0, 1)
76
- return reward
77
 
78
  def train_mistral_solitaire(num_episodes: int, learning_rate: float):
79
  """Train Mistral model to play Solitaire using reinforcement learning"""
@@ -91,9 +83,9 @@ def train_mistral_solitaire(num_episodes: int, learning_rate: float):
91
 
92
  def play_solitaire_game(state_description: str, action: str):
93
  """Execute a move in the Solitaire game"""
94
- # In a real implementation, this would modify the actual game state
95
  game_state = {
96
- "tableau": [[random.randint(1, 13) for _ in range(random.randint(1, 5)] for _ in range(7)]
97
 
98
  # Calculate reward based on action quality
99
  if "foundation" in action:
@@ -116,9 +108,12 @@ def format_game_state(state: Dict) -> str:
116
 
117
  # Tableau piles
118
  formatted += "### Tableau Piles\n"
119
- for i, pile in enumerate(state.get("tableau", [])):
120
- pile_str = " | ".join(str(card) for card in pile[-3:]]) if pile else "Empty"
121
- formatted += "\n"
 
 
 
122
 
123
  return formatted
124
 
@@ -126,13 +121,9 @@ def create_solitaire_ui():
126
  """Create the main Gradio interface for the Solitaire RL project"""
127
 
128
  with gr.Blocks() as demo:
129
- gr.Markdown("# 🎮 Mistral 3B Solitaire RL Trainer")
130
- gr.Markdown("Train Mistral 3B to play Solitaire using Reinforcement Learning")
131
 
132
- with gr.Row():
133
- with gr.Column(scale=1):
134
- gr.Markdown("### 🏗️ Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)")
135
-
136
  with gr.Tab("Training Interface"):
137
  with gr.Row():
138
  episodes = gr.Slider(
@@ -140,16 +131,15 @@ def create_solitaire_ui():
140
  minimum=10,
141
  maximum=1000,
142
  value=100,
143
- step=10,
144
- info="More episodes = better training but longer wait"
145
  )
146
  learning_rate = gr.Slider(
147
  label="Learning Rate",
148
  minimum=0.001,
149
  maximum=0.1,
150
  value=0.01,
151
- step=0.001,
152
- )
153
 
154
  train_btn = gr.Button("Start Training", variant="primary")
155
  training_output = gr.JSON(label="Training Progress")
@@ -165,16 +155,15 @@ def create_solitaire_ui():
165
  with gr.Row():
166
  game_state = gr.Textbox(
167
  label="Current Game State",
168
- value="A♠ 2♠ 3♠ | K♥ | Q♦ | J♣",
169
- lines=3
170
  )
171
 
172
  with gr.Row():
173
  action_input = gr.Textbox(
174
  label="Action to Take",
175
  placeholder="e.g., Move A♠ to foundation, Draw from deck"
176
- )
177
-
178
  play_btn = gr.Button("Execute Move", variant="secondary")
179
  game_result = gr.JSON(label="Game Result")
180
 
@@ -193,15 +182,14 @@ def create_solitaire_ui():
193
  )
194
 
195
  with gr.Accordion("Advanced Options", open=False):
196
- exploration_rate = gr.Slider(
197
- label="Exploration Rate",
198
- minimum=0.01,
199
  maximum=1.0,
200
  value=0.1,
201
- step=0.01,
202
- info="Higher exploration = more experimentation"
203
- )
204
-
205
  gr.Markdown("---\n*This demo simulates training a language model to play Solitaire*")
206
 
207
  return demo
@@ -217,9 +205,6 @@ if __name__ == "__main__":
217
  text_size="lg",
218
  spacing_size="lg",
219
  radius_size="md"
220
- ).set(
221
- button_primary_background_fill="*primary_600",
222
- button_primary_background_fill_hover="*primary_700"
223
  ),
224
- footer_links=[{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"]
225
  )
 
2
  import numpy as np
3
  from typing import List, Tuple, Dict, Any
4
  import random
 
 
 
5
 
6
  class SolitaireEnvironment:
7
  def __init__(self):
 
19
  # Deal cards to tableau (Solitaire rules)
20
  for i in range(7):
21
  self.tableau[i] = self.deck[:i+1]
22
+ self.deck = self.deck[i+1:]
23
 
24
  def get_valid_moves(self):
25
  # Simplified valid moves for demonstration
 
39
  class SolitaireRLTrainer:
40
  def __init__(self):
41
  self.env = SolitaireEnvironment()
42
+
 
 
 
 
43
  def get_game_state(self):
44
  return {
45
  "tableau": self.env.tableau,
 
64
  def take_action(self, action: str):
65
  try:
66
  # Simulate game action and calculate reward
 
67
  reward = random.uniform(0, 1)
68
+ return reward
69
 
70
  def train_mistral_solitaire(num_episodes: int, learning_rate: float):
71
  """Train Mistral model to play Solitaire using reinforcement learning"""
 
83
 
84
  def play_solitaire_game(state_description: str, action: str):
85
  """Execute a move in the Solitaire game"""
86
+ # Simulate game action
87
  game_state = {
88
+ "tableau": [[random.randint(1, 13) for _ in range(i+1)] for i in range(7)]
89
 
90
  # Calculate reward based on action quality
91
  if "foundation" in action:
 
108
 
109
  # Tableau piles
110
  formatted += "### Tableau Piles\n"
111
+ for i in range(7):
112
+ pile = state.get("tableau", [[]] * 7))[i]
113
+ if pile:
114
+ formatted += f"Pile {i+1}: {pile[-3:]} \n"
115
+ else:
116
+ formatted += f"Pile {i+1}: Empty\n"
117
 
118
  return formatted
119
 
 
121
  """Create the main Gradio interface for the Solitaire RL project"""
122
 
123
  with gr.Blocks() as demo:
124
+ gr.Markdown("# 🎮 Mistral Solitaire RL Trainer")
125
+ gr.Markdown("Train Mistral to play Solitaire using Reinforcement Learning")
126
 
 
 
 
 
127
  with gr.Tab("Training Interface"):
128
  with gr.Row():
129
  episodes = gr.Slider(
 
131
  minimum=10,
132
  maximum=1000,
133
  value=100,
134
+ step=10
 
135
  )
136
  learning_rate = gr.Slider(
137
  label="Learning Rate",
138
  minimum=0.001,
139
  maximum=0.1,
140
  value=0.01,
141
+ step=0.001
142
+ )
143
 
144
  train_btn = gr.Button("Start Training", variant="primary")
145
  training_output = gr.JSON(label="Training Progress")
 
155
  with gr.Row():
156
  game_state = gr.Textbox(
157
  label="Current Game State",
158
+ lines=3
 
159
  )
160
 
161
  with gr.Row():
162
  action_input = gr.Textbox(
163
  label="Action to Take",
164
  placeholder="e.g., Move A♠ to foundation, Draw from deck"
165
+ )
166
+
167
  play_btn = gr.Button("Execute Move", variant="secondary")
168
  game_result = gr.JSON(label="Game Result")
169
 
 
182
  )
183
 
184
  with gr.Accordion("Advanced Options", open=False):
185
+ exploration_rate = gr.Slider(
186
+ label="Exploration Rate",
187
+ minimum=0.01,
188
  maximum=1.0,
189
  value=0.1,
190
+ step=0.01
191
+ )
192
+
 
193
  gr.Markdown("---\n*This demo simulates training a language model to play Solitaire*")
194
 
195
  return demo
 
205
  text_size="lg",
206
  spacing_size="lg",
207
  radius_size="md"
 
 
 
208
  ),
209
+ footer_links=[{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"
210
  )