mrhaseenullah commited on
Commit
d436fd9
·
verified ·
1 Parent(s): 5c34f36

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +139 -3
app.py CHANGED
@@ -2,7 +2,145 @@ import random
2
  import numpy as np
3
  import gradio as gr
4
 
5
- [Previous TicTacToe, MinimaxPlayer, and QLearningPlayer classes remain exactly the same]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  # Global game instance
8
  game = TicTacToe()
@@ -27,7 +165,6 @@ def make_move(evt: gr.SelectData, game_mode, difficulty):
27
  """Handle player moves and AI responses"""
28
  row, col = evt.index
29
  position = row * 3 + col
30
- status = ""
31
 
32
  # Human move
33
  if game.board[position] == ' ':
@@ -86,7 +223,6 @@ def create_gui():
86
  value="medium"
87
  )
88
 
89
- # Fixed DataFrame initialization
90
  board = gr.DataFrame(
91
  render_board(),
92
  headers=[""]*3, # Empty headers for 3 columns
 
2
  import numpy as np
3
  import gradio as gr
4
 
5
+ class TicTacToe:
6
+ def __init__(self):
7
+ self.board = [' '] * 9
8
+ self.current_player = 'X'
9
+
10
+ def display_board(self):
11
+ print("\n")
12
+ for i in range(3):
13
+ print(" | ".join(self.board[i * 3:(i + 1) * 3]))
14
+ if i < 2:
15
+ print("---------")
16
+ print("\n")
17
+
18
+ def make_move(self, position):
19
+ if self.board[position] == ' ':
20
+ self.board[position] = self.current_player
21
+ return True
22
+ return False
23
+
24
+ def switch_player(self):
25
+ self.current_player = 'O' if self.current_player == 'X' else 'X'
26
+
27
+ def check_winner(self):
28
+ winning_combinations = [
29
+ [0, 1, 2], [3, 4, 5], [6, 7, 8], # Rows
30
+ [0, 3, 6], [1, 4, 7], [2, 5, 8], # Columns
31
+ [0, 4, 8], [2, 4, 6] # Diagonals
32
+ ]
33
+ for combo in winning_combinations:
34
+ if self.board[combo[0]] == self.board[combo[1]] == self.board[combo[2]] != ' ':
35
+ return self.board[combo[0]]
36
+ return None
37
+
38
+ def is_draw(self):
39
+ return ' ' not in self.board
40
+
41
+ def reset_board(self):
42
+ self.board = [' '] * 9
43
+ self.current_player = 'X'
44
+
45
+ class MinimaxPlayer:
46
+ def __init__(self, symbol):
47
+ self.symbol = symbol
48
+
49
+ def minimax(self, game, is_maximizing):
50
+ winner = game.check_winner()
51
+ if winner == self.symbol:
52
+ return 1
53
+ elif winner == ('O' if self.symbol == 'X' else 'X'):
54
+ return -1
55
+ elif game.is_draw():
56
+ return 0
57
+
58
+ if is_maximizing:
59
+ best_score = -float('inf')
60
+ for i in range(9):
61
+ if game.board[i] == ' ':
62
+ game.board[i] = self.symbol
63
+ score = self.minimax(game, False)
64
+ game.board[i] = ' '
65
+ best_score = max(score, best_score)
66
+ return best_score
67
+ else:
68
+ best_score = float('inf')
69
+ for i in range(9):
70
+ if game.board[i] == ' ':
71
+ game.board[i] = ('O' if self.symbol == 'X' else 'X')
72
+ score = self.minimax(game, True)
73
+ game.board[i] = ' '
74
+ best_score = min(score, best_score)
75
+ return best_score
76
+
77
+ def get_move(self, game):
78
+ best_score = -float('inf')
79
+ best_move = None
80
+ for i in range(9):
81
+ if game.board[i] == ' ':
82
+ game.board[i] = self.symbol
83
+ score = self.minimax(game, False)
84
+ game.board[i] = ' '
85
+ if score > best_score:
86
+ best_score = score
87
+ best_move = i
88
+ return best_move
89
+
90
+ class QLearningPlayer:
91
+ def __init__(self, symbol, learning_rate=0.1, discount_factor=0.9, exploration_rate=1.0):
92
+ self.symbol = symbol
93
+ self.q_table = {}
94
+ self.learning_rate = learning_rate
95
+ self.discount_factor = discount_factor
96
+ self.exploration_rate = exploration_rate
97
+
98
+ def get_state(self, game):
99
+ return ''.join(game.board)
100
+
101
+ def choose_action(self, game):
102
+ state = self.get_state(game)
103
+ if random.random() < self.exploration_rate:
104
+ return random.choice([i for i in range(9) if game.board[i] == ' '])
105
+ if state not in self.q_table:
106
+ self.q_table[state] = np.zeros(9)
107
+ return np.argmax(self.q_table[state])
108
+
109
+ def update_q_table(self, state, action, reward, next_state):
110
+ if state not in self.q_table:
111
+ self.q_table[state] = np.zeros(9)
112
+ if next_state not in self.q_table:
113
+ self.q_table[next_state] = np.zeros(9)
114
+ self.q_table[state][action] += self.learning_rate * (
115
+ reward + self.discount_factor * np.max(self.q_table[next_state]) - self.q_table[state][action]
116
+ )
117
+
118
+ def train(self, episodes):
119
+ for _ in range(episodes):
120
+ game = TicTacToe()
121
+ state = self.get_state(game)
122
+ while True:
123
+ action = self.choose_action(game)
124
+ game.make_move(action)
125
+ next_state = self.get_state(game)
126
+ winner = game.check_winner()
127
+ if winner == self.symbol:
128
+ reward = 1
129
+ self.update_q_table(state, action, reward, next_state)
130
+ break
131
+ elif winner:
132
+ reward = -1
133
+ self.update_q_table(state, action, reward, next_state)
134
+ break
135
+ elif game.is_draw():
136
+ reward = 0.5
137
+ self.update_q_table(state, action, reward, next_state)
138
+ break
139
+ else:
140
+ reward = 0
141
+ self.update_q_table(state, action, reward, next_state)
142
+ game.switch_player()
143
+ state = next_state
144
 
145
  # Global game instance
146
  game = TicTacToe()
 
165
  """Handle player moves and AI responses"""
166
  row, col = evt.index
167
  position = row * 3 + col
 
168
 
169
  # Human move
170
  if game.board[position] == ' ':
 
223
  value="medium"
224
  )
225
 
 
226
  board = gr.DataFrame(
227
  render_board(),
228
  headers=[""]*3, # Empty headers for 3 columns