ChessAI-Community
/

NeoChess-Community

Reinforcement Learning

ChessAI-Community

Model card Files Files and versions

sigmoidneuron123 commited on Aug 10, 2025

Commit

83c383f

·

verified ·

1 Parent(s): fc99911

Update neochessppo.py

Files changed (1) hide show

neochessppo.py +9 -7

neochessppo.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import torchrl
 import torch
 import chess
@@ -145,9 +147,9 @@ with set_gym_backend("gymnasium"):
 policy = Policy().to(device)
 value = Value().to(device)
-valweight = torch.load("NeoChess/chessy_model.pth",map_location=device)
 value.load_state_dict(valweight)
-polweight = torch.load("NeoChess/chessy_policy.pth")
 policy.load_state_dict(polweight)
 def sample_masked_action(logits, mask):
@@ -253,9 +255,10 @@ print(actor(obs))
 rollout = env.rollout(3)
 from torchrl.record.loggers import generate_exp_name, get_logger
-def train_ppo_chess(chess_env, num_iterations=1, frames_per_batch=100,
-                   num_epochs=10, lr=3e-4, gamma=0.99, lmbda=0.95,
                    clip_epsilon=0.2, device="cpu"):
     """
     Main PPO training loop for Chess
@@ -275,7 +278,6 @@ def train_ppo_chess(chess_env, num_iterations=1, frames_per_batch=100,
     env = chess_env
     # Create actor and value modules
     actor_module = actor
-    global actor_module, value_module, loss_module
     collector = SyncDataCollector(
         env,
@@ -414,5 +416,5 @@ def train_ppo_chess(chess_env, num_iterations=1, frames_per_batch=100,
     print("\nTraining completed!")
 train_ppo_chess(env)
-torch.save(value.state_dict(),"chessy_model.pth")
-torch.save(policy.state_dict(),"chessy_policy.pth")

+import os
+os.system("mv NeoChess/san_moves.txt /usr/local/python/3.12.1/lib/python3.12/site-packages/torchrl/envs/custom/")
 import torchrl
 import torch
 import chess
 policy = Policy().to(device)
 value = Value().to(device)
+valweight = torch.load("NeoChess-Community/chessy_modelt-1.pth",map_location=device,weights_only=False)
 value.load_state_dict(valweight)
+polweight = torch.load("NeoChess-Community/chessy_policy.pth",map_location=device,weights_only=False)
 policy.load_state_dict(polweight)
 def sample_masked_action(logits, mask):
 rollout = env.rollout(3)
 from torchrl.record.loggers import generate_exp_name, get_logger
+def train_ppo_chess(chess_env, num_iterations=1, frames_per_batch=1000,
+                   num_epochs=100, lr=3e-4, gamma=0.99, lmbda=0.95,
                    clip_epsilon=0.2, device="cpu"):
+    global actor_module, value_module, loss_module
     """
     Main PPO training loop for Chess
     env = chess_env
     # Create actor and value modules
     actor_module = actor
     collector = SyncDataCollector(
         env,
     print("\nTraining completed!")
 train_ppo_chess(env)
+torch.save(value.state_dict(),"NeoChess-Community/chessy_model.pth")
+torch.save(policy.state_dict(),"NeoChess-Community/chessy_policy.pth")