ChessAI-Community
/

NeoChess-Community

@@ -1,19 +1,3 @@
-# -*- coding: utf-8 -*-
-"""NeoChessPPO.ipynb
-Automatically generated by Colab.
-Original file is located at
-    https://colab.research.google.com/drive/1wfdi_MmS5cEnvU_IIomlNObjzoGodqIC
-"""
-!git clone https://huggingface.co/sigmoidneuron123/NeoChess
-!pip install torchrl
-!pip install tensordict
-!pip install gymnasium
-!pip install chess
 import torchrl
 import torch
 import chess
@@ -49,8 +33,6 @@ from torchrl.envs.libs.gym import set_gym_backend, GymWrapper
 from torchrl.envs import GymEnv
 from tensordict import TensorDict
-!git clone https://huggingface.co/sigmoidneuron123/NeoChess
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 def board_to_tensor(board):
@@ -160,34 +142,13 @@ with set_gym_backend("gymnasium"):
         include_fen=True,
         include_san=False,
     )
-    obs = env.reset()
-!mv san_moves.txt /usr/local/lib/python3.11/dist-packages/torchrl/envs/custom/
-!pip show torchrl gymnasium
-obs = env.reset()
-for _ in range(10):
-    legal_moves = obs["action_mask"].nonzero(as_tuple=False).squeeze(-1)
-    action = legal_moves[0]  # example: pick first legal move
-    td_action = TensorDict({"action": action}, batch_size=obs.batch_size)
-    obs = env.step(td_action)  # obs is the nested TensorDict
-    # Use the next observation for the next step:
-    obs = obs.get("next")  # move to next state
-    board = chess.Board(obs["fen"])
-    print(board)
-obs = env.reset()
-obs
 policy = Policy().to(device)
 value = Value().to(device)
 valweight = torch.load("NeoChess/chessy_model.pth",map_location=device)
 value.load_state_dict(valweight)
 def sample_masked_action(logits, mask):
     masked_logits = logits.clone()
@@ -282,11 +243,10 @@ actor = ProbabilisticActor(
     distribution_class=masked_categorical_factory,
     return_log_prob=True,
 )
-actor_critic = ActorCriticWrapper(actor,value_module)
 obs = env.reset()
 print(obs)
-print(policy_module(obs)["logits"])
 print(value_module(obs))
 print(actor(obs))
@@ -315,6 +275,7 @@ def train_ppo_chess(chess_env, num_iterations=1, frames_per_batch=100,
     env = chess_env
     # Create actor and value modules
     actor_module = actor
     collector = SyncDataCollector(
         env,
@@ -451,6 +412,7 @@ def train_ppo_chess(chess_env, num_iterations=1, frames_per_batch=100,
         replay_buffer.empty()
     print("\nTraining completed!")
-    return actor_module, value_module, loss_module
-train_ppo_chess(env)

 import torchrl
 import torch
 import chess
 from torchrl.envs import GymEnv
 from tensordict import TensorDict
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 def board_to_tensor(board):
         include_fen=True,
         include_san=False,
     )
 policy = Policy().to(device)
 value = Value().to(device)
 valweight = torch.load("NeoChess/chessy_model.pth",map_location=device)
 value.load_state_dict(valweight)
+polweight = torch.load("NeoChess/chessy_policy.pth")
+policy.load_state_dict(polweight)
 def sample_masked_action(logits, mask):
     masked_logits = logits.clone()
     distribution_class=masked_categorical_factory,
     return_log_prob=True,
 )
+#test
 obs = env.reset()
 print(obs)
+print(policy_module(obs))
 print(value_module(obs))
 print(actor(obs))
     env = chess_env
     # Create actor and value modules
     actor_module = actor
+    global actor_module, value_module, loss_module
     collector = SyncDataCollector(
         env,
         replay_buffer.empty()
     print("\nTraining completed!")
+train_ppo_chess(env)
+torch.save(value.state_dict(),"chessy_model.pth")
+torch.save(policy.state_dict(),"chessy_policy.pth")