NautilusTrainer / train.py
Nautilus AI
Deploy: Trainer to Root (Retry)
c5c085b
import ray
from ray import tune
from ray.rllib.algorithms.ppo import PPOConfig
# from envs.nautilus_env import NautilusExecutionEnv # Not used in this script
import os
def train():
# 1. Init Ray
ray.init(ignore_reinit_error=True)
# 2. Register Environment (Using Standard Gym for Health Check)
env_name = "CartPole-v1"
# 3. Configure Algorithm
config = (
PPOConfig()
.environment(env_name)
.framework("torch")
.rollouts(num_rollout_workers=0) # 0 for local test, CPU count for Prod
.training(model={"fcnet_hiddens": [64, 64]})
.resources(num_gpus=0) # Set to 1 if using GPU Space
)
# 4. Run Training
print("Starting Training...")
algo = config.build()
for i in range(10): # 10 Iterations for test
result = algo.train()
print(f"Iter: {i}, Reward: {result['episode_reward_mean']}")
# Save Checkpoint
if i % 5 == 0:
checkpoint_dir = algo.save(f"./checkpoints/iter_{i}")
print(f"Checkpoint saved at {checkpoint_dir}")
# 5. Export to ONNX (Crucial for Nautilus)
print("Exporting to ONNX...")
# onnx_path = algorithm.export_model_model(export_dir="./models")
# (Simplified, implementation detail varies by RLLib version)
ray.shutdown()
if __name__ == "__main__":
train()