Spaces:
Sleeping
Sleeping
| import ray | |
| from ray import tune | |
| from ray.rllib.algorithms.ppo import PPOConfig | |
| # from envs.nautilus_env import NautilusExecutionEnv # Not used in this script | |
| import os | |
| def train(): | |
| # 1. Init Ray | |
| ray.init(ignore_reinit_error=True) | |
| # 2. Register Environment (Using Standard Gym for Health Check) | |
| env_name = "CartPole-v1" | |
| # 3. Configure Algorithm | |
| config = ( | |
| PPOConfig() | |
| .environment(env_name) | |
| .framework("torch") | |
| .rollouts(num_rollout_workers=0) # 0 for local test, CPU count for Prod | |
| .training(model={"fcnet_hiddens": [64, 64]}) | |
| .resources(num_gpus=0) # Set to 1 if using GPU Space | |
| ) | |
| # 4. Run Training | |
| print("Starting Training...") | |
| algo = config.build() | |
| for i in range(10): # 10 Iterations for test | |
| result = algo.train() | |
| print(f"Iter: {i}, Reward: {result['episode_reward_mean']}") | |
| # Save Checkpoint | |
| if i % 5 == 0: | |
| checkpoint_dir = algo.save(f"./checkpoints/iter_{i}") | |
| print(f"Checkpoint saved at {checkpoint_dir}") | |
| # 5. Export to ONNX (Crucial for Nautilus) | |
| print("Exporting to ONNX...") | |
| # onnx_path = algorithm.export_model_model(export_dir="./models") | |
| # (Simplified, implementation detail varies by RLLib version) | |
| ray.shutdown() | |
| if __name__ == "__main__": | |
| train() | |