diff --git a/.gitattributes b/.gitattributes index a00bf6ffa23a23893c46e7f0ed3bce4b2af83dea..43964235f38ab112f6a905a80ef83f0a9b8794f9 100644 --- a/.gitattributes +++ b/.gitattributes @@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text factor_sweeps/flappy/observation_stride/train/factor_sweep:flappy:observation_stride:fixed_l2:fs4:obs30:stride1:seed10/episode_metrics.jsonl filter=lfs diff=lfs merge=lfs -text factor_sweeps/flappy/observation_stride/train/factor_sweep:flappy:observation_stride:fixed_l2:fs4:obs30:stride1:seed13/episode_metrics.jsonl filter=lfs diff=lfs merge=lfs -text +factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed12/episode_metrics.jsonl filter=lfs diff=lfs merge=lfs -text diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed12/checkpoint_p0/best_000048832_25034752_reward_2442.527.pth b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed12/checkpoint_p0/best_000048832_25034752_reward_2442.527.pth new file mode 100644 index 0000000000000000000000000000000000000000..177b1707a2fa0fe96827220699419a15ed38724c --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed12/checkpoint_p0/best_000048832_25034752_reward_2442.527.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3609abcb5c8d04e6975ae65a0a6e30118192f0678cadaeda73c7144a3ff48144 +size 21797945 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed12/checkpoint_p0/checkpoint_000024112_12353536.pth b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed12/checkpoint_p0/checkpoint_000024112_12353536.pth new file mode 100644 index 0000000000000000000000000000000000000000..2898c439fc06fefb8cbd1f4ec19e1b56a4d3382c --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed12/checkpoint_p0/checkpoint_000024112_12353536.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bd5f1b26cb9761c40c6aafafd5f3c45c03b5207778aeb51c3419494ef8d0c6b +size 21798305 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed12/checkpoint_p0/checkpoint_000048832_25034752.pth b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed12/checkpoint_p0/checkpoint_000048832_25034752.pth new file mode 100644 index 0000000000000000000000000000000000000000..29ed71a2a06bc12ec06e09a17c279fdbecf21326 --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed12/checkpoint_p0/checkpoint_000048832_25034752.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58038189b7e968c9837e1acdb30966fcdff05b5fe5189011bc729c7579f94f4a +size 21798305 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed12/config.json b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed12/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7cf3a3921612d7aed31d740788c548497b355eb3 --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed12/config.json @@ -0,0 +1,266 @@ +{ + "help": false, + "algo": "APPO", + "env": "latency_flappy", + "experiment": "flappy_frame_stack_fixed_l2_fs4_seed12", + "train_dir": "results/checkpoints_factor_sweeps/flappy/context_window", + "restart_behavior": "resume", + "device": "gpu", + "seed": 12, + "num_policies": 1, + "async_rl": true, + "serial_mode": false, + "batched_sampling": true, + "num_batches_to_accumulate": 2, + "worker_num_splits": 1, + "policy_workers_per_policy": 1, + "max_policy_lag": 400, + "num_workers": 2, + "num_envs_per_worker": 1, + "batch_size": 4096, + "num_batches_per_epoch": 8, + "num_epochs": 8, + "rollout": 128, + "recurrence": 1, + "shuffle_minibatches": false, + "gamma": 0.99, + "reward_scale": 1.0, + "reward_clip": 1000.0, + "value_bootstrap": false, + "normalize_returns": true, + "exploration_loss_coeff": 0.003, + "value_loss_coeff": 0.5, + "kl_loss_coeff": 0.0, + "exploration_loss": "entropy", + "gae_lambda": 0.95, + "ppo_clip_ratio": 0.1, + "ppo_clip_value": 0.2, + "with_vtrace": false, + "vtrace_rho": 1.0, + "vtrace_c": 1.0, + "optimizer": "adam", + "adam_eps": 1e-05, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "max_grad_norm": 0.5, + "learning_rate": 0.00025, + "lr_schedule": "linear_decay", + "lr_schedule_kl_threshold": 0.008, + "lr_adaptive_min": 1e-06, + "lr_adaptive_max": 0.01, + "obs_subtract_mean": 0.0, + "obs_scale": 255.0, + "normalize_input": true, + "normalize_input_keys": null, + "decorrelate_experience_max_seconds": 0, + "decorrelate_envs_on_one_worker": true, + "actor_worker_gpus": [ + 0 + ], + "set_workers_cpu_affinity": true, + "force_envs_single_thread": false, + "default_niceness": 0, + "log_to_file": true, + "experiment_summaries_interval": 1, + "flush_summaries_interval": 30, + "stats_avg": 100, + "summaries_use_frameskip": true, + "heartbeat_interval": 20, + "heartbeat_reporting_interval": 180, + "train_for_env_steps": 25000000, + "train_for_seconds": 10000000000, + "save_every_sec": 600, + "keep_checkpoints": 5, + "load_checkpoint_kind": "latest", + "save_milestones_sec": -1, + "save_best_every_sec": 5, + "save_best_metric": "reward", + "save_best_after": 100000, + "benchmark": false, + "encoder_mlp_layers": [ + 512, + 512 + ], + "encoder_conv_architecture": "convnet_atari", + "encoder_conv_mlp_layers": [ + 512 + ], + "use_rnn": false, + "rnn_size": 512, + "rnn_type": "gru", + "rnn_num_layers": 1, + "decoder_mlp_layers": [], + "nonlinearity": "elu", + "policy_initialization": "orthogonal", + "policy_init_gain": 1.0, + "actor_critic_share_weights": true, + "adaptive_stddev": true, + "continuous_tanh_scale": 0.0, + "initial_stddev": 1.0, + "use_env_info_cache": false, + "env_gpu_actions": true, + "env_gpu_observations": true, + "env_frameskip": 1, + "env_framestack": 1, + "pixel_format": "CHW", + "use_record_episode_statistics": false, + "with_wandb": true, + "wandb_user": null, + "wandb_project": "latency-sensitive-bench", + "wandb_group": "flappy-fs4-fixed_l2", + "wandb_job_type": "sample_factory", + "wandb_tags": [ + "factor_sweep", + "flappy", + "frame_stack", + "fixed", + "fixed_l2", + "fs4", + "seed12" + ], + "with_pbt": false, + "pbt_mix_policies_in_one_env": true, + "pbt_period_env_steps": 5000000, + "pbt_start_mutation": 20000000, + "pbt_replace_fraction": 0.3, + "pbt_mutation_rate": 0.15, + "pbt_replace_reward_gap": 0.1, + "pbt_replace_reward_gap_absolute": 1e-06, + "pbt_optimize_gamma": false, + "pbt_target_objective": "true_objective", + "pbt_perturb_min": 1.1, + "pbt_perturb_max": 1.5, + "gym_id": "FlappyBird-v0", + "env_fps": 30.0, + "obs_fps": 30.0, + "use_lidar": false, + "normalize_obs": true, + "audio_on": false, + "screen_size": "", + "obs_resize": "84,84", + "use_gpu_render": true, + "simulator": "gpu", + "gpu_render_device": "auto", + "gpu_render_batch_size": 128, + "gpu_render_profile": false, + "gpu_render_profile_interval": 200, + "pipe_gap": 100, + "bird_color": "yellow", + "pipe_color": "green", + "background": "day", + "score_limit": -1, + "frame_stack": 4, + "debug": false, + "debug_timelimit_diagnostics": false, + "max_episode_steps": 0, + "mode": "train", + "latency_type": "fixed", + "fixed_latency_ms": 66.66666666666667, + "mean_latency_ms": null, + "std_latency_ms": null, + "min_latency_ms": null, + "max_latency_ms": null, + "latency_seed": null, + "add_latency_info": false, + "max_pending_actions": null, + "hold_policy": "one_frame_then_noop", + "ordering_policy": "latest_ready", + "eval_episodes": 100, + "eval_parallel_envs": 100, + "eval_latency_raw_frame_values": "0,1,2,3,4,5", + "eval_max_steps": 3600, + "eval_deterministic": true, + "eval_raw_reward": false, + "episode_metrics_path": "results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs4_seed12/episode_metrics.jsonl", + "command_line": "--mode train --algo APPO --env latency_flappy --experiment flappy_frame_stack_fixed_l2_fs4_seed12 --train_dir results/checkpoints_factor_sweeps/flappy/context_window --restart_behavior resume --device gpu --actor_worker_gpus 0 --env_gpu_observations True --env_gpu_actions True --gpu-render-batch-size 128 --seed 12 --episode_metrics_path results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs4_seed12/episode_metrics.jsonl --train_for_env_steps 25000000 --num_workers 2 --num_envs_per_worker 1 --num_policies 1 --batch_size 4096 --rollout 128 --recurrence 1 --num_epochs 8 --num_batches_per_epoch 8 --worker_num_splits 1 --max_policy_lag 400 --learning_rate 0.00025 --gamma 0.99 --gae_lambda 0.95 --ppo_clip_ratio 0.1 --ppo_clip_value 0.2 --value_loss_coeff 0.5 --max_grad_norm 0.5 --save_every_sec 600 --keep_checkpoints 5 --stats_avg 100 --experiment_summaries_interval 1 --batched_sampling True --async_rl True --use_rnn False --normalize_returns True --normalize_input True --latency-type fixed --fixed-latency-ms 66.66666666666667 --add-latency-info False --eval-episodes 100 --eval-parallel-envs 100 --eval-max-steps 3600 --eval-deterministic True --with_wandb True --wandb_project latency-sensitive-bench --wandb_group flappy-fs4-fixed_l2 --wandb_job_type sample_factory --wandb_tags factor_sweep flappy frame_stack fixed fixed_l2 fs4 seed12 --gym_id FlappyBird-v0 --env-fps 30 --obs-fps 30.0 --use_lidar False --normalize_obs True --audio_on False --obs_resize 84,84 --use-gpu-render True --simulator gpu --gpu-render-device auto --gpu-render-profile False --gpu-render-profile-interval 200 --pipe_gap 100 --bird_color yellow --pipe_color green --background day --frame_stack 4 --debug False --debug-timelimit-diagnostics False --hold-policy one_frame_then_noop --ordering-policy latest_ready", + "cli_args": { + "algo": "APPO", + "env": "latency_flappy", + "experiment": "flappy_frame_stack_fixed_l2_fs4_seed12", + "train_dir": "results/checkpoints_factor_sweeps/flappy/context_window", + "restart_behavior": "resume", + "device": "gpu", + "seed": 12, + "num_policies": 1, + "async_rl": true, + "batched_sampling": true, + "worker_num_splits": 1, + "max_policy_lag": 400, + "num_workers": 2, + "num_envs_per_worker": 1, + "batch_size": 4096, + "num_batches_per_epoch": 8, + "num_epochs": 8, + "rollout": 128, + "recurrence": 1, + "gamma": 0.99, + "normalize_returns": true, + "value_loss_coeff": 0.5, + "gae_lambda": 0.95, + "ppo_clip_ratio": 0.1, + "ppo_clip_value": 0.2, + "max_grad_norm": 0.5, + "learning_rate": 0.00025, + "normalize_input": true, + "actor_worker_gpus": [ + 0 + ], + "experiment_summaries_interval": 1, + "stats_avg": 100, + "train_for_env_steps": 25000000, + "save_every_sec": 600, + "keep_checkpoints": 5, + "use_rnn": false, + "env_gpu_actions": true, + "env_gpu_observations": true, + "with_wandb": true, + "wandb_project": "latency-sensitive-bench", + "wandb_group": "flappy-fs4-fixed_l2", + "wandb_job_type": "sample_factory", + "wandb_tags": [ + "factor_sweep", + "flappy", + "frame_stack", + "fixed", + "fixed_l2", + "fs4", + "seed12" + ], + "gym_id": "FlappyBird-v0", + "env_fps": 30.0, + "obs_fps": 30.0, + "use_lidar": false, + "normalize_obs": true, + "audio_on": false, + "obs_resize": "84,84", + "use_gpu_render": true, + "simulator": "gpu", + "gpu_render_device": "auto", + "gpu_render_batch_size": 128, + "gpu_render_profile": false, + "gpu_render_profile_interval": 200, + "pipe_gap": 100, + "bird_color": "yellow", + "pipe_color": "green", + "background": "day", + "frame_stack": 4, + "debug": false, + "debug_timelimit_diagnostics": false, + "mode": "train", + "latency_type": "fixed", + "fixed_latency_ms": 66.66666666666667, + "add_latency_info": false, + "hold_policy": "one_frame_then_noop", + "ordering_policy": "latest_ready", + "eval_episodes": 100, + "eval_parallel_envs": 100, + "eval_max_steps": 3600, + "eval_deterministic": true, + "episode_metrics_path": "results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs4_seed12/episode_metrics.jsonl" + }, + "git_hash": "284fe8ace24f0e8a40c03c5b559969abd7caeb29", + "git_repo_name": "git@github.com:ZihanWang314/latency-sensitive-bench.git", + "eval_env_frameskip": 1, + "output_dir": "outputs/factor_sweeps/flappy/context_window/train/frame_stack/fixed_l2/fs4/seed_12", + "wandb_unique_id": "flappy-fs4-fixed_l2-s12" +} \ No newline at end of file diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed12/episode_metrics.jsonl b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed12/episode_metrics.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..dc214cc2fc2762f085145efdaeab9872ae1e362e --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed12/episode_metrics.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eab58f11ae205de2e89072c0a9a08dc701287f5b6def25be97d0f1ffc45237ce +size 23239166 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed12/git.diff b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed12/git.diff new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed12/sf_log.txt b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed12/sf_log.txt new file mode 100644 index 0000000000000000000000000000000000000000..076c97b5a1a6fd3f970881b0d417b344b811490a --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed12/sf_log.txt @@ -0,0 +1,5435 @@ +[2026-06-02 16:23:16,298][235960] Saving configuration to results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs4_seed12/config.json... +[2026-06-02 16:23:16,365][235960] Using GPUs [0] for process 0 (actually maps to GPUs [4]) +[2026-06-02 16:23:16,365][235960] Rollout worker 0 uses device cuda:0 +[2026-06-02 16:23:16,365][235960] Using GPUs [0] for process 1 (actually maps to GPUs [4]) +[2026-06-02 16:23:16,365][235960] Rollout worker 1 uses device cuda:0 +[2026-06-02 16:23:19,343][235960] Using GPUs [0] for process 0 (actually maps to GPUs [4]) +[2026-06-02 16:23:19,344][235960] InferenceWorker_p0-w0: min num requests: 1 +[2026-06-02 16:23:19,348][235960] Using GPUs [0] for process 0 (actually maps to GPUs [4]) +[2026-06-02 16:23:19,353][235960] Using GPUs [0] for process 1 (actually maps to GPUs [4]) +[2026-06-02 16:23:19,354][235960] Starting all processes... +[2026-06-02 16:23:19,354][235960] Starting process learner_proc0 +[2026-06-02 16:23:20,716][235960] Starting all processes... +[2026-06-02 16:23:20,719][235960] Starting process inference_proc0-0 +[2026-06-02 16:23:20,719][235960] Starting process rollout_proc0 +[2026-06-02 16:23:20,719][235960] Starting process rollout_proc1 +[2026-06-02 16:23:21,752][242748] Using GPUs [0] for process 0 (actually maps to GPUs [4]) +[2026-06-02 16:23:21,752][242748] Set environment var CUDA_VISIBLE_DEVICES to '4' (GPU indices [0]) for learning process 0 +[2026-06-02 16:23:21,752][242748] Num visible devices: 1 +[2026-06-02 16:23:21,753][242748] Setting fixed seed 12 +[2026-06-02 16:23:21,755][242748] Using GPUs [0] for process 0 (actually maps to GPUs [4]) +[2026-06-02 16:23:21,755][242748] Initializing actor-critic model on device cuda:0 +[2026-06-02 16:23:21,755][242748] RunningMeanStd input shape: (12, 84, 84) +[2026-06-02 16:23:21,763][242748] RunningMeanStd input shape: (1,) +[2026-06-02 16:23:21,771][242748] ConvEncoder: input_channels=12 +[2026-06-02 16:23:21,833][242748] Conv encoder output size: 512 +[2026-06-02 16:23:21,834][242748] Created Actor Critic model with architecture: +[2026-06-02 16:23:21,835][242748] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): MultiInputEncoder( + (encoders): ModuleDict( + (obs): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + ) + (core): ModelCoreIdentity() + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=2, bias=True) + ) +) +[2026-06-02 16:23:21,844][242748] Using optimizer +[2026-06-02 16:23:22,855][242748] No checkpoints found +[2026-06-02 16:23:22,855][242748] Did not load from checkpoint, starting from scratch! +[2026-06-02 16:23:22,856][242748] Initialized policy 0 weights for model version 0 +[2026-06-02 16:23:22,866][242748] LearnerWorker_p0 finished initialization! +[2026-06-02 16:23:22,867][242748] Using GPUs [0] for process 0 (actually maps to GPUs [4]) +[2026-06-02 16:23:24,811][235960] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2026-06-02 16:23:26,363][243562] Using GPUs [0] for process 0 (actually maps to GPUs [4]) +[2026-06-02 16:23:26,363][243562] Set environment var CUDA_VISIBLE_DEVICES to '4' (GPU indices [0]) for inference process 0 +[2026-06-02 16:23:26,364][243562] Num visible devices: 1 +[2026-06-02 16:23:26,429][243562] RunningMeanStd input shape: (12, 84, 84) +[2026-06-02 16:23:26,466][243562] RunningMeanStd input shape: (1,) +[2026-06-02 16:23:26,491][243562] ConvEncoder: input_channels=12 +[2026-06-02 16:23:26,563][243562] Conv encoder output size: 512 +[2026-06-02 16:23:26,586][235960] Inference worker 0-0 is ready! +[2026-06-02 16:23:26,587][235960] All inference workers are ready! Signal rollout workers to start! +[2026-06-02 16:23:26,689][243563] Worker 0 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191] +[2026-06-02 16:23:26,689][243563] Using GPUs [0] for process 0 (actually maps to GPUs [4]) +[2026-06-02 16:23:26,690][243563] Set environment var CUDA_VISIBLE_DEVICES to '4' (GPU indices [0]) for actor process 0 +[2026-06-02 16:23:26,690][243563] Num visible devices: 1 +[2026-06-02 16:23:26,691][243563] EnvRunner 0-0 uses policy 0 +[2026-06-02 16:23:26,731][243564] Worker 1 uses CPU cores [192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383] +[2026-06-02 16:23:26,732][243564] Using GPUs [0] for process 1 (actually maps to GPUs [4]) +[2026-06-02 16:23:26,732][243564] Set environment var CUDA_VISIBLE_DEVICES to '4' (GPU indices [0]) for actor process 1 +[2026-06-02 16:23:26,732][243564] Num visible devices: 1 +[2026-06-02 16:23:26,735][243564] EnvRunner 1-0 uses policy 0 +[2026-06-02 16:23:29,338][242748] Signal inference workers to stop experience collection... +[2026-06-02 16:23:29,342][243562] InferenceWorker_p0-w0: stopping experience collection +[2026-06-02 16:23:29,812][235960] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 13182.7. Samples: 65920. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2026-06-02 16:23:29,813][235960] Avg episode reward: [(0, '-7.167')] +[2026-06-02 16:23:30,776][242748] Signal inference workers to resume experience collection... +[2026-06-02 16:23:30,776][243562] InferenceWorker_p0-w0: resuming experience collection +[2026-06-02 16:23:31,159][243562] Updated weights for policy 0, policy_version 81 (0.0047) +[2026-06-02 16:23:31,345][243562] Updated weights for policy 0, policy_version 91 (0.0005) +[2026-06-02 16:23:31,650][243562] Updated weights for policy 0, policy_version 110 (0.0008) +[2026-06-02 16:23:31,831][243562] Updated weights for policy 0, policy_version 120 (0.0009) +[2026-06-02 16:23:32,373][243562] Updated weights for policy 0, policy_version 130 (0.0009) +[2026-06-02 16:23:32,597][243562] Updated weights for policy 0, policy_version 145 (0.0009) +[2026-06-02 16:23:32,773][243562] Updated weights for policy 0, policy_version 155 (0.0009) +[2026-06-02 16:23:32,952][243562] Updated weights for policy 0, policy_version 165 (0.0009) +[2026-06-02 16:23:33,115][243562] Updated weights for policy 0, policy_version 175 (0.0013) +[2026-06-02 16:23:33,281][243562] Updated weights for policy 0, policy_version 185 (0.0009) +[2026-06-02 16:23:33,822][243562] Updated weights for policy 0, policy_version 195 (0.0007) +[2026-06-02 16:23:33,981][243562] Updated weights for policy 0, policy_version 205 (0.0005) +[2026-06-02 16:23:34,142][243562] Updated weights for policy 0, policy_version 215 (0.0008) +[2026-06-02 16:23:34,312][243562] Updated weights for policy 0, policy_version 225 (0.0009) +[2026-06-02 16:23:34,492][243562] Updated weights for policy 0, policy_version 235 (0.0009) +[2026-06-02 16:23:34,651][243562] Updated weights for policy 0, policy_version 245 (0.0008) +[2026-06-02 16:23:34,812][235960] Fps is (10 sec: 9830.2, 60 sec: 9830.2, 300 sec: 9830.2). Total num frames: 98304. Throughput: 0: 10495.8. Samples: 104960. Policy #0 lag: (min: 53.0, avg: 53.0, max: 53.0) +[2026-06-02 16:23:34,814][235960] Avg episode reward: [(0, '-6.375')] +[2026-06-02 16:23:34,821][243562] Updated weights for policy 0, policy_version 255 (0.0007) +[2026-06-02 16:23:35,275][243562] Updated weights for policy 0, policy_version 265 (0.0008) +[2026-06-02 16:23:35,444][243562] Updated weights for policy 0, policy_version 275 (0.0008) +[2026-06-02 16:23:35,609][243562] Updated weights for policy 0, policy_version 285 (0.0008) +[2026-06-02 16:23:35,786][243562] Updated weights for policy 0, policy_version 295 (0.0006) +[2026-06-02 16:23:35,952][243562] Updated weights for policy 0, policy_version 305 (0.0006) +[2026-06-02 16:23:36,131][243562] Updated weights for policy 0, policy_version 315 (0.0009) +[2026-06-02 16:23:36,634][243562] Updated weights for policy 0, policy_version 325 (0.0013) +[2026-06-02 16:23:36,802][243562] Updated weights for policy 0, policy_version 335 (0.0008) +[2026-06-02 16:23:36,962][243562] Updated weights for policy 0, policy_version 345 (0.0008) +[2026-06-02 16:23:37,136][243562] Updated weights for policy 0, policy_version 355 (0.0008) +[2026-06-02 16:23:37,302][243562] Updated weights for policy 0, policy_version 365 (0.0009) +[2026-06-02 16:23:37,478][243562] Updated weights for policy 0, policy_version 375 (0.0008) +[2026-06-02 16:23:37,937][243562] Updated weights for policy 0, policy_version 385 (0.0009) +[2026-06-02 16:23:38,094][243562] Updated weights for policy 0, policy_version 395 (0.0008) +[2026-06-02 16:23:38,261][243562] Updated weights for policy 0, policy_version 405 (0.0008) +[2026-06-02 16:23:38,433][243562] Updated weights for policy 0, policy_version 415 (0.0008) +[2026-06-02 16:23:38,602][243562] Updated weights for policy 0, policy_version 425 (0.0009) +[2026-06-02 16:23:38,777][243562] Updated weights for policy 0, policy_version 435 (0.0009) +[2026-06-02 16:23:38,942][243562] Updated weights for policy 0, policy_version 445 (0.0008) +[2026-06-02 16:23:39,332][235960] Heartbeat connected on Batcher_0 +[2026-06-02 16:23:39,349][235960] Heartbeat connected on RolloutWorker_w0 +[2026-06-02 16:23:39,356][235960] Heartbeat connected on InferenceWorker_p0-w0 +[2026-06-02 16:23:39,406][235960] Heartbeat connected on RolloutWorker_w1 +[2026-06-02 16:23:39,427][243562] Updated weights for policy 0, policy_version 455 (0.0009) +[2026-06-02 16:23:39,584][243562] Updated weights for policy 0, policy_version 465 (0.0008) +[2026-06-02 16:23:39,759][243562] Updated weights for policy 0, policy_version 475 (0.0009) +[2026-06-02 16:23:39,811][235960] Fps is (10 sec: 22938.2, 60 sec: 15291.5, 300 sec: 15291.5). Total num frames: 229376. Throughput: 0: 16460.6. Samples: 246912. Policy #0 lag: (min: 63.0, avg: 88.5, max: 127.0) +[2026-06-02 16:23:39,813][235960] Avg episode reward: [(0, '-4.720')] +[2026-06-02 16:23:39,934][243562] Updated weights for policy 0, policy_version 485 (0.0009) +[2026-06-02 16:23:40,107][243562] Updated weights for policy 0, policy_version 495 (0.0009) +[2026-06-02 16:23:40,269][243562] Updated weights for policy 0, policy_version 505 (0.0009) +[2026-06-02 16:23:40,387][242748] Saving new best policy, reward=-4.720! +[2026-06-02 16:23:40,412][235960] Heartbeat connected on LearnerWorker_p0 +[2026-06-02 16:23:40,778][243562] Updated weights for policy 0, policy_version 515 (0.0009) +[2026-06-02 16:23:40,937][243562] Updated weights for policy 0, policy_version 525 (0.0005) +[2026-06-02 16:23:41,116][243562] Updated weights for policy 0, policy_version 535 (0.0004) +[2026-06-02 16:23:41,283][243562] Updated weights for policy 0, policy_version 545 (0.0004) +[2026-06-02 16:23:41,457][243562] Updated weights for policy 0, policy_version 555 (0.0006) +[2026-06-02 16:23:41,624][243562] Updated weights for policy 0, policy_version 565 (0.0008) +[2026-06-02 16:23:41,800][243562] Updated weights for policy 0, policy_version 575 (0.0008) +[2026-06-02 16:23:42,261][243562] Updated weights for policy 0, policy_version 585 (0.0009) +[2026-06-02 16:23:42,418][243562] Updated weights for policy 0, policy_version 595 (0.0009) +[2026-06-02 16:23:42,600][243562] Updated weights for policy 0, policy_version 605 (0.0009) +[2026-06-02 16:23:42,757][243562] Updated weights for policy 0, policy_version 615 (0.0009) +[2026-06-02 16:23:42,926][243562] Updated weights for policy 0, policy_version 625 (0.0010) +[2026-06-02 16:23:43,092][243562] Updated weights for policy 0, policy_version 635 (0.0010) +[2026-06-02 16:23:43,684][243562] Updated weights for policy 0, policy_version 645 (0.0008) +[2026-06-02 16:23:43,842][243562] Updated weights for policy 0, policy_version 655 (0.0008) +[2026-06-02 16:23:44,009][243562] Updated weights for policy 0, policy_version 665 (0.0009) +[2026-06-02 16:23:44,180][243562] Updated weights for policy 0, policy_version 675 (0.0008) +[2026-06-02 16:23:44,375][243562] Updated weights for policy 0, policy_version 687 (0.0008) +[2026-06-02 16:23:44,531][243562] Updated weights for policy 0, policy_version 697 (0.0008) +[2026-06-02 16:23:44,812][235960] Fps is (10 sec: 26213.1, 60 sec: 18021.7, 300 sec: 18021.7). Total num frames: 360448. Throughput: 0: 19090.5. Samples: 381824. Policy #0 lag: (min: 25.0, avg: 63.6, max: 89.0) +[2026-06-02 16:23:44,820][235960] Avg episode reward: [(0, '3.565')] +[2026-06-02 16:23:44,830][242748] Saving new best policy, reward=3.565! +[2026-06-02 16:23:45,133][243562] Updated weights for policy 0, policy_version 707 (0.0008) +[2026-06-02 16:23:45,315][243562] Updated weights for policy 0, policy_version 719 (0.0008) +[2026-06-02 16:23:45,485][243562] Updated weights for policy 0, policy_version 730 (0.0009) +[2026-06-02 16:23:45,643][243562] Updated weights for policy 0, policy_version 740 (0.0008) +[2026-06-02 16:23:45,822][243562] Updated weights for policy 0, policy_version 750 (0.0008) +[2026-06-02 16:23:45,993][243562] Updated weights for policy 0, policy_version 760 (0.0008) +[2026-06-02 16:23:46,642][243562] Updated weights for policy 0, policy_version 772 (0.0008) +[2026-06-02 16:23:46,814][243562] Updated weights for policy 0, policy_version 782 (0.0008) +[2026-06-02 16:23:47,024][243562] Updated weights for policy 0, policy_version 795 (0.0008) +[2026-06-02 16:23:47,232][243562] Updated weights for policy 0, policy_version 808 (0.0010) +[2026-06-02 16:23:47,395][243562] Updated weights for policy 0, policy_version 819 (0.0009) +[2026-06-02 16:23:47,558][243562] Updated weights for policy 0, policy_version 829 (0.0008) +[2026-06-02 16:23:48,218][243562] Updated weights for policy 0, policy_version 842 (0.0008) +[2026-06-02 16:23:48,366][243562] Updated weights for policy 0, policy_version 852 (0.0009) +[2026-06-02 16:23:48,528][243562] Updated weights for policy 0, policy_version 862 (0.0009) +[2026-06-02 16:23:48,693][243562] Updated weights for policy 0, policy_version 872 (0.0009) +[2026-06-02 16:23:48,883][243562] Updated weights for policy 0, policy_version 882 (0.0008) +[2026-06-02 16:23:49,067][243562] Updated weights for policy 0, policy_version 894 (0.0008) +[2026-06-02 16:23:49,767][243562] Updated weights for policy 0, policy_version 904 (0.0008) +[2026-06-02 16:23:49,811][235960] Fps is (10 sec: 22937.5, 60 sec: 18349.9, 300 sec: 18349.9). Total num frames: 458752. Throughput: 0: 17960.8. Samples: 449024. Policy #0 lag: (min: 13.0, avg: 31.5, max: 77.0) +[2026-06-02 16:23:49,820][235960] Avg episode reward: [(0, '3.973')] +[2026-06-02 16:23:49,942][243562] Updated weights for policy 0, policy_version 915 (0.0008) +[2026-06-02 16:23:50,088][243562] Updated weights for policy 0, policy_version 925 (0.0008) +[2026-06-02 16:23:50,272][243562] Updated weights for policy 0, policy_version 936 (0.0008) +[2026-06-02 16:23:50,431][243562] Updated weights for policy 0, policy_version 946 (0.0009) +[2026-06-02 16:23:50,607][243562] Updated weights for policy 0, policy_version 956 (0.0008) +[2026-06-02 16:23:50,670][242748] Saving new best policy, reward=3.973! +[2026-06-02 16:23:51,174][243562] Updated weights for policy 0, policy_version 966 (0.0008) +[2026-06-02 16:23:51,354][243562] Updated weights for policy 0, policy_version 978 (0.0008) +[2026-06-02 16:23:51,514][243562] Updated weights for policy 0, policy_version 988 (0.0008) +[2026-06-02 16:23:51,673][243562] Updated weights for policy 0, policy_version 998 (0.0008) +[2026-06-02 16:23:51,853][243562] Updated weights for policy 0, policy_version 1008 (0.0008) +[2026-06-02 16:23:52,036][243562] Updated weights for policy 0, policy_version 1019 (0.0008) +[2026-06-02 16:23:52,750][243562] Updated weights for policy 0, policy_version 1029 (0.0008) +[2026-06-02 16:23:52,927][243562] Updated weights for policy 0, policy_version 1040 (0.0008) +[2026-06-02 16:23:53,075][243562] Updated weights for policy 0, policy_version 1050 (0.0008) +[2026-06-02 16:23:53,226][243562] Updated weights for policy 0, policy_version 1060 (0.0008) +[2026-06-02 16:23:53,387][243562] Updated weights for policy 0, policy_version 1070 (0.0008) +[2026-06-02 16:23:53,554][243562] Updated weights for policy 0, policy_version 1080 (0.0008) +[2026-06-02 16:23:54,234][243562] Updated weights for policy 0, policy_version 1090 (0.0009) +[2026-06-02 16:23:54,411][243562] Updated weights for policy 0, policy_version 1102 (0.0008) +[2026-06-02 16:23:54,583][243562] Updated weights for policy 0, policy_version 1112 (0.0008) +[2026-06-02 16:23:54,748][243562] Updated weights for policy 0, policy_version 1122 (0.0008) +[2026-06-02 16:23:54,812][235960] Fps is (10 sec: 19661.1, 60 sec: 18568.2, 300 sec: 18568.2). Total num frames: 557056. Throughput: 0: 19289.2. Samples: 578688. Policy #0 lag: (min: 63.0, avg: 84.9, max: 127.0) +[2026-06-02 16:23:54,814][235960] Avg episode reward: [(0, '4.042')] +[2026-06-02 16:23:54,909][243562] Updated weights for policy 0, policy_version 1132 (0.0009) +[2026-06-02 16:23:55,069][243562] Updated weights for policy 0, policy_version 1142 (0.0010) +[2026-06-02 16:23:55,213][242748] Saving new best policy, reward=4.042! +[2026-06-02 16:23:55,216][243562] Updated weights for policy 0, policy_version 1152 (0.0008) +[2026-06-02 16:23:55,933][243562] Updated weights for policy 0, policy_version 1162 (0.0010) +[2026-06-02 16:23:56,121][243562] Updated weights for policy 0, policy_version 1174 (0.0008) +[2026-06-02 16:23:56,281][243562] Updated weights for policy 0, policy_version 1185 (0.0009) +[2026-06-02 16:23:56,454][243562] Updated weights for policy 0, policy_version 1195 (0.0009) +[2026-06-02 16:23:56,609][243562] Updated weights for policy 0, policy_version 1205 (0.0009) +[2026-06-02 16:23:56,784][243562] Updated weights for policy 0, policy_version 1215 (0.0009) +[2026-06-02 16:23:57,580][243562] Updated weights for policy 0, policy_version 1227 (0.0009) +[2026-06-02 16:23:57,730][243562] Updated weights for policy 0, policy_version 1237 (0.0009) +[2026-06-02 16:23:57,911][243562] Updated weights for policy 0, policy_version 1248 (0.0009) +[2026-06-02 16:23:58,069][243562] Updated weights for policy 0, policy_version 1258 (0.0008) +[2026-06-02 16:23:58,238][243562] Updated weights for policy 0, policy_version 1269 (0.0008) +[2026-06-02 16:23:58,397][243562] Updated weights for policy 0, policy_version 1279 (0.0008) +[2026-06-02 16:23:59,184][243562] Updated weights for policy 0, policy_version 1289 (0.0008) +[2026-06-02 16:23:59,347][243562] Updated weights for policy 0, policy_version 1299 (0.0008) +[2026-06-02 16:23:59,508][243562] Updated weights for policy 0, policy_version 1309 (0.0009) +[2026-06-02 16:23:59,664][243562] Updated weights for policy 0, policy_version 1319 (0.0008) +[2026-06-02 16:23:59,811][235960] Fps is (10 sec: 19661.4, 60 sec: 18724.6, 300 sec: 18724.6). Total num frames: 655360. Throughput: 0: 20183.8. Samples: 706432. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-02 16:23:59,812][235960] Avg episode reward: [(0, '4.028')] +[2026-06-02 16:23:59,839][243562] Updated weights for policy 0, policy_version 1330 (0.0008) +[2026-06-02 16:23:59,996][243562] Updated weights for policy 0, policy_version 1340 (0.0008) +[2026-06-02 16:24:00,798][243562] Updated weights for policy 0, policy_version 1351 (0.0008) +[2026-06-02 16:24:00,959][243562] Updated weights for policy 0, policy_version 1362 (0.0008) +[2026-06-02 16:24:01,127][243562] Updated weights for policy 0, policy_version 1372 (0.0008) +[2026-06-02 16:24:01,274][243562] Updated weights for policy 0, policy_version 1382 (0.0008) +[2026-06-02 16:24:01,464][243562] Updated weights for policy 0, policy_version 1393 (0.0008) +[2026-06-02 16:24:01,621][243562] Updated weights for policy 0, policy_version 1403 (0.0008) +[2026-06-02 16:24:02,374][243562] Updated weights for policy 0, policy_version 1413 (0.0008) +[2026-06-02 16:24:02,530][243562] Updated weights for policy 0, policy_version 1423 (0.0008) +[2026-06-02 16:24:02,720][243562] Updated weights for policy 0, policy_version 1435 (0.0008) +[2026-06-02 16:24:02,901][243562] Updated weights for policy 0, policy_version 1446 (0.0008) +[2026-06-02 16:24:03,081][243562] Updated weights for policy 0, policy_version 1457 (0.0008) +[2026-06-02 16:24:03,234][243562] Updated weights for policy 0, policy_version 1467 (0.0008) +[2026-06-02 16:24:04,045][243562] Updated weights for policy 0, policy_version 1478 (0.0006) +[2026-06-02 16:24:04,209][243562] Updated weights for policy 0, policy_version 1488 (0.0005) +[2026-06-02 16:24:04,371][243562] Updated weights for policy 0, policy_version 1499 (0.0005) +[2026-06-02 16:24:04,535][243562] Updated weights for policy 0, policy_version 1509 (0.0008) +[2026-06-02 16:24:04,689][243562] Updated weights for policy 0, policy_version 1519 (0.0008) +[2026-06-02 16:24:04,811][235960] Fps is (10 sec: 19662.3, 60 sec: 18841.7, 300 sec: 18841.7). Total num frames: 753664. Throughput: 0: 19241.7. Samples: 769664. Policy #0 lag: (min: 63.0, avg: 76.7, max: 127.0) +[2026-06-02 16:24:04,812][235960] Avg episode reward: [(0, '4.403')] +[2026-06-02 16:24:04,870][243562] Updated weights for policy 0, policy_version 1530 (0.0010) +[2026-06-02 16:24:04,956][242748] Saving new best policy, reward=4.403! +[2026-06-02 16:24:05,660][243562] Updated weights for policy 0, policy_version 1540 (0.0008) +[2026-06-02 16:24:05,828][243562] Updated weights for policy 0, policy_version 1550 (0.0008) +[2026-06-02 16:24:06,013][243562] Updated weights for policy 0, policy_version 1562 (0.0009) +[2026-06-02 16:24:06,164][243562] Updated weights for policy 0, policy_version 1572 (0.0008) +[2026-06-02 16:24:06,339][243562] Updated weights for policy 0, policy_version 1583 (0.0008) +[2026-06-02 16:24:06,516][243562] Updated weights for policy 0, policy_version 1594 (0.0008) +[2026-06-02 16:24:07,336][243562] Updated weights for policy 0, policy_version 1604 (0.0008) +[2026-06-02 16:24:07,489][243562] Updated weights for policy 0, policy_version 1614 (0.0010) +[2026-06-02 16:24:07,652][243562] Updated weights for policy 0, policy_version 1625 (0.0008) +[2026-06-02 16:24:07,817][243562] Updated weights for policy 0, policy_version 1636 (0.0008) +[2026-06-02 16:24:07,997][243562] Updated weights for policy 0, policy_version 1647 (0.0009) +[2026-06-02 16:24:08,172][243562] Updated weights for policy 0, policy_version 1658 (0.0008) +[2026-06-02 16:24:08,977][243562] Updated weights for policy 0, policy_version 1668 (0.0009) +[2026-06-02 16:24:09,173][243562] Updated weights for policy 0, policy_version 1681 (0.0009) +[2026-06-02 16:24:09,334][243562] Updated weights for policy 0, policy_version 1692 (0.0008) +[2026-06-02 16:24:09,495][243562] Updated weights for policy 0, policy_version 1703 (0.0008) +[2026-06-02 16:24:09,657][243562] Updated weights for policy 0, policy_version 1713 (0.0008) +[2026-06-02 16:24:09,811][235960] Fps is (10 sec: 19660.6, 60 sec: 18932.6, 300 sec: 18932.6). Total num frames: 851968. Throughput: 0: 19723.4. Samples: 887552. Policy #0 lag: (min: 63.0, avg: 76.6, max: 127.0) +[2026-06-02 16:24:09,813][235960] Avg episode reward: [(0, '4.686')] +[2026-06-02 16:24:09,826][243562] Updated weights for policy 0, policy_version 1723 (0.0008) +[2026-06-02 16:24:09,890][242748] Saving new best policy, reward=4.686! +[2026-06-02 16:24:10,634][243562] Updated weights for policy 0, policy_version 1733 (0.0008) +[2026-06-02 16:24:10,808][243562] Updated weights for policy 0, policy_version 1744 (0.0008) +[2026-06-02 16:24:10,964][243562] Updated weights for policy 0, policy_version 1754 (0.0008) +[2026-06-02 16:24:11,147][243562] Updated weights for policy 0, policy_version 1765 (0.0009) +[2026-06-02 16:24:11,300][243562] Updated weights for policy 0, policy_version 1775 (0.0008) +[2026-06-02 16:24:11,452][243562] Updated weights for policy 0, policy_version 1785 (0.0008) +[2026-06-02 16:24:12,272][243562] Updated weights for policy 0, policy_version 1796 (0.0009) +[2026-06-02 16:24:12,438][243562] Updated weights for policy 0, policy_version 1807 (0.0008) +[2026-06-02 16:24:12,612][243562] Updated weights for policy 0, policy_version 1818 (0.0008) +[2026-06-02 16:24:12,808][243562] Updated weights for policy 0, policy_version 1830 (0.0008) +[2026-06-02 16:24:12,969][243562] Updated weights for policy 0, policy_version 1841 (0.0008) +[2026-06-02 16:24:13,128][243562] Updated weights for policy 0, policy_version 1851 (0.0008) +[2026-06-02 16:24:13,924][243562] Updated weights for policy 0, policy_version 1862 (0.0009) +[2026-06-02 16:24:14,115][243562] Updated weights for policy 0, policy_version 1874 (0.0009) +[2026-06-02 16:24:14,290][243562] Updated weights for policy 0, policy_version 1885 (0.0008) +[2026-06-02 16:24:14,488][243562] Updated weights for policy 0, policy_version 1897 (0.0008) +[2026-06-02 16:24:14,653][243562] Updated weights for policy 0, policy_version 1908 (0.0008) +[2026-06-02 16:24:14,811][235960] Fps is (10 sec: 19660.6, 60 sec: 19005.5, 300 sec: 19005.5). Total num frames: 950272. Throughput: 0: 20787.5. Samples: 1001344. Policy #0 lag: (min: 19.0, avg: 72.2, max: 85.0) +[2026-06-02 16:24:14,812][235960] Avg episode reward: [(0, '5.077')] +[2026-06-02 16:24:14,835][243562] Updated weights for policy 0, policy_version 1919 (0.0008) +[2026-06-02 16:24:14,844][242748] Saving new best policy, reward=5.077! +[2026-06-02 16:24:15,648][243562] Updated weights for policy 0, policy_version 1929 (0.0004) +[2026-06-02 16:24:15,820][243562] Updated weights for policy 0, policy_version 1940 (0.0006) +[2026-06-02 16:24:15,986][243562] Updated weights for policy 0, policy_version 1950 (0.0008) +[2026-06-02 16:24:16,172][243562] Updated weights for policy 0, policy_version 1962 (0.0008) +[2026-06-02 16:24:16,333][243562] Updated weights for policy 0, policy_version 1972 (0.0008) +[2026-06-02 16:24:16,523][243562] Updated weights for policy 0, policy_version 1984 (0.0008) +[2026-06-02 16:24:17,326][243562] Updated weights for policy 0, policy_version 1995 (0.0008) +[2026-06-02 16:24:17,490][243562] Updated weights for policy 0, policy_version 2005 (0.0009) +[2026-06-02 16:24:17,678][243562] Updated weights for policy 0, policy_version 2017 (0.0008) +[2026-06-02 16:24:17,843][243562] Updated weights for policy 0, policy_version 2027 (0.0009) +[2026-06-02 16:24:18,019][243562] Updated weights for policy 0, policy_version 2038 (0.0009) +[2026-06-02 16:24:18,166][243562] Updated weights for policy 0, policy_version 2048 (0.0009) +[2026-06-02 16:24:19,006][243562] Updated weights for policy 0, policy_version 2060 (0.0009) +[2026-06-02 16:24:19,179][243562] Updated weights for policy 0, policy_version 2071 (0.0008) +[2026-06-02 16:24:19,369][243562] Updated weights for policy 0, policy_version 2083 (0.0009) +[2026-06-02 16:24:19,534][243562] Updated weights for policy 0, policy_version 2094 (0.0008) +[2026-06-02 16:24:19,737][243562] Updated weights for policy 0, policy_version 2106 (0.0008) +[2026-06-02 16:24:19,811][235960] Fps is (10 sec: 19660.9, 60 sec: 19065.0, 300 sec: 19065.0). Total num frames: 1048576. Throughput: 0: 21296.5. Samples: 1063296. Policy #0 lag: (min: 41.0, avg: 89.1, max: 105.0) +[2026-06-02 16:24:19,813][235960] Avg episode reward: [(0, '5.050')] +[2026-06-02 16:24:20,566][243562] Updated weights for policy 0, policy_version 2116 (0.0008) +[2026-06-02 16:24:20,736][243562] Updated weights for policy 0, policy_version 2128 (0.0008) +[2026-06-02 16:24:20,893][243562] Updated weights for policy 0, policy_version 2138 (0.0008) +[2026-06-02 16:24:21,066][243562] Updated weights for policy 0, policy_version 2149 (0.0008) +[2026-06-02 16:24:21,232][243562] Updated weights for policy 0, policy_version 2159 (0.0010) +[2026-06-02 16:24:21,397][243562] Updated weights for policy 0, policy_version 2169 (0.0008) +[2026-06-02 16:24:22,237][243562] Updated weights for policy 0, policy_version 2180 (0.0009) +[2026-06-02 16:24:22,393][243562] Updated weights for policy 0, policy_version 2191 (0.0009) +[2026-06-02 16:24:22,552][243562] Updated weights for policy 0, policy_version 2201 (0.0008) +[2026-06-02 16:24:22,711][243562] Updated weights for policy 0, policy_version 2211 (0.0009) +[2026-06-02 16:24:22,880][243562] Updated weights for policy 0, policy_version 2222 (0.0008) +[2026-06-02 16:24:23,053][243562] Updated weights for policy 0, policy_version 2233 (0.0010) +[2026-06-02 16:24:23,883][243562] Updated weights for policy 0, policy_version 2243 (0.0010) +[2026-06-02 16:24:24,051][243562] Updated weights for policy 0, policy_version 2254 (0.0008) +[2026-06-02 16:24:24,204][243562] Updated weights for policy 0, policy_version 2264 (0.0008) +[2026-06-02 16:24:24,367][243562] Updated weights for policy 0, policy_version 2274 (0.0008) +[2026-06-02 16:24:24,547][243562] Updated weights for policy 0, policy_version 2286 (0.0008) +[2026-06-02 16:24:24,747][243562] Updated weights for policy 0, policy_version 2298 (0.0008) +[2026-06-02 16:24:24,811][235960] Fps is (10 sec: 19660.4, 60 sec: 19114.6, 300 sec: 19114.6). Total num frames: 1146880. Throughput: 0: 20815.7. Samples: 1183616. Policy #0 lag: (min: 46.0, avg: 58.9, max: 110.0) +[2026-06-02 16:24:24,813][235960] Avg episode reward: [(0, '5.837')] +[2026-06-02 16:24:24,835][242748] Saving new best policy, reward=5.837! +[2026-06-02 16:24:25,578][243562] Updated weights for policy 0, policy_version 2308 (0.0009) +[2026-06-02 16:24:25,724][243562] Updated weights for policy 0, policy_version 2318 (0.0008) +[2026-06-02 16:24:25,867][243562] Updated weights for policy 0, policy_version 2328 (0.0008) +[2026-06-02 16:24:26,051][243562] Updated weights for policy 0, policy_version 2339 (0.0008) +[2026-06-02 16:24:26,238][243562] Updated weights for policy 0, policy_version 2351 (0.0008) +[2026-06-02 16:24:26,417][243562] Updated weights for policy 0, policy_version 2362 (0.0008) +[2026-06-02 16:24:27,267][243562] Updated weights for policy 0, policy_version 2374 (0.0009) +[2026-06-02 16:24:27,437][243562] Updated weights for policy 0, policy_version 2385 (0.0008) +[2026-06-02 16:24:27,607][243562] Updated weights for policy 0, policy_version 2396 (0.0008) +[2026-06-02 16:24:27,814][243562] Updated weights for policy 0, policy_version 2409 (0.0008) +[2026-06-02 16:24:27,978][243562] Updated weights for policy 0, policy_version 2419 (0.0008) +[2026-06-02 16:24:28,177][243562] Updated weights for policy 0, policy_version 2431 (0.0008) +[2026-06-02 16:24:28,996][243562] Updated weights for policy 0, policy_version 2441 (0.0009) +[2026-06-02 16:24:29,149][243562] Updated weights for policy 0, policy_version 2451 (0.0008) +[2026-06-02 16:24:29,332][243562] Updated weights for policy 0, policy_version 2463 (0.0008) +[2026-06-02 16:24:29,511][243562] Updated weights for policy 0, policy_version 2474 (0.0007) +[2026-06-02 16:24:29,664][243562] Updated weights for policy 0, policy_version 2484 (0.0005) +[2026-06-02 16:24:29,811][235960] Fps is (10 sec: 19660.9, 60 sec: 20753.3, 300 sec: 19156.7). Total num frames: 1245184. Throughput: 0: 20309.7. Samples: 1295744. Policy #0 lag: (min: 37.0, avg: 49.8, max: 101.0) +[2026-06-02 16:24:29,813][235960] Avg episode reward: [(0, '6.890')] +[2026-06-02 16:24:29,829][243562] Updated weights for policy 0, policy_version 2494 (0.0005) +[2026-06-02 16:24:29,854][242748] Saving new best policy, reward=6.890! +[2026-06-02 16:24:30,640][243562] Updated weights for policy 0, policy_version 2505 (0.0008) +[2026-06-02 16:24:30,827][243562] Updated weights for policy 0, policy_version 2517 (0.0009) +[2026-06-02 16:24:30,986][243562] Updated weights for policy 0, policy_version 2527 (0.0009) +[2026-06-02 16:24:31,180][243562] Updated weights for policy 0, policy_version 2539 (0.0009) +[2026-06-02 16:24:31,365][243562] Updated weights for policy 0, policy_version 2551 (0.0008) +[2026-06-02 16:24:32,197][243562] Updated weights for policy 0, policy_version 2563 (0.0009) +[2026-06-02 16:24:32,369][243562] Updated weights for policy 0, policy_version 2574 (0.0009) +[2026-06-02 16:24:32,525][243562] Updated weights for policy 0, policy_version 2584 (0.0009) +[2026-06-02 16:24:32,702][243562] Updated weights for policy 0, policy_version 2595 (0.0008) +[2026-06-02 16:24:32,886][243562] Updated weights for policy 0, policy_version 2606 (0.0009) +[2026-06-02 16:24:33,069][243562] Updated weights for policy 0, policy_version 2617 (0.0009) +[2026-06-02 16:24:33,851][243562] Updated weights for policy 0, policy_version 2628 (0.0009) +[2026-06-02 16:24:34,034][243562] Updated weights for policy 0, policy_version 2639 (0.0008) +[2026-06-02 16:24:34,225][243562] Updated weights for policy 0, policy_version 2651 (0.0009) +[2026-06-02 16:24:34,398][243562] Updated weights for policy 0, policy_version 2662 (0.0008) +[2026-06-02 16:24:34,593][243562] Updated weights for policy 0, policy_version 2674 (0.0009) +[2026-06-02 16:24:34,751][243562] Updated weights for policy 0, policy_version 2684 (0.0008) +[2026-06-02 16:24:34,811][235960] Fps is (10 sec: 22937.8, 60 sec: 21299.3, 300 sec: 19660.8). Total num frames: 1376256. Throughput: 0: 20201.3. Samples: 1358080. Policy #0 lag: (min: 3.0, avg: 48.6, max: 67.0) +[2026-06-02 16:24:34,812][235960] Avg episode reward: [(0, '7.563')] +[2026-06-02 16:24:34,818][242748] Saving new best policy, reward=7.563! +[2026-06-02 16:24:35,557][243562] Updated weights for policy 0, policy_version 2694 (0.0010) +[2026-06-02 16:24:35,724][243562] Updated weights for policy 0, policy_version 2705 (0.0009) +[2026-06-02 16:24:35,893][243562] Updated weights for policy 0, policy_version 2716 (0.0009) +[2026-06-02 16:24:36,063][243562] Updated weights for policy 0, policy_version 2726 (0.0008) +[2026-06-02 16:24:36,254][243562] Updated weights for policy 0, policy_version 2738 (0.0008) +[2026-06-02 16:24:36,419][243562] Updated weights for policy 0, policy_version 2748 (0.0008) +[2026-06-02 16:24:37,203][243562] Updated weights for policy 0, policy_version 2758 (0.0008) +[2026-06-02 16:24:37,396][243562] Updated weights for policy 0, policy_version 2771 (0.0010) +[2026-06-02 16:24:37,568][243562] Updated weights for policy 0, policy_version 2782 (0.0009) +[2026-06-02 16:24:37,783][243562] Updated weights for policy 0, policy_version 2795 (0.0008) +[2026-06-02 16:24:37,948][243562] Updated weights for policy 0, policy_version 2805 (0.0008) +[2026-06-02 16:24:38,113][243562] Updated weights for policy 0, policy_version 2816 (0.0010) +[2026-06-02 16:24:38,948][243562] Updated weights for policy 0, policy_version 2827 (0.0009) +[2026-06-02 16:24:39,160][243562] Updated weights for policy 0, policy_version 2841 (0.0008) +[2026-06-02 16:24:39,327][243562] Updated weights for policy 0, policy_version 2851 (0.0008) +[2026-06-02 16:24:39,488][243562] Updated weights for policy 0, policy_version 2861 (0.0008) +[2026-06-02 16:24:39,659][243562] Updated weights for policy 0, policy_version 2872 (0.0008) +[2026-06-02 16:24:39,811][235960] Fps is (10 sec: 22937.4, 60 sec: 20753.1, 300 sec: 19660.8). Total num frames: 1474560. Throughput: 0: 20047.9. Samples: 1480832. Policy #0 lag: (min: 33.0, avg: 77.7, max: 97.0) +[2026-06-02 16:24:39,813][235960] Avg episode reward: [(0, '8.585')] +[2026-06-02 16:24:39,819][242748] Saving new best policy, reward=8.585! +[2026-06-02 16:24:40,477][243562] Updated weights for policy 0, policy_version 2883 (0.0007) +[2026-06-02 16:24:40,643][243562] Updated weights for policy 0, policy_version 2894 (0.0004) +[2026-06-02 16:24:40,848][243562] Updated weights for policy 0, policy_version 2907 (0.0008) +[2026-06-02 16:24:41,032][243562] Updated weights for policy 0, policy_version 2918 (0.0008) +[2026-06-02 16:24:41,202][243562] Updated weights for policy 0, policy_version 2929 (0.0009) +[2026-06-02 16:24:41,373][243562] Updated weights for policy 0, policy_version 2940 (0.0008) +[2026-06-02 16:24:42,210][243562] Updated weights for policy 0, policy_version 2951 (0.0008) +[2026-06-02 16:24:42,402][243562] Updated weights for policy 0, policy_version 2963 (0.0009) +[2026-06-02 16:24:42,569][243562] Updated weights for policy 0, policy_version 2974 (0.0008) +[2026-06-02 16:24:42,732][243562] Updated weights for policy 0, policy_version 2984 (0.0008) +[2026-06-02 16:24:42,893][243562] Updated weights for policy 0, policy_version 2994 (0.0008) +[2026-06-02 16:24:43,087][243562] Updated weights for policy 0, policy_version 3006 (0.0009) +[2026-06-02 16:24:43,877][243562] Updated weights for policy 0, policy_version 3017 (0.0005) +[2026-06-02 16:24:44,051][243562] Updated weights for policy 0, policy_version 3028 (0.0007) +[2026-06-02 16:24:44,255][243562] Updated weights for policy 0, policy_version 3041 (0.0009) +[2026-06-02 16:24:44,443][243562] Updated weights for policy 0, policy_version 3052 (0.0008) +[2026-06-02 16:24:44,606][243562] Updated weights for policy 0, policy_version 3063 (0.0008) +[2026-06-02 16:24:44,811][235960] Fps is (10 sec: 19661.1, 60 sec: 20207.2, 300 sec: 19660.8). Total num frames: 1572864. Throughput: 0: 19675.0. Samples: 1591808. Policy #0 lag: (min: 28.0, avg: 40.3, max: 92.0) +[2026-06-02 16:24:44,812][235960] Avg episode reward: [(0, '9.738')] +[2026-06-02 16:24:44,817][242748] Saving new best policy, reward=9.738! +[2026-06-02 16:24:45,413][243562] Updated weights for policy 0, policy_version 3075 (0.0008) +[2026-06-02 16:24:45,587][243562] Updated weights for policy 0, policy_version 3086 (0.0008) +[2026-06-02 16:24:45,759][243562] Updated weights for policy 0, policy_version 3097 (0.0009) +[2026-06-02 16:24:45,965][243562] Updated weights for policy 0, policy_version 3110 (0.0008) +[2026-06-02 16:24:46,163][243562] Updated weights for policy 0, policy_version 3122 (0.0008) +[2026-06-02 16:24:46,335][243562] Updated weights for policy 0, policy_version 3133 (0.0008) +[2026-06-02 16:24:47,127][243562] Updated weights for policy 0, policy_version 3144 (0.0008) +[2026-06-02 16:24:47,306][243562] Updated weights for policy 0, policy_version 3155 (0.0008) +[2026-06-02 16:24:47,478][243562] Updated weights for policy 0, policy_version 3166 (0.0008) +[2026-06-02 16:24:47,671][243562] Updated weights for policy 0, policy_version 3178 (0.0008) +[2026-06-02 16:24:47,826][243562] Updated weights for policy 0, policy_version 3188 (0.0008) +[2026-06-02 16:24:47,985][243562] Updated weights for policy 0, policy_version 3198 (0.0008) +[2026-06-02 16:24:48,777][243562] Updated weights for policy 0, policy_version 3208 (0.0009) +[2026-06-02 16:24:48,954][243562] Updated weights for policy 0, policy_version 3220 (0.0009) +[2026-06-02 16:24:49,161][243562] Updated weights for policy 0, policy_version 3233 (0.0008) +[2026-06-02 16:24:49,357][243562] Updated weights for policy 0, policy_version 3245 (0.0008) +[2026-06-02 16:24:49,542][243562] Updated weights for policy 0, policy_version 3257 (0.0009) +[2026-06-02 16:24:49,811][235960] Fps is (10 sec: 19660.9, 60 sec: 20207.0, 300 sec: 19660.8). Total num frames: 1671168. Throughput: 0: 19629.4. Samples: 1652992. Policy #0 lag: (min: 59.0, avg: 103.2, max: 123.0) +[2026-06-02 16:24:49,813][235960] Avg episode reward: [(0, '12.443')] +[2026-06-02 16:24:49,818][242748] Saving new best policy, reward=12.443! +[2026-06-02 16:24:50,383][243562] Updated weights for policy 0, policy_version 3269 (0.0009) +[2026-06-02 16:24:50,535][243562] Updated weights for policy 0, policy_version 3279 (0.0009) +[2026-06-02 16:24:50,707][243562] Updated weights for policy 0, policy_version 3290 (0.0009) +[2026-06-02 16:24:50,891][243562] Updated weights for policy 0, policy_version 3301 (0.0008) +[2026-06-02 16:24:51,074][243562] Updated weights for policy 0, policy_version 3313 (0.0009) +[2026-06-02 16:24:51,267][243562] Updated weights for policy 0, policy_version 3325 (0.0008) +[2026-06-02 16:24:52,038][243562] Updated weights for policy 0, policy_version 3335 (0.0009) +[2026-06-02 16:24:52,191][243562] Updated weights for policy 0, policy_version 3345 (0.0008) +[2026-06-02 16:24:52,345][243562] Updated weights for policy 0, policy_version 3355 (0.0008) +[2026-06-02 16:24:52,572][243562] Updated weights for policy 0, policy_version 3369 (0.0008) +[2026-06-02 16:24:52,792][243562] Updated weights for policy 0, policy_version 3383 (0.0008) +[2026-06-02 16:24:53,610][243562] Updated weights for policy 0, policy_version 3393 (0.0008) +[2026-06-02 16:24:53,765][243562] Updated weights for policy 0, policy_version 3403 (0.0008) +[2026-06-02 16:24:53,935][243562] Updated weights for policy 0, policy_version 3414 (0.0008) +[2026-06-02 16:24:54,095][243562] Updated weights for policy 0, policy_version 3424 (0.0008) +[2026-06-02 16:24:54,272][243562] Updated weights for policy 0, policy_version 3435 (0.0008) +[2026-06-02 16:24:54,432][243562] Updated weights for policy 0, policy_version 3445 (0.0008) +[2026-06-02 16:24:54,592][243562] Updated weights for policy 0, policy_version 3455 (0.0008) +[2026-06-02 16:24:54,811][235960] Fps is (10 sec: 19660.5, 60 sec: 20207.1, 300 sec: 19660.8). Total num frames: 1769472. Throughput: 0: 19786.0. Samples: 1777920. Policy #0 lag: (min: 63.0, avg: 75.3, max: 127.0) +[2026-06-02 16:24:54,812][235960] Avg episode reward: [(0, '13.343')] +[2026-06-02 16:24:54,819][242748] Saving new best policy, reward=13.343! +[2026-06-02 16:24:55,385][243562] Updated weights for policy 0, policy_version 3465 (0.0008) +[2026-06-02 16:24:55,576][243562] Updated weights for policy 0, policy_version 3477 (0.0008) +[2026-06-02 16:24:55,729][243562] Updated weights for policy 0, policy_version 3487 (0.0008) +[2026-06-02 16:24:55,882][243562] Updated weights for policy 0, policy_version 3497 (0.0008) +[2026-06-02 16:24:56,048][243562] Updated weights for policy 0, policy_version 3507 (0.0008) +[2026-06-02 16:24:56,227][243562] Updated weights for policy 0, policy_version 3518 (0.0008) +[2026-06-02 16:24:57,000][243562] Updated weights for policy 0, policy_version 3530 (0.0008) +[2026-06-02 16:24:57,187][243562] Updated weights for policy 0, policy_version 3542 (0.0009) +[2026-06-02 16:24:57,361][243562] Updated weights for policy 0, policy_version 3553 (0.0008) +[2026-06-02 16:24:57,543][243562] Updated weights for policy 0, policy_version 3564 (0.0008) +[2026-06-02 16:24:57,694][243562] Updated weights for policy 0, policy_version 3574 (0.0008) +[2026-06-02 16:24:58,507][243562] Updated weights for policy 0, policy_version 3585 (0.0008) +[2026-06-02 16:24:58,663][243562] Updated weights for policy 0, policy_version 3596 (0.0008) +[2026-06-02 16:24:58,826][243562] Updated weights for policy 0, policy_version 3606 (0.0009) +[2026-06-02 16:24:58,983][243562] Updated weights for policy 0, policy_version 3616 (0.0008) +[2026-06-02 16:24:59,138][243562] Updated weights for policy 0, policy_version 3626 (0.0008) +[2026-06-02 16:24:59,331][243562] Updated weights for policy 0, policy_version 3638 (0.0008) +[2026-06-02 16:24:59,489][243562] Updated weights for policy 0, policy_version 3648 (0.0008) +[2026-06-02 16:24:59,811][235960] Fps is (10 sec: 19661.0, 60 sec: 20207.0, 300 sec: 19660.8). Total num frames: 1867776. Throughput: 0: 19774.6. Samples: 1891200. Policy #0 lag: (min: 63.0, avg: 75.7, max: 127.0) +[2026-06-02 16:24:59,812][235960] Avg episode reward: [(0, '16.999')] +[2026-06-02 16:24:59,817][242748] Saving new best policy, reward=16.999! +[2026-06-02 16:25:00,283][243562] Updated weights for policy 0, policy_version 3660 (0.0009) +[2026-06-02 16:25:00,459][243562] Updated weights for policy 0, policy_version 3671 (0.0009) +[2026-06-02 16:25:00,627][243562] Updated weights for policy 0, policy_version 3682 (0.0008) +[2026-06-02 16:25:00,801][243562] Updated weights for policy 0, policy_version 3693 (0.0008) +[2026-06-02 16:25:00,959][243562] Updated weights for policy 0, policy_version 3703 (0.0009) +[2026-06-02 16:25:01,754][243562] Updated weights for policy 0, policy_version 3714 (0.0009) +[2026-06-02 16:25:01,901][243562] Updated weights for policy 0, policy_version 3724 (0.0008) +[2026-06-02 16:25:02,062][243562] Updated weights for policy 0, policy_version 3734 (0.0008) +[2026-06-02 16:25:02,222][243562] Updated weights for policy 0, policy_version 3744 (0.0008) +[2026-06-02 16:25:02,390][243562] Updated weights for policy 0, policy_version 3754 (0.0008) +[2026-06-02 16:25:02,550][243562] Updated weights for policy 0, policy_version 3764 (0.0008) +[2026-06-02 16:25:02,735][243562] Updated weights for policy 0, policy_version 3775 (0.0008) +[2026-06-02 16:25:03,497][243562] Updated weights for policy 0, policy_version 3785 (0.0008) +[2026-06-02 16:25:03,654][243562] Updated weights for policy 0, policy_version 3795 (0.0008) +[2026-06-02 16:25:03,810][243562] Updated weights for policy 0, policy_version 3805 (0.0010) +[2026-06-02 16:25:03,988][243562] Updated weights for policy 0, policy_version 3816 (0.0008) +[2026-06-02 16:25:04,147][243562] Updated weights for policy 0, policy_version 3826 (0.0008) +[2026-06-02 16:25:04,351][243562] Updated weights for policy 0, policy_version 3839 (0.0009) +[2026-06-02 16:25:04,811][235960] Fps is (10 sec: 19660.9, 60 sec: 20206.9, 300 sec: 19660.8). Total num frames: 1966080. Throughput: 0: 19726.2. Samples: 1950976. Policy #0 lag: (min: 63.0, avg: 76.3, max: 127.0) +[2026-06-02 16:25:04,813][235960] Avg episode reward: [(0, '17.182')] +[2026-06-02 16:25:04,818][242748] Saving new best policy, reward=17.182! +[2026-06-02 16:25:05,133][243562] Updated weights for policy 0, policy_version 3849 (0.0009) +[2026-06-02 16:25:05,304][243562] Updated weights for policy 0, policy_version 3860 (0.0008) +[2026-06-02 16:25:05,513][243562] Updated weights for policy 0, policy_version 3873 (0.0009) +[2026-06-02 16:25:05,684][243562] Updated weights for policy 0, policy_version 3884 (0.0009) +[2026-06-02 16:25:05,849][243562] Updated weights for policy 0, policy_version 3894 (0.0009) +[2026-06-02 16:25:06,007][243562] Updated weights for policy 0, policy_version 3904 (0.0008) +[2026-06-02 16:25:06,790][243562] Updated weights for policy 0, policy_version 3915 (0.0009) +[2026-06-02 16:25:06,978][243562] Updated weights for policy 0, policy_version 3927 (0.0009) +[2026-06-02 16:25:07,139][243562] Updated weights for policy 0, policy_version 3937 (0.0009) +[2026-06-02 16:25:07,299][243562] Updated weights for policy 0, policy_version 3947 (0.0008) +[2026-06-02 16:25:07,463][243562] Updated weights for policy 0, policy_version 3957 (0.0008) +[2026-06-02 16:25:07,628][243562] Updated weights for policy 0, policy_version 3967 (0.0008) +[2026-06-02 16:25:08,361][243562] Updated weights for policy 0, policy_version 3978 (0.0009) +[2026-06-02 16:25:08,538][243562] Updated weights for policy 0, policy_version 3989 (0.0008) +[2026-06-02 16:25:08,701][243562] Updated weights for policy 0, policy_version 3999 (0.0009) +[2026-06-02 16:25:08,853][243562] Updated weights for policy 0, policy_version 4009 (0.0008) +[2026-06-02 16:25:09,045][243562] Updated weights for policy 0, policy_version 4021 (0.0009) +[2026-06-02 16:25:09,223][243562] Updated weights for policy 0, policy_version 4032 (0.0008) +[2026-06-02 16:25:09,811][235960] Fps is (10 sec: 19660.7, 60 sec: 20207.0, 300 sec: 19660.8). Total num frames: 2064384. Throughput: 0: 19874.2. Samples: 2077952. Policy #0 lag: (min: 63.0, avg: 76.3, max: 127.0) +[2026-06-02 16:25:09,812][235960] Avg episode reward: [(0, '26.609')] +[2026-06-02 16:25:10,010][243562] Updated weights for policy 0, policy_version 4043 (0.0009) +[2026-06-02 16:25:10,180][243562] Updated weights for policy 0, policy_version 4053 (0.0008) +[2026-06-02 16:25:10,356][243562] Updated weights for policy 0, policy_version 4065 (0.0009) +[2026-06-02 16:25:10,519][243562] Updated weights for policy 0, policy_version 4075 (0.0008) +[2026-06-02 16:25:10,681][243562] Updated weights for policy 0, policy_version 4085 (0.0008) +[2026-06-02 16:25:10,853][243562] Updated weights for policy 0, policy_version 4095 (0.0009) +[2026-06-02 16:25:10,862][242748] Saving new best policy, reward=26.609! +[2026-06-02 16:25:11,559][243562] Updated weights for policy 0, policy_version 4105 (0.0009) +[2026-06-02 16:25:11,728][243562] Updated weights for policy 0, policy_version 4116 (0.0008) +[2026-06-02 16:25:11,882][243562] Updated weights for policy 0, policy_version 4126 (0.0008) +[2026-06-02 16:25:12,065][243562] Updated weights for policy 0, policy_version 4137 (0.0009) +[2026-06-02 16:25:12,253][243562] Updated weights for policy 0, policy_version 4149 (0.0008) +[2026-06-02 16:25:12,417][243562] Updated weights for policy 0, policy_version 4159 (0.0008) +[2026-06-02 16:25:13,113][243562] Updated weights for policy 0, policy_version 4169 (0.0008) +[2026-06-02 16:25:13,283][243562] Updated weights for policy 0, policy_version 4180 (0.0009) +[2026-06-02 16:25:13,438][243562] Updated weights for policy 0, policy_version 4190 (0.0008) +[2026-06-02 16:25:13,598][243562] Updated weights for policy 0, policy_version 4200 (0.0008) +[2026-06-02 16:25:13,763][243562] Updated weights for policy 0, policy_version 4210 (0.0009) +[2026-06-02 16:25:13,927][243562] Updated weights for policy 0, policy_version 4220 (0.0009) +[2026-06-02 16:25:14,684][243562] Updated weights for policy 0, policy_version 4230 (0.0009) +[2026-06-02 16:25:14,811][235960] Fps is (10 sec: 19661.0, 60 sec: 20207.0, 300 sec: 19660.8). Total num frames: 2162688. Throughput: 0: 20226.9. Samples: 2205952. Policy #0 lag: (min: 18.0, avg: 63.7, max: 82.0) +[2026-06-02 16:25:14,812][235960] Avg episode reward: [(0, '28.313')] +[2026-06-02 16:25:14,849][243562] Updated weights for policy 0, policy_version 4241 (0.0009) +[2026-06-02 16:25:15,022][243562] Updated weights for policy 0, policy_version 4252 (0.0009) +[2026-06-02 16:25:15,208][243562] Updated weights for policy 0, policy_version 4263 (0.0009) +[2026-06-02 16:25:15,413][243562] Updated weights for policy 0, policy_version 4276 (0.0009) +[2026-06-02 16:25:15,570][243562] Updated weights for policy 0, policy_version 4286 (0.0008) +[2026-06-02 16:25:15,601][242748] Saving new best policy, reward=28.313! +[2026-06-02 16:25:16,283][243562] Updated weights for policy 0, policy_version 4297 (0.0009) +[2026-06-02 16:25:16,461][243562] Updated weights for policy 0, policy_version 4308 (0.0008) +[2026-06-02 16:25:16,632][243562] Updated weights for policy 0, policy_version 4319 (0.0008) +[2026-06-02 16:25:16,795][243562] Updated weights for policy 0, policy_version 4329 (0.0008) +[2026-06-02 16:25:16,960][243562] Updated weights for policy 0, policy_version 4339 (0.0008) +[2026-06-02 16:25:17,111][243562] Updated weights for policy 0, policy_version 4349 (0.0008) +[2026-06-02 16:25:17,870][243562] Updated weights for policy 0, policy_version 4360 (0.0008) +[2026-06-02 16:25:18,032][243562] Updated weights for policy 0, policy_version 4370 (0.0008) +[2026-06-02 16:25:18,245][243562] Updated weights for policy 0, policy_version 4383 (0.0008) +[2026-06-02 16:25:18,400][243562] Updated weights for policy 0, policy_version 4393 (0.0009) +[2026-06-02 16:25:18,561][243562] Updated weights for policy 0, policy_version 4403 (0.0008) +[2026-06-02 16:25:18,741][243562] Updated weights for policy 0, policy_version 4414 (0.0008) +[2026-06-02 16:25:19,491][243562] Updated weights for policy 0, policy_version 4426 (0.0008) +[2026-06-02 16:25:19,674][243562] Updated weights for policy 0, policy_version 4437 (0.0005) +[2026-06-02 16:25:19,811][235960] Fps is (10 sec: 19660.8, 60 sec: 20207.0, 300 sec: 19660.8). Total num frames: 2260992. Throughput: 0: 20224.1. Samples: 2268160. Policy #0 lag: (min: 63.0, avg: 75.0, max: 127.0) +[2026-06-02 16:25:19,812][235960] Avg episode reward: [(0, '33.006')] +[2026-06-02 16:25:19,851][243562] Updated weights for policy 0, policy_version 4448 (0.0005) +[2026-06-02 16:25:20,016][243562] Updated weights for policy 0, policy_version 4458 (0.0005) +[2026-06-02 16:25:20,168][243562] Updated weights for policy 0, policy_version 4468 (0.0005) +[2026-06-02 16:25:20,361][242748] Saving new best policy, reward=33.006! +[2026-06-02 16:25:20,363][243562] Updated weights for policy 0, policy_version 4480 (0.0005) +[2026-06-02 16:25:21,096][243562] Updated weights for policy 0, policy_version 4492 (0.0009) +[2026-06-02 16:25:21,271][243562] Updated weights for policy 0, policy_version 4503 (0.0010) +[2026-06-02 16:25:21,441][243562] Updated weights for policy 0, policy_version 4513 (0.0008) +[2026-06-02 16:25:21,619][243562] Updated weights for policy 0, policy_version 4524 (0.0008) +[2026-06-02 16:25:21,774][243562] Updated weights for policy 0, policy_version 4534 (0.0008) +[2026-06-02 16:25:22,515][243562] Updated weights for policy 0, policy_version 4545 (0.0005) +[2026-06-02 16:25:22,661][243562] Updated weights for policy 0, policy_version 4555 (0.0005) +[2026-06-02 16:25:22,826][243562] Updated weights for policy 0, policy_version 4565 (0.0005) +[2026-06-02 16:25:22,994][243562] Updated weights for policy 0, policy_version 4576 (0.0005) +[2026-06-02 16:25:23,179][243562] Updated weights for policy 0, policy_version 4587 (0.0005) +[2026-06-02 16:25:23,342][243562] Updated weights for policy 0, policy_version 4597 (0.0005) +[2026-06-02 16:25:23,496][243562] Updated weights for policy 0, policy_version 4607 (0.0008) +[2026-06-02 16:25:24,204][243562] Updated weights for policy 0, policy_version 4617 (0.0006) +[2026-06-02 16:25:24,361][243562] Updated weights for policy 0, policy_version 4627 (0.0005) +[2026-06-02 16:25:24,527][243562] Updated weights for policy 0, policy_version 4637 (0.0004) +[2026-06-02 16:25:24,692][243562] Updated weights for policy 0, policy_version 4647 (0.0004) +[2026-06-02 16:25:24,811][235960] Fps is (10 sec: 19660.6, 60 sec: 20207.0, 300 sec: 19660.8). Total num frames: 2359296. Throughput: 0: 20096.0. Samples: 2385152. Policy #0 lag: (min: 31.0, avg: 45.7, max: 95.0) +[2026-06-02 16:25:24,812][235960] Avg episode reward: [(0, '37.706')] +[2026-06-02 16:25:24,866][243562] Updated weights for policy 0, policy_version 4658 (0.0004) +[2026-06-02 16:25:25,027][243562] Updated weights for policy 0, policy_version 4668 (0.0004) +[2026-06-02 16:25:25,092][242748] Saving new best policy, reward=37.706! +[2026-06-02 16:25:25,701][243562] Updated weights for policy 0, policy_version 4678 (0.0007) +[2026-06-02 16:25:25,873][243562] Updated weights for policy 0, policy_version 4689 (0.0008) +[2026-06-02 16:25:26,051][243562] Updated weights for policy 0, policy_version 4700 (0.0008) +[2026-06-02 16:25:26,216][243562] Updated weights for policy 0, policy_version 4710 (0.0008) +[2026-06-02 16:25:26,392][243562] Updated weights for policy 0, policy_version 4721 (0.0009) +[2026-06-02 16:25:26,565][243562] Updated weights for policy 0, policy_version 4732 (0.0008) +[2026-06-02 16:25:27,255][243562] Updated weights for policy 0, policy_version 4743 (0.0008) +[2026-06-02 16:25:27,431][243562] Updated weights for policy 0, policy_version 4754 (0.0008) +[2026-06-02 16:25:27,589][243562] Updated weights for policy 0, policy_version 4764 (0.0008) +[2026-06-02 16:25:27,747][243562] Updated weights for policy 0, policy_version 4774 (0.0007) +[2026-06-02 16:25:27,909][243562] Updated weights for policy 0, policy_version 4784 (0.0005) +[2026-06-02 16:25:28,104][243562] Updated weights for policy 0, policy_version 4796 (0.0005) +[2026-06-02 16:25:28,782][243562] Updated weights for policy 0, policy_version 4806 (0.0004) +[2026-06-02 16:25:28,951][243562] Updated weights for policy 0, policy_version 4817 (0.0004) +[2026-06-02 16:25:29,143][243562] Updated weights for policy 0, policy_version 4829 (0.0004) +[2026-06-02 16:25:29,324][243562] Updated weights for policy 0, policy_version 4840 (0.0004) +[2026-06-02 16:25:29,496][243562] Updated weights for policy 0, policy_version 4850 (0.0004) +[2026-06-02 16:25:29,657][243562] Updated weights for policy 0, policy_version 4860 (0.0004) +[2026-06-02 16:25:29,811][235960] Fps is (10 sec: 22937.6, 60 sec: 20753.1, 300 sec: 19923.0). Total num frames: 2490368. Throughput: 0: 20431.6. Samples: 2511232. Policy #0 lag: (min: 63.0, avg: 77.8, max: 127.0) +[2026-06-02 16:25:29,812][235960] Avg episode reward: [(0, '48.087')] +[2026-06-02 16:25:29,817][242748] Saving new best policy, reward=48.087! +[2026-06-02 16:25:30,339][243562] Updated weights for policy 0, policy_version 4871 (0.0007) +[2026-06-02 16:25:30,498][243562] Updated weights for policy 0, policy_version 4881 (0.0008) +[2026-06-02 16:25:30,674][243562] Updated weights for policy 0, policy_version 4892 (0.0009) +[2026-06-02 16:25:30,835][243562] Updated weights for policy 0, policy_version 4902 (0.0009) +[2026-06-02 16:25:30,998][242748] Early stopping after 6 epochs (48 sgd steps), loss delta 0.0000006 +[2026-06-02 16:25:31,002][243562] Updated weights for policy 0, policy_version 4912 (0.0008) +[2026-06-02 16:25:31,743][243562] Updated weights for policy 0, policy_version 4922 (0.0009) +[2026-06-02 16:25:31,919][243562] Updated weights for policy 0, policy_version 4933 (0.0008) +[2026-06-02 16:25:32,093][243562] Updated weights for policy 0, policy_version 4944 (0.0008) +[2026-06-02 16:25:32,259][243562] Updated weights for policy 0, policy_version 4954 (0.0008) +[2026-06-02 16:25:32,430][243562] Updated weights for policy 0, policy_version 4964 (0.0008) +[2026-06-02 16:25:33,171][243562] Updated weights for policy 0, policy_version 4977 (0.0009) +[2026-06-02 16:25:33,348][243562] Updated weights for policy 0, policy_version 4989 (0.0009) +[2026-06-02 16:25:33,509][243562] Updated weights for policy 0, policy_version 4999 (0.0009) +[2026-06-02 16:25:33,667][243562] Updated weights for policy 0, policy_version 5009 (0.0008) +[2026-06-02 16:25:33,837][243562] Updated weights for policy 0, policy_version 5019 (0.0009) +[2026-06-02 16:25:34,006][243562] Updated weights for policy 0, policy_version 5030 (0.0009) +[2026-06-02 16:25:34,172][243562] Updated weights for policy 0, policy_version 5040 (0.0008) +[2026-06-02 16:25:34,811][235960] Fps is (10 sec: 22937.8, 60 sec: 20207.0, 300 sec: 19912.9). Total num frames: 2588672. Throughput: 0: 20582.4. Samples: 2579200. Policy #0 lag: (min: 62.0, avg: 77.3, max: 126.0) +[2026-06-02 16:25:34,812][235960] Avg episode reward: [(0, '50.001')] +[2026-06-02 16:25:34,860][243562] Updated weights for policy 0, policy_version 5050 (0.0008) +[2026-06-02 16:25:35,037][243562] Updated weights for policy 0, policy_version 5061 (0.0009) +[2026-06-02 16:25:35,229][243562] Updated weights for policy 0, policy_version 5073 (0.0009) +[2026-06-02 16:25:35,387][243562] Updated weights for policy 0, policy_version 5083 (0.0008) +[2026-06-02 16:25:35,556][243562] Updated weights for policy 0, policy_version 5093 (0.0008) +[2026-06-02 16:25:35,714][243562] Updated weights for policy 0, policy_version 5103 (0.0008) +[2026-06-02 16:25:35,727][242748] Saving new best policy, reward=50.001! +[2026-06-02 16:25:36,413][243562] Updated weights for policy 0, policy_version 5114 (0.0009) +[2026-06-02 16:25:36,570][243562] Updated weights for policy 0, policy_version 5124 (0.0008) +[2026-06-02 16:25:36,728][243562] Updated weights for policy 0, policy_version 5134 (0.0008) +[2026-06-02 16:25:36,891][243562] Updated weights for policy 0, policy_version 5144 (0.0008) +[2026-06-02 16:25:37,054][243562] Updated weights for policy 0, policy_version 5154 (0.0008) +[2026-06-02 16:25:37,234][243562] Updated weights for policy 0, policy_version 5165 (0.0008) +[2026-06-02 16:25:37,900][243562] Updated weights for policy 0, policy_version 5176 (0.0008) +[2026-06-02 16:25:38,062][243562] Updated weights for policy 0, policy_version 5186 (0.0008) +[2026-06-02 16:25:38,228][243562] Updated weights for policy 0, policy_version 5196 (0.0008) +[2026-06-02 16:25:38,392][243562] Updated weights for policy 0, policy_version 5206 (0.0008) +[2026-06-02 16:25:38,571][243562] Updated weights for policy 0, policy_version 5217 (0.0008) +[2026-06-02 16:25:38,740][243562] Updated weights for policy 0, policy_version 5227 (0.0008) +[2026-06-02 16:25:39,445][243562] Updated weights for policy 0, policy_version 5239 (0.0008) +[2026-06-02 16:25:39,607][243562] Updated weights for policy 0, policy_version 5249 (0.0008) +[2026-06-02 16:25:39,766][243562] Updated weights for policy 0, policy_version 5259 (0.0008) +[2026-06-02 16:25:39,811][235960] Fps is (10 sec: 19660.8, 60 sec: 20207.0, 300 sec: 19903.5). Total num frames: 2686976. Throughput: 0: 20665.0. Samples: 2707840. Policy #0 lag: (min: 62.0, avg: 77.3, max: 126.0) +[2026-06-02 16:25:39,812][235960] Avg episode reward: [(0, '64.691')] +[2026-06-02 16:25:39,930][243562] Updated weights for policy 0, policy_version 5269 (0.0008) +[2026-06-02 16:25:40,093][243562] Updated weights for policy 0, policy_version 5279 (0.0008) +[2026-06-02 16:25:40,262][243562] Updated weights for policy 0, policy_version 5289 (0.0008) +[2026-06-02 16:25:40,367][242748] Saving new best policy, reward=64.691! +[2026-06-02 16:25:40,965][243562] Updated weights for policy 0, policy_version 5300 (0.0008) +[2026-06-02 16:25:41,127][243562] Updated weights for policy 0, policy_version 5310 (0.0008) +[2026-06-02 16:25:41,291][243562] Updated weights for policy 0, policy_version 5320 (0.0008) +[2026-06-02 16:25:41,450][243562] Updated weights for policy 0, policy_version 5330 (0.0008) +[2026-06-02 16:25:41,610][243562] Updated weights for policy 0, policy_version 5340 (0.0008) +[2026-06-02 16:25:41,812][243562] Updated weights for policy 0, policy_version 5352 (0.0009) +[2026-06-02 16:25:42,485][243562] Updated weights for policy 0, policy_version 5362 (0.0009) +[2026-06-02 16:25:42,637][243562] Updated weights for policy 0, policy_version 5372 (0.0008) +[2026-06-02 16:25:42,800][243562] Updated weights for policy 0, policy_version 5382 (0.0008) +[2026-06-02 16:25:42,962][243562] Updated weights for policy 0, policy_version 5392 (0.0009) +[2026-06-02 16:25:43,160][243562] Updated weights for policy 0, policy_version 5404 (0.0009) +[2026-06-02 16:25:43,352][243562] Updated weights for policy 0, policy_version 5416 (0.0009) +[2026-06-02 16:25:44,041][243562] Updated weights for policy 0, policy_version 5427 (0.0009) +[2026-06-02 16:25:44,191][243562] Updated weights for policy 0, policy_version 5437 (0.0009) +[2026-06-02 16:25:44,350][243562] Updated weights for policy 0, policy_version 5447 (0.0008) +[2026-06-02 16:25:44,509][243562] Updated weights for policy 0, policy_version 5457 (0.0009) +[2026-06-02 16:25:44,681][243562] Updated weights for policy 0, policy_version 5467 (0.0009) +[2026-06-02 16:25:44,811][235960] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 19894.9). Total num frames: 2785280. Throughput: 0: 21000.5. Samples: 2836224. Policy #0 lag: (min: 51.0, avg: 102.4, max: 122.0) +[2026-06-02 16:25:44,812][235960] Avg episode reward: [(0, '65.042')] +[2026-06-02 16:25:44,855][243562] Updated weights for policy 0, policy_version 5478 (0.0008) +[2026-06-02 16:25:45,011][242748] Saving new best policy, reward=65.042! +[2026-06-02 16:25:45,013][243562] Updated weights for policy 0, policy_version 5488 (0.0008) +[2026-06-02 16:25:45,683][243562] Updated weights for policy 0, policy_version 5498 (0.0008) +[2026-06-02 16:25:45,842][243562] Updated weights for policy 0, policy_version 5508 (0.0008) +[2026-06-02 16:25:46,005][243562] Updated weights for policy 0, policy_version 5518 (0.0008) +[2026-06-02 16:25:46,173][243562] Updated weights for policy 0, policy_version 5528 (0.0008) +[2026-06-02 16:25:46,344][243562] Updated weights for policy 0, policy_version 5538 (0.0008) +[2026-06-02 16:25:46,507][243562] Updated weights for policy 0, policy_version 5548 (0.0009) +[2026-06-02 16:25:47,167][243562] Updated weights for policy 0, policy_version 5558 (0.0009) +[2026-06-02 16:25:47,322][243562] Updated weights for policy 0, policy_version 5568 (0.0008) +[2026-06-02 16:25:47,485][243562] Updated weights for policy 0, policy_version 5578 (0.0008) +[2026-06-02 16:25:47,679][243562] Updated weights for policy 0, policy_version 5590 (0.0008) +[2026-06-02 16:25:47,842][243562] Updated weights for policy 0, policy_version 5600 (0.0009) +[2026-06-02 16:25:47,997][243562] Updated weights for policy 0, policy_version 5610 (0.0008) +[2026-06-02 16:25:48,694][243562] Updated weights for policy 0, policy_version 5620 (0.0008) +[2026-06-02 16:25:48,883][243562] Updated weights for policy 0, policy_version 5632 (0.0008) +[2026-06-02 16:25:49,062][243562] Updated weights for policy 0, policy_version 5643 (0.0008) +[2026-06-02 16:25:49,235][243562] Updated weights for policy 0, policy_version 5654 (0.0008) +[2026-06-02 16:25:49,404][243562] Updated weights for policy 0, policy_version 5664 (0.0009) +[2026-06-02 16:25:49,566][243562] Updated weights for policy 0, policy_version 5674 (0.0008) +[2026-06-02 16:25:49,811][235960] Fps is (10 sec: 22937.6, 60 sec: 20753.1, 300 sec: 20112.8). Total num frames: 2916352. Throughput: 0: 21103.0. Samples: 2900608. Policy #0 lag: (min: 42.0, avg: 92.5, max: 104.0) +[2026-06-02 16:25:49,812][235960] Avg episode reward: [(0, '88.330')] +[2026-06-02 16:25:49,817][242748] Saving new best policy, reward=88.330! +[2026-06-02 16:25:50,281][243562] Updated weights for policy 0, policy_version 5684 (0.0009) +[2026-06-02 16:25:50,440][243562] Updated weights for policy 0, policy_version 5694 (0.0008) +[2026-06-02 16:25:50,614][243562] Updated weights for policy 0, policy_version 5705 (0.0008) +[2026-06-02 16:25:50,769][243562] Updated weights for policy 0, policy_version 5715 (0.0008) +[2026-06-02 16:25:50,940][243562] Updated weights for policy 0, policy_version 5725 (0.0007) +[2026-06-02 16:25:51,099][243562] Updated weights for policy 0, policy_version 5735 (0.0005) +[2026-06-02 16:25:51,776][243562] Updated weights for policy 0, policy_version 5745 (0.0006) +[2026-06-02 16:25:51,935][243562] Updated weights for policy 0, policy_version 5755 (0.0008) +[2026-06-02 16:25:52,129][243562] Updated weights for policy 0, policy_version 5767 (0.0008) +[2026-06-02 16:25:52,315][243562] Updated weights for policy 0, policy_version 5779 (0.0008) +[2026-06-02 16:25:52,478][243562] Updated weights for policy 0, policy_version 5789 (0.0008) +[2026-06-02 16:25:52,649][243562] Updated weights for policy 0, policy_version 5799 (0.0009) +[2026-06-02 16:25:53,327][243562] Updated weights for policy 0, policy_version 5809 (0.0008) +[2026-06-02 16:25:53,484][243562] Updated weights for policy 0, policy_version 5819 (0.0008) +[2026-06-02 16:25:53,661][243562] Updated weights for policy 0, policy_version 5830 (0.0008) +[2026-06-02 16:25:53,822][243562] Updated weights for policy 0, policy_version 5840 (0.0008) +[2026-06-02 16:25:53,980][243562] Updated weights for policy 0, policy_version 5850 (0.0008) +[2026-06-02 16:25:54,153][243562] Updated weights for policy 0, policy_version 5860 (0.0008) +[2026-06-02 16:25:54,314][243562] Updated weights for policy 0, policy_version 5870 (0.0009) +[2026-06-02 16:25:54,811][235960] Fps is (10 sec: 22937.6, 60 sec: 20753.1, 300 sec: 20097.7). Total num frames: 3014656. Throughput: 0: 21145.6. Samples: 3029504. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) +[2026-06-02 16:25:54,812][235960] Avg episode reward: [(0, '84.763')] +[2026-06-02 16:25:54,989][243562] Updated weights for policy 0, policy_version 5880 (0.0009) +[2026-06-02 16:25:55,148][243562] Updated weights for policy 0, policy_version 5890 (0.0008) +[2026-06-02 16:25:55,309][243562] Updated weights for policy 0, policy_version 5900 (0.0008) +[2026-06-02 16:25:55,475][243562] Updated weights for policy 0, policy_version 5910 (0.0008) +[2026-06-02 16:25:55,630][243562] Updated weights for policy 0, policy_version 5920 (0.0008) +[2026-06-02 16:25:55,804][243562] Updated weights for policy 0, policy_version 5930 (0.0008) +[2026-06-02 16:25:56,441][243562] Updated weights for policy 0, policy_version 5940 (0.0008) +[2026-06-02 16:25:56,598][243562] Updated weights for policy 0, policy_version 5950 (0.0009) +[2026-06-02 16:25:56,759][243562] Updated weights for policy 0, policy_version 5960 (0.0008) +[2026-06-02 16:25:56,940][243562] Updated weights for policy 0, policy_version 5971 (0.0008) +[2026-06-02 16:25:57,113][243562] Updated weights for policy 0, policy_version 5982 (0.0008) +[2026-06-02 16:25:57,290][243562] Updated weights for policy 0, policy_version 5992 (0.0008) +[2026-06-02 16:25:57,955][243562] Updated weights for policy 0, policy_version 6002 (0.0009) +[2026-06-02 16:25:58,116][243562] Updated weights for policy 0, policy_version 6012 (0.0009) +[2026-06-02 16:25:58,275][243562] Updated weights for policy 0, policy_version 6022 (0.0008) +[2026-06-02 16:25:58,438][243562] Updated weights for policy 0, policy_version 6032 (0.0008) +[2026-06-02 16:25:58,598][243562] Updated weights for policy 0, policy_version 6042 (0.0008) +[2026-06-02 16:25:58,762][243562] Updated weights for policy 0, policy_version 6052 (0.0008) +[2026-06-02 16:25:58,933][243562] Updated weights for policy 0, policy_version 6063 (0.0008) +[2026-06-02 16:25:59,601][243562] Updated weights for policy 0, policy_version 6074 (0.0009) +[2026-06-02 16:25:59,769][243562] Updated weights for policy 0, policy_version 6084 (0.0008) +[2026-06-02 16:25:59,811][235960] Fps is (10 sec: 19660.8, 60 sec: 20753.1, 300 sec: 20083.6). Total num frames: 3112960. Throughput: 0: 21137.1. Samples: 3157120. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) +[2026-06-02 16:25:59,812][235960] Avg episode reward: [(0, '87.928')] +[2026-06-02 16:25:59,933][243562] Updated weights for policy 0, policy_version 6094 (0.0008) +[2026-06-02 16:26:00,128][243562] Updated weights for policy 0, policy_version 6106 (0.0009) +[2026-06-02 16:26:00,308][243562] Updated weights for policy 0, policy_version 6117 (0.0009) +[2026-06-02 16:26:00,472][243562] Updated weights for policy 0, policy_version 6127 (0.0010) +[2026-06-02 16:26:01,154][243562] Updated weights for policy 0, policy_version 6137 (0.0012) +[2026-06-02 16:26:01,323][243562] Updated weights for policy 0, policy_version 6148 (0.0009) +[2026-06-02 16:26:01,487][243562] Updated weights for policy 0, policy_version 6158 (0.0009) +[2026-06-02 16:26:01,645][243562] Updated weights for policy 0, policy_version 6168 (0.0008) +[2026-06-02 16:26:01,811][243562] Updated weights for policy 0, policy_version 6178 (0.0008) +[2026-06-02 16:26:02,011][243562] Updated weights for policy 0, policy_version 6190 (0.0009) +[2026-06-02 16:26:02,703][243562] Updated weights for policy 0, policy_version 6201 (0.0009) +[2026-06-02 16:26:02,866][243562] Updated weights for policy 0, policy_version 6211 (0.0009) +[2026-06-02 16:26:03,030][243562] Updated weights for policy 0, policy_version 6221 (0.0011) +[2026-06-02 16:26:03,192][243562] Updated weights for policy 0, policy_version 6231 (0.0009) +[2026-06-02 16:26:03,371][243562] Updated weights for policy 0, policy_version 6242 (0.0008) +[2026-06-02 16:26:03,538][243562] Updated weights for policy 0, policy_version 6252 (0.0008) +[2026-06-02 16:26:04,224][243562] Updated weights for policy 0, policy_version 6263 (0.0010) +[2026-06-02 16:26:04,398][243562] Updated weights for policy 0, policy_version 6274 (0.0009) +[2026-06-02 16:26:04,559][243562] Updated weights for policy 0, policy_version 6284 (0.0009) +[2026-06-02 16:26:04,732][243562] Updated weights for policy 0, policy_version 6295 (0.0008) +[2026-06-02 16:26:04,811][235960] Fps is (10 sec: 19660.9, 60 sec: 20753.1, 300 sec: 20070.4). Total num frames: 3211264. Throughput: 0: 21202.5. Samples: 3222272. Policy #0 lag: (min: 3.0, avg: 53.0, max: 67.0) +[2026-06-02 16:26:04,812][235960] Avg episode reward: [(0, '95.404')] +[2026-06-02 16:26:04,905][243562] Updated weights for policy 0, policy_version 6305 (0.0008) +[2026-06-02 16:26:05,077][243562] Updated weights for policy 0, policy_version 6316 (0.0008) +[2026-06-02 16:26:05,140][242748] Saving new best policy, reward=95.404! +[2026-06-02 16:26:05,759][243562] Updated weights for policy 0, policy_version 6326 (0.0008) +[2026-06-02 16:26:05,929][243562] Updated weights for policy 0, policy_version 6337 (0.0008) +[2026-06-02 16:26:06,094][243562] Updated weights for policy 0, policy_version 6347 (0.0008) +[2026-06-02 16:26:06,257][243562] Updated weights for policy 0, policy_version 6357 (0.0008) +[2026-06-02 16:26:06,428][243562] Updated weights for policy 0, policy_version 6368 (0.0008) +[2026-06-02 16:26:06,613][243562] Updated weights for policy 0, policy_version 6379 (0.0008) +[2026-06-02 16:26:07,296][243562] Updated weights for policy 0, policy_version 6389 (0.0008) +[2026-06-02 16:26:07,463][243562] Updated weights for policy 0, policy_version 6400 (0.0010) +[2026-06-02 16:26:07,644][243562] Updated weights for policy 0, policy_version 6411 (0.0010) +[2026-06-02 16:26:07,806][243562] Updated weights for policy 0, policy_version 6421 (0.0009) +[2026-06-02 16:26:07,961][243562] Updated weights for policy 0, policy_version 6431 (0.0008) +[2026-06-02 16:26:08,124][243562] Updated weights for policy 0, policy_version 6441 (0.0008) +[2026-06-02 16:26:08,809][243562] Updated weights for policy 0, policy_version 6451 (0.0009) +[2026-06-02 16:26:08,965][243562] Updated weights for policy 0, policy_version 6461 (0.0009) +[2026-06-02 16:26:09,159][243562] Updated weights for policy 0, policy_version 6473 (0.0009) +[2026-06-02 16:26:09,321][243562] Updated weights for policy 0, policy_version 6483 (0.0009) +[2026-06-02 16:26:09,488][243562] Updated weights for policy 0, policy_version 6493 (0.0008) +[2026-06-02 16:26:09,645][243562] Updated weights for policy 0, policy_version 6503 (0.0009) +[2026-06-02 16:26:09,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21299.2, 300 sec: 20256.6). Total num frames: 3342336. Throughput: 0: 21438.6. Samples: 3349888. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-02 16:26:09,812][235960] Avg episode reward: [(0, '95.196')] +[2026-06-02 16:26:10,355][243562] Updated weights for policy 0, policy_version 6515 (0.0009) +[2026-06-02 16:26:10,511][243562] Updated weights for policy 0, policy_version 6525 (0.0009) +[2026-06-02 16:26:10,678][243562] Updated weights for policy 0, policy_version 6535 (0.0009) +[2026-06-02 16:26:10,843][243562] Updated weights for policy 0, policy_version 6545 (0.0009) +[2026-06-02 16:26:11,009][243562] Updated weights for policy 0, policy_version 6555 (0.0009) +[2026-06-02 16:26:11,205][243562] Updated weights for policy 0, policy_version 6567 (0.0009) +[2026-06-02 16:26:11,880][243562] Updated weights for policy 0, policy_version 6577 (0.0009) +[2026-06-02 16:26:12,032][243562] Updated weights for policy 0, policy_version 6587 (0.0009) +[2026-06-02 16:26:12,187][243562] Updated weights for policy 0, policy_version 6597 (0.0008) +[2026-06-02 16:26:12,343][243562] Updated weights for policy 0, policy_version 6607 (0.0010) +[2026-06-02 16:26:12,510][243562] Updated weights for policy 0, policy_version 6617 (0.0007) +[2026-06-02 16:26:12,670][243562] Updated weights for policy 0, policy_version 6627 (0.0005) +[2026-06-02 16:26:12,856][243562] Updated weights for policy 0, policy_version 6638 (0.0005) +[2026-06-02 16:26:13,543][243562] Updated weights for policy 0, policy_version 6648 (0.0009) +[2026-06-02 16:26:13,690][243562] Updated weights for policy 0, policy_version 6658 (0.0008) +[2026-06-02 16:26:13,862][243562] Updated weights for policy 0, policy_version 6668 (0.0008) +[2026-06-02 16:26:14,019][243562] Updated weights for policy 0, policy_version 6678 (0.0009) +[2026-06-02 16:26:14,201][243562] Updated weights for policy 0, policy_version 6689 (0.0009) +[2026-06-02 16:26:14,372][243562] Updated weights for policy 0, policy_version 6699 (0.0008) +[2026-06-02 16:26:14,811][235960] Fps is (10 sec: 22937.7, 60 sec: 21299.2, 300 sec: 20239.1). Total num frames: 3440640. Throughput: 0: 21378.9. Samples: 3473280. Policy #0 lag: (min: 63.0, avg: 76.4, max: 127.0) +[2026-06-02 16:26:14,812][235960] Avg episode reward: [(0, '98.591')] +[2026-06-02 16:26:15,017][243562] Updated weights for policy 0, policy_version 6709 (0.0009) +[2026-06-02 16:26:15,187][243562] Updated weights for policy 0, policy_version 6720 (0.0009) +[2026-06-02 16:26:15,366][243562] Updated weights for policy 0, policy_version 6731 (0.0008) +[2026-06-02 16:26:15,528][243562] Updated weights for policy 0, policy_version 6741 (0.0008) +[2026-06-02 16:26:15,689][243562] Updated weights for policy 0, policy_version 6751 (0.0008) +[2026-06-02 16:26:15,875][243562] Updated weights for policy 0, policy_version 6762 (0.0009) +[2026-06-02 16:26:15,959][242748] Saving new best policy, reward=98.591! +[2026-06-02 16:26:16,553][243562] Updated weights for policy 0, policy_version 6773 (0.0009) +[2026-06-02 16:26:16,748][243562] Updated weights for policy 0, policy_version 6786 (0.0009) +[2026-06-02 16:26:16,920][243562] Updated weights for policy 0, policy_version 6796 (0.0008) +[2026-06-02 16:26:17,077][243562] Updated weights for policy 0, policy_version 6806 (0.0008) +[2026-06-02 16:26:17,262][243562] Updated weights for policy 0, policy_version 6817 (0.0009) +[2026-06-02 16:26:17,422][243562] Updated weights for policy 0, policy_version 6827 (0.0009) +[2026-06-02 16:26:18,087][243562] Updated weights for policy 0, policy_version 6837 (0.0009) +[2026-06-02 16:26:18,247][243562] Updated weights for policy 0, policy_version 6847 (0.0008) +[2026-06-02 16:26:18,438][243562] Updated weights for policy 0, policy_version 6859 (0.0008) +[2026-06-02 16:26:18,601][243562] Updated weights for policy 0, policy_version 6869 (0.0008) +[2026-06-02 16:26:18,772][243562] Updated weights for policy 0, policy_version 6879 (0.0009) +[2026-06-02 16:26:18,942][243562] Updated weights for policy 0, policy_version 6889 (0.0009) +[2026-06-02 16:26:19,673][243562] Updated weights for policy 0, policy_version 6901 (0.0008) +[2026-06-02 16:26:19,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 20222.5). Total num frames: 3538944. Throughput: 0: 21245.1. Samples: 3535232. Policy #0 lag: (min: 63.0, avg: 76.4, max: 127.0) +[2026-06-02 16:26:19,813][235960] Avg episode reward: [(0, '123.040')] +[2026-06-02 16:26:19,858][243562] Updated weights for policy 0, policy_version 6913 (0.0009) +[2026-06-02 16:26:20,028][243562] Updated weights for policy 0, policy_version 6923 (0.0008) +[2026-06-02 16:26:20,189][243562] Updated weights for policy 0, policy_version 6933 (0.0008) +[2026-06-02 16:26:20,365][243562] Updated weights for policy 0, policy_version 6944 (0.0008) +[2026-06-02 16:26:20,532][243562] Updated weights for policy 0, policy_version 6954 (0.0009) +[2026-06-02 16:26:20,622][242748] Saving new best policy, reward=123.040! +[2026-06-02 16:26:21,209][243562] Updated weights for policy 0, policy_version 6965 (0.0009) +[2026-06-02 16:26:21,360][243562] Updated weights for policy 0, policy_version 6975 (0.0008) +[2026-06-02 16:26:21,522][243562] Updated weights for policy 0, policy_version 6985 (0.0008) +[2026-06-02 16:26:21,689][243562] Updated weights for policy 0, policy_version 6995 (0.0008) +[2026-06-02 16:26:21,840][243562] Updated weights for policy 0, policy_version 7005 (0.0009) +[2026-06-02 16:26:22,027][243562] Updated weights for policy 0, policy_version 7016 (0.0008) +[2026-06-02 16:26:22,701][243562] Updated weights for policy 0, policy_version 7026 (0.0009) +[2026-06-02 16:26:22,865][243562] Updated weights for policy 0, policy_version 7037 (0.0008) +[2026-06-02 16:26:23,047][243562] Updated weights for policy 0, policy_version 7048 (0.0008) +[2026-06-02 16:26:23,210][243562] Updated weights for policy 0, policy_version 7058 (0.0008) +[2026-06-02 16:26:23,388][243562] Updated weights for policy 0, policy_version 7069 (0.0009) +[2026-06-02 16:26:23,571][243562] Updated weights for policy 0, policy_version 7080 (0.0009) +[2026-06-02 16:26:24,293][243562] Updated weights for policy 0, policy_version 7092 (0.0009) +[2026-06-02 16:26:24,454][243562] Updated weights for policy 0, policy_version 7102 (0.0010) +[2026-06-02 16:26:24,624][243562] Updated weights for policy 0, policy_version 7113 (0.0009) +[2026-06-02 16:26:24,780][243562] Updated weights for policy 0, policy_version 7123 (0.0009) +[2026-06-02 16:26:24,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 20206.9). Total num frames: 3637248. Throughput: 0: 21222.4. Samples: 3662848. Policy #0 lag: (min: 40.0, avg: 65.8, max: 85.0) +[2026-06-02 16:26:24,812][235960] Avg episode reward: [(0, '131.462')] +[2026-06-02 16:26:24,951][243562] Updated weights for policy 0, policy_version 7133 (0.0009) +[2026-06-02 16:26:25,113][243562] Updated weights for policy 0, policy_version 7143 (0.0008) +[2026-06-02 16:26:25,254][242748] Saving new best policy, reward=131.462! +[2026-06-02 16:26:25,780][243562] Updated weights for policy 0, policy_version 7153 (0.0009) +[2026-06-02 16:26:25,926][243562] Updated weights for policy 0, policy_version 7163 (0.0009) +[2026-06-02 16:26:26,089][243562] Updated weights for policy 0, policy_version 7173 (0.0004) +[2026-06-02 16:26:26,272][243562] Updated weights for policy 0, policy_version 7184 (0.0007) +[2026-06-02 16:26:26,440][243562] Updated weights for policy 0, policy_version 7195 (0.0010) +[2026-06-02 16:26:26,612][243562] Updated weights for policy 0, policy_version 7205 (0.0008) +[2026-06-02 16:26:26,789][243562] Updated weights for policy 0, policy_version 7216 (0.0009) +[2026-06-02 16:26:27,479][243562] Updated weights for policy 0, policy_version 7227 (0.0009) +[2026-06-02 16:26:27,636][243562] Updated weights for policy 0, policy_version 7237 (0.0009) +[2026-06-02 16:26:27,837][243562] Updated weights for policy 0, policy_version 7249 (0.0009) +[2026-06-02 16:26:28,001][243562] Updated weights for policy 0, policy_version 7259 (0.0009) +[2026-06-02 16:26:28,157][243562] Updated weights for policy 0, policy_version 7269 (0.0010) +[2026-06-02 16:26:28,324][243562] Updated weights for policy 0, policy_version 7279 (0.0009) +[2026-06-02 16:26:28,987][243562] Updated weights for policy 0, policy_version 7290 (0.0010) +[2026-06-02 16:26:29,148][243562] Updated weights for policy 0, policy_version 7300 (0.0008) +[2026-06-02 16:26:29,305][243562] Updated weights for policy 0, policy_version 7310 (0.0009) +[2026-06-02 16:26:29,461][243562] Updated weights for policy 0, policy_version 7320 (0.0008) +[2026-06-02 16:26:29,640][243562] Updated weights for policy 0, policy_version 7331 (0.0009) +[2026-06-02 16:26:29,811][235960] Fps is (10 sec: 19660.9, 60 sec: 20753.1, 300 sec: 20192.2). Total num frames: 3735552. Throughput: 0: 21250.9. Samples: 3792512. Policy #0 lag: (min: 40.0, avg: 65.8, max: 85.0) +[2026-06-02 16:26:29,812][235960] Avg episode reward: [(0, '143.496')] +[2026-06-02 16:26:29,813][243562] Updated weights for policy 0, policy_version 7341 (0.0008) +[2026-06-02 16:26:29,857][242748] Saving new best policy, reward=143.496! +[2026-06-02 16:26:30,495][243562] Updated weights for policy 0, policy_version 7351 (0.0008) +[2026-06-02 16:26:30,663][243562] Updated weights for policy 0, policy_version 7361 (0.0009) +[2026-06-02 16:26:30,831][243562] Updated weights for policy 0, policy_version 7371 (0.0010) +[2026-06-02 16:26:30,982][243562] Updated weights for policy 0, policy_version 7381 (0.0008) +[2026-06-02 16:26:31,165][243562] Updated weights for policy 0, policy_version 7392 (0.0008) +[2026-06-02 16:26:31,324][243562] Updated weights for policy 0, policy_version 7402 (0.0008) +[2026-06-02 16:26:32,010][243562] Updated weights for policy 0, policy_version 7413 (0.0009) +[2026-06-02 16:26:32,165][243562] Updated weights for policy 0, policy_version 7423 (0.0009) +[2026-06-02 16:26:32,322][243562] Updated weights for policy 0, policy_version 7433 (0.0008) +[2026-06-02 16:26:32,501][243562] Updated weights for policy 0, policy_version 7444 (0.0009) +[2026-06-02 16:26:32,668][243562] Updated weights for policy 0, policy_version 7454 (0.0009) +[2026-06-02 16:26:32,824][243562] Updated weights for policy 0, policy_version 7464 (0.0009) +[2026-06-02 16:26:33,502][243562] Updated weights for policy 0, policy_version 7474 (0.0008) +[2026-06-02 16:26:33,671][243562] Updated weights for policy 0, policy_version 7485 (0.0009) +[2026-06-02 16:26:33,849][243562] Updated weights for policy 0, policy_version 7496 (0.0008) +[2026-06-02 16:26:34,010][243562] Updated weights for policy 0, policy_version 7506 (0.0008) +[2026-06-02 16:26:34,188][243562] Updated weights for policy 0, policy_version 7517 (0.0008) +[2026-06-02 16:26:34,345][243562] Updated weights for policy 0, policy_version 7527 (0.0008) +[2026-06-02 16:26:34,811][235960] Fps is (10 sec: 22937.7, 60 sec: 21299.2, 300 sec: 20350.7). Total num frames: 3866624. Throughput: 0: 21265.1. Samples: 3857536. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-02 16:26:34,812][235960] Avg episode reward: [(0, '154.265')] +[2026-06-02 16:26:34,817][242748] Saving new best policy, reward=154.265! +[2026-06-02 16:26:35,029][243562] Updated weights for policy 0, policy_version 7537 (0.0009) +[2026-06-02 16:26:35,187][243562] Updated weights for policy 0, policy_version 7547 (0.0009) +[2026-06-02 16:26:35,340][243562] Updated weights for policy 0, policy_version 7557 (0.0008) +[2026-06-02 16:26:35,509][243562] Updated weights for policy 0, policy_version 7567 (0.0008) +[2026-06-02 16:26:35,690][243562] Updated weights for policy 0, policy_version 7578 (0.0008) +[2026-06-02 16:26:35,866][243562] Updated weights for policy 0, policy_version 7589 (0.0008) +[2026-06-02 16:26:36,036][243562] Updated weights for policy 0, policy_version 7599 (0.0008) +[2026-06-02 16:26:36,665][243562] Updated weights for policy 0, policy_version 7609 (0.0009) +[2026-06-02 16:26:36,842][243562] Updated weights for policy 0, policy_version 7619 (0.0008) +[2026-06-02 16:26:37,032][243562] Updated weights for policy 0, policy_version 7631 (0.0008) +[2026-06-02 16:26:37,195][243562] Updated weights for policy 0, policy_version 7641 (0.0008) +[2026-06-02 16:26:37,358][243562] Updated weights for policy 0, policy_version 7651 (0.0009) +[2026-06-02 16:26:37,522][243562] Updated weights for policy 0, policy_version 7661 (0.0008) +[2026-06-02 16:26:38,189][243562] Updated weights for policy 0, policy_version 7671 (0.0008) +[2026-06-02 16:26:38,346][243562] Updated weights for policy 0, policy_version 7681 (0.0008) +[2026-06-02 16:26:38,509][243562] Updated weights for policy 0, policy_version 7691 (0.0008) +[2026-06-02 16:26:38,678][243562] Updated weights for policy 0, policy_version 7701 (0.0009) +[2026-06-02 16:26:38,841][243562] Updated weights for policy 0, policy_version 7711 (0.0008) +[2026-06-02 16:26:39,008][243562] Updated weights for policy 0, policy_version 7721 (0.0008) +[2026-06-02 16:26:39,664][243562] Updated weights for policy 0, policy_version 7731 (0.0009) +[2026-06-02 16:26:39,811][235960] Fps is (10 sec: 22937.0, 60 sec: 21299.1, 300 sec: 20333.0). Total num frames: 3964928. Throughput: 0: 21307.6. Samples: 3988352. Policy #0 lag: (min: 14.0, avg: 29.8, max: 78.0) +[2026-06-02 16:26:39,813][235960] Avg episode reward: [(0, '153.767')] +[2026-06-02 16:26:39,827][243562] Updated weights for policy 0, policy_version 7741 (0.0009) +[2026-06-02 16:26:39,981][243562] Updated weights for policy 0, policy_version 7751 (0.0008) +[2026-06-02 16:26:40,144][243562] Updated weights for policy 0, policy_version 7761 (0.0008) +[2026-06-02 16:26:40,304][243562] Updated weights for policy 0, policy_version 7771 (0.0008) +[2026-06-02 16:26:40,488][243562] Updated weights for policy 0, policy_version 7782 (0.0008) +[2026-06-02 16:26:40,649][243562] Updated weights for policy 0, policy_version 7792 (0.0008) +[2026-06-02 16:26:41,317][243562] Updated weights for policy 0, policy_version 7803 (0.0009) +[2026-06-02 16:26:41,480][243562] Updated weights for policy 0, policy_version 7813 (0.0008) +[2026-06-02 16:26:41,642][243562] Updated weights for policy 0, policy_version 7823 (0.0008) +[2026-06-02 16:26:41,805][243562] Updated weights for policy 0, policy_version 7833 (0.0008) +[2026-06-02 16:26:41,992][243562] Updated weights for policy 0, policy_version 7844 (0.0008) +[2026-06-02 16:26:42,179][243562] Updated weights for policy 0, policy_version 7856 (0.0008) +[2026-06-02 16:26:42,887][243562] Updated weights for policy 0, policy_version 7868 (0.0009) +[2026-06-02 16:26:43,065][243562] Updated weights for policy 0, policy_version 7879 (0.0008) +[2026-06-02 16:26:43,225][243562] Updated weights for policy 0, policy_version 7889 (0.0008) +[2026-06-02 16:26:43,390][243562] Updated weights for policy 0, policy_version 7899 (0.0008) +[2026-06-02 16:26:43,569][243562] Updated weights for policy 0, policy_version 7910 (0.0009) +[2026-06-02 16:26:43,729][243562] Updated weights for policy 0, policy_version 7920 (0.0011) +[2026-06-02 16:26:44,390][243562] Updated weights for policy 0, policy_version 7930 (0.0009) +[2026-06-02 16:26:44,552][243562] Updated weights for policy 0, policy_version 7940 (0.0009) +[2026-06-02 16:26:44,707][243562] Updated weights for policy 0, policy_version 7950 (0.0008) +[2026-06-02 16:26:44,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 20316.2). Total num frames: 4063232. Throughput: 0: 21350.4. Samples: 4117888. Policy #0 lag: (min: 14.0, avg: 29.8, max: 78.0) +[2026-06-02 16:26:44,812][235960] Avg episode reward: [(0, '157.132')] +[2026-06-02 16:26:44,877][243562] Updated weights for policy 0, policy_version 7960 (0.0008) +[2026-06-02 16:26:45,030][243562] Updated weights for policy 0, policy_version 7970 (0.0008) +[2026-06-02 16:26:45,198][243562] Updated weights for policy 0, policy_version 7980 (0.0009) +[2026-06-02 16:26:45,266][242748] Saving new best policy, reward=157.132! +[2026-06-02 16:26:45,858][243562] Updated weights for policy 0, policy_version 7990 (0.0008) +[2026-06-02 16:26:46,031][243562] Updated weights for policy 0, policy_version 8001 (0.0008) +[2026-06-02 16:26:46,214][243562] Updated weights for policy 0, policy_version 8012 (0.0008) +[2026-06-02 16:26:46,377][243562] Updated weights for policy 0, policy_version 8022 (0.0009) +[2026-06-02 16:26:46,544][243562] Updated weights for policy 0, policy_version 8032 (0.0009) +[2026-06-02 16:26:46,708][243562] Updated weights for policy 0, policy_version 8042 (0.0009) +[2026-06-02 16:26:47,389][243562] Updated weights for policy 0, policy_version 8054 (0.0010) +[2026-06-02 16:26:47,542][243562] Updated weights for policy 0, policy_version 8064 (0.0008) +[2026-06-02 16:26:47,714][243562] Updated weights for policy 0, policy_version 8075 (0.0008) +[2026-06-02 16:26:47,885][243562] Updated weights for policy 0, policy_version 8085 (0.0008) +[2026-06-02 16:26:48,050][243562] Updated weights for policy 0, policy_version 8095 (0.0008) +[2026-06-02 16:26:48,214][243562] Updated weights for policy 0, policy_version 8105 (0.0008) +[2026-06-02 16:26:48,891][243562] Updated weights for policy 0, policy_version 8116 (0.0008) +[2026-06-02 16:26:49,072][243562] Updated weights for policy 0, policy_version 8127 (0.0009) +[2026-06-02 16:26:49,233][243562] Updated weights for policy 0, policy_version 8137 (0.0008) +[2026-06-02 16:26:49,400][243562] Updated weights for policy 0, policy_version 8147 (0.0009) +[2026-06-02 16:26:49,565][243562] Updated weights for policy 0, policy_version 8157 (0.0008) +[2026-06-02 16:26:49,728][243562] Updated weights for policy 0, policy_version 8167 (0.0008) +[2026-06-02 16:26:49,811][235960] Fps is (10 sec: 19661.3, 60 sec: 20753.1, 300 sec: 20300.2). Total num frames: 4161536. Throughput: 0: 21333.3. Samples: 4182272. Policy #0 lag: (min: 63.0, avg: 77.8, max: 127.0) +[2026-06-02 16:26:49,812][235960] Avg episode reward: [(0, '160.653')] +[2026-06-02 16:26:49,873][242748] Saving new best policy, reward=160.653! +[2026-06-02 16:26:50,392][243562] Updated weights for policy 0, policy_version 8177 (0.0009) +[2026-06-02 16:26:50,546][243562] Updated weights for policy 0, policy_version 8187 (0.0008) +[2026-06-02 16:26:50,701][243562] Updated weights for policy 0, policy_version 8197 (0.0008) +[2026-06-02 16:26:50,868][243562] Updated weights for policy 0, policy_version 8207 (0.0009) +[2026-06-02 16:26:51,025][243562] Updated weights for policy 0, policy_version 8217 (0.0008) +[2026-06-02 16:26:51,194][243562] Updated weights for policy 0, policy_version 8227 (0.0008) +[2026-06-02 16:26:51,354][243562] Updated weights for policy 0, policy_version 8237 (0.0008) +[2026-06-02 16:26:52,036][243562] Updated weights for policy 0, policy_version 8248 (0.0008) +[2026-06-02 16:26:52,202][243562] Updated weights for policy 0, policy_version 8258 (0.0008) +[2026-06-02 16:26:52,366][243562] Updated weights for policy 0, policy_version 8268 (0.0008) +[2026-06-02 16:26:52,558][243562] Updated weights for policy 0, policy_version 8280 (0.0008) +[2026-06-02 16:26:52,724][243562] Updated weights for policy 0, policy_version 8290 (0.0008) +[2026-06-02 16:26:52,887][243562] Updated weights for policy 0, policy_version 8300 (0.0009) +[2026-06-02 16:26:53,534][243562] Updated weights for policy 0, policy_version 8310 (0.0008) +[2026-06-02 16:26:53,698][243562] Updated weights for policy 0, policy_version 8320 (0.0008) +[2026-06-02 16:26:53,854][243562] Updated weights for policy 0, policy_version 8330 (0.0008) +[2026-06-02 16:26:54,031][243562] Updated weights for policy 0, policy_version 8341 (0.0008) +[2026-06-02 16:26:54,196][243562] Updated weights for policy 0, policy_version 8351 (0.0008) +[2026-06-02 16:26:54,359][243562] Updated weights for policy 0, policy_version 8361 (0.0008) +[2026-06-02 16:26:54,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21299.2, 300 sec: 20441.0). Total num frames: 4292608. Throughput: 0: 21376.0. Samples: 4311808. Policy #0 lag: (min: 63.0, avg: 78.0, max: 127.0) +[2026-06-02 16:26:54,812][235960] Avg episode reward: [(0, '195.453')] +[2026-06-02 16:26:54,817][242748] Saving new best policy, reward=195.453! +[2026-06-02 16:26:55,040][243562] Updated weights for policy 0, policy_version 8371 (0.0010) +[2026-06-02 16:26:55,210][243562] Updated weights for policy 0, policy_version 8382 (0.0009) +[2026-06-02 16:26:55,402][243562] Updated weights for policy 0, policy_version 8394 (0.0008) +[2026-06-02 16:26:55,567][243562] Updated weights for policy 0, policy_version 8404 (0.0008) +[2026-06-02 16:26:55,737][243562] Updated weights for policy 0, policy_version 8414 (0.0008) +[2026-06-02 16:26:55,902][243562] Updated weights for policy 0, policy_version 8424 (0.0008) +[2026-06-02 16:26:56,567][243562] Updated weights for policy 0, policy_version 8435 (0.0008) +[2026-06-02 16:26:56,715][243562] Updated weights for policy 0, policy_version 8445 (0.0008) +[2026-06-02 16:26:56,909][243562] Updated weights for policy 0, policy_version 8456 (0.0009) +[2026-06-02 16:26:57,061][243562] Updated weights for policy 0, policy_version 8466 (0.0008) +[2026-06-02 16:26:57,219][243562] Updated weights for policy 0, policy_version 8476 (0.0008) +[2026-06-02 16:26:57,389][243562] Updated weights for policy 0, policy_version 8486 (0.0009) +[2026-06-02 16:26:57,552][243562] Updated weights for policy 0, policy_version 8496 (0.0008) +[2026-06-02 16:26:58,199][243562] Updated weights for policy 0, policy_version 8506 (0.0008) +[2026-06-02 16:26:58,363][243562] Updated weights for policy 0, policy_version 8517 (0.0008) +[2026-06-02 16:26:58,553][243562] Updated weights for policy 0, policy_version 8528 (0.0008) +[2026-06-02 16:26:58,711][243562] Updated weights for policy 0, policy_version 8538 (0.0008) +[2026-06-02 16:26:58,886][243562] Updated weights for policy 0, policy_version 8548 (0.0009) +[2026-06-02 16:26:59,050][243562] Updated weights for policy 0, policy_version 8558 (0.0008) +[2026-06-02 16:26:59,741][243562] Updated weights for policy 0, policy_version 8570 (0.0009) +[2026-06-02 16:26:59,811][235960] Fps is (10 sec: 22937.5, 60 sec: 21299.2, 300 sec: 20422.9). Total num frames: 4390912. Throughput: 0: 21506.8. Samples: 4441088. Policy #0 lag: (min: 63.0, avg: 78.0, max: 127.0) +[2026-06-02 16:26:59,812][235960] Avg episode reward: [(0, '214.526')] +[2026-06-02 16:26:59,899][243562] Updated weights for policy 0, policy_version 8580 (0.0008) +[2026-06-02 16:27:00,058][243562] Updated weights for policy 0, policy_version 8590 (0.0008) +[2026-06-02 16:27:00,230][243562] Updated weights for policy 0, policy_version 8600 (0.0008) +[2026-06-02 16:27:00,386][243562] Updated weights for policy 0, policy_version 8610 (0.0008) +[2026-06-02 16:27:00,554][243562] Updated weights for policy 0, policy_version 8620 (0.0008) +[2026-06-02 16:27:00,609][242748] Saving new best policy, reward=214.526! +[2026-06-02 16:27:01,218][243562] Updated weights for policy 0, policy_version 8631 (0.0009) +[2026-06-02 16:27:01,377][243562] Updated weights for policy 0, policy_version 8641 (0.0008) +[2026-06-02 16:27:01,535][243562] Updated weights for policy 0, policy_version 8651 (0.0008) +[2026-06-02 16:27:01,701][243562] Updated weights for policy 0, policy_version 8661 (0.0009) +[2026-06-02 16:27:01,859][243562] Updated weights for policy 0, policy_version 8671 (0.0008) +[2026-06-02 16:27:02,027][243562] Updated weights for policy 0, policy_version 8681 (0.0010) +[2026-06-02 16:27:02,702][243562] Updated weights for policy 0, policy_version 8691 (0.0008) +[2026-06-02 16:27:02,858][243562] Updated weights for policy 0, policy_version 8701 (0.0008) +[2026-06-02 16:27:03,036][243562] Updated weights for policy 0, policy_version 8712 (0.0008) +[2026-06-02 16:27:03,215][243562] Updated weights for policy 0, policy_version 8723 (0.0008) +[2026-06-02 16:27:03,376][243562] Updated weights for policy 0, policy_version 8733 (0.0008) +[2026-06-02 16:27:03,573][243562] Updated weights for policy 0, policy_version 8745 (0.0009) +[2026-06-02 16:27:04,239][243562] Updated weights for policy 0, policy_version 8755 (0.0009) +[2026-06-02 16:27:04,393][243562] Updated weights for policy 0, policy_version 8765 (0.0008) +[2026-06-02 16:27:04,560][243562] Updated weights for policy 0, policy_version 8775 (0.0008) +[2026-06-02 16:27:04,719][243562] Updated weights for policy 0, policy_version 8785 (0.0008) +[2026-06-02 16:27:04,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 20405.5). Total num frames: 4489216. Throughput: 0: 21575.1. Samples: 4506112. Policy #0 lag: (min: 127.0, avg: 141.2, max: 191.0) +[2026-06-02 16:27:04,812][235960] Avg episode reward: [(0, '228.620')] +[2026-06-02 16:27:04,902][243562] Updated weights for policy 0, policy_version 8796 (0.0009) +[2026-06-02 16:27:05,068][243562] Updated weights for policy 0, policy_version 8806 (0.0009) +[2026-06-02 16:27:05,223][242748] Saving new best policy, reward=228.620! +[2026-06-02 16:27:05,225][243562] Updated weights for policy 0, policy_version 8816 (0.0009) +[2026-06-02 16:27:05,877][243562] Updated weights for policy 0, policy_version 8826 (0.0008) +[2026-06-02 16:27:06,038][243562] Updated weights for policy 0, policy_version 8836 (0.0008) +[2026-06-02 16:27:06,216][243562] Updated weights for policy 0, policy_version 8847 (0.0008) +[2026-06-02 16:27:06,371][243562] Updated weights for policy 0, policy_version 8857 (0.0008) +[2026-06-02 16:27:06,555][243562] Updated weights for policy 0, policy_version 8868 (0.0008) +[2026-06-02 16:27:06,722][243562] Updated weights for policy 0, policy_version 8878 (0.0008) +[2026-06-02 16:27:07,396][243562] Updated weights for policy 0, policy_version 8889 (0.0009) +[2026-06-02 16:27:07,555][243562] Updated weights for policy 0, policy_version 8899 (0.0008) +[2026-06-02 16:27:07,713][243562] Updated weights for policy 0, policy_version 8909 (0.0007) +[2026-06-02 16:27:07,874][243562] Updated weights for policy 0, policy_version 8919 (0.0008) +[2026-06-02 16:27:08,053][243562] Updated weights for policy 0, policy_version 8930 (0.0008) +[2026-06-02 16:27:08,224][243562] Updated weights for policy 0, policy_version 8940 (0.0009) +[2026-06-02 16:27:08,862][243562] Updated weights for policy 0, policy_version 8950 (0.0009) +[2026-06-02 16:27:09,018][243562] Updated weights for policy 0, policy_version 8960 (0.0008) +[2026-06-02 16:27:09,190][243562] Updated weights for policy 0, policy_version 8971 (0.0010) +[2026-06-02 16:27:09,356][243562] Updated weights for policy 0, policy_version 8981 (0.0008) +[2026-06-02 16:27:09,523][243562] Updated weights for policy 0, policy_version 8991 (0.0008) +[2026-06-02 16:27:09,695][243562] Updated weights for policy 0, policy_version 9002 (0.0008) +[2026-06-02 16:27:09,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21299.2, 300 sec: 20534.6). Total num frames: 4620288. Throughput: 0: 21566.6. Samples: 4633344. Policy #0 lag: (min: 127.0, avg: 141.2, max: 191.0) +[2026-06-02 16:27:09,812][235960] Avg episode reward: [(0, '209.070')] +[2026-06-02 16:27:10,365][243562] Updated weights for policy 0, policy_version 9013 (0.0008) +[2026-06-02 16:27:10,520][243562] Updated weights for policy 0, policy_version 9023 (0.0008) +[2026-06-02 16:27:10,683][243562] Updated weights for policy 0, policy_version 9033 (0.0008) +[2026-06-02 16:27:10,840][243562] Updated weights for policy 0, policy_version 9043 (0.0008) +[2026-06-02 16:27:11,022][243562] Updated weights for policy 0, policy_version 9054 (0.0008) +[2026-06-02 16:27:11,188][243562] Updated weights for policy 0, policy_version 9064 (0.0008) +[2026-06-02 16:27:11,841][243562] Updated weights for policy 0, policy_version 9074 (0.0008) +[2026-06-02 16:27:12,025][243562] Updated weights for policy 0, policy_version 9086 (0.0009) +[2026-06-02 16:27:12,189][243562] Updated weights for policy 0, policy_version 9096 (0.0009) +[2026-06-02 16:27:12,356][243562] Updated weights for policy 0, policy_version 9106 (0.0008) +[2026-06-02 16:27:12,531][243562] Updated weights for policy 0, policy_version 9117 (0.0008) +[2026-06-02 16:27:12,700][243562] Updated weights for policy 0, policy_version 9127 (0.0008) +[2026-06-02 16:27:13,372][243562] Updated weights for policy 0, policy_version 9138 (0.0009) +[2026-06-02 16:27:13,518][243562] Updated weights for policy 0, policy_version 9148 (0.0008) +[2026-06-02 16:27:13,698][243562] Updated weights for policy 0, policy_version 9159 (0.0008) +[2026-06-02 16:27:13,858][243562] Updated weights for policy 0, policy_version 9169 (0.0008) +[2026-06-02 16:27:14,029][243562] Updated weights for policy 0, policy_version 9179 (0.0008) +[2026-06-02 16:27:14,196][243562] Updated weights for policy 0, policy_version 9189 (0.0008) +[2026-06-02 16:27:14,363][243562] Updated weights for policy 0, policy_version 9199 (0.0008) +[2026-06-02 16:27:14,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21299.2, 300 sec: 20515.6). Total num frames: 4718592. Throughput: 0: 21441.4. Samples: 4757376. Policy #0 lag: (min: 63.0, avg: 77.8, max: 127.0) +[2026-06-02 16:27:14,812][235960] Avg episode reward: [(0, '203.746')] +[2026-06-02 16:27:15,024][243562] Updated weights for policy 0, policy_version 9209 (0.0009) +[2026-06-02 16:27:15,182][243562] Updated weights for policy 0, policy_version 9219 (0.0008) +[2026-06-02 16:27:15,359][243562] Updated weights for policy 0, policy_version 9230 (0.0009) +[2026-06-02 16:27:15,541][243562] Updated weights for policy 0, policy_version 9241 (0.0008) +[2026-06-02 16:27:15,703][243562] Updated weights for policy 0, policy_version 9251 (0.0008) +[2026-06-02 16:27:15,866][243562] Updated weights for policy 0, policy_version 9261 (0.0007) +[2026-06-02 16:27:16,526][243562] Updated weights for policy 0, policy_version 9271 (0.0008) +[2026-06-02 16:27:16,686][243562] Updated weights for policy 0, policy_version 9281 (0.0009) +[2026-06-02 16:27:16,850][243562] Updated weights for policy 0, policy_version 9291 (0.0008) +[2026-06-02 16:27:17,010][243562] Updated weights for policy 0, policy_version 9301 (0.0008) +[2026-06-02 16:27:17,170][243562] Updated weights for policy 0, policy_version 9311 (0.0008) +[2026-06-02 16:27:17,355][243562] Updated weights for policy 0, policy_version 9322 (0.0009) +[2026-06-02 16:27:18,043][243562] Updated weights for policy 0, policy_version 9333 (0.0008) +[2026-06-02 16:27:18,201][243562] Updated weights for policy 0, policy_version 9343 (0.0009) +[2026-06-02 16:27:18,378][243562] Updated weights for policy 0, policy_version 9354 (0.0008) +[2026-06-02 16:27:18,536][243562] Updated weights for policy 0, policy_version 9364 (0.0008) +[2026-06-02 16:27:18,700][243562] Updated weights for policy 0, policy_version 9374 (0.0009) +[2026-06-02 16:27:18,860][243562] Updated weights for policy 0, policy_version 9384 (0.0009) +[2026-06-02 16:27:19,554][243562] Updated weights for policy 0, policy_version 9394 (0.0009) +[2026-06-02 16:27:19,704][243562] Updated weights for policy 0, policy_version 9404 (0.0008) +[2026-06-02 16:27:19,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 20497.4). Total num frames: 4816896. Throughput: 0: 21387.3. Samples: 4819968. Policy #0 lag: (min: 63.0, avg: 77.8, max: 127.0) +[2026-06-02 16:27:19,812][235960] Avg episode reward: [(0, '205.432')] +[2026-06-02 16:27:19,885][243562] Updated weights for policy 0, policy_version 9415 (0.0008) +[2026-06-02 16:27:20,047][243562] Updated weights for policy 0, policy_version 9425 (0.0008) +[2026-06-02 16:27:20,216][243562] Updated weights for policy 0, policy_version 9435 (0.0009) +[2026-06-02 16:27:20,380][243562] Updated weights for policy 0, policy_version 9445 (0.0009) +[2026-06-02 16:27:20,539][243562] Updated weights for policy 0, policy_version 9455 (0.0008) +[2026-06-02 16:27:21,200][243562] Updated weights for policy 0, policy_version 9465 (0.0008) +[2026-06-02 16:27:21,359][243562] Updated weights for policy 0, policy_version 9475 (0.0009) +[2026-06-02 16:27:21,526][243562] Updated weights for policy 0, policy_version 9485 (0.0008) +[2026-06-02 16:27:21,683][243562] Updated weights for policy 0, policy_version 9495 (0.0009) +[2026-06-02 16:27:21,853][243562] Updated weights for policy 0, policy_version 9505 (0.0009) +[2026-06-02 16:27:22,021][243562] Updated weights for policy 0, policy_version 9515 (0.0008) +[2026-06-02 16:27:22,658][243562] Updated weights for policy 0, policy_version 9525 (0.0010) +[2026-06-02 16:27:22,831][243562] Updated weights for policy 0, policy_version 9536 (0.0008) +[2026-06-02 16:27:22,992][243562] Updated weights for policy 0, policy_version 9546 (0.0009) +[2026-06-02 16:27:23,154][243562] Updated weights for policy 0, policy_version 9556 (0.0008) +[2026-06-02 16:27:23,331][243562] Updated weights for policy 0, policy_version 9567 (0.0009) +[2026-06-02 16:27:23,489][243562] Updated weights for policy 0, policy_version 9577 (0.0008) +[2026-06-02 16:27:24,161][243562] Updated weights for policy 0, policy_version 9588 (0.0009) +[2026-06-02 16:27:24,323][243562] Updated weights for policy 0, policy_version 9598 (0.0008) +[2026-06-02 16:27:24,478][243562] Updated weights for policy 0, policy_version 9608 (0.0009) +[2026-06-02 16:27:24,642][243562] Updated weights for policy 0, policy_version 9618 (0.0008) +[2026-06-02 16:27:24,811][235960] Fps is (10 sec: 19660.9, 60 sec: 21299.2, 300 sec: 20480.0). Total num frames: 4915200. Throughput: 0: 21265.2. Samples: 4945280. Policy #0 lag: (min: 121.0, avg: 133.2, max: 183.0) +[2026-06-02 16:27:24,812][235960] Avg episode reward: [(0, '215.578')] +[2026-06-02 16:27:24,815][243562] Updated weights for policy 0, policy_version 9628 (0.0008) +[2026-06-02 16:27:24,995][243562] Updated weights for policy 0, policy_version 9639 (0.0010) +[2026-06-02 16:27:25,667][243562] Updated weights for policy 0, policy_version 9649 (0.0009) +[2026-06-02 16:27:25,824][243562] Updated weights for policy 0, policy_version 9659 (0.0008) +[2026-06-02 16:27:25,991][243562] Updated weights for policy 0, policy_version 9669 (0.0008) +[2026-06-02 16:27:26,150][243562] Updated weights for policy 0, policy_version 9679 (0.0008) +[2026-06-02 16:27:26,311][243562] Updated weights for policy 0, policy_version 9689 (0.0008) +[2026-06-02 16:27:26,496][243562] Updated weights for policy 0, policy_version 9700 (0.0009) +[2026-06-02 16:27:26,659][243562] Updated weights for policy 0, policy_version 9710 (0.0008) +[2026-06-02 16:27:27,323][243562] Updated weights for policy 0, policy_version 9720 (0.0008) +[2026-06-02 16:27:27,514][243562] Updated weights for policy 0, policy_version 9732 (0.0008) +[2026-06-02 16:27:27,681][243562] Updated weights for policy 0, policy_version 9742 (0.0008) +[2026-06-02 16:27:27,844][243562] Updated weights for policy 0, policy_version 9752 (0.0008) +[2026-06-02 16:27:28,005][243562] Updated weights for policy 0, policy_version 9762 (0.0010) +[2026-06-02 16:27:28,178][243562] Updated weights for policy 0, policy_version 9773 (0.0008) +[2026-06-02 16:27:28,843][243562] Updated weights for policy 0, policy_version 9784 (0.0008) +[2026-06-02 16:27:29,008][243562] Updated weights for policy 0, policy_version 9794 (0.0008) +[2026-06-02 16:27:29,170][243562] Updated weights for policy 0, policy_version 9804 (0.0008) +[2026-06-02 16:27:29,335][243562] Updated weights for policy 0, policy_version 9814 (0.0008) +[2026-06-02 16:27:29,513][243562] Updated weights for policy 0, policy_version 9825 (0.0008) +[2026-06-02 16:27:29,681][243562] Updated weights for policy 0, policy_version 9835 (0.0009) +[2026-06-02 16:27:29,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21845.3, 300 sec: 20597.0). Total num frames: 5046272. Throughput: 0: 21259.4. Samples: 5074560. Policy #0 lag: (min: 121.0, avg: 133.2, max: 183.0) +[2026-06-02 16:27:29,812][235960] Avg episode reward: [(0, '234.315')] +[2026-06-02 16:27:29,822][242748] Saving new best policy, reward=234.315! +[2026-06-02 16:27:30,331][243562] Updated weights for policy 0, policy_version 9845 (0.0008) +[2026-06-02 16:27:30,485][243562] Updated weights for policy 0, policy_version 9855 (0.0008) +[2026-06-02 16:27:30,664][243562] Updated weights for policy 0, policy_version 9866 (0.0009) +[2026-06-02 16:27:30,825][243562] Updated weights for policy 0, policy_version 9876 (0.0008) +[2026-06-02 16:27:31,000][243562] Updated weights for policy 0, policy_version 9886 (0.0008) +[2026-06-02 16:27:31,176][243562] Updated weights for policy 0, policy_version 9897 (0.0008) +[2026-06-02 16:27:31,805][243562] Updated weights for policy 0, policy_version 9907 (0.0008) +[2026-06-02 16:27:31,956][243562] Updated weights for policy 0, policy_version 9917 (0.0008) +[2026-06-02 16:27:32,110][243562] Updated weights for policy 0, policy_version 9927 (0.0008) +[2026-06-02 16:27:32,303][243562] Updated weights for policy 0, policy_version 9938 (0.0009) +[2026-06-02 16:27:32,467][243562] Updated weights for policy 0, policy_version 9948 (0.0008) +[2026-06-02 16:27:32,635][243562] Updated weights for policy 0, policy_version 9958 (0.0009) +[2026-06-02 16:27:32,790][243562] Updated weights for policy 0, policy_version 9968 (0.0008) +[2026-06-02 16:27:33,439][243562] Updated weights for policy 0, policy_version 9978 (0.0008) +[2026-06-02 16:27:33,624][243562] Updated weights for policy 0, policy_version 9989 (0.0009) +[2026-06-02 16:27:33,802][243562] Updated weights for policy 0, policy_version 10000 (0.0008) +[2026-06-02 16:27:33,974][243562] Updated weights for policy 0, policy_version 10011 (0.0008) +[2026-06-02 16:27:34,158][243562] Updated weights for policy 0, policy_version 10022 (0.0008) +[2026-06-02 16:27:34,319][243562] Updated weights for policy 0, policy_version 10032 (0.0008) +[2026-06-02 16:27:34,811][235960] Fps is (10 sec: 22937.5, 60 sec: 21299.2, 300 sec: 20578.3). Total num frames: 5144576. Throughput: 0: 21242.3. Samples: 5138176. Policy #0 lag: (min: 3.0, avg: 18.3, max: 67.0) +[2026-06-02 16:27:34,812][235960] Avg episode reward: [(0, '222.506')] +[2026-06-02 16:27:35,001][243562] Updated weights for policy 0, policy_version 10043 (0.0009) +[2026-06-02 16:27:35,167][243562] Updated weights for policy 0, policy_version 10053 (0.0009) +[2026-06-02 16:27:35,332][243562] Updated weights for policy 0, policy_version 10063 (0.0008) +[2026-06-02 16:27:35,489][243562] Updated weights for policy 0, policy_version 10073 (0.0008) +[2026-06-02 16:27:35,660][243562] Updated weights for policy 0, policy_version 10083 (0.0008) +[2026-06-02 16:27:35,828][243562] Updated weights for policy 0, policy_version 10093 (0.0008) +[2026-06-02 16:27:36,479][243562] Updated weights for policy 0, policy_version 10104 (0.0008) +[2026-06-02 16:27:36,629][243562] Updated weights for policy 0, policy_version 10114 (0.0008) +[2026-06-02 16:27:36,800][243562] Updated weights for policy 0, policy_version 10124 (0.0008) +[2026-06-02 16:27:36,956][243562] Updated weights for policy 0, policy_version 10134 (0.0008) +[2026-06-02 16:27:37,140][243562] Updated weights for policy 0, policy_version 10145 (0.0008) +[2026-06-02 16:27:37,303][243562] Updated weights for policy 0, policy_version 10155 (0.0008) +[2026-06-02 16:27:37,958][243562] Updated weights for policy 0, policy_version 10165 (0.0008) +[2026-06-02 16:27:38,113][243562] Updated weights for policy 0, policy_version 10175 (0.0009) +[2026-06-02 16:27:38,286][243562] Updated weights for policy 0, policy_version 10186 (0.0008) +[2026-06-02 16:27:38,464][243562] Updated weights for policy 0, policy_version 10197 (0.0008) +[2026-06-02 16:27:38,628][243562] Updated weights for policy 0, policy_version 10207 (0.0008) +[2026-06-02 16:27:38,794][243562] Updated weights for policy 0, policy_version 10217 (0.0009) +[2026-06-02 16:27:39,440][243562] Updated weights for policy 0, policy_version 10227 (0.0008) +[2026-06-02 16:27:39,591][243562] Updated weights for policy 0, policy_version 10237 (0.0008) +[2026-06-02 16:27:39,772][243562] Updated weights for policy 0, policy_version 10248 (0.0009) +[2026-06-02 16:27:39,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 20560.3). Total num frames: 5242880. Throughput: 0: 21219.5. Samples: 5266688. Policy #0 lag: (min: 3.0, avg: 18.3, max: 67.0) +[2026-06-02 16:27:39,812][235960] Avg episode reward: [(0, '234.088')] +[2026-06-02 16:27:39,929][243562] Updated weights for policy 0, policy_version 10258 (0.0008) +[2026-06-02 16:27:40,097][243562] Updated weights for policy 0, policy_version 10268 (0.0009) +[2026-06-02 16:27:40,280][243562] Updated weights for policy 0, policy_version 10279 (0.0008) +[2026-06-02 16:27:40,929][243562] Updated weights for policy 0, policy_version 10289 (0.0008) +[2026-06-02 16:27:41,088][243562] Updated weights for policy 0, policy_version 10299 (0.0008) +[2026-06-02 16:27:41,245][243562] Updated weights for policy 0, policy_version 10309 (0.0006) +[2026-06-02 16:27:41,423][243562] Updated weights for policy 0, policy_version 10320 (0.0006) +[2026-06-02 16:27:41,595][243562] Updated weights for policy 0, policy_version 10330 (0.0007) +[2026-06-02 16:27:41,749][243562] Updated weights for policy 0, policy_version 10340 (0.0008) +[2026-06-02 16:27:41,916][243562] Updated weights for policy 0, policy_version 10350 (0.0008) +[2026-06-02 16:27:42,557][243562] Updated weights for policy 0, policy_version 10360 (0.0008) +[2026-06-02 16:27:42,716][243562] Updated weights for policy 0, policy_version 10370 (0.0008) +[2026-06-02 16:27:42,879][243562] Updated weights for policy 0, policy_version 10380 (0.0008) +[2026-06-02 16:27:43,043][243562] Updated weights for policy 0, policy_version 10390 (0.0008) +[2026-06-02 16:27:43,215][243562] Updated weights for policy 0, policy_version 10400 (0.0008) +[2026-06-02 16:27:43,391][243562] Updated weights for policy 0, policy_version 10411 (0.0007) +[2026-06-02 16:27:44,070][243562] Updated weights for policy 0, policy_version 10423 (0.0005) +[2026-06-02 16:27:44,234][243562] Updated weights for policy 0, policy_version 10433 (0.0005) +[2026-06-02 16:27:44,398][243562] Updated weights for policy 0, policy_version 10443 (0.0005) +[2026-06-02 16:27:44,560][243562] Updated weights for policy 0, policy_version 10453 (0.0005) +[2026-06-02 16:27:44,750][243562] Updated weights for policy 0, policy_version 10464 (0.0004) +[2026-06-02 16:27:44,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 20543.0). Total num frames: 5341184. Throughput: 0: 21199.7. Samples: 5395072. Policy #0 lag: (min: 63.0, avg: 77.5, max: 127.0) +[2026-06-02 16:27:44,812][235960] Avg episode reward: [(0, '235.888')] +[2026-06-02 16:27:44,917][243562] Updated weights for policy 0, policy_version 10474 (0.0005) +[2026-06-02 16:27:45,004][242748] Saving new best policy, reward=235.888! +[2026-06-02 16:27:45,547][243562] Updated weights for policy 0, policy_version 10484 (0.0008) +[2026-06-02 16:27:45,707][243562] Updated weights for policy 0, policy_version 10494 (0.0008) +[2026-06-02 16:27:45,897][243562] Updated weights for policy 0, policy_version 10506 (0.0008) +[2026-06-02 16:27:46,059][243562] Updated weights for policy 0, policy_version 10516 (0.0008) +[2026-06-02 16:27:46,224][243562] Updated weights for policy 0, policy_version 10526 (0.0008) +[2026-06-02 16:27:46,387][243562] Updated weights for policy 0, policy_version 10536 (0.0008) +[2026-06-02 16:27:47,058][243562] Updated weights for policy 0, policy_version 10547 (0.0009) +[2026-06-02 16:27:47,215][243562] Updated weights for policy 0, policy_version 10557 (0.0008) +[2026-06-02 16:27:47,388][243562] Updated weights for policy 0, policy_version 10568 (0.0008) +[2026-06-02 16:27:47,554][243562] Updated weights for policy 0, policy_version 10578 (0.0008) +[2026-06-02 16:27:47,723][243562] Updated weights for policy 0, policy_version 10588 (0.0008) +[2026-06-02 16:27:47,907][243562] Updated weights for policy 0, policy_version 10599 (0.0008) +[2026-06-02 16:27:48,556][243562] Updated weights for policy 0, policy_version 10609 (0.0008) +[2026-06-02 16:27:48,711][243562] Updated weights for policy 0, policy_version 10619 (0.0008) +[2026-06-02 16:27:48,870][243562] Updated weights for policy 0, policy_version 10629 (0.0009) +[2026-06-02 16:27:49,034][243562] Updated weights for policy 0, policy_version 10639 (0.0008) +[2026-06-02 16:27:49,213][243562] Updated weights for policy 0, policy_version 10650 (0.0008) +[2026-06-02 16:27:49,401][243562] Updated weights for policy 0, policy_version 10661 (0.0008) +[2026-06-02 16:27:49,566][243562] Updated weights for policy 0, policy_version 10671 (0.0008) +[2026-06-02 16:27:49,811][235960] Fps is (10 sec: 22937.9, 60 sec: 21845.3, 300 sec: 20650.0). Total num frames: 5472256. Throughput: 0: 21185.5. Samples: 5459456. Policy #0 lag: (min: 63.0, avg: 77.5, max: 127.0) +[2026-06-02 16:27:49,812][235960] Avg episode reward: [(0, '264.998')] +[2026-06-02 16:27:49,815][242748] Saving new best policy, reward=264.998! +[2026-06-02 16:27:50,225][243562] Updated weights for policy 0, policy_version 10681 (0.0006) +[2026-06-02 16:27:50,392][243562] Updated weights for policy 0, policy_version 10691 (0.0008) +[2026-06-02 16:27:50,573][243562] Updated weights for policy 0, policy_version 10702 (0.0008) +[2026-06-02 16:27:50,742][243562] Updated weights for policy 0, policy_version 10712 (0.0008) +[2026-06-02 16:27:50,907][243562] Updated weights for policy 0, policy_version 10722 (0.0008) +[2026-06-02 16:27:51,074][243562] Updated weights for policy 0, policy_version 10732 (0.0008) +[2026-06-02 16:27:51,696][243562] Updated weights for policy 0, policy_version 10742 (0.0008) +[2026-06-02 16:27:51,865][243562] Updated weights for policy 0, policy_version 10752 (0.0008) +[2026-06-02 16:27:52,020][243562] Updated weights for policy 0, policy_version 10762 (0.0008) +[2026-06-02 16:27:52,195][243562] Updated weights for policy 0, policy_version 10773 (0.0008) +[2026-06-02 16:27:52,364][243562] Updated weights for policy 0, policy_version 10783 (0.0008) +[2026-06-02 16:27:52,527][243562] Updated weights for policy 0, policy_version 10793 (0.0009) +[2026-06-02 16:27:53,198][243562] Updated weights for policy 0, policy_version 10804 (0.0009) +[2026-06-02 16:27:53,350][243562] Updated weights for policy 0, policy_version 10814 (0.0008) +[2026-06-02 16:27:53,514][243562] Updated weights for policy 0, policy_version 10824 (0.0008) +[2026-06-02 16:27:53,673][243562] Updated weights for policy 0, policy_version 10834 (0.0008) +[2026-06-02 16:27:53,848][243562] Updated weights for policy 0, policy_version 10845 (0.0008) +[2026-06-02 16:27:54,036][243562] Updated weights for policy 0, policy_version 10856 (0.0008) +[2026-06-02 16:27:54,696][243562] Updated weights for policy 0, policy_version 10866 (0.0009) +[2026-06-02 16:27:54,811][235960] Fps is (10 sec: 22937.7, 60 sec: 21299.2, 300 sec: 20631.7). Total num frames: 5570560. Throughput: 0: 21233.8. Samples: 5588864. Policy #0 lag: (min: 46.0, avg: 84.9, max: 109.0) +[2026-06-02 16:27:54,812][235960] Avg episode reward: [(0, '288.362')] +[2026-06-02 16:27:54,838][243562] Updated weights for policy 0, policy_version 10876 (0.0008) +[2026-06-02 16:27:55,002][243562] Updated weights for policy 0, policy_version 10886 (0.0008) +[2026-06-02 16:27:55,181][243562] Updated weights for policy 0, policy_version 10897 (0.0008) +[2026-06-02 16:27:55,351][243562] Updated weights for policy 0, policy_version 10907 (0.0008) +[2026-06-02 16:27:55,514][243562] Updated weights for policy 0, policy_version 10917 (0.0009) +[2026-06-02 16:27:55,684][243562] Updated weights for policy 0, policy_version 10927 (0.0008) +[2026-06-02 16:27:55,691][242748] Saving new best policy, reward=288.362! +[2026-06-02 16:27:56,340][243562] Updated weights for policy 0, policy_version 10937 (0.0008) +[2026-06-02 16:27:56,516][243562] Updated weights for policy 0, policy_version 10948 (0.0008) +[2026-06-02 16:27:56,690][243562] Updated weights for policy 0, policy_version 10959 (0.0008) +[2026-06-02 16:27:56,875][243562] Updated weights for policy 0, policy_version 10970 (0.0008) +[2026-06-02 16:27:57,043][243562] Updated weights for policy 0, policy_version 10980 (0.0008) +[2026-06-02 16:27:57,219][243562] Updated weights for policy 0, policy_version 10991 (0.0009) +[2026-06-02 16:27:57,862][243562] Updated weights for policy 0, policy_version 11001 (0.0008) +[2026-06-02 16:27:58,021][243562] Updated weights for policy 0, policy_version 11011 (0.0008) +[2026-06-02 16:27:58,201][243562] Updated weights for policy 0, policy_version 11022 (0.0008) +[2026-06-02 16:27:58,366][243562] Updated weights for policy 0, policy_version 11032 (0.0008) +[2026-06-02 16:27:58,524][243562] Updated weights for policy 0, policy_version 11042 (0.0008) +[2026-06-02 16:27:58,697][243562] Updated weights for policy 0, policy_version 11052 (0.0008) +[2026-06-02 16:27:59,366][243562] Updated weights for policy 0, policy_version 11063 (0.0010) +[2026-06-02 16:27:59,527][243562] Updated weights for policy 0, policy_version 11073 (0.0008) +[2026-06-02 16:27:59,687][243562] Updated weights for policy 0, policy_version 11083 (0.0008) +[2026-06-02 16:27:59,811][235960] Fps is (10 sec: 19660.6, 60 sec: 21299.2, 300 sec: 20614.1). Total num frames: 5668864. Throughput: 0: 21358.9. Samples: 5718528. Policy #0 lag: (min: 46.0, avg: 84.9, max: 109.0) +[2026-06-02 16:27:59,812][235960] Avg episode reward: [(0, '299.978')] +[2026-06-02 16:27:59,854][243562] Updated weights for policy 0, policy_version 11093 (0.0009) +[2026-06-02 16:28:00,011][243562] Updated weights for policy 0, policy_version 11103 (0.0008) +[2026-06-02 16:28:00,174][243562] Updated weights for policy 0, policy_version 11113 (0.0008) +[2026-06-02 16:28:00,286][242748] Saving new best policy, reward=299.978! +[2026-06-02 16:28:00,827][243562] Updated weights for policy 0, policy_version 11123 (0.0009) +[2026-06-02 16:28:00,984][243562] Updated weights for policy 0, policy_version 11133 (0.0009) +[2026-06-02 16:28:01,143][243562] Updated weights for policy 0, policy_version 11143 (0.0008) +[2026-06-02 16:28:01,305][243562] Updated weights for policy 0, policy_version 11153 (0.0008) +[2026-06-02 16:28:01,481][243562] Updated weights for policy 0, policy_version 11164 (0.0008) +[2026-06-02 16:28:01,652][243562] Updated weights for policy 0, policy_version 11174 (0.0009) +[2026-06-02 16:28:01,813][243562] Updated weights for policy 0, policy_version 11184 (0.0008) +[2026-06-02 16:28:02,447][243562] Updated weights for policy 0, policy_version 11194 (0.0008) +[2026-06-02 16:28:02,629][243562] Updated weights for policy 0, policy_version 11205 (0.0008) +[2026-06-02 16:28:02,808][243562] Updated weights for policy 0, policy_version 11216 (0.0009) +[2026-06-02 16:28:02,973][243562] Updated weights for policy 0, policy_version 11226 (0.0008) +[2026-06-02 16:28:03,151][243562] Updated weights for policy 0, policy_version 11237 (0.0008) +[2026-06-02 16:28:03,318][243562] Updated weights for policy 0, policy_version 11247 (0.0010) +[2026-06-02 16:28:03,979][243562] Updated weights for policy 0, policy_version 11257 (0.0008) +[2026-06-02 16:28:04,157][243562] Updated weights for policy 0, policy_version 11268 (0.0009) +[2026-06-02 16:28:04,326][243562] Updated weights for policy 0, policy_version 11278 (0.0009) +[2026-06-02 16:28:04,491][243562] Updated weights for policy 0, policy_version 11288 (0.0009) +[2026-06-02 16:28:04,655][243562] Updated weights for policy 0, policy_version 11298 (0.0009) +[2026-06-02 16:28:04,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 20597.0). Total num frames: 5767168. Throughput: 0: 21390.2. Samples: 5782528. Policy #0 lag: (min: 78.0, avg: 117.2, max: 144.0) +[2026-06-02 16:28:04,812][235960] Avg episode reward: [(0, '304.202')] +[2026-06-02 16:28:04,852][243562] Updated weights for policy 0, policy_version 11310 (0.0009) +[2026-06-02 16:28:04,873][242748] Saving new best policy, reward=304.202! +[2026-06-02 16:28:05,480][243562] Updated weights for policy 0, policy_version 11320 (0.0008) +[2026-06-02 16:28:05,641][243562] Updated weights for policy 0, policy_version 11330 (0.0009) +[2026-06-02 16:28:05,804][243562] Updated weights for policy 0, policy_version 11340 (0.0009) +[2026-06-02 16:28:05,980][243562] Updated weights for policy 0, policy_version 11351 (0.0008) +[2026-06-02 16:28:06,144][243562] Updated weights for policy 0, policy_version 11361 (0.0007) +[2026-06-02 16:28:06,315][243562] Updated weights for policy 0, policy_version 11372 (0.0008) +[2026-06-02 16:28:06,999][243562] Updated weights for policy 0, policy_version 11382 (0.0008) +[2026-06-02 16:28:07,156][243562] Updated weights for policy 0, policy_version 11392 (0.0008) +[2026-06-02 16:28:07,349][243562] Updated weights for policy 0, policy_version 11404 (0.0008) +[2026-06-02 16:28:07,511][243562] Updated weights for policy 0, policy_version 11414 (0.0008) +[2026-06-02 16:28:07,677][243562] Updated weights for policy 0, policy_version 11424 (0.0008) +[2026-06-02 16:28:07,863][243562] Updated weights for policy 0, policy_version 11435 (0.0008) +[2026-06-02 16:28:08,551][243562] Updated weights for policy 0, policy_version 11448 (0.0008) +[2026-06-02 16:28:08,720][243562] Updated weights for policy 0, policy_version 11459 (0.0008) +[2026-06-02 16:28:08,887][243562] Updated weights for policy 0, policy_version 11469 (0.0008) +[2026-06-02 16:28:09,053][243562] Updated weights for policy 0, policy_version 11479 (0.0008) +[2026-06-02 16:28:09,231][243562] Updated weights for policy 0, policy_version 11490 (0.0008) +[2026-06-02 16:28:09,411][243562] Updated weights for policy 0, policy_version 11501 (0.0008) +[2026-06-02 16:28:09,811][235960] Fps is (10 sec: 22937.7, 60 sec: 21299.2, 300 sec: 20695.6). Total num frames: 5898240. Throughput: 0: 21472.7. Samples: 5911552. Policy #0 lag: (min: 78.0, avg: 117.2, max: 144.0) +[2026-06-02 16:28:09,812][235960] Avg episode reward: [(0, '307.977')] +[2026-06-02 16:28:09,817][242748] Saving new best policy, reward=307.977! +[2026-06-02 16:28:10,106][243562] Updated weights for policy 0, policy_version 11511 (0.0008) +[2026-06-02 16:28:10,257][243562] Updated weights for policy 0, policy_version 11521 (0.0008) +[2026-06-02 16:28:10,433][243562] Updated weights for policy 0, policy_version 11532 (0.0008) +[2026-06-02 16:28:10,605][243562] Updated weights for policy 0, policy_version 11542 (0.0008) +[2026-06-02 16:28:10,766][243562] Updated weights for policy 0, policy_version 11552 (0.0008) +[2026-06-02 16:28:10,939][243562] Updated weights for policy 0, policy_version 11562 (0.0009) +[2026-06-02 16:28:11,554][243562] Updated weights for policy 0, policy_version 11572 (0.0008) +[2026-06-02 16:28:11,729][243562] Updated weights for policy 0, policy_version 11583 (0.0008) +[2026-06-02 16:28:11,886][243562] Updated weights for policy 0, policy_version 11593 (0.0008) +[2026-06-02 16:28:12,055][243562] Updated weights for policy 0, policy_version 11603 (0.0008) +[2026-06-02 16:28:12,218][243562] Updated weights for policy 0, policy_version 11613 (0.0008) +[2026-06-02 16:28:12,375][243562] Updated weights for policy 0, policy_version 11623 (0.0008) +[2026-06-02 16:28:13,076][243562] Updated weights for policy 0, policy_version 11635 (0.0008) +[2026-06-02 16:28:13,228][243562] Updated weights for policy 0, policy_version 11645 (0.0008) +[2026-06-02 16:28:13,392][243562] Updated weights for policy 0, policy_version 11655 (0.0008) +[2026-06-02 16:28:13,547][243562] Updated weights for policy 0, policy_version 11665 (0.0008) +[2026-06-02 16:28:13,720][243562] Updated weights for policy 0, policy_version 11675 (0.0007) +[2026-06-02 16:28:13,884][243562] Updated weights for policy 0, policy_version 11685 (0.0005) +[2026-06-02 16:28:14,052][243562] Updated weights for policy 0, policy_version 11695 (0.0004) +[2026-06-02 16:28:14,690][243562] Updated weights for policy 0, policy_version 11705 (0.0008) +[2026-06-02 16:28:14,811][235960] Fps is (10 sec: 22937.8, 60 sec: 21299.2, 300 sec: 20677.7). Total num frames: 5996544. Throughput: 0: 21498.4. Samples: 6041984. Policy #0 lag: (min: 63.0, avg: 77.8, max: 127.0) +[2026-06-02 16:28:14,812][235960] Avg episode reward: [(0, '316.297')] +[2026-06-02 16:28:14,844][243562] Updated weights for policy 0, policy_version 11715 (0.0008) +[2026-06-02 16:28:15,011][243562] Updated weights for policy 0, policy_version 11725 (0.0009) +[2026-06-02 16:28:15,195][243562] Updated weights for policy 0, policy_version 11736 (0.0009) +[2026-06-02 16:28:15,358][243562] Updated weights for policy 0, policy_version 11746 (0.0009) +[2026-06-02 16:28:15,541][243562] Updated weights for policy 0, policy_version 11757 (0.0009) +[2026-06-02 16:28:15,578][242748] Saving new best policy, reward=316.297! +[2026-06-02 16:28:16,201][243562] Updated weights for policy 0, policy_version 11767 (0.0009) +[2026-06-02 16:28:16,353][243562] Updated weights for policy 0, policy_version 11777 (0.0008) +[2026-06-02 16:28:16,526][243562] Updated weights for policy 0, policy_version 11787 (0.0008) +[2026-06-02 16:28:16,698][243562] Updated weights for policy 0, policy_version 11798 (0.0009) +[2026-06-02 16:28:16,863][243562] Updated weights for policy 0, policy_version 11808 (0.0009) +[2026-06-02 16:28:17,019][243562] Updated weights for policy 0, policy_version 11818 (0.0008) +[2026-06-02 16:28:17,695][243562] Updated weights for policy 0, policy_version 11829 (0.0008) +[2026-06-02 16:28:17,853][243562] Updated weights for policy 0, policy_version 11839 (0.0008) +[2026-06-02 16:28:18,027][243562] Updated weights for policy 0, policy_version 11850 (0.0008) +[2026-06-02 16:28:18,208][243562] Updated weights for policy 0, policy_version 11861 (0.0009) +[2026-06-02 16:28:18,373][243562] Updated weights for policy 0, policy_version 11871 (0.0009) +[2026-06-02 16:28:18,532][243562] Updated weights for policy 0, policy_version 11881 (0.0008) +[2026-06-02 16:28:19,209][243562] Updated weights for policy 0, policy_version 11892 (0.0009) +[2026-06-02 16:28:19,376][243562] Updated weights for policy 0, policy_version 11903 (0.0008) +[2026-06-02 16:28:19,537][243562] Updated weights for policy 0, policy_version 11913 (0.0008) +[2026-06-02 16:28:19,704][243562] Updated weights for policy 0, policy_version 11923 (0.0009) +[2026-06-02 16:28:19,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 20660.5). Total num frames: 6094848. Throughput: 0: 21529.6. Samples: 6107008. Policy #0 lag: (min: 63.0, avg: 77.8, max: 127.0) +[2026-06-02 16:28:19,812][235960] Avg episode reward: [(0, '314.348')] +[2026-06-02 16:28:19,861][243562] Updated weights for policy 0, policy_version 11933 (0.0008) +[2026-06-02 16:28:20,033][243562] Updated weights for policy 0, policy_version 11943 (0.0009) +[2026-06-02 16:28:20,708][243562] Updated weights for policy 0, policy_version 11953 (0.0008) +[2026-06-02 16:28:20,852][243562] Updated weights for policy 0, policy_version 11963 (0.0008) +[2026-06-02 16:28:21,016][243562] Updated weights for policy 0, policy_version 11973 (0.0009) +[2026-06-02 16:28:21,202][243562] Updated weights for policy 0, policy_version 11985 (0.0008) +[2026-06-02 16:28:21,371][243562] Updated weights for policy 0, policy_version 11995 (0.0009) +[2026-06-02 16:28:21,533][243562] Updated weights for policy 0, policy_version 12005 (0.0008) +[2026-06-02 16:28:21,696][243562] Updated weights for policy 0, policy_version 12015 (0.0009) +[2026-06-02 16:28:22,325][243562] Updated weights for policy 0, policy_version 12025 (0.0009) +[2026-06-02 16:28:22,490][243562] Updated weights for policy 0, policy_version 12035 (0.0009) +[2026-06-02 16:28:22,642][243562] Updated weights for policy 0, policy_version 12045 (0.0008) +[2026-06-02 16:28:22,832][243562] Updated weights for policy 0, policy_version 12056 (0.0009) +[2026-06-02 16:28:22,989][243562] Updated weights for policy 0, policy_version 12066 (0.0008) +[2026-06-02 16:28:23,160][243562] Updated weights for policy 0, policy_version 12076 (0.0009) +[2026-06-02 16:28:23,793][243562] Updated weights for policy 0, policy_version 12086 (0.0009) +[2026-06-02 16:28:23,952][243562] Updated weights for policy 0, policy_version 12096 (0.0009) +[2026-06-02 16:28:24,105][243562] Updated weights for policy 0, policy_version 12106 (0.0008) +[2026-06-02 16:28:24,275][243562] Updated weights for policy 0, policy_version 12116 (0.0008) +[2026-06-02 16:28:24,436][243562] Updated weights for policy 0, policy_version 12126 (0.0007) +[2026-06-02 16:28:24,608][243562] Updated weights for policy 0, policy_version 12136 (0.0006) +[2026-06-02 16:28:24,811][235960] Fps is (10 sec: 22937.4, 60 sec: 21845.3, 300 sec: 21104.9). Total num frames: 6225920. Throughput: 0: 21538.2. Samples: 6235904. Policy #0 lag: (min: 63.0, avg: 77.8, max: 127.0) +[2026-06-02 16:28:24,812][235960] Avg episode reward: [(0, '297.881')] +[2026-06-02 16:28:25,249][243562] Updated weights for policy 0, policy_version 12146 (0.0006) +[2026-06-02 16:28:25,407][243562] Updated weights for policy 0, policy_version 12156 (0.0005) +[2026-06-02 16:28:25,586][243562] Updated weights for policy 0, policy_version 12167 (0.0008) +[2026-06-02 16:28:25,745][243562] Updated weights for policy 0, policy_version 12177 (0.0004) +[2026-06-02 16:28:25,916][243562] Updated weights for policy 0, policy_version 12187 (0.0008) +[2026-06-02 16:28:26,097][243562] Updated weights for policy 0, policy_version 12198 (0.0008) +[2026-06-02 16:28:26,254][243562] Updated weights for policy 0, policy_version 12208 (0.0009) +[2026-06-02 16:28:26,896][243562] Updated weights for policy 0, policy_version 12218 (0.0008) +[2026-06-02 16:28:27,056][243562] Updated weights for policy 0, policy_version 12228 (0.0009) +[2026-06-02 16:28:27,235][243562] Updated weights for policy 0, policy_version 12239 (0.0008) +[2026-06-02 16:28:27,401][243562] Updated weights for policy 0, policy_version 12249 (0.0008) +[2026-06-02 16:28:27,579][243562] Updated weights for policy 0, policy_version 12260 (0.0008) +[2026-06-02 16:28:27,750][243562] Updated weights for policy 0, policy_version 12270 (0.0008) +[2026-06-02 16:28:28,410][243562] Updated weights for policy 0, policy_version 12281 (0.0009) +[2026-06-02 16:28:28,571][243562] Updated weights for policy 0, policy_version 12291 (0.0008) +[2026-06-02 16:28:28,735][243562] Updated weights for policy 0, policy_version 12301 (0.0008) +[2026-06-02 16:28:28,930][243562] Updated weights for policy 0, policy_version 12313 (0.0009) +[2026-06-02 16:28:29,098][243562] Updated weights for policy 0, policy_version 12323 (0.0008) +[2026-06-02 16:28:29,256][243562] Updated weights for policy 0, policy_version 12333 (0.0008) +[2026-06-02 16:28:29,811][235960] Fps is (10 sec: 22937.5, 60 sec: 21299.2, 300 sec: 21104.8). Total num frames: 6324224. Throughput: 0: 21489.8. Samples: 6362112. Policy #0 lag: (min: 63.0, avg: 77.8, max: 127.0) +[2026-06-02 16:28:29,812][235960] Avg episode reward: [(0, '318.608')] +[2026-06-02 16:28:29,913][243562] Updated weights for policy 0, policy_version 12343 (0.0009) +[2026-06-02 16:28:30,097][243562] Updated weights for policy 0, policy_version 12355 (0.0008) +[2026-06-02 16:28:30,265][243562] Updated weights for policy 0, policy_version 12365 (0.0009) +[2026-06-02 16:28:30,431][243562] Updated weights for policy 0, policy_version 12375 (0.0008) +[2026-06-02 16:28:30,593][243562] Updated weights for policy 0, policy_version 12385 (0.0008) +[2026-06-02 16:28:30,771][243562] Updated weights for policy 0, policy_version 12396 (0.0008) +[2026-06-02 16:28:30,836][242748] Saving new best policy, reward=318.608! +[2026-06-02 16:28:31,441][243562] Updated weights for policy 0, policy_version 12406 (0.0009) +[2026-06-02 16:28:31,618][243562] Updated weights for policy 0, policy_version 12417 (0.0008) +[2026-06-02 16:28:31,778][243562] Updated weights for policy 0, policy_version 12427 (0.0008) +[2026-06-02 16:28:31,940][243562] Updated weights for policy 0, policy_version 12437 (0.0008) +[2026-06-02 16:28:32,103][243562] Updated weights for policy 0, policy_version 12447 (0.0008) +[2026-06-02 16:28:32,271][243562] Updated weights for policy 0, policy_version 12457 (0.0008) +[2026-06-02 16:28:32,925][243562] Updated weights for policy 0, policy_version 12467 (0.0008) +[2026-06-02 16:28:33,081][243562] Updated weights for policy 0, policy_version 12477 (0.0008) +[2026-06-02 16:28:33,245][243562] Updated weights for policy 0, policy_version 12487 (0.0008) +[2026-06-02 16:28:33,406][243562] Updated weights for policy 0, policy_version 12497 (0.0008) +[2026-06-02 16:28:33,570][243562] Updated weights for policy 0, policy_version 12507 (0.0008) +[2026-06-02 16:28:33,733][243562] Updated weights for policy 0, policy_version 12517 (0.0008) +[2026-06-02 16:28:33,893][243562] Updated weights for policy 0, policy_version 12527 (0.0008) +[2026-06-02 16:28:34,554][243562] Updated weights for policy 0, policy_version 12537 (0.0008) +[2026-06-02 16:28:34,731][243562] Updated weights for policy 0, policy_version 12548 (0.0009) +[2026-06-02 16:28:34,811][235960] Fps is (10 sec: 19660.9, 60 sec: 21299.2, 300 sec: 20993.8). Total num frames: 6422528. Throughput: 0: 21415.8. Samples: 6423168. Policy #0 lag: (min: 63.0, avg: 78.2, max: 127.0) +[2026-06-02 16:28:34,812][235960] Avg episode reward: [(0, '332.390')] +[2026-06-02 16:28:34,891][243562] Updated weights for policy 0, policy_version 12558 (0.0009) +[2026-06-02 16:28:35,055][243562] Updated weights for policy 0, policy_version 12568 (0.0008) +[2026-06-02 16:28:35,219][243562] Updated weights for policy 0, policy_version 12578 (0.0008) +[2026-06-02 16:28:35,386][243562] Updated weights for policy 0, policy_version 12588 (0.0009) +[2026-06-02 16:28:35,442][242748] Saving new best policy, reward=332.390! +[2026-06-02 16:28:36,046][243562] Updated weights for policy 0, policy_version 12598 (0.0009) +[2026-06-02 16:28:36,195][243562] Updated weights for policy 0, policy_version 12608 (0.0008) +[2026-06-02 16:28:36,364][243562] Updated weights for policy 0, policy_version 12618 (0.0008) +[2026-06-02 16:28:36,522][243562] Updated weights for policy 0, policy_version 12628 (0.0009) +[2026-06-02 16:28:36,689][243562] Updated weights for policy 0, policy_version 12638 (0.0008) +[2026-06-02 16:28:36,849][243562] Updated weights for policy 0, policy_version 12648 (0.0009) +[2026-06-02 16:28:37,518][243562] Updated weights for policy 0, policy_version 12658 (0.0008) +[2026-06-02 16:28:37,665][243562] Updated weights for policy 0, policy_version 12668 (0.0008) +[2026-06-02 16:28:37,841][243562] Updated weights for policy 0, policy_version 12679 (0.0008) +[2026-06-02 16:28:38,005][243562] Updated weights for policy 0, policy_version 12689 (0.0008) +[2026-06-02 16:28:38,162][243562] Updated weights for policy 0, policy_version 12699 (0.0008) +[2026-06-02 16:28:38,328][243562] Updated weights for policy 0, policy_version 12709 (0.0008) +[2026-06-02 16:28:38,499][243562] Updated weights for policy 0, policy_version 12719 (0.0008) +[2026-06-02 16:28:39,159][243562] Updated weights for policy 0, policy_version 12729 (0.0008) +[2026-06-02 16:28:39,315][243562] Updated weights for policy 0, policy_version 12739 (0.0008) +[2026-06-02 16:28:39,497][243562] Updated weights for policy 0, policy_version 12750 (0.0009) +[2026-06-02 16:28:39,657][243562] Updated weights for policy 0, policy_version 12760 (0.0008) +[2026-06-02 16:28:39,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 20882.7). Total num frames: 6520832. Throughput: 0: 21319.1. Samples: 6548224. Policy #0 lag: (min: 63.0, avg: 78.2, max: 127.0) +[2026-06-02 16:28:39,812][235960] Avg episode reward: [(0, '360.934')] +[2026-06-02 16:28:39,837][243562] Updated weights for policy 0, policy_version 12771 (0.0008) +[2026-06-02 16:28:40,008][243562] Updated weights for policy 0, policy_version 12781 (0.0008) +[2026-06-02 16:28:40,047][242748] Saving new best policy, reward=360.934! +[2026-06-02 16:28:40,658][243562] Updated weights for policy 0, policy_version 12792 (0.0009) +[2026-06-02 16:28:40,814][243562] Updated weights for policy 0, policy_version 12802 (0.0009) +[2026-06-02 16:28:40,996][243562] Updated weights for policy 0, policy_version 12813 (0.0008) +[2026-06-02 16:28:41,159][243562] Updated weights for policy 0, policy_version 12823 (0.0008) +[2026-06-02 16:28:41,331][243562] Updated weights for policy 0, policy_version 12834 (0.0008) +[2026-06-02 16:28:41,500][243562] Updated weights for policy 0, policy_version 12844 (0.0008) +[2026-06-02 16:28:42,170][243562] Updated weights for policy 0, policy_version 12854 (0.0009) +[2026-06-02 16:28:42,343][243562] Updated weights for policy 0, policy_version 12865 (0.0008) +[2026-06-02 16:28:42,524][243562] Updated weights for policy 0, policy_version 12876 (0.0008) +[2026-06-02 16:28:42,688][243562] Updated weights for policy 0, policy_version 12886 (0.0008) +[2026-06-02 16:28:42,853][243562] Updated weights for policy 0, policy_version 12896 (0.0008) +[2026-06-02 16:28:43,016][243562] Updated weights for policy 0, policy_version 12906 (0.0008) +[2026-06-02 16:28:43,668][243562] Updated weights for policy 0, policy_version 12916 (0.0008) +[2026-06-02 16:28:43,820][243562] Updated weights for policy 0, policy_version 12926 (0.0008) +[2026-06-02 16:28:43,976][243562] Updated weights for policy 0, policy_version 12936 (0.0008) +[2026-06-02 16:28:44,143][243562] Updated weights for policy 0, policy_version 12946 (0.0009) +[2026-06-02 16:28:44,317][243562] Updated weights for policy 0, policy_version 12957 (0.0008) +[2026-06-02 16:28:44,488][243562] Updated weights for policy 0, policy_version 12967 (0.0009) +[2026-06-02 16:28:44,811][235960] Fps is (10 sec: 22937.4, 60 sec: 21845.3, 300 sec: 20993.8). Total num frames: 6651904. Throughput: 0: 21245.1. Samples: 6674560. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) +[2026-06-02 16:28:44,812][235960] Avg episode reward: [(0, '417.849')] +[2026-06-02 16:28:44,818][242748] Saving new best policy, reward=417.849! +[2026-06-02 16:28:45,135][243562] Updated weights for policy 0, policy_version 12977 (0.0009) +[2026-06-02 16:28:45,292][243562] Updated weights for policy 0, policy_version 12988 (0.0009) +[2026-06-02 16:28:45,463][243562] Updated weights for policy 0, policy_version 12998 (0.0008) +[2026-06-02 16:28:45,638][243562] Updated weights for policy 0, policy_version 13009 (0.0009) +[2026-06-02 16:28:45,820][243562] Updated weights for policy 0, policy_version 13020 (0.0009) +[2026-06-02 16:28:45,976][243562] Updated weights for policy 0, policy_version 13030 (0.0009) +[2026-06-02 16:28:46,652][243562] Updated weights for policy 0, policy_version 13041 (0.0009) +[2026-06-02 16:28:46,793][243562] Updated weights for policy 0, policy_version 13051 (0.0009) +[2026-06-02 16:28:46,953][243562] Updated weights for policy 0, policy_version 13061 (0.0008) +[2026-06-02 16:28:47,123][243562] Updated weights for policy 0, policy_version 13071 (0.0009) +[2026-06-02 16:28:47,297][243562] Updated weights for policy 0, policy_version 13082 (0.0008) +[2026-06-02 16:28:47,473][243562] Updated weights for policy 0, policy_version 13093 (0.0008) +[2026-06-02 16:28:47,646][243562] Updated weights for policy 0, policy_version 13103 (0.0008) +[2026-06-02 16:28:48,319][243562] Updated weights for policy 0, policy_version 13113 (0.0010) +[2026-06-02 16:28:48,492][243562] Updated weights for policy 0, policy_version 13124 (0.0008) +[2026-06-02 16:28:48,672][243562] Updated weights for policy 0, policy_version 13135 (0.0008) +[2026-06-02 16:28:48,868][243562] Updated weights for policy 0, policy_version 13147 (0.0008) +[2026-06-02 16:28:49,034][243562] Updated weights for policy 0, policy_version 13157 (0.0008) +[2026-06-02 16:28:49,208][243562] Updated weights for policy 0, policy_version 13168 (0.0008) +[2026-06-02 16:28:49,811][235960] Fps is (10 sec: 22937.8, 60 sec: 21299.2, 300 sec: 20993.8). Total num frames: 6750208. Throughput: 0: 21239.5. Samples: 6738304. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) +[2026-06-02 16:28:49,812][235960] Avg episode reward: [(0, '435.520')] +[2026-06-02 16:28:49,873][243562] Updated weights for policy 0, policy_version 13178 (0.0009) +[2026-06-02 16:28:50,042][243562] Updated weights for policy 0, policy_version 13189 (0.0007) +[2026-06-02 16:28:50,204][243562] Updated weights for policy 0, policy_version 13199 (0.0005) +[2026-06-02 16:28:50,384][243562] Updated weights for policy 0, policy_version 13210 (0.0004) +[2026-06-02 16:28:50,544][243562] Updated weights for policy 0, policy_version 13220 (0.0005) +[2026-06-02 16:28:50,708][243562] Updated weights for policy 0, policy_version 13230 (0.0005) +[2026-06-02 16:28:50,731][242748] Saving new best policy, reward=435.520! +[2026-06-02 16:28:51,360][243562] Updated weights for policy 0, policy_version 13240 (0.0004) +[2026-06-02 16:28:51,517][243562] Updated weights for policy 0, policy_version 13250 (0.0005) +[2026-06-02 16:28:51,678][243562] Updated weights for policy 0, policy_version 13260 (0.0008) +[2026-06-02 16:28:51,855][243562] Updated weights for policy 0, policy_version 13271 (0.0008) +[2026-06-02 16:28:52,038][243562] Updated weights for policy 0, policy_version 13282 (0.0008) +[2026-06-02 16:28:52,206][243562] Updated weights for policy 0, policy_version 13293 (0.0008) +[2026-06-02 16:28:52,864][243562] Updated weights for policy 0, policy_version 13304 (0.0008) +[2026-06-02 16:28:53,024][243562] Updated weights for policy 0, policy_version 13314 (0.0008) +[2026-06-02 16:28:53,188][243562] Updated weights for policy 0, policy_version 13324 (0.0008) +[2026-06-02 16:28:53,342][243562] Updated weights for policy 0, policy_version 13334 (0.0008) +[2026-06-02 16:28:53,509][243562] Updated weights for policy 0, policy_version 13344 (0.0009) +[2026-06-02 16:28:53,687][243562] Updated weights for policy 0, policy_version 13355 (0.0008) +[2026-06-02 16:28:54,372][243562] Updated weights for policy 0, policy_version 13365 (0.0009) +[2026-06-02 16:28:54,541][243562] Updated weights for policy 0, policy_version 13376 (0.0008) +[2026-06-02 16:28:54,701][243562] Updated weights for policy 0, policy_version 13386 (0.0008) +[2026-06-02 16:28:54,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 20993.7). Total num frames: 6848512. Throughput: 0: 21230.9. Samples: 6866944. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) +[2026-06-02 16:28:54,812][235960] Avg episode reward: [(0, '438.660')] +[2026-06-02 16:28:54,867][243562] Updated weights for policy 0, policy_version 13396 (0.0008) +[2026-06-02 16:28:55,036][243562] Updated weights for policy 0, policy_version 13406 (0.0008) +[2026-06-02 16:28:55,226][243562] Updated weights for policy 0, policy_version 13417 (0.0008) +[2026-06-02 16:28:55,331][242748] Saving new best policy, reward=438.660! +[2026-06-02 16:28:55,868][243562] Updated weights for policy 0, policy_version 13427 (0.0009) +[2026-06-02 16:28:56,032][243562] Updated weights for policy 0, policy_version 13437 (0.0008) +[2026-06-02 16:28:56,193][243562] Updated weights for policy 0, policy_version 13447 (0.0008) +[2026-06-02 16:28:56,360][243562] Updated weights for policy 0, policy_version 13457 (0.0008) +[2026-06-02 16:28:56,522][243562] Updated weights for policy 0, policy_version 13467 (0.0008) +[2026-06-02 16:28:56,708][243562] Updated weights for policy 0, policy_version 13478 (0.0008) +[2026-06-02 16:28:56,864][243562] Updated weights for policy 0, policy_version 13488 (0.0008) +[2026-06-02 16:28:57,522][243562] Updated weights for policy 0, policy_version 13498 (0.0008) +[2026-06-02 16:28:57,698][243562] Updated weights for policy 0, policy_version 13509 (0.0008) +[2026-06-02 16:28:57,860][243562] Updated weights for policy 0, policy_version 13519 (0.0009) +[2026-06-02 16:28:58,023][243562] Updated weights for policy 0, policy_version 13529 (0.0009) +[2026-06-02 16:28:58,205][243562] Updated weights for policy 0, policy_version 13540 (0.0008) +[2026-06-02 16:28:58,366][243562] Updated weights for policy 0, policy_version 13550 (0.0008) +[2026-06-02 16:28:59,011][243562] Updated weights for policy 0, policy_version 13560 (0.0008) +[2026-06-02 16:28:59,170][243562] Updated weights for policy 0, policy_version 13570 (0.0008) +[2026-06-02 16:28:59,343][243562] Updated weights for policy 0, policy_version 13580 (0.0008) +[2026-06-02 16:28:59,531][243562] Updated weights for policy 0, policy_version 13592 (0.0008) +[2026-06-02 16:28:59,704][243562] Updated weights for policy 0, policy_version 13602 (0.0008) +[2026-06-02 16:28:59,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 20993.7). Total num frames: 6946816. Throughput: 0: 21233.7. Samples: 6997504. Policy #0 lag: (min: 51.0, avg: 92.9, max: 115.0) +[2026-06-02 16:28:59,812][235960] Avg episode reward: [(0, '429.285')] +[2026-06-02 16:28:59,862][243562] Updated weights for policy 0, policy_version 13612 (0.0008) +[2026-06-02 16:29:00,492][243562] Updated weights for policy 0, policy_version 13622 (0.0008) +[2026-06-02 16:29:00,665][243562] Updated weights for policy 0, policy_version 13633 (0.0009) +[2026-06-02 16:29:00,836][243562] Updated weights for policy 0, policy_version 13644 (0.0006) +[2026-06-02 16:29:01,008][243562] Updated weights for policy 0, policy_version 13654 (0.0004) +[2026-06-02 16:29:01,201][243562] Updated weights for policy 0, policy_version 13666 (0.0004) +[2026-06-02 16:29:01,372][243562] Updated weights for policy 0, policy_version 13676 (0.0004) +[2026-06-02 16:29:02,005][243562] Updated weights for policy 0, policy_version 13686 (0.0004) +[2026-06-02 16:29:02,165][243562] Updated weights for policy 0, policy_version 13696 (0.0007) +[2026-06-02 16:29:02,324][243562] Updated weights for policy 0, policy_version 13706 (0.0008) +[2026-06-02 16:29:02,498][243562] Updated weights for policy 0, policy_version 13717 (0.0008) +[2026-06-02 16:29:02,672][243562] Updated weights for policy 0, policy_version 13727 (0.0008) +[2026-06-02 16:29:02,829][243562] Updated weights for policy 0, policy_version 13737 (0.0008) +[2026-06-02 16:29:03,520][243562] Updated weights for policy 0, policy_version 13748 (0.0008) +[2026-06-02 16:29:03,669][243562] Updated weights for policy 0, policy_version 13758 (0.0008) +[2026-06-02 16:29:03,824][243562] Updated weights for policy 0, policy_version 13768 (0.0008) +[2026-06-02 16:29:04,024][243562] Updated weights for policy 0, policy_version 13780 (0.0008) +[2026-06-02 16:29:04,207][243562] Updated weights for policy 0, policy_version 13791 (0.0008) +[2026-06-02 16:29:04,374][243562] Updated weights for policy 0, policy_version 13801 (0.0008) +[2026-06-02 16:29:04,811][235960] Fps is (10 sec: 22937.5, 60 sec: 21845.3, 300 sec: 21104.8). Total num frames: 7077888. Throughput: 0: 21211.0. Samples: 7061504. Policy #0 lag: (min: 51.0, avg: 92.9, max: 115.0) +[2026-06-02 16:29:04,812][235960] Avg episode reward: [(0, '393.301')] +[2026-06-02 16:29:05,014][243562] Updated weights for policy 0, policy_version 13811 (0.0009) +[2026-06-02 16:29:05,188][243562] Updated weights for policy 0, policy_version 13822 (0.0009) +[2026-06-02 16:29:05,369][243562] Updated weights for policy 0, policy_version 13833 (0.0008) +[2026-06-02 16:29:05,536][243562] Updated weights for policy 0, policy_version 13843 (0.0009) +[2026-06-02 16:29:05,698][243562] Updated weights for policy 0, policy_version 13853 (0.0009) +[2026-06-02 16:29:05,861][243562] Updated weights for policy 0, policy_version 13863 (0.0008) +[2026-06-02 16:29:06,542][243562] Updated weights for policy 0, policy_version 13875 (0.0009) +[2026-06-02 16:29:06,701][243562] Updated weights for policy 0, policy_version 13885 (0.0008) +[2026-06-02 16:29:06,866][243562] Updated weights for policy 0, policy_version 13895 (0.0008) +[2026-06-02 16:29:07,025][243562] Updated weights for policy 0, policy_version 13905 (0.0008) +[2026-06-02 16:29:07,187][243562] Updated weights for policy 0, policy_version 13915 (0.0008) +[2026-06-02 16:29:07,346][243562] Updated weights for policy 0, policy_version 13925 (0.0009) +[2026-06-02 16:29:07,531][243562] Updated weights for policy 0, policy_version 13936 (0.0008) +[2026-06-02 16:29:08,190][243562] Updated weights for policy 0, policy_version 13946 (0.0009) +[2026-06-02 16:29:08,351][243562] Updated weights for policy 0, policy_version 13956 (0.0009) +[2026-06-02 16:29:08,523][243562] Updated weights for policy 0, policy_version 13967 (0.0008) +[2026-06-02 16:29:08,690][243562] Updated weights for policy 0, policy_version 13977 (0.0009) +[2026-06-02 16:29:08,863][243562] Updated weights for policy 0, policy_version 13987 (0.0009) +[2026-06-02 16:29:09,024][243562] Updated weights for policy 0, policy_version 13997 (0.0008) +[2026-06-02 16:29:09,665][243562] Updated weights for policy 0, policy_version 14008 (0.0009) +[2026-06-02 16:29:09,811][235960] Fps is (10 sec: 22937.4, 60 sec: 21299.2, 300 sec: 21104.8). Total num frames: 7176192. Throughput: 0: 21205.3. Samples: 7190144. Policy #0 lag: (min: 45.0, avg: 59.9, max: 109.0) +[2026-06-02 16:29:09,812][235960] Avg episode reward: [(0, '386.254')] +[2026-06-02 16:29:09,828][243562] Updated weights for policy 0, policy_version 14018 (0.0008) +[2026-06-02 16:29:09,987][243562] Updated weights for policy 0, policy_version 14028 (0.0008) +[2026-06-02 16:29:10,150][243562] Updated weights for policy 0, policy_version 14038 (0.0008) +[2026-06-02 16:29:10,313][243562] Updated weights for policy 0, policy_version 14048 (0.0008) +[2026-06-02 16:29:10,482][243562] Updated weights for policy 0, policy_version 14058 (0.0008) +[2026-06-02 16:29:11,138][243562] Updated weights for policy 0, policy_version 14068 (0.0008) +[2026-06-02 16:29:11,309][243562] Updated weights for policy 0, policy_version 14079 (0.0008) +[2026-06-02 16:29:11,466][243562] Updated weights for policy 0, policy_version 14089 (0.0008) +[2026-06-02 16:29:11,644][243562] Updated weights for policy 0, policy_version 14100 (0.0009) +[2026-06-02 16:29:11,832][243562] Updated weights for policy 0, policy_version 14111 (0.0008) +[2026-06-02 16:29:11,998][243562] Updated weights for policy 0, policy_version 14121 (0.0008) +[2026-06-02 16:29:12,660][243562] Updated weights for policy 0, policy_version 14132 (0.0009) +[2026-06-02 16:29:12,818][243562] Updated weights for policy 0, policy_version 14142 (0.0008) +[2026-06-02 16:29:12,977][243562] Updated weights for policy 0, policy_version 14152 (0.0008) +[2026-06-02 16:29:13,140][243562] Updated weights for policy 0, policy_version 14162 (0.0008) +[2026-06-02 16:29:13,309][243562] Updated weights for policy 0, policy_version 14172 (0.0008) +[2026-06-02 16:29:13,459][243562] Updated weights for policy 0, policy_version 14182 (0.0008) +[2026-06-02 16:29:13,627][243562] Updated weights for policy 0, policy_version 14192 (0.0008) +[2026-06-02 16:29:14,305][243562] Updated weights for policy 0, policy_version 14203 (0.0006) +[2026-06-02 16:29:14,473][243562] Updated weights for policy 0, policy_version 14213 (0.0008) +[2026-06-02 16:29:14,625][243562] Updated weights for policy 0, policy_version 14223 (0.0008) +[2026-06-02 16:29:14,790][243562] Updated weights for policy 0, policy_version 14233 (0.0008) +[2026-06-02 16:29:14,811][235960] Fps is (10 sec: 19660.9, 60 sec: 21299.2, 300 sec: 21104.8). Total num frames: 7274496. Throughput: 0: 21276.4. Samples: 7319552. Policy #0 lag: (min: 45.0, avg: 59.9, max: 109.0) +[2026-06-02 16:29:14,812][235960] Avg episode reward: [(0, '406.011')] +[2026-06-02 16:29:14,952][243562] Updated weights for policy 0, policy_version 14243 (0.0008) +[2026-06-02 16:29:15,119][243562] Updated weights for policy 0, policy_version 14253 (0.0009) +[2026-06-02 16:29:15,768][243562] Updated weights for policy 0, policy_version 14263 (0.0009) +[2026-06-02 16:29:15,943][243562] Updated weights for policy 0, policy_version 14274 (0.0008) +[2026-06-02 16:29:16,104][243562] Updated weights for policy 0, policy_version 14284 (0.0009) +[2026-06-02 16:29:16,262][243562] Updated weights for policy 0, policy_version 14294 (0.0009) +[2026-06-02 16:29:16,434][243562] Updated weights for policy 0, policy_version 14305 (0.0009) +[2026-06-02 16:29:16,605][243562] Updated weights for policy 0, policy_version 14315 (0.0008) +[2026-06-02 16:29:17,278][243562] Updated weights for policy 0, policy_version 14326 (0.0008) +[2026-06-02 16:29:17,437][243562] Updated weights for policy 0, policy_version 14336 (0.0008) +[2026-06-02 16:29:17,595][243562] Updated weights for policy 0, policy_version 14346 (0.0008) +[2026-06-02 16:29:17,778][243562] Updated weights for policy 0, policy_version 14357 (0.0009) +[2026-06-02 16:29:17,949][243562] Updated weights for policy 0, policy_version 14367 (0.0010) +[2026-06-02 16:29:18,114][243562] Updated weights for policy 0, policy_version 14377 (0.0009) +[2026-06-02 16:29:18,759][243562] Updated weights for policy 0, policy_version 14387 (0.0009) +[2026-06-02 16:29:18,947][243562] Updated weights for policy 0, policy_version 14399 (0.0008) +[2026-06-02 16:29:19,110][243562] Updated weights for policy 0, policy_version 14409 (0.0009) +[2026-06-02 16:29:19,277][243562] Updated weights for policy 0, policy_version 14419 (0.0009) +[2026-06-02 16:29:19,435][243562] Updated weights for policy 0, policy_version 14429 (0.0008) +[2026-06-02 16:29:19,606][243562] Updated weights for policy 0, policy_version 14439 (0.0009) +[2026-06-02 16:29:19,811][235960] Fps is (10 sec: 22937.7, 60 sec: 21845.3, 300 sec: 21215.9). Total num frames: 7405568. Throughput: 0: 21344.7. Samples: 7383680. Policy #0 lag: (min: 35.0, avg: 67.3, max: 99.0) +[2026-06-02 16:29:19,812][235960] Avg episode reward: [(0, '396.686')] +[2026-06-02 16:29:20,275][243562] Updated weights for policy 0, policy_version 14449 (0.0008) +[2026-06-02 16:29:20,431][243562] Updated weights for policy 0, policy_version 14459 (0.0009) +[2026-06-02 16:29:20,582][243562] Updated weights for policy 0, policy_version 14469 (0.0009) +[2026-06-02 16:29:20,749][243562] Updated weights for policy 0, policy_version 14479 (0.0008) +[2026-06-02 16:29:20,911][243562] Updated weights for policy 0, policy_version 14489 (0.0008) +[2026-06-02 16:29:21,073][243562] Updated weights for policy 0, policy_version 14499 (0.0008) +[2026-06-02 16:29:21,242][243562] Updated weights for policy 0, policy_version 14509 (0.0009) +[2026-06-02 16:29:21,885][243562] Updated weights for policy 0, policy_version 14519 (0.0009) +[2026-06-02 16:29:22,047][243562] Updated weights for policy 0, policy_version 14529 (0.0010) +[2026-06-02 16:29:22,210][243562] Updated weights for policy 0, policy_version 14539 (0.0008) +[2026-06-02 16:29:22,370][243562] Updated weights for policy 0, policy_version 14549 (0.0008) +[2026-06-02 16:29:22,538][243562] Updated weights for policy 0, policy_version 14559 (0.0009) +[2026-06-02 16:29:22,697][243562] Updated weights for policy 0, policy_version 14569 (0.0008) +[2026-06-02 16:29:23,347][243562] Updated weights for policy 0, policy_version 14579 (0.0008) +[2026-06-02 16:29:23,497][243562] Updated weights for policy 0, policy_version 14589 (0.0008) +[2026-06-02 16:29:23,673][243562] Updated weights for policy 0, policy_version 14600 (0.0008) +[2026-06-02 16:29:23,843][243562] Updated weights for policy 0, policy_version 14610 (0.0008) +[2026-06-02 16:29:24,027][243562] Updated weights for policy 0, policy_version 14621 (0.0008) +[2026-06-02 16:29:24,189][243562] Updated weights for policy 0, policy_version 14631 (0.0009) +[2026-06-02 16:29:24,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21299.2, 300 sec: 21215.9). Total num frames: 7503872. Throughput: 0: 21435.8. Samples: 7512832. Policy #0 lag: (min: 35.0, avg: 67.3, max: 99.0) +[2026-06-02 16:29:24,812][235960] Avg episode reward: [(0, '432.449')] +[2026-06-02 16:29:24,844][243562] Updated weights for policy 0, policy_version 14641 (0.0008) +[2026-06-02 16:29:25,004][243562] Updated weights for policy 0, policy_version 14651 (0.0010) +[2026-06-02 16:29:25,158][243562] Updated weights for policy 0, policy_version 14661 (0.0008) +[2026-06-02 16:29:25,354][243562] Updated weights for policy 0, policy_version 14673 (0.0008) +[2026-06-02 16:29:25,534][243562] Updated weights for policy 0, policy_version 14684 (0.0008) +[2026-06-02 16:29:25,707][243562] Updated weights for policy 0, policy_version 14695 (0.0008) +[2026-06-02 16:29:26,395][243562] Updated weights for policy 0, policy_version 14706 (0.0008) +[2026-06-02 16:29:26,564][243562] Updated weights for policy 0, policy_version 14717 (0.0005) +[2026-06-02 16:29:26,730][243562] Updated weights for policy 0, policy_version 14727 (0.0005) +[2026-06-02 16:29:26,894][243562] Updated weights for policy 0, policy_version 14737 (0.0005) +[2026-06-02 16:29:27,055][243562] Updated weights for policy 0, policy_version 14747 (0.0005) +[2026-06-02 16:29:27,236][243562] Updated weights for policy 0, policy_version 14758 (0.0005) +[2026-06-02 16:29:27,401][243562] Updated weights for policy 0, policy_version 14768 (0.0005) +[2026-06-02 16:29:28,029][243562] Updated weights for policy 0, policy_version 14778 (0.0004) +[2026-06-02 16:29:28,214][243562] Updated weights for policy 0, policy_version 14789 (0.0005) +[2026-06-02 16:29:28,377][243562] Updated weights for policy 0, policy_version 14799 (0.0005) +[2026-06-02 16:29:28,542][243562] Updated weights for policy 0, policy_version 14809 (0.0005) +[2026-06-02 16:29:28,709][243562] Updated weights for policy 0, policy_version 14819 (0.0005) +[2026-06-02 16:29:28,878][243562] Updated weights for policy 0, policy_version 14829 (0.0005) +[2026-06-02 16:29:29,515][243562] Updated weights for policy 0, policy_version 14839 (0.0005) +[2026-06-02 16:29:29,679][243562] Updated weights for policy 0, policy_version 14849 (0.0005) +[2026-06-02 16:29:29,811][235960] Fps is (10 sec: 19660.9, 60 sec: 21299.2, 300 sec: 21104.8). Total num frames: 7602176. Throughput: 0: 21504.0. Samples: 7642240. Policy #0 lag: (min: 35.0, avg: 67.3, max: 99.0) +[2026-06-02 16:29:29,812][235960] Avg episode reward: [(0, '427.122')] +[2026-06-02 16:29:29,834][243562] Updated weights for policy 0, policy_version 14859 (0.0005) +[2026-06-02 16:29:30,010][243562] Updated weights for policy 0, policy_version 14869 (0.0005) +[2026-06-02 16:29:30,171][243562] Updated weights for policy 0, policy_version 14879 (0.0005) +[2026-06-02 16:29:30,351][243562] Updated weights for policy 0, policy_version 14890 (0.0005) +[2026-06-02 16:29:30,995][243562] Updated weights for policy 0, policy_version 14900 (0.0005) +[2026-06-02 16:29:31,151][243562] Updated weights for policy 0, policy_version 14910 (0.0005) +[2026-06-02 16:29:31,315][243562] Updated weights for policy 0, policy_version 14920 (0.0005) +[2026-06-02 16:29:31,477][243562] Updated weights for policy 0, policy_version 14930 (0.0005) +[2026-06-02 16:29:31,642][243562] Updated weights for policy 0, policy_version 14940 (0.0005) +[2026-06-02 16:29:31,800][243562] Updated weights for policy 0, policy_version 14950 (0.0005) +[2026-06-02 16:29:32,469][243562] Updated weights for policy 0, policy_version 14961 (0.0006) +[2026-06-02 16:29:32,671][243562] Updated weights for policy 0, policy_version 14974 (0.0008) +[2026-06-02 16:29:32,828][243562] Updated weights for policy 0, policy_version 14984 (0.0009) +[2026-06-02 16:29:32,996][243562] Updated weights for policy 0, policy_version 14994 (0.0010) +[2026-06-02 16:29:33,162][243562] Updated weights for policy 0, policy_version 15004 (0.0012) +[2026-06-02 16:29:33,322][243562] Updated weights for policy 0, policy_version 15014 (0.0013) +[2026-06-02 16:29:34,013][243562] Updated weights for policy 0, policy_version 15025 (0.0011) +[2026-06-02 16:29:34,162][243562] Updated weights for policy 0, policy_version 15035 (0.0009) +[2026-06-02 16:29:34,321][243562] Updated weights for policy 0, policy_version 15045 (0.0008) +[2026-06-02 16:29:34,488][243562] Updated weights for policy 0, policy_version 15055 (0.0008) +[2026-06-02 16:29:34,652][243562] Updated weights for policy 0, policy_version 15065 (0.0009) +[2026-06-02 16:29:34,811][235960] Fps is (10 sec: 19660.9, 60 sec: 21299.2, 300 sec: 21104.8). Total num frames: 7700480. Throughput: 0: 21526.7. Samples: 7707008. Policy #0 lag: (min: 63.0, avg: 78.6, max: 127.0) +[2026-06-02 16:29:34,812][235960] Avg episode reward: [(0, '404.635')] +[2026-06-02 16:29:34,835][243562] Updated weights for policy 0, policy_version 15076 (0.0009) +[2026-06-02 16:29:35,023][243562] Updated weights for policy 0, policy_version 15088 (0.0009) +[2026-06-02 16:29:35,691][243562] Updated weights for policy 0, policy_version 15098 (0.0008) +[2026-06-02 16:29:35,858][243562] Updated weights for policy 0, policy_version 15108 (0.0010) +[2026-06-02 16:29:36,033][243562] Updated weights for policy 0, policy_version 15119 (0.0008) +[2026-06-02 16:29:36,198][243562] Updated weights for policy 0, policy_version 15129 (0.0008) +[2026-06-02 16:29:36,362][243562] Updated weights for policy 0, policy_version 15139 (0.0008) +[2026-06-02 16:29:36,530][243562] Updated weights for policy 0, policy_version 15149 (0.0008) +[2026-06-02 16:29:37,165][243562] Updated weights for policy 0, policy_version 15159 (0.0010) +[2026-06-02 16:29:37,357][243562] Updated weights for policy 0, policy_version 15171 (0.0009) +[2026-06-02 16:29:37,541][243562] Updated weights for policy 0, policy_version 15182 (0.0011) +[2026-06-02 16:29:37,700][243562] Updated weights for policy 0, policy_version 15192 (0.0010) +[2026-06-02 16:29:37,867][243562] Updated weights for policy 0, policy_version 15202 (0.0010) +[2026-06-02 16:29:38,034][243562] Updated weights for policy 0, policy_version 15212 (0.0010) +[2026-06-02 16:29:38,670][243562] Updated weights for policy 0, policy_version 15222 (0.0008) +[2026-06-02 16:29:38,848][243562] Updated weights for policy 0, policy_version 15233 (0.0008) +[2026-06-02 16:29:39,013][243562] Updated weights for policy 0, policy_version 15243 (0.0008) +[2026-06-02 16:29:39,176][243562] Updated weights for policy 0, policy_version 15253 (0.0008) +[2026-06-02 16:29:39,330][243562] Updated weights for policy 0, policy_version 15263 (0.0009) +[2026-06-02 16:29:39,519][243562] Updated weights for policy 0, policy_version 15274 (0.0010) +[2026-06-02 16:29:39,811][235960] Fps is (10 sec: 22937.5, 60 sec: 21845.3, 300 sec: 21215.9). Total num frames: 7831552. Throughput: 0: 21549.5. Samples: 7836672. Policy #0 lag: (min: 63.0, avg: 78.6, max: 127.0) +[2026-06-02 16:29:39,812][235960] Avg episode reward: [(0, '416.602')] +[2026-06-02 16:29:40,166][243562] Updated weights for policy 0, policy_version 15284 (0.0008) +[2026-06-02 16:29:40,329][243562] Updated weights for policy 0, policy_version 15294 (0.0008) +[2026-06-02 16:29:40,507][243562] Updated weights for policy 0, policy_version 15305 (0.0009) +[2026-06-02 16:29:40,723][243562] Updated weights for policy 0, policy_version 15318 (0.0009) +[2026-06-02 16:29:40,901][243562] Updated weights for policy 0, policy_version 15329 (0.0009) +[2026-06-02 16:29:41,063][243562] Updated weights for policy 0, policy_version 15339 (0.0009) +[2026-06-02 16:29:41,694][243562] Updated weights for policy 0, policy_version 15349 (0.0009) +[2026-06-02 16:29:41,849][243562] Updated weights for policy 0, policy_version 15359 (0.0008) +[2026-06-02 16:29:42,008][243562] Updated weights for policy 0, policy_version 15369 (0.0009) +[2026-06-02 16:29:42,174][243562] Updated weights for policy 0, policy_version 15379 (0.0009) +[2026-06-02 16:29:42,330][243562] Updated weights for policy 0, policy_version 15389 (0.0008) +[2026-06-02 16:29:42,512][243562] Updated weights for policy 0, policy_version 15400 (0.0009) +[2026-06-02 16:29:43,183][243562] Updated weights for policy 0, policy_version 15411 (0.0008) +[2026-06-02 16:29:43,375][243562] Updated weights for policy 0, policy_version 15423 (0.0009) +[2026-06-02 16:29:43,542][243562] Updated weights for policy 0, policy_version 15433 (0.0009) +[2026-06-02 16:29:43,702][243562] Updated weights for policy 0, policy_version 15443 (0.0009) +[2026-06-02 16:29:43,863][243562] Updated weights for policy 0, policy_version 15453 (0.0008) +[2026-06-02 16:29:44,043][243562] Updated weights for policy 0, policy_version 15464 (0.0008) +[2026-06-02 16:29:44,719][243562] Updated weights for policy 0, policy_version 15475 (0.0009) +[2026-06-02 16:29:44,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21299.2, 300 sec: 21215.9). Total num frames: 7929856. Throughput: 0: 21518.2. Samples: 7965824. Policy #0 lag: (min: 18.0, avg: 44.1, max: 82.0) +[2026-06-02 16:29:44,812][235960] Avg episode reward: [(0, '433.525')] +[2026-06-02 16:29:44,874][243562] Updated weights for policy 0, policy_version 15485 (0.0008) +[2026-06-02 16:29:45,058][243562] Updated weights for policy 0, policy_version 15497 (0.0008) +[2026-06-02 16:29:45,230][243562] Updated weights for policy 0, policy_version 15507 (0.0008) +[2026-06-02 16:29:45,404][243562] Updated weights for policy 0, policy_version 15518 (0.0009) +[2026-06-02 16:29:45,569][243562] Updated weights for policy 0, policy_version 15528 (0.0008) +[2026-06-02 16:29:46,248][243562] Updated weights for policy 0, policy_version 15539 (0.0009) +[2026-06-02 16:29:46,427][243562] Updated weights for policy 0, policy_version 15550 (0.0009) +[2026-06-02 16:29:46,582][243562] Updated weights for policy 0, policy_version 15560 (0.0008) +[2026-06-02 16:29:46,754][243562] Updated weights for policy 0, policy_version 15570 (0.0008) +[2026-06-02 16:29:46,912][243562] Updated weights for policy 0, policy_version 15580 (0.0008) +[2026-06-02 16:29:47,082][243562] Updated weights for policy 0, policy_version 15590 (0.0008) +[2026-06-02 16:29:47,238][243562] Updated weights for policy 0, policy_version 15600 (0.0009) +[2026-06-02 16:29:47,891][243562] Updated weights for policy 0, policy_version 15611 (0.0009) +[2026-06-02 16:29:48,056][243562] Updated weights for policy 0, policy_version 15621 (0.0008) +[2026-06-02 16:29:48,215][243562] Updated weights for policy 0, policy_version 15631 (0.0008) +[2026-06-02 16:29:48,378][243562] Updated weights for policy 0, policy_version 15641 (0.0008) +[2026-06-02 16:29:48,553][243562] Updated weights for policy 0, policy_version 15652 (0.0009) +[2026-06-02 16:29:48,739][243562] Updated weights for policy 0, policy_version 15663 (0.0009) +[2026-06-02 16:29:49,383][243562] Updated weights for policy 0, policy_version 15673 (0.0009) +[2026-06-02 16:29:49,538][243562] Updated weights for policy 0, policy_version 15683 (0.0008) +[2026-06-02 16:29:49,718][243562] Updated weights for policy 0, policy_version 15694 (0.0008) +[2026-06-02 16:29:49,811][235960] Fps is (10 sec: 19660.5, 60 sec: 21299.1, 300 sec: 21215.9). Total num frames: 8028160. Throughput: 0: 21515.3. Samples: 8029696. Policy #0 lag: (min: 18.0, avg: 44.1, max: 82.0) +[2026-06-02 16:29:49,812][235960] Avg episode reward: [(0, '477.044')] +[2026-06-02 16:29:49,916][243562] Updated weights for policy 0, policy_version 15706 (0.0009) +[2026-06-02 16:29:50,077][243562] Updated weights for policy 0, policy_version 15716 (0.0008) +[2026-06-02 16:29:50,238][243562] Updated weights for policy 0, policy_version 15726 (0.0009) +[2026-06-02 16:29:50,265][242748] Saving new best policy, reward=477.044! +[2026-06-02 16:29:50,901][243562] Updated weights for policy 0, policy_version 15736 (0.0009) +[2026-06-02 16:29:51,077][243562] Updated weights for policy 0, policy_version 15747 (0.0008) +[2026-06-02 16:29:51,266][243562] Updated weights for policy 0, policy_version 15759 (0.0009) +[2026-06-02 16:29:51,426][243562] Updated weights for policy 0, policy_version 15769 (0.0009) +[2026-06-02 16:29:51,626][243562] Updated weights for policy 0, policy_version 15781 (0.0009) +[2026-06-02 16:29:51,809][243562] Updated weights for policy 0, policy_version 15792 (0.0008) +[2026-06-02 16:29:52,453][243562] Updated weights for policy 0, policy_version 15802 (0.0009) +[2026-06-02 16:29:52,604][243562] Updated weights for policy 0, policy_version 15812 (0.0008) +[2026-06-02 16:29:52,776][243562] Updated weights for policy 0, policy_version 15822 (0.0008) +[2026-06-02 16:29:52,938][243562] Updated weights for policy 0, policy_version 15832 (0.0008) +[2026-06-02 16:29:53,137][243562] Updated weights for policy 0, policy_version 15844 (0.0008) +[2026-06-02 16:29:53,298][243562] Updated weights for policy 0, policy_version 15854 (0.0008) +[2026-06-02 16:29:53,962][243562] Updated weights for policy 0, policy_version 15865 (0.0008) +[2026-06-02 16:29:54,118][243562] Updated weights for policy 0, policy_version 15875 (0.0008) +[2026-06-02 16:29:54,310][243562] Updated weights for policy 0, policy_version 15887 (0.0008) +[2026-06-02 16:29:54,493][243562] Updated weights for policy 0, policy_version 15898 (0.0008) +[2026-06-02 16:29:54,652][243562] Updated weights for policy 0, policy_version 15908 (0.0009) +[2026-06-02 16:29:54,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 21215.9). Total num frames: 8126464. Throughput: 0: 21432.9. Samples: 8154624. Policy #0 lag: (min: 18.0, avg: 44.1, max: 82.0) +[2026-06-02 16:29:54,812][235960] Avg episode reward: [(0, '471.327')] +[2026-06-02 16:29:54,826][243562] Updated weights for policy 0, policy_version 15918 (0.0008) +[2026-06-02 16:29:55,466][243562] Updated weights for policy 0, policy_version 15928 (0.0008) +[2026-06-02 16:29:55,630][243562] Updated weights for policy 0, policy_version 15938 (0.0008) +[2026-06-02 16:29:55,796][243562] Updated weights for policy 0, policy_version 15948 (0.0008) +[2026-06-02 16:29:55,957][243562] Updated weights for policy 0, policy_version 15958 (0.0009) +[2026-06-02 16:29:56,126][243562] Updated weights for policy 0, policy_version 15968 (0.0008) +[2026-06-02 16:29:56,281][243562] Updated weights for policy 0, policy_version 15978 (0.0008) +[2026-06-02 16:29:56,939][243562] Updated weights for policy 0, policy_version 15988 (0.0008) +[2026-06-02 16:29:57,093][243562] Updated weights for policy 0, policy_version 15998 (0.0008) +[2026-06-02 16:29:57,273][243562] Updated weights for policy 0, policy_version 16009 (0.0008) +[2026-06-02 16:29:57,429][243562] Updated weights for policy 0, policy_version 16019 (0.0008) +[2026-06-02 16:29:57,615][243562] Updated weights for policy 0, policy_version 16030 (0.0009) +[2026-06-02 16:29:57,783][243562] Updated weights for policy 0, policy_version 16040 (0.0008) +[2026-06-02 16:29:58,448][243562] Updated weights for policy 0, policy_version 16050 (0.0009) +[2026-06-02 16:29:58,600][243562] Updated weights for policy 0, policy_version 16060 (0.0008) +[2026-06-02 16:29:58,777][243562] Updated weights for policy 0, policy_version 16071 (0.0008) +[2026-06-02 16:29:58,933][243562] Updated weights for policy 0, policy_version 16081 (0.0008) +[2026-06-02 16:29:59,111][243562] Updated weights for policy 0, policy_version 16092 (0.0008) +[2026-06-02 16:29:59,277][243562] Updated weights for policy 0, policy_version 16102 (0.0008) +[2026-06-02 16:29:59,435][243562] Updated weights for policy 0, policy_version 16112 (0.0008) +[2026-06-02 16:29:59,811][235960] Fps is (10 sec: 22938.0, 60 sec: 21845.3, 300 sec: 21327.0). Total num frames: 8257536. Throughput: 0: 21350.4. Samples: 8280320. Policy #0 lag: (min: 14.0, avg: 28.9, max: 78.0) +[2026-06-02 16:29:59,812][235960] Avg episode reward: [(0, '478.071')] +[2026-06-02 16:30:00,076][243562] Updated weights for policy 0, policy_version 16122 (0.0008) +[2026-06-02 16:30:00,243][243562] Updated weights for policy 0, policy_version 16133 (0.0008) +[2026-06-02 16:30:00,417][243562] Updated weights for policy 0, policy_version 16143 (0.0008) +[2026-06-02 16:30:00,579][243562] Updated weights for policy 0, policy_version 16153 (0.0008) +[2026-06-02 16:30:00,762][243562] Updated weights for policy 0, policy_version 16164 (0.0009) +[2026-06-02 16:30:00,925][243562] Updated weights for policy 0, policy_version 16174 (0.0008) +[2026-06-02 16:30:00,950][242748] Saving new best policy, reward=478.071! +[2026-06-02 16:30:01,603][243562] Updated weights for policy 0, policy_version 16185 (0.0009) +[2026-06-02 16:30:01,757][243562] Updated weights for policy 0, policy_version 16195 (0.0008) +[2026-06-02 16:30:01,950][243562] Updated weights for policy 0, policy_version 16207 (0.0008) +[2026-06-02 16:30:02,114][243562] Updated weights for policy 0, policy_version 16217 (0.0008) +[2026-06-02 16:30:02,277][243562] Updated weights for policy 0, policy_version 16227 (0.0008) +[2026-06-02 16:30:02,435][243562] Updated weights for policy 0, policy_version 16237 (0.0008) +[2026-06-02 16:30:03,092][243562] Updated weights for policy 0, policy_version 16247 (0.0006) +[2026-06-02 16:30:03,258][243562] Updated weights for policy 0, policy_version 16257 (0.0008) +[2026-06-02 16:30:03,412][243562] Updated weights for policy 0, policy_version 16267 (0.0008) +[2026-06-02 16:30:03,609][243562] Updated weights for policy 0, policy_version 16279 (0.0009) +[2026-06-02 16:30:03,777][243562] Updated weights for policy 0, policy_version 16289 (0.0008) +[2026-06-02 16:30:03,941][243562] Updated weights for policy 0, policy_version 16299 (0.0009) +[2026-06-02 16:30:04,602][243562] Updated weights for policy 0, policy_version 16309 (0.0008) +[2026-06-02 16:30:04,772][243562] Updated weights for policy 0, policy_version 16320 (0.0008) +[2026-06-02 16:30:04,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21299.2, 300 sec: 21327.0). Total num frames: 8355840. Throughput: 0: 21313.5. Samples: 8342784. Policy #0 lag: (min: 14.0, avg: 28.9, max: 78.0) +[2026-06-02 16:30:04,812][235960] Avg episode reward: [(0, '469.278')] +[2026-06-02 16:30:04,947][243562] Updated weights for policy 0, policy_version 16331 (0.0008) +[2026-06-02 16:30:05,138][243562] Updated weights for policy 0, policy_version 16342 (0.0008) +[2026-06-02 16:30:05,321][243562] Updated weights for policy 0, policy_version 16353 (0.0009) +[2026-06-02 16:30:05,485][243562] Updated weights for policy 0, policy_version 16363 (0.0008) +[2026-06-02 16:30:06,125][243562] Updated weights for policy 0, policy_version 16373 (0.0010) +[2026-06-02 16:30:06,282][243562] Updated weights for policy 0, policy_version 16383 (0.0008) +[2026-06-02 16:30:06,447][243562] Updated weights for policy 0, policy_version 16393 (0.0008) +[2026-06-02 16:30:06,600][243562] Updated weights for policy 0, policy_version 16403 (0.0008) +[2026-06-02 16:30:06,773][243562] Updated weights for policy 0, policy_version 16413 (0.0008) +[2026-06-02 16:30:06,932][243562] Updated weights for policy 0, policy_version 16423 (0.0009) +[2026-06-02 16:30:07,593][243562] Updated weights for policy 0, policy_version 16433 (0.0008) +[2026-06-02 16:30:07,747][243562] Updated weights for policy 0, policy_version 16443 (0.0008) +[2026-06-02 16:30:07,918][243562] Updated weights for policy 0, policy_version 16454 (0.0008) +[2026-06-02 16:30:08,098][243562] Updated weights for policy 0, policy_version 16465 (0.0008) +[2026-06-02 16:30:08,265][243562] Updated weights for policy 0, policy_version 16475 (0.0008) +[2026-06-02 16:30:08,444][243562] Updated weights for policy 0, policy_version 16486 (0.0008) +[2026-06-02 16:30:08,604][243562] Updated weights for policy 0, policy_version 16496 (0.0008) +[2026-06-02 16:30:09,259][243562] Updated weights for policy 0, policy_version 16506 (0.0004) +[2026-06-02 16:30:09,427][243562] Updated weights for policy 0, policy_version 16516 (0.0004) +[2026-06-02 16:30:09,604][243562] Updated weights for policy 0, policy_version 16527 (0.0004) +[2026-06-02 16:30:09,780][243562] Updated weights for policy 0, policy_version 16538 (0.0004) +[2026-06-02 16:30:09,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 21327.0). Total num frames: 8454144. Throughput: 0: 21239.5. Samples: 8468608. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) +[2026-06-02 16:30:09,812][235960] Avg episode reward: [(0, '454.630')] +[2026-06-02 16:30:09,945][243562] Updated weights for policy 0, policy_version 16548 (0.0004) +[2026-06-02 16:30:10,107][243562] Updated weights for policy 0, policy_version 16558 (0.0004) +[2026-06-02 16:30:10,757][243562] Updated weights for policy 0, policy_version 16570 (0.0007) +[2026-06-02 16:30:10,937][243562] Updated weights for policy 0, policy_version 16581 (0.0008) +[2026-06-02 16:30:11,106][243562] Updated weights for policy 0, policy_version 16591 (0.0009) +[2026-06-02 16:30:11,271][243562] Updated weights for policy 0, policy_version 16601 (0.0009) +[2026-06-02 16:30:11,433][243562] Updated weights for policy 0, policy_version 16611 (0.0008) +[2026-06-02 16:30:11,595][243562] Updated weights for policy 0, policy_version 16621 (0.0008) +[2026-06-02 16:30:12,263][243562] Updated weights for policy 0, policy_version 16632 (0.0008) +[2026-06-02 16:30:12,421][243562] Updated weights for policy 0, policy_version 16642 (0.0008) +[2026-06-02 16:30:12,597][243562] Updated weights for policy 0, policy_version 16653 (0.0008) +[2026-06-02 16:30:12,789][243562] Updated weights for policy 0, policy_version 16664 (0.0008) +[2026-06-02 16:30:12,948][243562] Updated weights for policy 0, policy_version 16674 (0.0008) +[2026-06-02 16:30:13,125][243562] Updated weights for policy 0, policy_version 16685 (0.0008) +[2026-06-02 16:30:13,821][243562] Updated weights for policy 0, policy_version 16697 (0.0008) +[2026-06-02 16:30:13,983][243562] Updated weights for policy 0, policy_version 16707 (0.0004) +[2026-06-02 16:30:14,142][243562] Updated weights for policy 0, policy_version 16717 (0.0006) +[2026-06-02 16:30:14,311][243562] Updated weights for policy 0, policy_version 16727 (0.0008) +[2026-06-02 16:30:14,481][243562] Updated weights for policy 0, policy_version 16737 (0.0009) +[2026-06-02 16:30:14,654][243562] Updated weights for policy 0, policy_version 16748 (0.0009) +[2026-06-02 16:30:14,811][235960] Fps is (10 sec: 22937.7, 60 sec: 21845.4, 300 sec: 21438.1). Total num frames: 8585216. Throughput: 0: 21276.5. Samples: 8599680. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) +[2026-06-02 16:30:14,812][235960] Avg episode reward: [(0, '474.969')] +[2026-06-02 16:30:15,287][243562] Updated weights for policy 0, policy_version 16758 (0.0009) +[2026-06-02 16:30:15,467][243562] Updated weights for policy 0, policy_version 16769 (0.0009) +[2026-06-02 16:30:15,631][243562] Updated weights for policy 0, policy_version 16779 (0.0008) +[2026-06-02 16:30:15,797][243562] Updated weights for policy 0, policy_version 16789 (0.0009) +[2026-06-02 16:30:15,971][243562] Updated weights for policy 0, policy_version 16800 (0.0008) +[2026-06-02 16:30:16,137][243562] Updated weights for policy 0, policy_version 16810 (0.0008) +[2026-06-02 16:30:16,807][243562] Updated weights for policy 0, policy_version 16820 (0.0009) +[2026-06-02 16:30:16,966][243562] Updated weights for policy 0, policy_version 16830 (0.0009) +[2026-06-02 16:30:17,143][243562] Updated weights for policy 0, policy_version 16841 (0.0009) +[2026-06-02 16:30:17,306][243562] Updated weights for policy 0, policy_version 16851 (0.0009) +[2026-06-02 16:30:17,474][243562] Updated weights for policy 0, policy_version 16862 (0.0008) +[2026-06-02 16:30:17,646][243562] Updated weights for policy 0, policy_version 16872 (0.0009) +[2026-06-02 16:30:18,295][243562] Updated weights for policy 0, policy_version 16882 (0.0009) +[2026-06-02 16:30:18,446][243562] Updated weights for policy 0, policy_version 16892 (0.0009) +[2026-06-02 16:30:18,633][243562] Updated weights for policy 0, policy_version 16904 (0.0008) +[2026-06-02 16:30:18,795][243562] Updated weights for policy 0, policy_version 16914 (0.0008) +[2026-06-02 16:30:18,983][243562] Updated weights for policy 0, policy_version 16925 (0.0008) +[2026-06-02 16:30:19,146][243562] Updated weights for policy 0, policy_version 16935 (0.0009) +[2026-06-02 16:30:19,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 8683520. Throughput: 0: 21276.4. Samples: 8664448. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) +[2026-06-02 16:30:19,812][235960] Avg episode reward: [(0, '480.559')] +[2026-06-02 16:30:19,835][243562] Updated weights for policy 0, policy_version 16946 (0.0008) +[2026-06-02 16:30:20,003][243562] Updated weights for policy 0, policy_version 16957 (0.0008) +[2026-06-02 16:30:20,160][243562] Updated weights for policy 0, policy_version 16967 (0.0008) +[2026-06-02 16:30:20,330][243562] Updated weights for policy 0, policy_version 16977 (0.0008) +[2026-06-02 16:30:20,491][243562] Updated weights for policy 0, policy_version 16987 (0.0008) +[2026-06-02 16:30:20,653][243562] Updated weights for policy 0, policy_version 16997 (0.0009) +[2026-06-02 16:30:20,825][242748] Saving new best policy, reward=480.559! +[2026-06-02 16:30:20,827][243562] Updated weights for policy 0, policy_version 17008 (0.0008) +[2026-06-02 16:30:21,474][243562] Updated weights for policy 0, policy_version 17018 (0.0008) +[2026-06-02 16:30:21,640][243562] Updated weights for policy 0, policy_version 17028 (0.0008) +[2026-06-02 16:30:21,817][243562] Updated weights for policy 0, policy_version 17039 (0.0009) +[2026-06-02 16:30:21,995][243562] Updated weights for policy 0, policy_version 17050 (0.0008) +[2026-06-02 16:30:22,160][243562] Updated weights for policy 0, policy_version 17060 (0.0008) +[2026-06-02 16:30:22,328][243562] Updated weights for policy 0, policy_version 17070 (0.0009) +[2026-06-02 16:30:22,987][243562] Updated weights for policy 0, policy_version 17081 (0.0008) +[2026-06-02 16:30:23,167][243562] Updated weights for policy 0, policy_version 17092 (0.0009) +[2026-06-02 16:30:23,345][243562] Updated weights for policy 0, policy_version 17103 (0.0008) +[2026-06-02 16:30:23,507][243562] Updated weights for policy 0, policy_version 17113 (0.0008) +[2026-06-02 16:30:23,679][243562] Updated weights for policy 0, policy_version 17123 (0.0008) +[2026-06-02 16:30:23,859][243562] Updated weights for policy 0, policy_version 17134 (0.0008) +[2026-06-02 16:30:24,532][243562] Updated weights for policy 0, policy_version 17145 (0.0008) +[2026-06-02 16:30:24,698][243562] Updated weights for policy 0, policy_version 17155 (0.0009) +[2026-06-02 16:30:24,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 21327.0). Total num frames: 8781824. Throughput: 0: 21267.9. Samples: 8793728. Policy #0 lag: (min: 63.0, avg: 78.4, max: 127.0) +[2026-06-02 16:30:24,812][235960] Avg episode reward: [(0, '477.362')] +[2026-06-02 16:30:24,860][243562] Updated weights for policy 0, policy_version 17165 (0.0008) +[2026-06-02 16:30:25,025][243562] Updated weights for policy 0, policy_version 17175 (0.0008) +[2026-06-02 16:30:25,191][243562] Updated weights for policy 0, policy_version 17185 (0.0008) +[2026-06-02 16:30:25,367][243562] Updated weights for policy 0, policy_version 17196 (0.0008) +[2026-06-02 16:30:26,004][243562] Updated weights for policy 0, policy_version 17206 (0.0005) +[2026-06-02 16:30:26,158][243562] Updated weights for policy 0, policy_version 17216 (0.0004) +[2026-06-02 16:30:26,325][243562] Updated weights for policy 0, policy_version 17226 (0.0004) +[2026-06-02 16:30:26,509][243562] Updated weights for policy 0, policy_version 17237 (0.0008) +[2026-06-02 16:30:26,693][243562] Updated weights for policy 0, policy_version 17248 (0.0009) +[2026-06-02 16:30:26,857][243562] Updated weights for policy 0, policy_version 17258 (0.0009) +[2026-06-02 16:30:27,517][243562] Updated weights for policy 0, policy_version 17269 (0.0009) +[2026-06-02 16:30:27,677][243562] Updated weights for policy 0, policy_version 17279 (0.0008) +[2026-06-02 16:30:27,843][243562] Updated weights for policy 0, policy_version 17289 (0.0009) +[2026-06-02 16:30:28,009][243562] Updated weights for policy 0, policy_version 17299 (0.0009) +[2026-06-02 16:30:28,197][243562] Updated weights for policy 0, policy_version 17310 (0.0009) +[2026-06-02 16:30:28,353][243562] Updated weights for policy 0, policy_version 17320 (0.0008) +[2026-06-02 16:30:28,998][243562] Updated weights for policy 0, policy_version 17330 (0.0008) +[2026-06-02 16:30:29,154][243562] Updated weights for policy 0, policy_version 17340 (0.0006) +[2026-06-02 16:30:29,315][243562] Updated weights for policy 0, policy_version 17350 (0.0005) +[2026-06-02 16:30:29,479][243562] Updated weights for policy 0, policy_version 17360 (0.0005) +[2026-06-02 16:30:29,666][243562] Updated weights for policy 0, policy_version 17372 (0.0005) +[2026-06-02 16:30:29,811][235960] Fps is (10 sec: 19660.9, 60 sec: 21299.2, 300 sec: 21327.0). Total num frames: 8880128. Throughput: 0: 21287.8. Samples: 8923776. Policy #0 lag: (min: 63.0, avg: 78.4, max: 127.0) +[2026-06-02 16:30:29,813][235960] Avg episode reward: [(0, '500.061')] +[2026-06-02 16:30:29,835][243562] Updated weights for policy 0, policy_version 17382 (0.0005) +[2026-06-02 16:30:29,994][242748] Saving new best policy, reward=500.061! +[2026-06-02 16:30:30,497][243562] Updated weights for policy 0, policy_version 17393 (0.0005) +[2026-06-02 16:30:30,654][243562] Updated weights for policy 0, policy_version 17403 (0.0008) +[2026-06-02 16:30:30,815][243562] Updated weights for policy 0, policy_version 17413 (0.0008) +[2026-06-02 16:30:30,973][243562] Updated weights for policy 0, policy_version 17423 (0.0008) +[2026-06-02 16:30:31,142][243562] Updated weights for policy 0, policy_version 17433 (0.0008) +[2026-06-02 16:30:31,315][243562] Updated weights for policy 0, policy_version 17444 (0.0008) +[2026-06-02 16:30:31,505][243562] Updated weights for policy 0, policy_version 17455 (0.0008) +[2026-06-02 16:30:32,152][243562] Updated weights for policy 0, policy_version 17465 (0.0008) +[2026-06-02 16:30:32,333][243562] Updated weights for policy 0, policy_version 17476 (0.0008) +[2026-06-02 16:30:32,492][243562] Updated weights for policy 0, policy_version 17486 (0.0008) +[2026-06-02 16:30:32,662][243562] Updated weights for policy 0, policy_version 17497 (0.0008) +[2026-06-02 16:30:32,863][243562] Updated weights for policy 0, policy_version 17509 (0.0009) +[2026-06-02 16:30:33,044][243562] Updated weights for policy 0, policy_version 17520 (0.0009) +[2026-06-02 16:30:33,691][243562] Updated weights for policy 0, policy_version 17530 (0.0008) +[2026-06-02 16:30:33,854][243562] Updated weights for policy 0, policy_version 17540 (0.0009) +[2026-06-02 16:30:34,017][243562] Updated weights for policy 0, policy_version 17550 (0.0007) +[2026-06-02 16:30:34,190][243562] Updated weights for policy 0, policy_version 17561 (0.0009) +[2026-06-02 16:30:34,366][243562] Updated weights for policy 0, policy_version 17571 (0.0009) +[2026-06-02 16:30:34,532][243562] Updated weights for policy 0, policy_version 17581 (0.0009) +[2026-06-02 16:30:34,811][235960] Fps is (10 sec: 22937.7, 60 sec: 21845.3, 300 sec: 21438.0). Total num frames: 9011200. Throughput: 0: 21316.4. Samples: 8988928. Policy #0 lag: (min: 63.0, avg: 78.4, max: 127.0) +[2026-06-02 16:30:34,812][235960] Avg episode reward: [(0, '562.024')] +[2026-06-02 16:30:34,816][242748] Saving new best policy, reward=562.024! +[2026-06-02 16:30:35,180][243562] Updated weights for policy 0, policy_version 17591 (0.0008) +[2026-06-02 16:30:35,369][243562] Updated weights for policy 0, policy_version 17603 (0.0009) +[2026-06-02 16:30:35,542][243562] Updated weights for policy 0, policy_version 17613 (0.0008) +[2026-06-02 16:30:35,699][243562] Updated weights for policy 0, policy_version 17623 (0.0008) +[2026-06-02 16:30:35,885][243562] Updated weights for policy 0, policy_version 17634 (0.0008) +[2026-06-02 16:30:36,042][243562] Updated weights for policy 0, policy_version 17644 (0.0008) +[2026-06-02 16:30:36,696][243562] Updated weights for policy 0, policy_version 17655 (0.0009) +[2026-06-02 16:30:36,860][243562] Updated weights for policy 0, policy_version 17665 (0.0008) +[2026-06-02 16:30:37,027][243562] Updated weights for policy 0, policy_version 17675 (0.0008) +[2026-06-02 16:30:37,203][243562] Updated weights for policy 0, policy_version 17686 (0.0008) +[2026-06-02 16:30:37,377][243562] Updated weights for policy 0, policy_version 17697 (0.0008) +[2026-06-02 16:30:37,545][243562] Updated weights for policy 0, policy_version 17707 (0.0008) +[2026-06-02 16:30:38,193][243562] Updated weights for policy 0, policy_version 17717 (0.0008) +[2026-06-02 16:30:38,370][243562] Updated weights for policy 0, policy_version 17728 (0.0008) +[2026-06-02 16:30:38,532][243562] Updated weights for policy 0, policy_version 17738 (0.0008) +[2026-06-02 16:30:38,691][243562] Updated weights for policy 0, policy_version 17748 (0.0008) +[2026-06-02 16:30:38,869][243562] Updated weights for policy 0, policy_version 17758 (0.0008) +[2026-06-02 16:30:39,042][243562] Updated weights for policy 0, policy_version 17769 (0.0008) +[2026-06-02 16:30:39,710][243562] Updated weights for policy 0, policy_version 17780 (0.0009) +[2026-06-02 16:30:39,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21299.2, 300 sec: 21438.1). Total num frames: 9109504. Throughput: 0: 21410.1. Samples: 9118080. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) +[2026-06-02 16:30:39,812][235960] Avg episode reward: [(0, '545.057')] +[2026-06-02 16:30:39,863][243562] Updated weights for policy 0, policy_version 17790 (0.0008) +[2026-06-02 16:30:40,019][243562] Updated weights for policy 0, policy_version 17800 (0.0008) +[2026-06-02 16:30:40,217][243562] Updated weights for policy 0, policy_version 17812 (0.0008) +[2026-06-02 16:30:40,402][243562] Updated weights for policy 0, policy_version 17824 (0.0008) +[2026-06-02 16:30:40,578][243562] Updated weights for policy 0, policy_version 17834 (0.0008) +[2026-06-02 16:30:41,238][243562] Updated weights for policy 0, policy_version 17844 (0.0008) +[2026-06-02 16:30:41,397][243562] Updated weights for policy 0, policy_version 17854 (0.0008) +[2026-06-02 16:30:41,558][243562] Updated weights for policy 0, policy_version 17864 (0.0008) +[2026-06-02 16:30:41,711][243562] Updated weights for policy 0, policy_version 17874 (0.0008) +[2026-06-02 16:30:41,875][243562] Updated weights for policy 0, policy_version 17884 (0.0008) +[2026-06-02 16:30:42,040][243562] Updated weights for policy 0, policy_version 17894 (0.0009) +[2026-06-02 16:30:42,203][243562] Updated weights for policy 0, policy_version 17904 (0.0008) +[2026-06-02 16:30:42,847][243562] Updated weights for policy 0, policy_version 17914 (0.0008) +[2026-06-02 16:30:43,023][243562] Updated weights for policy 0, policy_version 17925 (0.0008) +[2026-06-02 16:30:43,192][243562] Updated weights for policy 0, policy_version 17936 (0.0009) +[2026-06-02 16:30:43,358][243562] Updated weights for policy 0, policy_version 17946 (0.0008) +[2026-06-02 16:30:43,518][243562] Updated weights for policy 0, policy_version 17956 (0.0008) +[2026-06-02 16:30:43,705][243562] Updated weights for policy 0, policy_version 17968 (0.0008) +[2026-06-02 16:30:44,381][243562] Updated weights for policy 0, policy_version 17978 (0.0009) +[2026-06-02 16:30:44,531][243562] Updated weights for policy 0, policy_version 17988 (0.0009) +[2026-06-02 16:30:44,709][243562] Updated weights for policy 0, policy_version 17999 (0.0009) +[2026-06-02 16:30:44,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 21327.0). Total num frames: 9207808. Throughput: 0: 21504.0. Samples: 9248000. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) +[2026-06-02 16:30:44,812][235960] Avg episode reward: [(0, '531.206')] +[2026-06-02 16:30:44,863][243562] Updated weights for policy 0, policy_version 18009 (0.0008) +[2026-06-02 16:30:45,037][243562] Updated weights for policy 0, policy_version 18020 (0.0008) +[2026-06-02 16:30:45,228][243562] Updated weights for policy 0, policy_version 18032 (0.0009) +[2026-06-02 16:30:45,902][243562] Updated weights for policy 0, policy_version 18042 (0.0008) +[2026-06-02 16:30:46,106][243562] Updated weights for policy 0, policy_version 18055 (0.0009) +[2026-06-02 16:30:46,296][243562] Updated weights for policy 0, policy_version 18067 (0.0009) +[2026-06-02 16:30:46,455][243562] Updated weights for policy 0, policy_version 18077 (0.0008) +[2026-06-02 16:30:46,644][243562] Updated weights for policy 0, policy_version 18089 (0.0008) +[2026-06-02 16:30:47,359][243562] Updated weights for policy 0, policy_version 18101 (0.0009) +[2026-06-02 16:30:47,532][243562] Updated weights for policy 0, policy_version 18112 (0.0008) +[2026-06-02 16:30:47,689][243562] Updated weights for policy 0, policy_version 18122 (0.0008) +[2026-06-02 16:30:47,857][243562] Updated weights for policy 0, policy_version 18132 (0.0008) +[2026-06-02 16:30:48,011][243562] Updated weights for policy 0, policy_version 18142 (0.0008) +[2026-06-02 16:30:48,179][243562] Updated weights for policy 0, policy_version 18152 (0.0008) +[2026-06-02 16:30:48,831][243562] Updated weights for policy 0, policy_version 18162 (0.0009) +[2026-06-02 16:30:48,980][243562] Updated weights for policy 0, policy_version 18172 (0.0008) +[2026-06-02 16:30:49,146][243562] Updated weights for policy 0, policy_version 18182 (0.0008) +[2026-06-02 16:30:49,306][243562] Updated weights for policy 0, policy_version 18192 (0.0008) +[2026-06-02 16:30:49,470][243562] Updated weights for policy 0, policy_version 18202 (0.0008) +[2026-06-02 16:30:49,637][243562] Updated weights for policy 0, policy_version 18212 (0.0008) +[2026-06-02 16:30:49,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.3, 300 sec: 21327.0). Total num frames: 9306112. Throughput: 0: 21572.3. Samples: 9313536. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) +[2026-06-02 16:30:49,812][235960] Avg episode reward: [(0, '561.569')] +[2026-06-02 16:30:49,816][243562] Updated weights for policy 0, policy_version 18223 (0.0008) +[2026-06-02 16:30:50,483][243562] Updated weights for policy 0, policy_version 18234 (0.0008) +[2026-06-02 16:30:50,644][243562] Updated weights for policy 0, policy_version 18244 (0.0008) +[2026-06-02 16:30:50,806][243562] Updated weights for policy 0, policy_version 18254 (0.0008) +[2026-06-02 16:30:50,969][243562] Updated weights for policy 0, policy_version 18264 (0.0008) +[2026-06-02 16:30:51,138][243562] Updated weights for policy 0, policy_version 18274 (0.0008) +[2026-06-02 16:30:51,304][243562] Updated weights for policy 0, policy_version 18284 (0.0009) +[2026-06-02 16:30:51,970][243562] Updated weights for policy 0, policy_version 18295 (0.0009) +[2026-06-02 16:30:52,133][243562] Updated weights for policy 0, policy_version 18305 (0.0008) +[2026-06-02 16:30:52,295][243562] Updated weights for policy 0, policy_version 18315 (0.0009) +[2026-06-02 16:30:52,457][243562] Updated weights for policy 0, policy_version 18325 (0.0008) +[2026-06-02 16:30:52,618][243562] Updated weights for policy 0, policy_version 18335 (0.0008) +[2026-06-02 16:30:52,788][243562] Updated weights for policy 0, policy_version 18345 (0.0009) +[2026-06-02 16:30:53,421][243562] Updated weights for policy 0, policy_version 18355 (0.0010) +[2026-06-02 16:30:53,569][243562] Updated weights for policy 0, policy_version 18365 (0.0009) +[2026-06-02 16:30:53,728][243562] Updated weights for policy 0, policy_version 18375 (0.0008) +[2026-06-02 16:30:53,899][243562] Updated weights for policy 0, policy_version 18385 (0.0008) +[2026-06-02 16:30:54,056][243562] Updated weights for policy 0, policy_version 18395 (0.0008) +[2026-06-02 16:30:54,213][243562] Updated weights for policy 0, policy_version 18405 (0.0008) +[2026-06-02 16:30:54,811][235960] Fps is (10 sec: 22937.4, 60 sec: 21845.3, 300 sec: 21438.0). Total num frames: 9437184. Throughput: 0: 21640.5. Samples: 9442432. Policy #0 lag: (min: 63.0, avg: 78.7, max: 127.0) +[2026-06-02 16:30:54,813][235960] Avg episode reward: [(0, '580.970')] +[2026-06-02 16:30:54,925][243562] Updated weights for policy 0, policy_version 18418 (0.0009) +[2026-06-02 16:30:55,080][243562] Updated weights for policy 0, policy_version 18428 (0.0008) +[2026-06-02 16:30:55,238][243562] Updated weights for policy 0, policy_version 18438 (0.0008) +[2026-06-02 16:30:55,411][243562] Updated weights for policy 0, policy_version 18448 (0.0008) +[2026-06-02 16:30:55,594][243562] Updated weights for policy 0, policy_version 18459 (0.0008) +[2026-06-02 16:30:55,773][243562] Updated weights for policy 0, policy_version 18470 (0.0008) +[2026-06-02 16:30:55,924][242748] Saving new best policy, reward=580.970! +[2026-06-02 16:30:55,927][243562] Updated weights for policy 0, policy_version 18480 (0.0008) +[2026-06-02 16:30:56,594][243562] Updated weights for policy 0, policy_version 18490 (0.0009) +[2026-06-02 16:30:56,751][243562] Updated weights for policy 0, policy_version 18500 (0.0009) +[2026-06-02 16:30:56,918][243562] Updated weights for policy 0, policy_version 18510 (0.0008) +[2026-06-02 16:30:57,078][243562] Updated weights for policy 0, policy_version 18520 (0.0008) +[2026-06-02 16:30:57,247][243562] Updated weights for policy 0, policy_version 18530 (0.0009) +[2026-06-02 16:30:57,410][243562] Updated weights for policy 0, policy_version 18540 (0.0008) +[2026-06-02 16:30:58,064][243562] Updated weights for policy 0, policy_version 18551 (0.0008) +[2026-06-02 16:30:58,259][243562] Updated weights for policy 0, policy_version 18563 (0.0009) +[2026-06-02 16:30:58,422][243562] Updated weights for policy 0, policy_version 18573 (0.0009) +[2026-06-02 16:30:58,593][243562] Updated weights for policy 0, policy_version 18584 (0.0008) +[2026-06-02 16:30:58,764][243562] Updated weights for policy 0, policy_version 18594 (0.0008) +[2026-06-02 16:30:58,944][243562] Updated weights for policy 0, policy_version 18605 (0.0008) +[2026-06-02 16:30:59,582][243562] Updated weights for policy 0, policy_version 18615 (0.0008) +[2026-06-02 16:30:59,748][243562] Updated weights for policy 0, policy_version 18625 (0.0008) +[2026-06-02 16:30:59,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 9535488. Throughput: 0: 21612.1. Samples: 9572224. Policy #0 lag: (min: 63.0, avg: 78.7, max: 127.0) +[2026-06-02 16:30:59,812][235960] Avg episode reward: [(0, '582.745')] +[2026-06-02 16:30:59,921][243562] Updated weights for policy 0, policy_version 18636 (0.0008) +[2026-06-02 16:31:00,081][243562] Updated weights for policy 0, policy_version 18646 (0.0009) +[2026-06-02 16:31:00,239][243562] Updated weights for policy 0, policy_version 18656 (0.0008) +[2026-06-02 16:31:00,402][243562] Updated weights for policy 0, policy_version 18666 (0.0009) +[2026-06-02 16:31:00,500][242748] Saving new best policy, reward=582.745! +[2026-06-02 16:31:01,050][243562] Updated weights for policy 0, policy_version 18676 (0.0008) +[2026-06-02 16:31:01,206][243562] Updated weights for policy 0, policy_version 18686 (0.0008) +[2026-06-02 16:31:01,383][243562] Updated weights for policy 0, policy_version 18697 (0.0008) +[2026-06-02 16:31:01,565][243562] Updated weights for policy 0, policy_version 18708 (0.0008) +[2026-06-02 16:31:01,728][243562] Updated weights for policy 0, policy_version 18718 (0.0008) +[2026-06-02 16:31:01,911][243562] Updated weights for policy 0, policy_version 18729 (0.0008) +[2026-06-02 16:31:02,565][243562] Updated weights for policy 0, policy_version 18739 (0.0008) +[2026-06-02 16:31:02,719][243562] Updated weights for policy 0, policy_version 18749 (0.0008) +[2026-06-02 16:31:02,873][243562] Updated weights for policy 0, policy_version 18759 (0.0008) +[2026-06-02 16:31:03,046][243562] Updated weights for policy 0, policy_version 18769 (0.0008) +[2026-06-02 16:31:03,206][243562] Updated weights for policy 0, policy_version 18779 (0.0008) +[2026-06-02 16:31:03,381][243562] Updated weights for policy 0, policy_version 18790 (0.0009) +[2026-06-02 16:31:03,540][243562] Updated weights for policy 0, policy_version 18800 (0.0009) +[2026-06-02 16:31:04,228][243562] Updated weights for policy 0, policy_version 18810 (0.0008) +[2026-06-02 16:31:04,400][243562] Updated weights for policy 0, policy_version 18821 (0.0008) +[2026-06-02 16:31:04,555][243562] Updated weights for policy 0, policy_version 18831 (0.0008) +[2026-06-02 16:31:04,738][243562] Updated weights for policy 0, policy_version 18842 (0.0008) +[2026-06-02 16:31:04,811][235960] Fps is (10 sec: 19661.0, 60 sec: 21299.2, 300 sec: 21327.0). Total num frames: 9633792. Throughput: 0: 21606.4. Samples: 9636736. Policy #0 lag: (min: 63.0, avg: 78.7, max: 127.0) +[2026-06-02 16:31:04,812][235960] Avg episode reward: [(0, '593.818')] +[2026-06-02 16:31:04,915][243562] Updated weights for policy 0, policy_version 18853 (0.0008) +[2026-06-02 16:31:05,083][243562] Updated weights for policy 0, policy_version 18863 (0.0008) +[2026-06-02 16:31:05,097][242748] Saving new best policy, reward=593.818! +[2026-06-02 16:31:05,716][243562] Updated weights for policy 0, policy_version 18873 (0.0008) +[2026-06-02 16:31:05,895][243562] Updated weights for policy 0, policy_version 18884 (0.0008) +[2026-06-02 16:31:06,079][243562] Updated weights for policy 0, policy_version 18895 (0.0008) +[2026-06-02 16:31:06,260][243562] Updated weights for policy 0, policy_version 18906 (0.0008) +[2026-06-02 16:31:06,426][243562] Updated weights for policy 0, policy_version 18916 (0.0008) +[2026-06-02 16:31:06,605][243562] Updated weights for policy 0, policy_version 18927 (0.0009) +[2026-06-02 16:31:07,282][243562] Updated weights for policy 0, policy_version 18938 (0.0009) +[2026-06-02 16:31:07,438][243562] Updated weights for policy 0, policy_version 18948 (0.0008) +[2026-06-02 16:31:07,601][243562] Updated weights for policy 0, policy_version 18958 (0.0008) +[2026-06-02 16:31:07,767][243562] Updated weights for policy 0, policy_version 18968 (0.0008) +[2026-06-02 16:31:07,944][243562] Updated weights for policy 0, policy_version 18979 (0.0008) +[2026-06-02 16:31:08,106][243562] Updated weights for policy 0, policy_version 18989 (0.0008) +[2026-06-02 16:31:08,771][243562] Updated weights for policy 0, policy_version 19000 (0.0008) +[2026-06-02 16:31:08,932][243562] Updated weights for policy 0, policy_version 19010 (0.0008) +[2026-06-02 16:31:09,096][243562] Updated weights for policy 0, policy_version 19020 (0.0008) +[2026-06-02 16:31:09,260][243562] Updated weights for policy 0, policy_version 19030 (0.0008) +[2026-06-02 16:31:09,437][243562] Updated weights for policy 0, policy_version 19041 (0.0009) +[2026-06-02 16:31:09,605][243562] Updated weights for policy 0, policy_version 19051 (0.0009) +[2026-06-02 16:31:09,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21845.3, 300 sec: 21438.0). Total num frames: 9764864. Throughput: 0: 21626.3. Samples: 9766912. Policy #0 lag: (min: 63.0, avg: 76.4, max: 127.0) +[2026-06-02 16:31:09,812][235960] Avg episode reward: [(0, '626.558')] +[2026-06-02 16:31:09,817][242748] Saving new best policy, reward=626.558! +[2026-06-02 16:31:10,242][243562] Updated weights for policy 0, policy_version 19061 (0.0008) +[2026-06-02 16:31:10,412][243562] Updated weights for policy 0, policy_version 19072 (0.0010) +[2026-06-02 16:31:10,581][243562] Updated weights for policy 0, policy_version 19082 (0.0012) +[2026-06-02 16:31:10,751][243562] Updated weights for policy 0, policy_version 19093 (0.0008) +[2026-06-02 16:31:10,940][243562] Updated weights for policy 0, policy_version 19104 (0.0008) +[2026-06-02 16:31:11,116][243562] Updated weights for policy 0, policy_version 19115 (0.0009) +[2026-06-02 16:31:11,774][243562] Updated weights for policy 0, policy_version 19125 (0.0008) +[2026-06-02 16:31:11,929][243562] Updated weights for policy 0, policy_version 19135 (0.0009) +[2026-06-02 16:31:12,110][243562] Updated weights for policy 0, policy_version 19146 (0.0008) +[2026-06-02 16:31:12,271][243562] Updated weights for policy 0, policy_version 19156 (0.0008) +[2026-06-02 16:31:12,437][243562] Updated weights for policy 0, policy_version 19166 (0.0008) +[2026-06-02 16:31:12,619][243562] Updated weights for policy 0, policy_version 19177 (0.0009) +[2026-06-02 16:31:13,262][243562] Updated weights for policy 0, policy_version 19187 (0.0008) +[2026-06-02 16:31:13,449][243562] Updated weights for policy 0, policy_version 19199 (0.0008) +[2026-06-02 16:31:13,611][243562] Updated weights for policy 0, policy_version 19209 (0.0008) +[2026-06-02 16:31:13,773][243562] Updated weights for policy 0, policy_version 19219 (0.0008) +[2026-06-02 16:31:13,935][243562] Updated weights for policy 0, policy_version 19229 (0.0008) +[2026-06-02 16:31:14,134][243562] Updated weights for policy 0, policy_version 19241 (0.0009) +[2026-06-02 16:31:14,786][243562] Updated weights for policy 0, policy_version 19251 (0.0009) +[2026-06-02 16:31:14,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 9863168. Throughput: 0: 21612.1. Samples: 9896320. Policy #0 lag: (min: 63.0, avg: 76.4, max: 127.0) +[2026-06-02 16:31:14,812][235960] Avg episode reward: [(0, '655.801')] +[2026-06-02 16:31:14,946][243562] Updated weights for policy 0, policy_version 19261 (0.0008) +[2026-06-02 16:31:15,102][243562] Updated weights for policy 0, policy_version 19271 (0.0008) +[2026-06-02 16:31:15,293][243562] Updated weights for policy 0, policy_version 19283 (0.0008) +[2026-06-02 16:31:15,476][243562] Updated weights for policy 0, policy_version 19294 (0.0008) +[2026-06-02 16:31:15,643][243562] Updated weights for policy 0, policy_version 19304 (0.0009) +[2026-06-02 16:31:15,769][242748] Saving new best policy, reward=655.801! +[2026-06-02 16:31:16,291][243562] Updated weights for policy 0, policy_version 19314 (0.0009) +[2026-06-02 16:31:16,445][243562] Updated weights for policy 0, policy_version 19324 (0.0008) +[2026-06-02 16:31:16,621][243562] Updated weights for policy 0, policy_version 19335 (0.0008) +[2026-06-02 16:31:16,787][243562] Updated weights for policy 0, policy_version 19345 (0.0008) +[2026-06-02 16:31:16,946][243562] Updated weights for policy 0, policy_version 19355 (0.0009) +[2026-06-02 16:31:17,114][243562] Updated weights for policy 0, policy_version 19365 (0.0008) +[2026-06-02 16:31:17,276][243562] Updated weights for policy 0, policy_version 19375 (0.0009) +[2026-06-02 16:31:17,937][243562] Updated weights for policy 0, policy_version 19386 (0.0008) +[2026-06-02 16:31:18,097][243562] Updated weights for policy 0, policy_version 19396 (0.0008) +[2026-06-02 16:31:18,263][243562] Updated weights for policy 0, policy_version 19406 (0.0009) +[2026-06-02 16:31:18,422][243562] Updated weights for policy 0, policy_version 19416 (0.0008) +[2026-06-02 16:31:18,586][243562] Updated weights for policy 0, policy_version 19426 (0.0008) +[2026-06-02 16:31:18,747][243562] Updated weights for policy 0, policy_version 19436 (0.0008) +[2026-06-02 16:31:19,417][243562] Updated weights for policy 0, policy_version 19446 (0.0008) +[2026-06-02 16:31:19,598][243562] Updated weights for policy 0, policy_version 19457 (0.0008) +[2026-06-02 16:31:19,778][243562] Updated weights for policy 0, policy_version 19468 (0.0008) +[2026-06-02 16:31:19,811][235960] Fps is (10 sec: 19660.9, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 9961472. Throughput: 0: 21552.4. Samples: 9958784. Policy #0 lag: (min: 63.0, avg: 76.4, max: 127.0) +[2026-06-02 16:31:19,812][235960] Avg episode reward: [(0, '685.603')] +[2026-06-02 16:31:19,945][243562] Updated weights for policy 0, policy_version 19478 (0.0008) +[2026-06-02 16:31:20,110][243562] Updated weights for policy 0, policy_version 19488 (0.0008) +[2026-06-02 16:31:20,304][243562] Updated weights for policy 0, policy_version 19500 (0.0009) +[2026-06-02 16:31:20,370][242748] Saving new best policy, reward=685.603! +[2026-06-02 16:31:20,950][243562] Updated weights for policy 0, policy_version 19510 (0.0008) +[2026-06-02 16:31:21,165][243562] Updated weights for policy 0, policy_version 19523 (0.0009) +[2026-06-02 16:31:21,324][243562] Updated weights for policy 0, policy_version 19533 (0.0008) +[2026-06-02 16:31:21,519][243562] Updated weights for policy 0, policy_version 19545 (0.0008) +[2026-06-02 16:31:21,683][243562] Updated weights for policy 0, policy_version 19555 (0.0008) +[2026-06-02 16:31:21,847][243562] Updated weights for policy 0, policy_version 19565 (0.0008) +[2026-06-02 16:31:22,493][243562] Updated weights for policy 0, policy_version 19575 (0.0009) +[2026-06-02 16:31:22,650][243562] Updated weights for policy 0, policy_version 19585 (0.0009) +[2026-06-02 16:31:22,834][243562] Updated weights for policy 0, policy_version 19596 (0.0008) +[2026-06-02 16:31:23,013][243562] Updated weights for policy 0, policy_version 19607 (0.0008) +[2026-06-02 16:31:23,173][243562] Updated weights for policy 0, policy_version 19617 (0.0008) +[2026-06-02 16:31:23,336][243562] Updated weights for policy 0, policy_version 19627 (0.0008) +[2026-06-02 16:31:23,967][243562] Updated weights for policy 0, policy_version 19637 (0.0008) +[2026-06-02 16:31:24,119][243562] Updated weights for policy 0, policy_version 19647 (0.0008) +[2026-06-02 16:31:24,300][243562] Updated weights for policy 0, policy_version 19658 (0.0009) +[2026-06-02 16:31:24,455][243562] Updated weights for policy 0, policy_version 19668 (0.0008) +[2026-06-02 16:31:24,627][243562] Updated weights for policy 0, policy_version 19678 (0.0008) +[2026-06-02 16:31:24,783][243562] Updated weights for policy 0, policy_version 19688 (0.0008) +[2026-06-02 16:31:24,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 10059776. Throughput: 0: 21455.6. Samples: 10083584. Policy #0 lag: (min: 62.0, avg: 76.0, max: 126.0) +[2026-06-02 16:31:24,812][235960] Avg episode reward: [(0, '701.350')] +[2026-06-02 16:31:24,913][242748] Saving new best policy, reward=701.350! +[2026-06-02 16:31:25,457][243562] Updated weights for policy 0, policy_version 19698 (0.0008) +[2026-06-02 16:31:25,617][243562] Updated weights for policy 0, policy_version 19709 (0.0008) +[2026-06-02 16:31:25,781][243562] Updated weights for policy 0, policy_version 19719 (0.0009) +[2026-06-02 16:31:25,948][243562] Updated weights for policy 0, policy_version 19729 (0.0008) +[2026-06-02 16:31:26,113][243562] Updated weights for policy 0, policy_version 19739 (0.0008) +[2026-06-02 16:31:26,278][243562] Updated weights for policy 0, policy_version 19749 (0.0008) +[2026-06-02 16:31:26,445][243562] Updated weights for policy 0, policy_version 19759 (0.0008) +[2026-06-02 16:31:27,094][243562] Updated weights for policy 0, policy_version 19769 (0.0009) +[2026-06-02 16:31:27,266][243562] Updated weights for policy 0, policy_version 19780 (0.0008) +[2026-06-02 16:31:27,440][243562] Updated weights for policy 0, policy_version 19791 (0.0009) +[2026-06-02 16:31:27,609][243562] Updated weights for policy 0, policy_version 19801 (0.0009) +[2026-06-02 16:31:27,777][243562] Updated weights for policy 0, policy_version 19811 (0.0009) +[2026-06-02 16:31:27,933][243562] Updated weights for policy 0, policy_version 19821 (0.0008) +[2026-06-02 16:31:28,585][243562] Updated weights for policy 0, policy_version 19831 (0.0009) +[2026-06-02 16:31:28,757][243562] Updated weights for policy 0, policy_version 19842 (0.0010) +[2026-06-02 16:31:28,941][243562] Updated weights for policy 0, policy_version 19853 (0.0008) +[2026-06-02 16:31:29,100][243562] Updated weights for policy 0, policy_version 19863 (0.0008) +[2026-06-02 16:31:29,270][243562] Updated weights for policy 0, policy_version 19873 (0.0008) +[2026-06-02 16:31:29,454][243562] Updated weights for policy 0, policy_version 19884 (0.0008) +[2026-06-02 16:31:29,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21845.3, 300 sec: 21438.0). Total num frames: 10190848. Throughput: 0: 21364.6. Samples: 10209408. Policy #0 lag: (min: 62.0, avg: 76.0, max: 126.0) +[2026-06-02 16:31:29,812][235960] Avg episode reward: [(0, '751.017')] +[2026-06-02 16:31:29,816][242748] Saving new best policy, reward=751.017! +[2026-06-02 16:31:30,102][243562] Updated weights for policy 0, policy_version 19895 (0.0009) +[2026-06-02 16:31:30,271][243562] Updated weights for policy 0, policy_version 19905 (0.0008) +[2026-06-02 16:31:30,428][243562] Updated weights for policy 0, policy_version 19915 (0.0009) +[2026-06-02 16:31:30,604][243562] Updated weights for policy 0, policy_version 19926 (0.0008) +[2026-06-02 16:31:30,767][243562] Updated weights for policy 0, policy_version 19936 (0.0008) +[2026-06-02 16:31:30,937][243562] Updated weights for policy 0, policy_version 19946 (0.0008) +[2026-06-02 16:31:31,603][243562] Updated weights for policy 0, policy_version 19957 (0.0009) +[2026-06-02 16:31:31,772][243562] Updated weights for policy 0, policy_version 19968 (0.0008) +[2026-06-02 16:31:31,933][243562] Updated weights for policy 0, policy_version 19978 (0.0008) +[2026-06-02 16:31:32,105][243562] Updated weights for policy 0, policy_version 19988 (0.0008) +[2026-06-02 16:31:32,261][243562] Updated weights for policy 0, policy_version 19998 (0.0008) +[2026-06-02 16:31:32,444][243562] Updated weights for policy 0, policy_version 20009 (0.0009) +[2026-06-02 16:31:33,117][243562] Updated weights for policy 0, policy_version 20020 (0.0009) +[2026-06-02 16:31:33,267][243562] Updated weights for policy 0, policy_version 20030 (0.0009) +[2026-06-02 16:31:33,438][243562] Updated weights for policy 0, policy_version 20040 (0.0009) +[2026-06-02 16:31:33,597][243562] Updated weights for policy 0, policy_version 20050 (0.0009) +[2026-06-02 16:31:33,778][243562] Updated weights for policy 0, policy_version 20061 (0.0009) +[2026-06-02 16:31:33,973][243562] Updated weights for policy 0, policy_version 20073 (0.0008) +[2026-06-02 16:31:34,646][243562] Updated weights for policy 0, policy_version 20084 (0.0009) +[2026-06-02 16:31:34,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21299.2, 300 sec: 21438.1). Total num frames: 10289152. Throughput: 0: 21307.7. Samples: 10272384. Policy #0 lag: (min: 62.0, avg: 76.0, max: 126.0) +[2026-06-02 16:31:34,812][235960] Avg episode reward: [(0, '748.799')] +[2026-06-02 16:31:34,820][243562] Updated weights for policy 0, policy_version 20095 (0.0008) +[2026-06-02 16:31:34,979][243562] Updated weights for policy 0, policy_version 20105 (0.0008) +[2026-06-02 16:31:35,151][243562] Updated weights for policy 0, policy_version 20116 (0.0008) +[2026-06-02 16:31:35,325][243562] Updated weights for policy 0, policy_version 20126 (0.0008) +[2026-06-02 16:31:35,487][243562] Updated weights for policy 0, policy_version 20136 (0.0008) +[2026-06-02 16:31:36,178][243562] Updated weights for policy 0, policy_version 20148 (0.0008) +[2026-06-02 16:31:36,350][243562] Updated weights for policy 0, policy_version 20159 (0.0008) +[2026-06-02 16:31:36,513][243562] Updated weights for policy 0, policy_version 20169 (0.0008) +[2026-06-02 16:31:36,693][243562] Updated weights for policy 0, policy_version 20180 (0.0008) +[2026-06-02 16:31:36,874][243562] Updated weights for policy 0, policy_version 20191 (0.0008) +[2026-06-02 16:31:37,055][243562] Updated weights for policy 0, policy_version 20202 (0.0008) +[2026-06-02 16:31:37,724][243562] Updated weights for policy 0, policy_version 20213 (0.0008) +[2026-06-02 16:31:37,877][243562] Updated weights for policy 0, policy_version 20223 (0.0008) +[2026-06-02 16:31:38,043][243562] Updated weights for policy 0, policy_version 20233 (0.0008) +[2026-06-02 16:31:38,208][243562] Updated weights for policy 0, policy_version 20243 (0.0008) +[2026-06-02 16:31:38,370][243562] Updated weights for policy 0, policy_version 20253 (0.0008) +[2026-06-02 16:31:38,531][243562] Updated weights for policy 0, policy_version 20263 (0.0008) +[2026-06-02 16:31:39,202][243562] Updated weights for policy 0, policy_version 20274 (0.0008) +[2026-06-02 16:31:39,354][243562] Updated weights for policy 0, policy_version 20284 (0.0008) +[2026-06-02 16:31:39,509][243562] Updated weights for policy 0, policy_version 20294 (0.0008) +[2026-06-02 16:31:39,673][243562] Updated weights for policy 0, policy_version 20304 (0.0008) +[2026-06-02 16:31:39,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 10387456. Throughput: 0: 21304.9. Samples: 10401152. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) +[2026-06-02 16:31:39,812][235960] Avg episode reward: [(0, '768.983')] +[2026-06-02 16:31:39,841][243562] Updated weights for policy 0, policy_version 20314 (0.0009) +[2026-06-02 16:31:40,031][243562] Updated weights for policy 0, policy_version 20326 (0.0008) +[2026-06-02 16:31:40,193][242748] Saving new best policy, reward=768.983! +[2026-06-02 16:31:40,196][243562] Updated weights for policy 0, policy_version 20336 (0.0009) +[2026-06-02 16:31:40,890][243562] Updated weights for policy 0, policy_version 20348 (0.0009) +[2026-06-02 16:31:41,042][243562] Updated weights for policy 0, policy_version 20358 (0.0009) +[2026-06-02 16:31:41,200][243562] Updated weights for policy 0, policy_version 20368 (0.0008) +[2026-06-02 16:31:41,369][243562] Updated weights for policy 0, policy_version 20378 (0.0008) +[2026-06-02 16:31:41,529][243562] Updated weights for policy 0, policy_version 20388 (0.0008) +[2026-06-02 16:31:41,709][243562] Updated weights for policy 0, policy_version 20399 (0.0009) +[2026-06-02 16:31:42,377][243562] Updated weights for policy 0, policy_version 20409 (0.0008) +[2026-06-02 16:31:42,530][243562] Updated weights for policy 0, policy_version 20419 (0.0009) +[2026-06-02 16:31:42,719][243562] Updated weights for policy 0, policy_version 20430 (0.0008) +[2026-06-02 16:31:42,881][243562] Updated weights for policy 0, policy_version 20440 (0.0008) +[2026-06-02 16:31:43,036][243562] Updated weights for policy 0, policy_version 20450 (0.0008) +[2026-06-02 16:31:43,197][243562] Updated weights for policy 0, policy_version 20460 (0.0008) +[2026-06-02 16:31:43,851][243562] Updated weights for policy 0, policy_version 20470 (0.0008) +[2026-06-02 16:31:44,022][243562] Updated weights for policy 0, policy_version 20481 (0.0008) +[2026-06-02 16:31:44,182][243562] Updated weights for policy 0, policy_version 20491 (0.0008) +[2026-06-02 16:31:44,350][243562] Updated weights for policy 0, policy_version 20501 (0.0008) +[2026-06-02 16:31:44,511][243562] Updated weights for policy 0, policy_version 20511 (0.0008) +[2026-06-02 16:31:44,708][243562] Updated weights for policy 0, policy_version 20523 (0.0008) +[2026-06-02 16:31:44,811][235960] Fps is (10 sec: 22937.7, 60 sec: 21845.3, 300 sec: 21549.1). Total num frames: 10518528. Throughput: 0: 21310.6. Samples: 10531200. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) +[2026-06-02 16:31:44,812][235960] Avg episode reward: [(0, '775.015')] +[2026-06-02 16:31:44,816][242748] Saving new best policy, reward=775.015! +[2026-06-02 16:31:45,371][243562] Updated weights for policy 0, policy_version 20533 (0.0008) +[2026-06-02 16:31:45,559][243562] Updated weights for policy 0, policy_version 20545 (0.0008) +[2026-06-02 16:31:45,722][243562] Updated weights for policy 0, policy_version 20555 (0.0008) +[2026-06-02 16:31:45,879][243562] Updated weights for policy 0, policy_version 20565 (0.0008) +[2026-06-02 16:31:46,051][243562] Updated weights for policy 0, policy_version 20575 (0.0009) +[2026-06-02 16:31:46,212][243562] Updated weights for policy 0, policy_version 20585 (0.0008) +[2026-06-02 16:31:46,859][243562] Updated weights for policy 0, policy_version 20595 (0.0008) +[2026-06-02 16:31:47,019][243562] Updated weights for policy 0, policy_version 20605 (0.0008) +[2026-06-02 16:31:47,193][243562] Updated weights for policy 0, policy_version 20616 (0.0009) +[2026-06-02 16:31:47,373][243562] Updated weights for policy 0, policy_version 20627 (0.0008) +[2026-06-02 16:31:47,530][243562] Updated weights for policy 0, policy_version 20637 (0.0008) +[2026-06-02 16:31:47,698][243562] Updated weights for policy 0, policy_version 20647 (0.0008) +[2026-06-02 16:31:48,370][243562] Updated weights for policy 0, policy_version 20658 (0.0008) +[2026-06-02 16:31:48,519][243562] Updated weights for policy 0, policy_version 20668 (0.0008) +[2026-06-02 16:31:48,701][243562] Updated weights for policy 0, policy_version 20679 (0.0008) +[2026-06-02 16:31:48,866][243562] Updated weights for policy 0, policy_version 20689 (0.0009) +[2026-06-02 16:31:49,031][243562] Updated weights for policy 0, policy_version 20699 (0.0008) +[2026-06-02 16:31:49,195][243562] Updated weights for policy 0, policy_version 20709 (0.0008) +[2026-06-02 16:31:49,357][243562] Updated weights for policy 0, policy_version 20719 (0.0008) +[2026-06-02 16:31:49,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21845.3, 300 sec: 21438.0). Total num frames: 10616832. Throughput: 0: 21319.1. Samples: 10596096. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) +[2026-06-02 16:31:49,812][235960] Avg episode reward: [(0, '801.350')] +[2026-06-02 16:31:50,064][243562] Updated weights for policy 0, policy_version 20729 (0.0008) +[2026-06-02 16:31:50,213][243562] Updated weights for policy 0, policy_version 20739 (0.0008) +[2026-06-02 16:31:50,400][243562] Updated weights for policy 0, policy_version 20750 (0.0008) +[2026-06-02 16:31:50,568][243562] Updated weights for policy 0, policy_version 20760 (0.0008) +[2026-06-02 16:31:50,745][243562] Updated weights for policy 0, policy_version 20771 (0.0009) +[2026-06-02 16:31:50,928][243562] Updated weights for policy 0, policy_version 20782 (0.0010) +[2026-06-02 16:31:50,949][242748] Saving new best policy, reward=801.350! +[2026-06-02 16:31:51,582][243562] Updated weights for policy 0, policy_version 20793 (0.0010) +[2026-06-02 16:31:51,740][243562] Updated weights for policy 0, policy_version 20803 (0.0009) +[2026-06-02 16:31:51,902][243562] Updated weights for policy 0, policy_version 20813 (0.0009) +[2026-06-02 16:31:52,091][243562] Updated weights for policy 0, policy_version 20825 (0.0009) +[2026-06-02 16:31:52,264][243562] Updated weights for policy 0, policy_version 20835 (0.0008) +[2026-06-02 16:31:52,423][243562] Updated weights for policy 0, policy_version 20845 (0.0008) +[2026-06-02 16:31:53,092][243562] Updated weights for policy 0, policy_version 20856 (0.0009) +[2026-06-02 16:31:53,252][243562] Updated weights for policy 0, policy_version 20866 (0.0008) +[2026-06-02 16:31:53,405][243562] Updated weights for policy 0, policy_version 20876 (0.0010) +[2026-06-02 16:31:53,578][243562] Updated weights for policy 0, policy_version 20886 (0.0008) +[2026-06-02 16:31:53,738][243562] Updated weights for policy 0, policy_version 20896 (0.0008) +[2026-06-02 16:31:53,906][243562] Updated weights for policy 0, policy_version 20906 (0.0008) +[2026-06-02 16:31:54,575][243562] Updated weights for policy 0, policy_version 20917 (0.0009) +[2026-06-02 16:31:54,757][243562] Updated weights for policy 0, policy_version 20928 (0.0008) +[2026-06-02 16:31:54,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 10715136. Throughput: 0: 21339.0. Samples: 10727168. Policy #0 lag: (min: 28.0, avg: 42.2, max: 92.0) +[2026-06-02 16:31:54,812][235960] Avg episode reward: [(0, '832.192')] +[2026-06-02 16:31:54,915][243562] Updated weights for policy 0, policy_version 20938 (0.0008) +[2026-06-02 16:31:55,080][243562] Updated weights for policy 0, policy_version 20948 (0.0008) +[2026-06-02 16:31:55,281][243562] Updated weights for policy 0, policy_version 20960 (0.0008) +[2026-06-02 16:31:55,442][243562] Updated weights for policy 0, policy_version 20970 (0.0008) +[2026-06-02 16:31:55,532][242748] Saving new best policy, reward=832.192! +[2026-06-02 16:31:56,089][243562] Updated weights for policy 0, policy_version 20981 (0.0009) +[2026-06-02 16:31:56,238][243562] Updated weights for policy 0, policy_version 20991 (0.0008) +[2026-06-02 16:31:56,425][243562] Updated weights for policy 0, policy_version 21002 (0.0008) +[2026-06-02 16:31:56,601][243562] Updated weights for policy 0, policy_version 21013 (0.0008) +[2026-06-02 16:31:56,776][243562] Updated weights for policy 0, policy_version 21023 (0.0008) +[2026-06-02 16:31:56,936][243562] Updated weights for policy 0, policy_version 21033 (0.0008) +[2026-06-02 16:31:57,600][243562] Updated weights for policy 0, policy_version 21043 (0.0008) +[2026-06-02 16:31:57,758][243562] Updated weights for policy 0, policy_version 21053 (0.0008) +[2026-06-02 16:31:57,913][243562] Updated weights for policy 0, policy_version 21063 (0.0008) +[2026-06-02 16:31:58,089][243562] Updated weights for policy 0, policy_version 21074 (0.0008) +[2026-06-02 16:31:58,256][243562] Updated weights for policy 0, policy_version 21084 (0.0008) +[2026-06-02 16:31:58,415][243562] Updated weights for policy 0, policy_version 21094 (0.0008) +[2026-06-02 16:31:59,117][243562] Updated weights for policy 0, policy_version 21105 (0.0009) +[2026-06-02 16:31:59,262][243562] Updated weights for policy 0, policy_version 21115 (0.0008) +[2026-06-02 16:31:59,424][243562] Updated weights for policy 0, policy_version 21125 (0.0008) +[2026-06-02 16:31:59,584][243562] Updated weights for policy 0, policy_version 21135 (0.0008) +[2026-06-02 16:31:59,772][243562] Updated weights for policy 0, policy_version 21146 (0.0008) +[2026-06-02 16:31:59,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 10813440. Throughput: 0: 21358.9. Samples: 10857472. Policy #0 lag: (min: 28.0, avg: 42.2, max: 92.0) +[2026-06-02 16:31:59,812][235960] Avg episode reward: [(0, '795.860')] +[2026-06-02 16:31:59,939][243562] Updated weights for policy 0, policy_version 21156 (0.0008) +[2026-06-02 16:32:00,101][243562] Updated weights for policy 0, policy_version 21166 (0.0008) +[2026-06-02 16:32:00,756][243562] Updated weights for policy 0, policy_version 21176 (0.0008) +[2026-06-02 16:32:00,913][243562] Updated weights for policy 0, policy_version 21186 (0.0008) +[2026-06-02 16:32:01,082][243562] Updated weights for policy 0, policy_version 21196 (0.0008) +[2026-06-02 16:32:01,245][243562] Updated weights for policy 0, policy_version 21206 (0.0008) +[2026-06-02 16:32:01,418][243562] Updated weights for policy 0, policy_version 21217 (0.0008) +[2026-06-02 16:32:01,592][243562] Updated weights for policy 0, policy_version 21227 (0.0008) +[2026-06-02 16:32:02,235][243562] Updated weights for policy 0, policy_version 21237 (0.0008) +[2026-06-02 16:32:02,392][243562] Updated weights for policy 0, policy_version 21247 (0.0008) +[2026-06-02 16:32:02,558][243562] Updated weights for policy 0, policy_version 21257 (0.0008) +[2026-06-02 16:32:02,738][243562] Updated weights for policy 0, policy_version 21268 (0.0008) +[2026-06-02 16:32:02,919][243562] Updated weights for policy 0, policy_version 21279 (0.0008) +[2026-06-02 16:32:03,082][243562] Updated weights for policy 0, policy_version 21289 (0.0008) +[2026-06-02 16:32:03,742][243562] Updated weights for policy 0, policy_version 21300 (0.0008) +[2026-06-02 16:32:03,910][243562] Updated weights for policy 0, policy_version 21311 (0.0008) +[2026-06-02 16:32:04,066][243562] Updated weights for policy 0, policy_version 21321 (0.0009) +[2026-06-02 16:32:04,230][243562] Updated weights for policy 0, policy_version 21331 (0.0008) +[2026-06-02 16:32:04,402][243562] Updated weights for policy 0, policy_version 21341 (0.0008) +[2026-06-02 16:32:04,563][243562] Updated weights for policy 0, policy_version 21351 (0.0008) +[2026-06-02 16:32:04,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21845.3, 300 sec: 21438.0). Total num frames: 10944512. Throughput: 0: 21421.5. Samples: 10922752. Policy #0 lag: (min: 28.0, avg: 42.2, max: 92.0) +[2026-06-02 16:32:04,812][235960] Avg episode reward: [(0, '816.106')] +[2026-06-02 16:32:05,248][243562] Updated weights for policy 0, policy_version 21361 (0.0009) +[2026-06-02 16:32:05,400][243562] Updated weights for policy 0, policy_version 21371 (0.0008) +[2026-06-02 16:32:05,557][243562] Updated weights for policy 0, policy_version 21381 (0.0008) +[2026-06-02 16:32:05,719][243562] Updated weights for policy 0, policy_version 21391 (0.0008) +[2026-06-02 16:32:05,906][243562] Updated weights for policy 0, policy_version 21402 (0.0008) +[2026-06-02 16:32:06,066][243562] Updated weights for policy 0, policy_version 21412 (0.0008) +[2026-06-02 16:32:06,228][243562] Updated weights for policy 0, policy_version 21422 (0.0008) +[2026-06-02 16:32:06,903][243562] Updated weights for policy 0, policy_version 21432 (0.0009) +[2026-06-02 16:32:07,057][243562] Updated weights for policy 0, policy_version 21442 (0.0008) +[2026-06-02 16:32:07,224][243562] Updated weights for policy 0, policy_version 21452 (0.0008) +[2026-06-02 16:32:07,387][243562] Updated weights for policy 0, policy_version 21462 (0.0008) +[2026-06-02 16:32:07,551][243562] Updated weights for policy 0, policy_version 21472 (0.0008) +[2026-06-02 16:32:07,733][243562] Updated weights for policy 0, policy_version 21483 (0.0008) +[2026-06-02 16:32:08,372][243562] Updated weights for policy 0, policy_version 21493 (0.0008) +[2026-06-02 16:32:08,536][243562] Updated weights for policy 0, policy_version 21503 (0.0008) +[2026-06-02 16:32:08,699][243562] Updated weights for policy 0, policy_version 21513 (0.0008) +[2026-06-02 16:32:08,859][243562] Updated weights for policy 0, policy_version 21523 (0.0008) +[2026-06-02 16:32:09,019][243562] Updated weights for policy 0, policy_version 21533 (0.0008) +[2026-06-02 16:32:09,205][243562] Updated weights for policy 0, policy_version 21544 (0.0008) +[2026-06-02 16:32:09,811][235960] Fps is (10 sec: 22937.5, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 11042816. Throughput: 0: 21540.9. Samples: 11052928. Policy #0 lag: (min: 57.0, avg: 86.6, max: 121.0) +[2026-06-02 16:32:09,812][235960] Avg episode reward: [(0, '800.812')] +[2026-06-02 16:32:09,850][243562] Updated weights for policy 0, policy_version 21554 (0.0008) +[2026-06-02 16:32:10,010][243562] Updated weights for policy 0, policy_version 21564 (0.0009) +[2026-06-02 16:32:10,159][243562] Updated weights for policy 0, policy_version 21574 (0.0008) +[2026-06-02 16:32:10,324][243562] Updated weights for policy 0, policy_version 21584 (0.0008) +[2026-06-02 16:32:10,499][243562] Updated weights for policy 0, policy_version 21594 (0.0008) +[2026-06-02 16:32:10,676][243562] Updated weights for policy 0, policy_version 21605 (0.0008) +[2026-06-02 16:32:10,843][243562] Updated weights for policy 0, policy_version 21615 (0.0008) +[2026-06-02 16:32:11,484][243562] Updated weights for policy 0, policy_version 21625 (0.0009) +[2026-06-02 16:32:11,677][243562] Updated weights for policy 0, policy_version 21637 (0.0009) +[2026-06-02 16:32:11,843][243562] Updated weights for policy 0, policy_version 21647 (0.0008) +[2026-06-02 16:32:12,021][243562] Updated weights for policy 0, policy_version 21658 (0.0008) +[2026-06-02 16:32:12,203][243562] Updated weights for policy 0, policy_version 21669 (0.0008) +[2026-06-02 16:32:12,379][243562] Updated weights for policy 0, policy_version 21680 (0.0008) +[2026-06-02 16:32:13,065][243562] Updated weights for policy 0, policy_version 21691 (0.0010) +[2026-06-02 16:32:13,236][243562] Updated weights for policy 0, policy_version 21702 (0.0008) +[2026-06-02 16:32:13,437][243562] Updated weights for policy 0, policy_version 21714 (0.0008) +[2026-06-02 16:32:13,615][243562] Updated weights for policy 0, policy_version 21725 (0.0008) +[2026-06-02 16:32:13,775][243562] Updated weights for policy 0, policy_version 21735 (0.0008) +[2026-06-02 16:32:14,443][243562] Updated weights for policy 0, policy_version 21745 (0.0008) +[2026-06-02 16:32:14,594][243562] Updated weights for policy 0, policy_version 21755 (0.0008) +[2026-06-02 16:32:14,755][243562] Updated weights for policy 0, policy_version 21765 (0.0008) +[2026-06-02 16:32:14,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 11141120. Throughput: 0: 21626.3. Samples: 11182592. Policy #0 lag: (min: 57.0, avg: 86.6, max: 121.0) +[2026-06-02 16:32:14,812][235960] Avg episode reward: [(0, '807.701')] +[2026-06-02 16:32:14,926][243562] Updated weights for policy 0, policy_version 21776 (0.0009) +[2026-06-02 16:32:15,097][243562] Updated weights for policy 0, policy_version 21786 (0.0008) +[2026-06-02 16:32:15,263][243562] Updated weights for policy 0, policy_version 21796 (0.0008) +[2026-06-02 16:32:15,443][243562] Updated weights for policy 0, policy_version 21807 (0.0008) +[2026-06-02 16:32:16,092][243562] Updated weights for policy 0, policy_version 21817 (0.0005) +[2026-06-02 16:32:16,258][243562] Updated weights for policy 0, policy_version 21827 (0.0004) +[2026-06-02 16:32:16,415][243562] Updated weights for policy 0, policy_version 21837 (0.0004) +[2026-06-02 16:32:16,585][243562] Updated weights for policy 0, policy_version 21847 (0.0008) +[2026-06-02 16:32:16,739][243562] Updated weights for policy 0, policy_version 21857 (0.0010) +[2026-06-02 16:32:16,905][243562] Updated weights for policy 0, policy_version 21867 (0.0007) +[2026-06-02 16:32:17,562][243562] Updated weights for policy 0, policy_version 21877 (0.0009) +[2026-06-02 16:32:17,721][243562] Updated weights for policy 0, policy_version 21887 (0.0008) +[2026-06-02 16:32:17,870][243562] Updated weights for policy 0, policy_version 21897 (0.0008) +[2026-06-02 16:32:18,036][243562] Updated weights for policy 0, policy_version 21907 (0.0008) +[2026-06-02 16:32:18,207][243562] Updated weights for policy 0, policy_version 21917 (0.0008) +[2026-06-02 16:32:18,367][243562] Updated weights for policy 0, policy_version 21927 (0.0008) +[2026-06-02 16:32:19,029][243562] Updated weights for policy 0, policy_version 21937 (0.0009) +[2026-06-02 16:32:19,179][243562] Updated weights for policy 0, policy_version 21947 (0.0008) +[2026-06-02 16:32:19,332][243562] Updated weights for policy 0, policy_version 21957 (0.0008) +[2026-06-02 16:32:19,504][243562] Updated weights for policy 0, policy_version 21967 (0.0008) +[2026-06-02 16:32:19,667][243562] Updated weights for policy 0, policy_version 21977 (0.0008) +[2026-06-02 16:32:19,811][235960] Fps is (10 sec: 19660.9, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 11239424. Throughput: 0: 21669.0. Samples: 11247488. Policy #0 lag: (min: 57.0, avg: 86.6, max: 121.0) +[2026-06-02 16:32:19,812][235960] Avg episode reward: [(0, '807.855')] +[2026-06-02 16:32:19,826][243562] Updated weights for policy 0, policy_version 21987 (0.0008) +[2026-06-02 16:32:19,993][243562] Updated weights for policy 0, policy_version 21997 (0.0008) +[2026-06-02 16:32:20,667][243562] Updated weights for policy 0, policy_version 22007 (0.0009) +[2026-06-02 16:32:20,826][243562] Updated weights for policy 0, policy_version 22017 (0.0008) +[2026-06-02 16:32:20,983][243562] Updated weights for policy 0, policy_version 22027 (0.0008) +[2026-06-02 16:32:21,148][243562] Updated weights for policy 0, policy_version 22037 (0.0008) +[2026-06-02 16:32:21,322][243562] Updated weights for policy 0, policy_version 22048 (0.0009) +[2026-06-02 16:32:21,484][243562] Updated weights for policy 0, policy_version 22058 (0.0008) +[2026-06-02 16:32:22,154][243562] Updated weights for policy 0, policy_version 22069 (0.0008) +[2026-06-02 16:32:22,341][243562] Updated weights for policy 0, policy_version 22081 (0.0008) +[2026-06-02 16:32:22,496][243562] Updated weights for policy 0, policy_version 22091 (0.0008) +[2026-06-02 16:32:22,651][243562] Updated weights for policy 0, policy_version 22101 (0.0008) +[2026-06-02 16:32:22,813][243562] Updated weights for policy 0, policy_version 22111 (0.0008) +[2026-06-02 16:32:22,971][243562] Updated weights for policy 0, policy_version 22121 (0.0009) +[2026-06-02 16:32:23,652][243562] Updated weights for policy 0, policy_version 22131 (0.0009) +[2026-06-02 16:32:23,815][243562] Updated weights for policy 0, policy_version 22142 (0.0009) +[2026-06-02 16:32:24,033][243562] Updated weights for policy 0, policy_version 22156 (0.0009) +[2026-06-02 16:32:24,206][243562] Updated weights for policy 0, policy_version 22167 (0.0009) +[2026-06-02 16:32:24,387][243562] Updated weights for policy 0, policy_version 22178 (0.0009) +[2026-06-02 16:32:24,578][243562] Updated weights for policy 0, policy_version 22190 (0.0009) +[2026-06-02 16:32:24,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21845.3, 300 sec: 21438.0). Total num frames: 11370496. Throughput: 0: 21694.6. Samples: 11377408. Policy #0 lag: (min: 63.0, avg: 78.3, max: 127.0) +[2026-06-02 16:32:24,812][235960] Avg episode reward: [(0, '809.431')] +[2026-06-02 16:32:25,243][243562] Updated weights for policy 0, policy_version 22200 (0.0009) +[2026-06-02 16:32:25,406][243562] Updated weights for policy 0, policy_version 22210 (0.0009) +[2026-06-02 16:32:25,575][243562] Updated weights for policy 0, policy_version 22221 (0.0009) +[2026-06-02 16:32:25,783][243562] Updated weights for policy 0, policy_version 22234 (0.0009) +[2026-06-02 16:32:25,950][243562] Updated weights for policy 0, policy_version 22244 (0.0008) +[2026-06-02 16:32:26,121][243562] Updated weights for policy 0, policy_version 22255 (0.0009) +[2026-06-02 16:32:26,793][243562] Updated weights for policy 0, policy_version 22265 (0.0009) +[2026-06-02 16:32:26,956][243562] Updated weights for policy 0, policy_version 22276 (0.0009) +[2026-06-02 16:32:27,123][243562] Updated weights for policy 0, policy_version 22286 (0.0008) +[2026-06-02 16:32:27,312][243562] Updated weights for policy 0, policy_version 22298 (0.0009) +[2026-06-02 16:32:27,508][243562] Updated weights for policy 0, policy_version 22310 (0.0008) +[2026-06-02 16:32:27,663][243562] Updated weights for policy 0, policy_version 22320 (0.0008) +[2026-06-02 16:32:28,366][243562] Updated weights for policy 0, policy_version 22330 (0.0009) +[2026-06-02 16:32:28,546][243562] Updated weights for policy 0, policy_version 22342 (0.0008) +[2026-06-02 16:32:28,726][243562] Updated weights for policy 0, policy_version 22353 (0.0008) +[2026-06-02 16:32:28,879][243562] Updated weights for policy 0, policy_version 22363 (0.0008) +[2026-06-02 16:32:29,053][243562] Updated weights for policy 0, policy_version 22373 (0.0008) +[2026-06-02 16:32:29,220][243562] Updated weights for policy 0, policy_version 22384 (0.0008) +[2026-06-02 16:32:29,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 11468800. Throughput: 0: 21708.8. Samples: 11508096. Policy #0 lag: (min: 63.0, avg: 78.3, max: 127.0) +[2026-06-02 16:32:29,812][235960] Avg episode reward: [(0, '871.869')] +[2026-06-02 16:32:29,889][243562] Updated weights for policy 0, policy_version 22394 (0.0009) +[2026-06-02 16:32:30,038][243562] Updated weights for policy 0, policy_version 22404 (0.0008) +[2026-06-02 16:32:30,214][243562] Updated weights for policy 0, policy_version 22415 (0.0009) +[2026-06-02 16:32:30,378][243562] Updated weights for policy 0, policy_version 22425 (0.0008) +[2026-06-02 16:32:30,551][243562] Updated weights for policy 0, policy_version 22436 (0.0008) +[2026-06-02 16:32:30,712][243562] Updated weights for policy 0, policy_version 22446 (0.0008) +[2026-06-02 16:32:30,737][242748] Saving new best policy, reward=871.869! +[2026-06-02 16:32:31,386][243562] Updated weights for policy 0, policy_version 22457 (0.0008) +[2026-06-02 16:32:31,550][243562] Updated weights for policy 0, policy_version 22468 (0.0008) +[2026-06-02 16:32:31,734][243562] Updated weights for policy 0, policy_version 22479 (0.0009) +[2026-06-02 16:32:31,888][243562] Updated weights for policy 0, policy_version 22489 (0.0008) +[2026-06-02 16:32:32,054][243562] Updated weights for policy 0, policy_version 22499 (0.0008) +[2026-06-02 16:32:32,224][243562] Updated weights for policy 0, policy_version 22510 (0.0008) +[2026-06-02 16:32:32,899][243562] Updated weights for policy 0, policy_version 22520 (0.0008) +[2026-06-02 16:32:33,048][243562] Updated weights for policy 0, policy_version 22530 (0.0008) +[2026-06-02 16:32:33,206][243562] Updated weights for policy 0, policy_version 22540 (0.0008) +[2026-06-02 16:32:33,389][243562] Updated weights for policy 0, policy_version 22551 (0.0008) +[2026-06-02 16:32:33,550][243562] Updated weights for policy 0, policy_version 22561 (0.0008) +[2026-06-02 16:32:33,736][243562] Updated weights for policy 0, policy_version 22573 (0.0008) +[2026-06-02 16:32:34,410][243562] Updated weights for policy 0, policy_version 22584 (0.0005) +[2026-06-02 16:32:34,556][243562] Updated weights for policy 0, policy_version 22594 (0.0004) +[2026-06-02 16:32:34,736][243562] Updated weights for policy 0, policy_version 22605 (0.0004) +[2026-06-02 16:32:34,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 11567104. Throughput: 0: 21657.6. Samples: 11570688. Policy #0 lag: (min: 63.0, avg: 78.3, max: 127.0) +[2026-06-02 16:32:34,812][235960] Avg episode reward: [(0, '896.552')] +[2026-06-02 16:32:34,926][243562] Updated weights for policy 0, policy_version 22617 (0.0004) +[2026-06-02 16:32:35,087][243562] Updated weights for policy 0, policy_version 22627 (0.0004) +[2026-06-02 16:32:35,253][243562] Updated weights for policy 0, policy_version 22637 (0.0004) +[2026-06-02 16:32:35,293][242748] Saving new best policy, reward=896.552! +[2026-06-02 16:32:35,971][243562] Updated weights for policy 0, policy_version 22651 (0.0008) +[2026-06-02 16:32:36,138][243562] Updated weights for policy 0, policy_version 22662 (0.0008) +[2026-06-02 16:32:36,317][243562] Updated weights for policy 0, policy_version 22673 (0.0008) +[2026-06-02 16:32:36,497][243562] Updated weights for policy 0, policy_version 22684 (0.0009) +[2026-06-02 16:32:36,677][243562] Updated weights for policy 0, policy_version 22695 (0.0008) +[2026-06-02 16:32:37,353][243562] Updated weights for policy 0, policy_version 22705 (0.0009) +[2026-06-02 16:32:37,529][243562] Updated weights for policy 0, policy_version 22717 (0.0008) +[2026-06-02 16:32:37,713][243562] Updated weights for policy 0, policy_version 22728 (0.0009) +[2026-06-02 16:32:37,887][243562] Updated weights for policy 0, policy_version 22739 (0.0008) +[2026-06-02 16:32:38,057][243562] Updated weights for policy 0, policy_version 22749 (0.0008) +[2026-06-02 16:32:38,217][243562] Updated weights for policy 0, policy_version 22759 (0.0008) +[2026-06-02 16:32:38,882][243562] Updated weights for policy 0, policy_version 22769 (0.0008) +[2026-06-02 16:32:39,054][243562] Updated weights for policy 0, policy_version 22780 (0.0009) +[2026-06-02 16:32:39,224][243562] Updated weights for policy 0, policy_version 22790 (0.0008) +[2026-06-02 16:32:39,380][243562] Updated weights for policy 0, policy_version 22800 (0.0008) +[2026-06-02 16:32:39,543][243562] Updated weights for policy 0, policy_version 22810 (0.0008) +[2026-06-02 16:32:39,714][243562] Updated weights for policy 0, policy_version 22820 (0.0008) +[2026-06-02 16:32:39,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 11665408. Throughput: 0: 21558.0. Samples: 11697280. Policy #0 lag: (min: 63.0, avg: 78.3, max: 127.0) +[2026-06-02 16:32:39,812][235960] Avg episode reward: [(0, '908.109')] +[2026-06-02 16:32:39,878][243562] Updated weights for policy 0, policy_version 22830 (0.0008) +[2026-06-02 16:32:39,906][242748] Saving new best policy, reward=908.109! +[2026-06-02 16:32:40,517][243562] Updated weights for policy 0, policy_version 22840 (0.0008) +[2026-06-02 16:32:40,676][243562] Updated weights for policy 0, policy_version 22850 (0.0009) +[2026-06-02 16:32:40,836][243562] Updated weights for policy 0, policy_version 22860 (0.0008) +[2026-06-02 16:32:41,004][243562] Updated weights for policy 0, policy_version 22870 (0.0009) +[2026-06-02 16:32:41,174][243562] Updated weights for policy 0, policy_version 22880 (0.0008) +[2026-06-02 16:32:41,335][243562] Updated weights for policy 0, policy_version 22890 (0.0008) +[2026-06-02 16:32:41,978][243562] Updated weights for policy 0, policy_version 22900 (0.0009) +[2026-06-02 16:32:42,151][243562] Updated weights for policy 0, policy_version 22911 (0.0008) +[2026-06-02 16:32:42,314][243562] Updated weights for policy 0, policy_version 22921 (0.0008) +[2026-06-02 16:32:42,472][243562] Updated weights for policy 0, policy_version 22931 (0.0008) +[2026-06-02 16:32:42,639][243562] Updated weights for policy 0, policy_version 22941 (0.0008) +[2026-06-02 16:32:42,809][243562] Updated weights for policy 0, policy_version 22951 (0.0009) +[2026-06-02 16:32:43,504][243562] Updated weights for policy 0, policy_version 22963 (0.0009) +[2026-06-02 16:32:43,658][243562] Updated weights for policy 0, policy_version 22973 (0.0008) +[2026-06-02 16:32:43,837][243562] Updated weights for policy 0, policy_version 22984 (0.0008) +[2026-06-02 16:32:44,015][243562] Updated weights for policy 0, policy_version 22995 (0.0008) +[2026-06-02 16:32:44,175][243562] Updated weights for policy 0, policy_version 23005 (0.0008) +[2026-06-02 16:32:44,342][243562] Updated weights for policy 0, policy_version 23015 (0.0008) +[2026-06-02 16:32:44,811][235960] Fps is (10 sec: 22937.7, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 11796480. Throughput: 0: 21455.7. Samples: 11822976. Policy #0 lag: (min: 42.0, avg: 82.5, max: 106.0) +[2026-06-02 16:32:44,812][235960] Avg episode reward: [(0, '937.345')] +[2026-06-02 16:32:44,817][242748] Saving new best policy, reward=937.345! +[2026-06-02 16:32:45,018][243562] Updated weights for policy 0, policy_version 23025 (0.0008) +[2026-06-02 16:32:45,169][243562] Updated weights for policy 0, policy_version 23035 (0.0008) +[2026-06-02 16:32:45,330][243562] Updated weights for policy 0, policy_version 23045 (0.0008) +[2026-06-02 16:32:45,485][243562] Updated weights for policy 0, policy_version 23055 (0.0008) +[2026-06-02 16:32:45,653][243562] Updated weights for policy 0, policy_version 23065 (0.0009) +[2026-06-02 16:32:45,833][243562] Updated weights for policy 0, policy_version 23076 (0.0010) +[2026-06-02 16:32:46,025][243562] Updated weights for policy 0, policy_version 23088 (0.0009) +[2026-06-02 16:32:46,680][243562] Updated weights for policy 0, policy_version 23098 (0.0008) +[2026-06-02 16:32:46,856][243562] Updated weights for policy 0, policy_version 23109 (0.0008) +[2026-06-02 16:32:47,041][243562] Updated weights for policy 0, policy_version 23120 (0.0009) +[2026-06-02 16:32:47,201][243562] Updated weights for policy 0, policy_version 23130 (0.0008) +[2026-06-02 16:32:47,367][243562] Updated weights for policy 0, policy_version 23140 (0.0008) +[2026-06-02 16:32:47,546][243562] Updated weights for policy 0, policy_version 23151 (0.0008) +[2026-06-02 16:32:48,190][243562] Updated weights for policy 0, policy_version 23161 (0.0008) +[2026-06-02 16:32:48,355][243562] Updated weights for policy 0, policy_version 23171 (0.0008) +[2026-06-02 16:32:48,520][243562] Updated weights for policy 0, policy_version 23181 (0.0008) +[2026-06-02 16:32:48,680][243562] Updated weights for policy 0, policy_version 23191 (0.0008) +[2026-06-02 16:32:48,845][243562] Updated weights for policy 0, policy_version 23201 (0.0008) +[2026-06-02 16:32:49,043][243562] Updated weights for policy 0, policy_version 23213 (0.0008) +[2026-06-02 16:32:49,704][243562] Updated weights for policy 0, policy_version 23223 (0.0008) +[2026-06-02 16:32:49,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 11894784. Throughput: 0: 21407.3. Samples: 11886080. Policy #0 lag: (min: 42.0, avg: 82.5, max: 106.0) +[2026-06-02 16:32:49,812][235960] Avg episode reward: [(0, '951.482')] +[2026-06-02 16:32:49,861][243562] Updated weights for policy 0, policy_version 23233 (0.0008) +[2026-06-02 16:32:50,072][243562] Updated weights for policy 0, policy_version 23246 (0.0008) +[2026-06-02 16:32:50,238][243562] Updated weights for policy 0, policy_version 23256 (0.0008) +[2026-06-02 16:32:50,403][243562] Updated weights for policy 0, policy_version 23266 (0.0007) +[2026-06-02 16:32:50,562][243562] Updated weights for policy 0, policy_version 23276 (0.0008) +[2026-06-02 16:32:50,631][242748] Saving new best policy, reward=951.482! +[2026-06-02 16:32:51,223][243562] Updated weights for policy 0, policy_version 23286 (0.0009) +[2026-06-02 16:32:51,382][243562] Updated weights for policy 0, policy_version 23296 (0.0008) +[2026-06-02 16:32:51,542][243562] Updated weights for policy 0, policy_version 23306 (0.0008) +[2026-06-02 16:32:51,706][243562] Updated weights for policy 0, policy_version 23316 (0.0008) +[2026-06-02 16:32:51,867][243562] Updated weights for policy 0, policy_version 23326 (0.0008) +[2026-06-02 16:32:52,056][243562] Updated weights for policy 0, policy_version 23337 (0.0008) +[2026-06-02 16:32:52,730][243562] Updated weights for policy 0, policy_version 23349 (0.0008) +[2026-06-02 16:32:52,885][243562] Updated weights for policy 0, policy_version 23359 (0.0008) +[2026-06-02 16:32:53,047][243562] Updated weights for policy 0, policy_version 23369 (0.0008) +[2026-06-02 16:32:53,222][243562] Updated weights for policy 0, policy_version 23380 (0.0008) +[2026-06-02 16:32:53,427][243562] Updated weights for policy 0, policy_version 23392 (0.0009) +[2026-06-02 16:32:53,592][243562] Updated weights for policy 0, policy_version 23402 (0.0008) +[2026-06-02 16:32:54,254][243562] Updated weights for policy 0, policy_version 23412 (0.0009) +[2026-06-02 16:32:54,413][243562] Updated weights for policy 0, policy_version 23422 (0.0008) +[2026-06-02 16:32:54,576][243562] Updated weights for policy 0, policy_version 23432 (0.0007) +[2026-06-02 16:32:54,776][243562] Updated weights for policy 0, policy_version 23444 (0.0009) +[2026-06-02 16:32:54,811][235960] Fps is (10 sec: 19660.9, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 11993088. Throughput: 0: 21367.5. Samples: 12014464. Policy #0 lag: (min: 42.0, avg: 82.5, max: 106.0) +[2026-06-02 16:32:54,812][235960] Avg episode reward: [(0, '954.920')] +[2026-06-02 16:32:54,960][243562] Updated weights for policy 0, policy_version 23455 (0.0009) +[2026-06-02 16:32:55,117][243562] Updated weights for policy 0, policy_version 23465 (0.0008) +[2026-06-02 16:32:55,227][242748] Saving new best policy, reward=954.920! +[2026-06-02 16:32:55,784][243562] Updated weights for policy 0, policy_version 23475 (0.0009) +[2026-06-02 16:32:55,950][243562] Updated weights for policy 0, policy_version 23486 (0.0009) +[2026-06-02 16:32:56,119][243562] Updated weights for policy 0, policy_version 23496 (0.0008) +[2026-06-02 16:32:56,281][243562] Updated weights for policy 0, policy_version 23506 (0.0008) +[2026-06-02 16:32:56,451][243562] Updated weights for policy 0, policy_version 23516 (0.0008) +[2026-06-02 16:32:56,616][243562] Updated weights for policy 0, policy_version 23526 (0.0008) +[2026-06-02 16:32:56,775][243562] Updated weights for policy 0, policy_version 23536 (0.0008) +[2026-06-02 16:32:57,461][243562] Updated weights for policy 0, policy_version 23548 (0.0008) +[2026-06-02 16:32:57,622][243562] Updated weights for policy 0, policy_version 23558 (0.0008) +[2026-06-02 16:32:57,782][243562] Updated weights for policy 0, policy_version 23568 (0.0008) +[2026-06-02 16:32:57,937][243562] Updated weights for policy 0, policy_version 23578 (0.0008) +[2026-06-02 16:32:58,128][243562] Updated weights for policy 0, policy_version 23589 (0.0008) +[2026-06-02 16:32:58,817][243562] Updated weights for policy 0, policy_version 23601 (0.0008) +[2026-06-02 16:32:58,977][243562] Updated weights for policy 0, policy_version 23611 (0.0009) +[2026-06-02 16:32:59,135][243562] Updated weights for policy 0, policy_version 23621 (0.0009) +[2026-06-02 16:32:59,296][243562] Updated weights for policy 0, policy_version 23631 (0.0008) +[2026-06-02 16:32:59,465][243562] Updated weights for policy 0, policy_version 23641 (0.0009) +[2026-06-02 16:32:59,648][243562] Updated weights for policy 0, policy_version 23652 (0.0009) +[2026-06-02 16:32:59,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 12091392. Throughput: 0: 21393.1. Samples: 12145280. Policy #0 lag: (min: 38.0, avg: 52.7, max: 102.0) +[2026-06-02 16:32:59,812][235960] Avg episode reward: [(0, '975.314')] +[2026-06-02 16:32:59,813][243562] Updated weights for policy 0, policy_version 23662 (0.0009) +[2026-06-02 16:32:59,838][242748] Saving new best policy, reward=975.314! +[2026-06-02 16:33:00,460][243562] Updated weights for policy 0, policy_version 23672 (0.0009) +[2026-06-02 16:33:00,615][243562] Updated weights for policy 0, policy_version 23682 (0.0008) +[2026-06-02 16:33:00,783][243562] Updated weights for policy 0, policy_version 23692 (0.0009) +[2026-06-02 16:33:00,958][243562] Updated weights for policy 0, policy_version 23703 (0.0009) +[2026-06-02 16:33:01,129][243562] Updated weights for policy 0, policy_version 23713 (0.0008) +[2026-06-02 16:33:01,294][243562] Updated weights for policy 0, policy_version 23723 (0.0009) +[2026-06-02 16:33:01,948][243562] Updated weights for policy 0, policy_version 23734 (0.0008) +[2026-06-02 16:33:02,130][243562] Updated weights for policy 0, policy_version 23745 (0.0008) +[2026-06-02 16:33:02,287][243562] Updated weights for policy 0, policy_version 23755 (0.0008) +[2026-06-02 16:33:02,456][243562] Updated weights for policy 0, policy_version 23765 (0.0008) +[2026-06-02 16:33:02,633][243562] Updated weights for policy 0, policy_version 23776 (0.0008) +[2026-06-02 16:33:02,793][243562] Updated weights for policy 0, policy_version 23786 (0.0008) +[2026-06-02 16:33:03,473][243562] Updated weights for policy 0, policy_version 23797 (0.0008) +[2026-06-02 16:33:03,629][243562] Updated weights for policy 0, policy_version 23807 (0.0008) +[2026-06-02 16:33:03,797][243562] Updated weights for policy 0, policy_version 23817 (0.0008) +[2026-06-02 16:33:03,977][243562] Updated weights for policy 0, policy_version 23828 (0.0008) +[2026-06-02 16:33:04,143][243562] Updated weights for policy 0, policy_version 23838 (0.0008) +[2026-06-02 16:33:04,311][243562] Updated weights for policy 0, policy_version 23848 (0.0008) +[2026-06-02 16:33:04,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 12222464. Throughput: 0: 21395.9. Samples: 12210304. Policy #0 lag: (min: 38.0, avg: 52.7, max: 102.0) +[2026-06-02 16:33:04,812][235960] Avg episode reward: [(0, '1011.564')] +[2026-06-02 16:33:04,968][243562] Updated weights for policy 0, policy_version 23858 (0.0008) +[2026-06-02 16:33:05,138][243562] Updated weights for policy 0, policy_version 23869 (0.0008) +[2026-06-02 16:33:05,296][243562] Updated weights for policy 0, policy_version 23879 (0.0008) +[2026-06-02 16:33:05,470][243562] Updated weights for policy 0, policy_version 23890 (0.0008) +[2026-06-02 16:33:05,662][243562] Updated weights for policy 0, policy_version 23901 (0.0008) +[2026-06-02 16:33:05,827][243562] Updated weights for policy 0, policy_version 23911 (0.0008) +[2026-06-02 16:33:05,963][242748] Saving new best policy, reward=1011.564! +[2026-06-02 16:33:06,485][243562] Updated weights for policy 0, policy_version 23921 (0.0009) +[2026-06-02 16:33:06,631][243562] Updated weights for policy 0, policy_version 23931 (0.0008) +[2026-06-02 16:33:06,794][243562] Updated weights for policy 0, policy_version 23941 (0.0008) +[2026-06-02 16:33:06,956][243562] Updated weights for policy 0, policy_version 23951 (0.0008) +[2026-06-02 16:33:07,116][243562] Updated weights for policy 0, policy_version 23961 (0.0008) +[2026-06-02 16:33:07,313][243562] Updated weights for policy 0, policy_version 23973 (0.0008) +[2026-06-02 16:33:07,483][243562] Updated weights for policy 0, policy_version 23983 (0.0008) +[2026-06-02 16:33:08,125][243562] Updated weights for policy 0, policy_version 23994 (0.0008) +[2026-06-02 16:33:08,295][243562] Updated weights for policy 0, policy_version 24004 (0.0008) +[2026-06-02 16:33:08,470][243562] Updated weights for policy 0, policy_version 24015 (0.0008) +[2026-06-02 16:33:08,636][243562] Updated weights for policy 0, policy_version 24025 (0.0008) +[2026-06-02 16:33:08,803][243562] Updated weights for policy 0, policy_version 24035 (0.0008) +[2026-06-02 16:33:08,968][243562] Updated weights for policy 0, policy_version 24045 (0.0008) +[2026-06-02 16:33:09,617][243562] Updated weights for policy 0, policy_version 24055 (0.0009) +[2026-06-02 16:33:09,792][243562] Updated weights for policy 0, policy_version 24066 (0.0008) +[2026-06-02 16:33:09,811][235960] Fps is (10 sec: 22937.5, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 12320768. Throughput: 0: 21404.4. Samples: 12340608. Policy #0 lag: (min: 38.0, avg: 52.7, max: 102.0) +[2026-06-02 16:33:09,812][235960] Avg episode reward: [(0, '1068.821')] +[2026-06-02 16:33:09,958][243562] Updated weights for policy 0, policy_version 24076 (0.0009) +[2026-06-02 16:33:10,121][243562] Updated weights for policy 0, policy_version 24086 (0.0008) +[2026-06-02 16:33:10,300][243562] Updated weights for policy 0, policy_version 24097 (0.0008) +[2026-06-02 16:33:10,464][243562] Updated weights for policy 0, policy_version 24107 (0.0008) +[2026-06-02 16:33:10,537][242748] Early stopping after 8 epochs (64 sgd steps), loss delta 0.0000007 +[2026-06-02 16:33:10,537][242748] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs4_seed12/checkpoint_p0/checkpoint_000024112_12353536.pth... +[2026-06-02 16:33:10,554][242748] Saving new best policy, reward=1068.821! +[2026-06-02 16:33:11,154][243562] Updated weights for policy 0, policy_version 24119 (0.0009) +[2026-06-02 16:33:11,310][243562] Updated weights for policy 0, policy_version 24129 (0.0008) +[2026-06-02 16:33:11,469][243562] Updated weights for policy 0, policy_version 24139 (0.0009) +[2026-06-02 16:33:11,655][243562] Updated weights for policy 0, policy_version 24150 (0.0008) +[2026-06-02 16:33:11,836][243562] Updated weights for policy 0, policy_version 24161 (0.0008) +[2026-06-02 16:33:11,987][243562] Updated weights for policy 0, policy_version 24171 (0.0008) +[2026-06-02 16:33:12,684][243562] Updated weights for policy 0, policy_version 24182 (0.0008) +[2026-06-02 16:33:12,841][243562] Updated weights for policy 0, policy_version 24192 (0.0008) +[2026-06-02 16:33:13,012][243562] Updated weights for policy 0, policy_version 24202 (0.0008) +[2026-06-02 16:33:13,166][243562] Updated weights for policy 0, policy_version 24212 (0.0008) +[2026-06-02 16:33:13,348][243562] Updated weights for policy 0, policy_version 24223 (0.0008) +[2026-06-02 16:33:13,512][243562] Updated weights for policy 0, policy_version 24233 (0.0008) +[2026-06-02 16:33:14,159][243562] Updated weights for policy 0, policy_version 24243 (0.0008) +[2026-06-02 16:33:14,311][243562] Updated weights for policy 0, policy_version 24253 (0.0009) +[2026-06-02 16:33:14,476][243562] Updated weights for policy 0, policy_version 24263 (0.0008) +[2026-06-02 16:33:14,657][243562] Updated weights for policy 0, policy_version 24274 (0.0008) +[2026-06-02 16:33:14,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 12419072. Throughput: 0: 21387.4. Samples: 12470528. Policy #0 lag: (min: 18.0, avg: 58.1, max: 81.0) +[2026-06-02 16:33:14,812][235960] Avg episode reward: [(0, '1106.165')] +[2026-06-02 16:33:14,837][243562] Updated weights for policy 0, policy_version 24285 (0.0009) +[2026-06-02 16:33:15,009][243562] Updated weights for policy 0, policy_version 24295 (0.0009) +[2026-06-02 16:33:15,150][242748] Saving new best policy, reward=1106.165! +[2026-06-02 16:33:15,682][243562] Updated weights for policy 0, policy_version 24305 (0.0009) +[2026-06-02 16:33:15,827][243562] Updated weights for policy 0, policy_version 24315 (0.0008) +[2026-06-02 16:33:15,992][243562] Updated weights for policy 0, policy_version 24325 (0.0008) +[2026-06-02 16:33:16,157][243562] Updated weights for policy 0, policy_version 24335 (0.0008) +[2026-06-02 16:33:16,316][243562] Updated weights for policy 0, policy_version 24345 (0.0009) +[2026-06-02 16:33:16,483][243562] Updated weights for policy 0, policy_version 24355 (0.0008) +[2026-06-02 16:33:16,669][243562] Updated weights for policy 0, policy_version 24366 (0.0009) +[2026-06-02 16:33:17,315][243562] Updated weights for policy 0, policy_version 24376 (0.0008) +[2026-06-02 16:33:17,499][243562] Updated weights for policy 0, policy_version 24387 (0.0009) +[2026-06-02 16:33:17,662][243562] Updated weights for policy 0, policy_version 24397 (0.0008) +[2026-06-02 16:33:17,823][243562] Updated weights for policy 0, policy_version 24407 (0.0008) +[2026-06-02 16:33:17,985][243562] Updated weights for policy 0, policy_version 24417 (0.0008) +[2026-06-02 16:33:18,150][243562] Updated weights for policy 0, policy_version 24427 (0.0008) +[2026-06-02 16:33:18,801][243562] Updated weights for policy 0, policy_version 24437 (0.0008) +[2026-06-02 16:33:18,952][243562] Updated weights for policy 0, policy_version 24447 (0.0008) +[2026-06-02 16:33:19,124][243562] Updated weights for policy 0, policy_version 24458 (0.0008) +[2026-06-02 16:33:19,288][243562] Updated weights for policy 0, policy_version 24468 (0.0008) +[2026-06-02 16:33:19,460][243562] Updated weights for policy 0, policy_version 24479 (0.0008) +[2026-06-02 16:33:19,637][243562] Updated weights for policy 0, policy_version 24489 (0.0008) +[2026-06-02 16:33:19,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21845.3, 300 sec: 21438.0). Total num frames: 12550144. Throughput: 0: 21458.5. Samples: 12536320. Policy #0 lag: (min: 18.0, avg: 58.1, max: 81.0) +[2026-06-02 16:33:19,813][235960] Avg episode reward: [(0, '1104.966')] +[2026-06-02 16:33:20,327][243562] Updated weights for policy 0, policy_version 24500 (0.0008) +[2026-06-02 16:33:20,486][243562] Updated weights for policy 0, policy_version 24510 (0.0008) +[2026-06-02 16:33:20,646][243562] Updated weights for policy 0, policy_version 24520 (0.0008) +[2026-06-02 16:33:20,811][243562] Updated weights for policy 0, policy_version 24530 (0.0009) +[2026-06-02 16:33:20,975][243562] Updated weights for policy 0, policy_version 24540 (0.0008) +[2026-06-02 16:33:21,139][243562] Updated weights for policy 0, policy_version 24550 (0.0008) +[2026-06-02 16:33:21,298][243562] Updated weights for policy 0, policy_version 24560 (0.0008) +[2026-06-02 16:33:21,950][243562] Updated weights for policy 0, policy_version 24571 (0.0008) +[2026-06-02 16:33:22,136][243562] Updated weights for policy 0, policy_version 24582 (0.0008) +[2026-06-02 16:33:22,326][243562] Updated weights for policy 0, policy_version 24594 (0.0009) +[2026-06-02 16:33:22,512][243562] Updated weights for policy 0, policy_version 24605 (0.0008) +[2026-06-02 16:33:22,676][243562] Updated weights for policy 0, policy_version 24615 (0.0008) +[2026-06-02 16:33:23,344][243562] Updated weights for policy 0, policy_version 24626 (0.0008) +[2026-06-02 16:33:23,497][243562] Updated weights for policy 0, policy_version 24636 (0.0008) +[2026-06-02 16:33:23,663][243562] Updated weights for policy 0, policy_version 24646 (0.0008) +[2026-06-02 16:33:23,830][243562] Updated weights for policy 0, policy_version 24656 (0.0008) +[2026-06-02 16:33:24,020][243562] Updated weights for policy 0, policy_version 24668 (0.0008) +[2026-06-02 16:33:24,187][243562] Updated weights for policy 0, policy_version 24678 (0.0008) +[2026-06-02 16:33:24,811][235960] Fps is (10 sec: 22937.7, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 12648448. Throughput: 0: 21526.8. Samples: 12665984. Policy #0 lag: (min: 18.0, avg: 58.1, max: 81.0) +[2026-06-02 16:33:24,812][235960] Avg episode reward: [(0, '1083.555')] +[2026-06-02 16:33:24,864][243562] Updated weights for policy 0, policy_version 24690 (0.0008) +[2026-06-02 16:33:25,022][243562] Updated weights for policy 0, policy_version 24700 (0.0008) +[2026-06-02 16:33:25,198][243562] Updated weights for policy 0, policy_version 24711 (0.0008) +[2026-06-02 16:33:25,381][243562] Updated weights for policy 0, policy_version 24722 (0.0008) +[2026-06-02 16:33:25,544][243562] Updated weights for policy 0, policy_version 24732 (0.0008) +[2026-06-02 16:33:25,712][243562] Updated weights for policy 0, policy_version 24742 (0.0008) +[2026-06-02 16:33:26,388][243562] Updated weights for policy 0, policy_version 24753 (0.0008) +[2026-06-02 16:33:26,546][243562] Updated weights for policy 0, policy_version 24763 (0.0008) +[2026-06-02 16:33:26,735][243562] Updated weights for policy 0, policy_version 24775 (0.0009) +[2026-06-02 16:33:26,900][243562] Updated weights for policy 0, policy_version 24785 (0.0008) +[2026-06-02 16:33:27,064][243562] Updated weights for policy 0, policy_version 24795 (0.0008) +[2026-06-02 16:33:27,240][243562] Updated weights for policy 0, policy_version 24806 (0.0008) +[2026-06-02 16:33:27,914][243562] Updated weights for policy 0, policy_version 24817 (0.0009) +[2026-06-02 16:33:28,072][243562] Updated weights for policy 0, policy_version 24827 (0.0009) +[2026-06-02 16:33:28,229][243562] Updated weights for policy 0, policy_version 24837 (0.0009) +[2026-06-02 16:33:28,395][243562] Updated weights for policy 0, policy_version 24847 (0.0009) +[2026-06-02 16:33:28,557][243562] Updated weights for policy 0, policy_version 24857 (0.0009) +[2026-06-02 16:33:28,736][243562] Updated weights for policy 0, policy_version 24868 (0.0008) +[2026-06-02 16:33:28,914][243562] Updated weights for policy 0, policy_version 24879 (0.0008) +[2026-06-02 16:33:29,595][243562] Updated weights for policy 0, policy_version 24891 (0.0008) +[2026-06-02 16:33:29,756][243562] Updated weights for policy 0, policy_version 24901 (0.0009) +[2026-06-02 16:33:29,811][235960] Fps is (10 sec: 19660.9, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 12746752. Throughput: 0: 21626.3. Samples: 12796160. Policy #0 lag: (min: 18.0, avg: 58.1, max: 81.0) +[2026-06-02 16:33:29,812][235960] Avg episode reward: [(0, '1087.344')] +[2026-06-02 16:33:29,919][243562] Updated weights for policy 0, policy_version 24911 (0.0008) +[2026-06-02 16:33:30,078][243562] Updated weights for policy 0, policy_version 24921 (0.0008) +[2026-06-02 16:33:30,284][243562] Updated weights for policy 0, policy_version 24933 (0.0009) +[2026-06-02 16:33:30,447][243562] Updated weights for policy 0, policy_version 24943 (0.0008) +[2026-06-02 16:33:31,083][243562] Updated weights for policy 0, policy_version 24953 (0.0009) +[2026-06-02 16:33:31,257][243562] Updated weights for policy 0, policy_version 24963 (0.0008) +[2026-06-02 16:33:31,408][243562] Updated weights for policy 0, policy_version 24973 (0.0008) +[2026-06-02 16:33:31,576][243562] Updated weights for policy 0, policy_version 24983 (0.0009) +[2026-06-02 16:33:31,741][243562] Updated weights for policy 0, policy_version 24993 (0.0008) +[2026-06-02 16:33:31,900][243562] Updated weights for policy 0, policy_version 25003 (0.0008) +[2026-06-02 16:33:32,562][243562] Updated weights for policy 0, policy_version 25013 (0.0009) +[2026-06-02 16:33:32,727][243562] Updated weights for policy 0, policy_version 25023 (0.0009) +[2026-06-02 16:33:32,898][243562] Updated weights for policy 0, policy_version 25034 (0.0008) +[2026-06-02 16:33:33,068][243562] Updated weights for policy 0, policy_version 25044 (0.0008) +[2026-06-02 16:33:33,245][243562] Updated weights for policy 0, policy_version 25055 (0.0008) +[2026-06-02 16:33:33,440][243562] Updated weights for policy 0, policy_version 25067 (0.0008) +[2026-06-02 16:33:34,100][243562] Updated weights for policy 0, policy_version 25078 (0.0009) +[2026-06-02 16:33:34,264][243562] Updated weights for policy 0, policy_version 25088 (0.0008) +[2026-06-02 16:33:34,424][243562] Updated weights for policy 0, policy_version 25098 (0.0008) +[2026-06-02 16:33:34,580][243562] Updated weights for policy 0, policy_version 25108 (0.0008) +[2026-06-02 16:33:34,765][243562] Updated weights for policy 0, policy_version 25119 (0.0008) +[2026-06-02 16:33:34,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 21438.1). Total num frames: 12845056. Throughput: 0: 21669.0. Samples: 12861184. Policy #0 lag: (min: 85.0, avg: 114.2, max: 148.0) +[2026-06-02 16:33:34,812][235960] Avg episode reward: [(0, '1065.435')] +[2026-06-02 16:33:34,932][243562] Updated weights for policy 0, policy_version 25129 (0.0009) +[2026-06-02 16:33:35,581][243562] Updated weights for policy 0, policy_version 25140 (0.0008) +[2026-06-02 16:33:35,740][243562] Updated weights for policy 0, policy_version 25150 (0.0009) +[2026-06-02 16:33:35,898][243562] Updated weights for policy 0, policy_version 25160 (0.0008) +[2026-06-02 16:33:36,060][243562] Updated weights for policy 0, policy_version 25170 (0.0006) +[2026-06-02 16:33:36,225][243562] Updated weights for policy 0, policy_version 25180 (0.0004) +[2026-06-02 16:33:36,382][243562] Updated weights for policy 0, policy_version 25190 (0.0004) +[2026-06-02 16:33:36,550][243562] Updated weights for policy 0, policy_version 25200 (0.0004) +[2026-06-02 16:33:37,226][243562] Updated weights for policy 0, policy_version 25211 (0.0009) +[2026-06-02 16:33:37,395][243562] Updated weights for policy 0, policy_version 25222 (0.0008) +[2026-06-02 16:33:37,569][243562] Updated weights for policy 0, policy_version 25232 (0.0008) +[2026-06-02 16:33:37,730][243562] Updated weights for policy 0, policy_version 25242 (0.0008) +[2026-06-02 16:33:37,918][243562] Updated weights for policy 0, policy_version 25253 (0.0008) +[2026-06-02 16:33:38,594][243562] Updated weights for policy 0, policy_version 25265 (0.0010) +[2026-06-02 16:33:38,750][243562] Updated weights for policy 0, policy_version 25275 (0.0008) +[2026-06-02 16:33:38,905][243562] Updated weights for policy 0, policy_version 25285 (0.0008) +[2026-06-02 16:33:39,088][243562] Updated weights for policy 0, policy_version 25296 (0.0009) +[2026-06-02 16:33:39,256][243562] Updated weights for policy 0, policy_version 25306 (0.0008) +[2026-06-02 16:33:39,417][243562] Updated weights for policy 0, policy_version 25316 (0.0008) +[2026-06-02 16:33:39,587][243562] Updated weights for policy 0, policy_version 25326 (0.0008) +[2026-06-02 16:33:39,811][235960] Fps is (10 sec: 22937.7, 60 sec: 21845.4, 300 sec: 21438.1). Total num frames: 12976128. Throughput: 0: 21703.1. Samples: 12991104. Policy #0 lag: (min: 85.0, avg: 114.2, max: 148.0) +[2026-06-02 16:33:39,812][235960] Avg episode reward: [(0, '1071.641')] +[2026-06-02 16:33:40,254][243562] Updated weights for policy 0, policy_version 25337 (0.0008) +[2026-06-02 16:33:40,418][243562] Updated weights for policy 0, policy_version 25347 (0.0008) +[2026-06-02 16:33:40,593][243562] Updated weights for policy 0, policy_version 25358 (0.0009) +[2026-06-02 16:33:40,759][243562] Updated weights for policy 0, policy_version 25368 (0.0007) +[2026-06-02 16:33:40,926][243562] Updated weights for policy 0, policy_version 25378 (0.0009) +[2026-06-02 16:33:41,082][243562] Updated weights for policy 0, policy_version 25388 (0.0009) +[2026-06-02 16:33:41,749][243562] Updated weights for policy 0, policy_version 25399 (0.0009) +[2026-06-02 16:33:41,940][243562] Updated weights for policy 0, policy_version 25411 (0.0010) +[2026-06-02 16:33:42,105][243562] Updated weights for policy 0, policy_version 25421 (0.0008) +[2026-06-02 16:33:42,267][243562] Updated weights for policy 0, policy_version 25431 (0.0009) +[2026-06-02 16:33:42,435][243562] Updated weights for policy 0, policy_version 25441 (0.0008) +[2026-06-02 16:33:42,598][243562] Updated weights for policy 0, policy_version 25451 (0.0009) +[2026-06-02 16:33:43,266][243562] Updated weights for policy 0, policy_version 25461 (0.0009) +[2026-06-02 16:33:43,423][243562] Updated weights for policy 0, policy_version 25471 (0.0008) +[2026-06-02 16:33:43,586][243562] Updated weights for policy 0, policy_version 25481 (0.0008) +[2026-06-02 16:33:43,745][243562] Updated weights for policy 0, policy_version 25491 (0.0008) +[2026-06-02 16:33:43,909][243562] Updated weights for policy 0, policy_version 25501 (0.0008) +[2026-06-02 16:33:44,071][243562] Updated weights for policy 0, policy_version 25511 (0.0008) +[2026-06-02 16:33:44,725][243562] Updated weights for policy 0, policy_version 25521 (0.0008) +[2026-06-02 16:33:44,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 13074432. Throughput: 0: 21703.2. Samples: 13121920. Policy #0 lag: (min: 85.0, avg: 114.2, max: 148.0) +[2026-06-02 16:33:44,812][235960] Avg episode reward: [(0, '1079.290')] +[2026-06-02 16:33:44,885][243562] Updated weights for policy 0, policy_version 25531 (0.0008) +[2026-06-02 16:33:45,048][243562] Updated weights for policy 0, policy_version 25541 (0.0008) +[2026-06-02 16:33:45,225][243562] Updated weights for policy 0, policy_version 25552 (0.0008) +[2026-06-02 16:33:45,429][243562] Updated weights for policy 0, policy_version 25564 (0.0008) +[2026-06-02 16:33:45,593][243562] Updated weights for policy 0, policy_version 25574 (0.0008) +[2026-06-02 16:33:45,749][243562] Updated weights for policy 0, policy_version 25584 (0.0009) +[2026-06-02 16:33:46,392][243562] Updated weights for policy 0, policy_version 25594 (0.0008) +[2026-06-02 16:33:46,556][243562] Updated weights for policy 0, policy_version 25604 (0.0008) +[2026-06-02 16:33:46,732][243562] Updated weights for policy 0, policy_version 25615 (0.0008) +[2026-06-02 16:33:46,893][243562] Updated weights for policy 0, policy_version 25625 (0.0008) +[2026-06-02 16:33:47,053][243562] Updated weights for policy 0, policy_version 25635 (0.0008) +[2026-06-02 16:33:47,223][243562] Updated weights for policy 0, policy_version 25645 (0.0008) +[2026-06-02 16:33:47,874][243562] Updated weights for policy 0, policy_version 25655 (0.0008) +[2026-06-02 16:33:48,030][243562] Updated weights for policy 0, policy_version 25665 (0.0008) +[2026-06-02 16:33:48,204][243562] Updated weights for policy 0, policy_version 25676 (0.0008) +[2026-06-02 16:33:48,365][243562] Updated weights for policy 0, policy_version 25686 (0.0008) +[2026-06-02 16:33:48,548][243562] Updated weights for policy 0, policy_version 25697 (0.0008) +[2026-06-02 16:33:48,735][243562] Updated weights for policy 0, policy_version 25708 (0.0008) +[2026-06-02 16:33:49,390][243562] Updated weights for policy 0, policy_version 25718 (0.0008) +[2026-06-02 16:33:49,546][243562] Updated weights for policy 0, policy_version 25728 (0.0008) +[2026-06-02 16:33:49,711][243562] Updated weights for policy 0, policy_version 25738 (0.0008) +[2026-06-02 16:33:49,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 13172736. Throughput: 0: 21651.9. Samples: 13184640. Policy #0 lag: (min: 6.0, avg: 21.0, max: 70.0) +[2026-06-02 16:33:49,812][235960] Avg episode reward: [(0, '1106.117')] +[2026-06-02 16:33:49,905][243562] Updated weights for policy 0, policy_version 25750 (0.0008) +[2026-06-02 16:33:50,075][243562] Updated weights for policy 0, policy_version 25760 (0.0009) +[2026-06-02 16:33:50,267][243562] Updated weights for policy 0, policy_version 25772 (0.0008) +[2026-06-02 16:33:50,935][243562] Updated weights for policy 0, policy_version 25782 (0.0008) +[2026-06-02 16:33:51,110][243562] Updated weights for policy 0, policy_version 25793 (0.0008) +[2026-06-02 16:33:51,276][243562] Updated weights for policy 0, policy_version 25803 (0.0009) +[2026-06-02 16:33:51,437][243562] Updated weights for policy 0, policy_version 25813 (0.0009) +[2026-06-02 16:33:51,608][243562] Updated weights for policy 0, policy_version 25823 (0.0009) +[2026-06-02 16:33:51,787][243562] Updated weights for policy 0, policy_version 25834 (0.0009) +[2026-06-02 16:33:52,432][243562] Updated weights for policy 0, policy_version 25844 (0.0009) +[2026-06-02 16:33:52,606][243562] Updated weights for policy 0, policy_version 25855 (0.0008) +[2026-06-02 16:33:52,770][243562] Updated weights for policy 0, policy_version 25865 (0.0008) +[2026-06-02 16:33:52,934][243562] Updated weights for policy 0, policy_version 25875 (0.0008) +[2026-06-02 16:33:53,089][243562] Updated weights for policy 0, policy_version 25885 (0.0009) +[2026-06-02 16:33:53,261][243562] Updated weights for policy 0, policy_version 25895 (0.0008) +[2026-06-02 16:33:53,945][243562] Updated weights for policy 0, policy_version 25907 (0.0009) +[2026-06-02 16:33:54,099][243562] Updated weights for policy 0, policy_version 25917 (0.0008) +[2026-06-02 16:33:54,262][243562] Updated weights for policy 0, policy_version 25927 (0.0008) +[2026-06-02 16:33:54,435][243562] Updated weights for policy 0, policy_version 25938 (0.0009) +[2026-06-02 16:33:54,597][243562] Updated weights for policy 0, policy_version 25948 (0.0008) +[2026-06-02 16:33:54,767][243562] Updated weights for policy 0, policy_version 25958 (0.0008) +[2026-06-02 16:33:54,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 13271040. Throughput: 0: 21521.1. Samples: 13309056. Policy #0 lag: (min: 6.0, avg: 21.0, max: 70.0) +[2026-06-02 16:33:54,812][235960] Avg episode reward: [(0, '1149.937')] +[2026-06-02 16:33:54,931][242748] Saving new best policy, reward=1149.937! +[2026-06-02 16:33:54,935][243562] Updated weights for policy 0, policy_version 25968 (0.0008) +[2026-06-02 16:33:55,598][243562] Updated weights for policy 0, policy_version 25979 (0.0008) +[2026-06-02 16:33:55,790][243562] Updated weights for policy 0, policy_version 25991 (0.0009) +[2026-06-02 16:33:55,963][243562] Updated weights for policy 0, policy_version 26001 (0.0008) +[2026-06-02 16:33:56,126][243562] Updated weights for policy 0, policy_version 26011 (0.0008) +[2026-06-02 16:33:56,301][243562] Updated weights for policy 0, policy_version 26022 (0.0008) +[2026-06-02 16:33:56,469][243562] Updated weights for policy 0, policy_version 26032 (0.0008) +[2026-06-02 16:33:57,109][243562] Updated weights for policy 0, policy_version 26042 (0.0009) +[2026-06-02 16:33:57,272][243562] Updated weights for policy 0, policy_version 26052 (0.0009) +[2026-06-02 16:33:57,431][243562] Updated weights for policy 0, policy_version 26062 (0.0008) +[2026-06-02 16:33:57,616][243562] Updated weights for policy 0, policy_version 26073 (0.0009) +[2026-06-02 16:33:57,777][243562] Updated weights for policy 0, policy_version 26083 (0.0008) +[2026-06-02 16:33:57,946][243562] Updated weights for policy 0, policy_version 26093 (0.0008) +[2026-06-02 16:33:58,583][243562] Updated weights for policy 0, policy_version 26103 (0.0008) +[2026-06-02 16:33:58,750][243562] Updated weights for policy 0, policy_version 26114 (0.0008) +[2026-06-02 16:33:58,910][243562] Updated weights for policy 0, policy_version 26124 (0.0009) +[2026-06-02 16:33:59,115][243562] Updated weights for policy 0, policy_version 26136 (0.0009) +[2026-06-02 16:33:59,274][243562] Updated weights for policy 0, policy_version 26146 (0.0008) +[2026-06-02 16:33:59,454][243562] Updated weights for policy 0, policy_version 26157 (0.0009) +[2026-06-02 16:33:59,811][235960] Fps is (10 sec: 22937.7, 60 sec: 21845.4, 300 sec: 21438.1). Total num frames: 13402112. Throughput: 0: 21424.4. Samples: 13434624. Policy #0 lag: (min: 6.0, avg: 21.0, max: 70.0) +[2026-06-02 16:33:59,812][235960] Avg episode reward: [(0, '1106.971')] +[2026-06-02 16:34:00,105][243562] Updated weights for policy 0, policy_version 26167 (0.0009) +[2026-06-02 16:34:00,266][243562] Updated weights for policy 0, policy_version 26177 (0.0009) +[2026-06-02 16:34:00,447][243562] Updated weights for policy 0, policy_version 26188 (0.0009) +[2026-06-02 16:34:00,617][243562] Updated weights for policy 0, policy_version 26198 (0.0009) +[2026-06-02 16:34:00,791][243562] Updated weights for policy 0, policy_version 26209 (0.0009) +[2026-06-02 16:34:00,963][243562] Updated weights for policy 0, policy_version 26219 (0.0008) +[2026-06-02 16:34:01,603][243562] Updated weights for policy 0, policy_version 26229 (0.0008) +[2026-06-02 16:34:01,761][243562] Updated weights for policy 0, policy_version 26239 (0.0009) +[2026-06-02 16:34:01,920][243562] Updated weights for policy 0, policy_version 26249 (0.0008) +[2026-06-02 16:34:02,085][243562] Updated weights for policy 0, policy_version 26259 (0.0008) +[2026-06-02 16:34:02,250][243562] Updated weights for policy 0, policy_version 26269 (0.0008) +[2026-06-02 16:34:02,428][243562] Updated weights for policy 0, policy_version 26280 (0.0008) +[2026-06-02 16:34:03,097][243562] Updated weights for policy 0, policy_version 26290 (0.0008) +[2026-06-02 16:34:03,269][243562] Updated weights for policy 0, policy_version 26301 (0.0008) +[2026-06-02 16:34:03,444][243562] Updated weights for policy 0, policy_version 26312 (0.0008) +[2026-06-02 16:34:03,608][243562] Updated weights for policy 0, policy_version 26322 (0.0008) +[2026-06-02 16:34:03,788][243562] Updated weights for policy 0, policy_version 26333 (0.0009) +[2026-06-02 16:34:03,950][243562] Updated weights for policy 0, policy_version 26343 (0.0008) +[2026-06-02 16:34:04,623][243562] Updated weights for policy 0, policy_version 26354 (0.0008) +[2026-06-02 16:34:04,775][243562] Updated weights for policy 0, policy_version 26364 (0.0008) +[2026-06-02 16:34:04,811][235960] Fps is (10 sec: 22937.7, 60 sec: 21299.2, 300 sec: 21438.1). Total num frames: 13500416. Throughput: 0: 21361.8. Samples: 13497600. Policy #0 lag: (min: 6.0, avg: 21.0, max: 70.0) +[2026-06-02 16:34:04,812][235960] Avg episode reward: [(0, '1083.098')] +[2026-06-02 16:34:04,962][243562] Updated weights for policy 0, policy_version 26375 (0.0008) +[2026-06-02 16:34:05,117][243562] Updated weights for policy 0, policy_version 26385 (0.0008) +[2026-06-02 16:34:05,294][243562] Updated weights for policy 0, policy_version 26396 (0.0006) +[2026-06-02 16:34:05,467][243562] Updated weights for policy 0, policy_version 26406 (0.0004) +[2026-06-02 16:34:05,628][243562] Updated weights for policy 0, policy_version 26416 (0.0005) +[2026-06-02 16:34:06,297][243562] Updated weights for policy 0, policy_version 26428 (0.0009) +[2026-06-02 16:34:06,486][243562] Updated weights for policy 0, policy_version 26440 (0.0008) +[2026-06-02 16:34:06,650][243562] Updated weights for policy 0, policy_version 26450 (0.0008) +[2026-06-02 16:34:06,823][243562] Updated weights for policy 0, policy_version 26460 (0.0009) +[2026-06-02 16:34:06,987][243562] Updated weights for policy 0, policy_version 26470 (0.0008) +[2026-06-02 16:34:07,146][243562] Updated weights for policy 0, policy_version 26480 (0.0008) +[2026-06-02 16:34:07,807][243562] Updated weights for policy 0, policy_version 26490 (0.0008) +[2026-06-02 16:34:07,969][243562] Updated weights for policy 0, policy_version 26500 (0.0008) +[2026-06-02 16:34:08,130][243562] Updated weights for policy 0, policy_version 26510 (0.0008) +[2026-06-02 16:34:08,297][243562] Updated weights for policy 0, policy_version 26520 (0.0008) +[2026-06-02 16:34:08,474][243562] Updated weights for policy 0, policy_version 26531 (0.0008) +[2026-06-02 16:34:08,649][243562] Updated weights for policy 0, policy_version 26541 (0.0009) +[2026-06-02 16:34:09,290][243562] Updated weights for policy 0, policy_version 26551 (0.0009) +[2026-06-02 16:34:09,469][243562] Updated weights for policy 0, policy_version 26562 (0.0009) +[2026-06-02 16:34:09,637][243562] Updated weights for policy 0, policy_version 26573 (0.0008) +[2026-06-02 16:34:09,810][243562] Updated weights for policy 0, policy_version 26583 (0.0008) +[2026-06-02 16:34:09,811][235960] Fps is (10 sec: 19660.6, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 13598720. Throughput: 0: 21370.3. Samples: 13627648. Policy #0 lag: (min: 36.0, avg: 50.9, max: 100.0) +[2026-06-02 16:34:09,813][235960] Avg episode reward: [(0, '1129.582')] +[2026-06-02 16:34:09,978][243562] Updated weights for policy 0, policy_version 26593 (0.0008) +[2026-06-02 16:34:10,161][243562] Updated weights for policy 0, policy_version 26604 (0.0009) +[2026-06-02 16:34:10,795][243562] Updated weights for policy 0, policy_version 26614 (0.0009) +[2026-06-02 16:34:10,961][243562] Updated weights for policy 0, policy_version 26625 (0.0008) +[2026-06-02 16:34:11,131][243562] Updated weights for policy 0, policy_version 26635 (0.0009) +[2026-06-02 16:34:11,294][243562] Updated weights for policy 0, policy_version 26645 (0.0009) +[2026-06-02 16:34:11,479][243562] Updated weights for policy 0, policy_version 26656 (0.0008) +[2026-06-02 16:34:11,633][243562] Updated weights for policy 0, policy_version 26666 (0.0008) +[2026-06-02 16:34:12,303][243562] Updated weights for policy 0, policy_version 26676 (0.0009) +[2026-06-02 16:34:12,467][243562] Updated weights for policy 0, policy_version 26686 (0.0010) +[2026-06-02 16:34:12,628][243562] Updated weights for policy 0, policy_version 26696 (0.0009) +[2026-06-02 16:34:12,782][243562] Updated weights for policy 0, policy_version 26706 (0.0008) +[2026-06-02 16:34:12,950][243562] Updated weights for policy 0, policy_version 26716 (0.0008) +[2026-06-02 16:34:13,118][243562] Updated weights for policy 0, policy_version 26726 (0.0008) +[2026-06-02 16:34:13,279][243562] Updated weights for policy 0, policy_version 26736 (0.0008) +[2026-06-02 16:34:13,941][243562] Updated weights for policy 0, policy_version 26747 (0.0009) +[2026-06-02 16:34:14,099][243562] Updated weights for policy 0, policy_version 26757 (0.0008) +[2026-06-02 16:34:14,266][243562] Updated weights for policy 0, policy_version 26767 (0.0009) +[2026-06-02 16:34:14,425][243562] Updated weights for policy 0, policy_version 26777 (0.0009) +[2026-06-02 16:34:14,599][243562] Updated weights for policy 0, policy_version 26787 (0.0008) +[2026-06-02 16:34:14,759][243562] Updated weights for policy 0, policy_version 26797 (0.0008) +[2026-06-02 16:34:14,811][235960] Fps is (10 sec: 22937.7, 60 sec: 21845.4, 300 sec: 21438.1). Total num frames: 13729792. Throughput: 0: 21370.4. Samples: 13757824. Policy #0 lag: (min: 36.0, avg: 50.9, max: 100.0) +[2026-06-02 16:34:14,812][235960] Avg episode reward: [(0, '1153.976')] +[2026-06-02 16:34:14,817][242748] Saving new best policy, reward=1153.976! +[2026-06-02 16:34:15,405][243562] Updated weights for policy 0, policy_version 26807 (0.0008) +[2026-06-02 16:34:15,571][243562] Updated weights for policy 0, policy_version 26817 (0.0009) +[2026-06-02 16:34:15,732][243562] Updated weights for policy 0, policy_version 26827 (0.0008) +[2026-06-02 16:34:15,896][243562] Updated weights for policy 0, policy_version 26837 (0.0009) +[2026-06-02 16:34:16,061][243562] Updated weights for policy 0, policy_version 26847 (0.0008) +[2026-06-02 16:34:16,232][243562] Updated weights for policy 0, policy_version 26857 (0.0008) +[2026-06-02 16:34:16,874][243562] Updated weights for policy 0, policy_version 26867 (0.0008) +[2026-06-02 16:34:17,028][243562] Updated weights for policy 0, policy_version 26877 (0.0009) +[2026-06-02 16:34:17,191][243562] Updated weights for policy 0, policy_version 26887 (0.0008) +[2026-06-02 16:34:17,371][243562] Updated weights for policy 0, policy_version 26898 (0.0009) +[2026-06-02 16:34:17,556][243562] Updated weights for policy 0, policy_version 26909 (0.0008) +[2026-06-02 16:34:17,723][243562] Updated weights for policy 0, policy_version 26919 (0.0008) +[2026-06-02 16:34:18,393][243562] Updated weights for policy 0, policy_version 26930 (0.0008) +[2026-06-02 16:34:18,563][243562] Updated weights for policy 0, policy_version 26941 (0.0008) +[2026-06-02 16:34:18,724][243562] Updated weights for policy 0, policy_version 26951 (0.0008) +[2026-06-02 16:34:18,894][243562] Updated weights for policy 0, policy_version 26961 (0.0008) +[2026-06-02 16:34:19,057][243562] Updated weights for policy 0, policy_version 26971 (0.0008) +[2026-06-02 16:34:19,224][243562] Updated weights for policy 0, policy_version 26981 (0.0008) +[2026-06-02 16:34:19,388][243562] Updated weights for policy 0, policy_version 26991 (0.0010) +[2026-06-02 16:34:19,811][235960] Fps is (10 sec: 22937.5, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 13828096. Throughput: 0: 21373.1. Samples: 13822976. Policy #0 lag: (min: 36.0, avg: 50.9, max: 100.0) +[2026-06-02 16:34:19,813][235960] Avg episode reward: [(0, '1165.364')] +[2026-06-02 16:34:20,045][243562] Updated weights for policy 0, policy_version 27001 (0.0008) +[2026-06-02 16:34:20,197][243562] Updated weights for policy 0, policy_version 27011 (0.0008) +[2026-06-02 16:34:20,363][243562] Updated weights for policy 0, policy_version 27021 (0.0008) +[2026-06-02 16:34:20,520][243562] Updated weights for policy 0, policy_version 27031 (0.0008) +[2026-06-02 16:34:20,695][243562] Updated weights for policy 0, policy_version 27041 (0.0008) +[2026-06-02 16:34:20,870][243562] Updated weights for policy 0, policy_version 27052 (0.0008) +[2026-06-02 16:34:20,931][242748] Saving new best policy, reward=1165.364! +[2026-06-02 16:34:21,507][243562] Updated weights for policy 0, policy_version 27062 (0.0008) +[2026-06-02 16:34:21,678][243562] Updated weights for policy 0, policy_version 27073 (0.0008) +[2026-06-02 16:34:21,835][243562] Updated weights for policy 0, policy_version 27083 (0.0008) +[2026-06-02 16:34:22,014][243562] Updated weights for policy 0, policy_version 27093 (0.0008) +[2026-06-02 16:34:22,169][243562] Updated weights for policy 0, policy_version 27103 (0.0008) +[2026-06-02 16:34:22,341][243562] Updated weights for policy 0, policy_version 27113 (0.0008) +[2026-06-02 16:34:22,995][243562] Updated weights for policy 0, policy_version 27124 (0.0009) +[2026-06-02 16:34:23,154][243562] Updated weights for policy 0, policy_version 27134 (0.0010) +[2026-06-02 16:34:23,318][243562] Updated weights for policy 0, policy_version 27144 (0.0010) +[2026-06-02 16:34:23,499][243562] Updated weights for policy 0, policy_version 27155 (0.0009) +[2026-06-02 16:34:23,696][243562] Updated weights for policy 0, policy_version 27167 (0.0008) +[2026-06-02 16:34:23,860][243562] Updated weights for policy 0, policy_version 27177 (0.0008) +[2026-06-02 16:34:24,517][243562] Updated weights for policy 0, policy_version 27187 (0.0008) +[2026-06-02 16:34:24,664][243562] Updated weights for policy 0, policy_version 27197 (0.0008) +[2026-06-02 16:34:24,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 21438.1). Total num frames: 13926400. Throughput: 0: 21361.8. Samples: 13952384. Policy #0 lag: (min: 36.0, avg: 50.9, max: 100.0) +[2026-06-02 16:34:24,812][235960] Avg episode reward: [(0, '1125.136')] +[2026-06-02 16:34:24,829][243562] Updated weights for policy 0, policy_version 27207 (0.0008) +[2026-06-02 16:34:25,010][243562] Updated weights for policy 0, policy_version 27218 (0.0008) +[2026-06-02 16:34:25,176][243562] Updated weights for policy 0, policy_version 27228 (0.0008) +[2026-06-02 16:34:25,337][243562] Updated weights for policy 0, policy_version 27238 (0.0008) +[2026-06-02 16:34:25,511][243562] Updated weights for policy 0, policy_version 27248 (0.0008) +[2026-06-02 16:34:26,157][243562] Updated weights for policy 0, policy_version 27260 (0.0009) +[2026-06-02 16:34:26,322][243562] Updated weights for policy 0, policy_version 27270 (0.0010) +[2026-06-02 16:34:26,486][243562] Updated weights for policy 0, policy_version 27280 (0.0008) +[2026-06-02 16:34:26,683][243562] Updated weights for policy 0, policy_version 27292 (0.0008) +[2026-06-02 16:34:26,853][243562] Updated weights for policy 0, policy_version 27302 (0.0008) +[2026-06-02 16:34:27,520][243562] Updated weights for policy 0, policy_version 27313 (0.0009) +[2026-06-02 16:34:27,678][243562] Updated weights for policy 0, policy_version 27323 (0.0010) +[2026-06-02 16:34:27,858][243562] Updated weights for policy 0, policy_version 27334 (0.0010) +[2026-06-02 16:34:28,018][243562] Updated weights for policy 0, policy_version 27344 (0.0009) +[2026-06-02 16:34:28,205][243562] Updated weights for policy 0, policy_version 27356 (0.0008) +[2026-06-02 16:34:28,380][243562] Updated weights for policy 0, policy_version 27366 (0.0008) +[2026-06-02 16:34:29,062][243562] Updated weights for policy 0, policy_version 27377 (0.0009) +[2026-06-02 16:34:29,234][243562] Updated weights for policy 0, policy_version 27388 (0.0008) +[2026-06-02 16:34:29,406][243562] Updated weights for policy 0, policy_version 27399 (0.0008) +[2026-06-02 16:34:29,574][243562] Updated weights for policy 0, policy_version 27409 (0.0010) +[2026-06-02 16:34:29,740][243562] Updated weights for policy 0, policy_version 27419 (0.0008) +[2026-06-02 16:34:29,811][235960] Fps is (10 sec: 19660.6, 60 sec: 21299.1, 300 sec: 21438.0). Total num frames: 14024704. Throughput: 0: 21319.0. Samples: 14081280. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) +[2026-06-02 16:34:29,812][235960] Avg episode reward: [(0, '1139.080')] +[2026-06-02 16:34:29,905][243562] Updated weights for policy 0, policy_version 27429 (0.0008) +[2026-06-02 16:34:30,577][243562] Updated weights for policy 0, policy_version 27441 (0.0010) +[2026-06-02 16:34:30,721][243562] Updated weights for policy 0, policy_version 27451 (0.0010) +[2026-06-02 16:34:30,878][243562] Updated weights for policy 0, policy_version 27461 (0.0008) +[2026-06-02 16:34:31,051][243562] Updated weights for policy 0, policy_version 27471 (0.0008) +[2026-06-02 16:34:31,238][243562] Updated weights for policy 0, policy_version 27482 (0.0008) +[2026-06-02 16:34:31,431][243562] Updated weights for policy 0, policy_version 27494 (0.0008) +[2026-06-02 16:34:32,104][243562] Updated weights for policy 0, policy_version 27505 (0.0008) +[2026-06-02 16:34:32,275][243562] Updated weights for policy 0, policy_version 27516 (0.0008) +[2026-06-02 16:34:32,455][243562] Updated weights for policy 0, policy_version 27527 (0.0008) +[2026-06-02 16:34:32,631][243562] Updated weights for policy 0, policy_version 27538 (0.0008) +[2026-06-02 16:34:32,798][243562] Updated weights for policy 0, policy_version 27548 (0.0008) +[2026-06-02 16:34:32,965][243562] Updated weights for policy 0, policy_version 27558 (0.0008) +[2026-06-02 16:34:33,122][243562] Updated weights for policy 0, policy_version 27568 (0.0008) +[2026-06-02 16:34:33,768][243562] Updated weights for policy 0, policy_version 27578 (0.0008) +[2026-06-02 16:34:33,921][243562] Updated weights for policy 0, policy_version 27588 (0.0008) +[2026-06-02 16:34:34,094][243562] Updated weights for policy 0, policy_version 27598 (0.0009) +[2026-06-02 16:34:34,259][243562] Updated weights for policy 0, policy_version 27608 (0.0009) +[2026-06-02 16:34:34,419][243562] Updated weights for policy 0, policy_version 27618 (0.0009) +[2026-06-02 16:34:34,586][243562] Updated weights for policy 0, policy_version 27628 (0.0009) +[2026-06-02 16:34:34,811][235960] Fps is (10 sec: 22937.5, 60 sec: 21845.3, 300 sec: 21438.1). Total num frames: 14155776. Throughput: 0: 21361.8. Samples: 14145920. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) +[2026-06-02 16:34:34,812][235960] Avg episode reward: [(0, '1127.560')] +[2026-06-02 16:34:35,257][243562] Updated weights for policy 0, policy_version 27638 (0.0006) +[2026-06-02 16:34:35,410][243562] Updated weights for policy 0, policy_version 27648 (0.0004) +[2026-06-02 16:34:35,595][243562] Updated weights for policy 0, policy_version 27659 (0.0007) +[2026-06-02 16:34:35,755][243562] Updated weights for policy 0, policy_version 27669 (0.0008) +[2026-06-02 16:34:35,937][243562] Updated weights for policy 0, policy_version 27680 (0.0009) +[2026-06-02 16:34:36,105][243562] Updated weights for policy 0, policy_version 27690 (0.0008) +[2026-06-02 16:34:36,743][243562] Updated weights for policy 0, policy_version 27700 (0.0009) +[2026-06-02 16:34:36,894][243562] Updated weights for policy 0, policy_version 27710 (0.0009) +[2026-06-02 16:34:37,097][243562] Updated weights for policy 0, policy_version 27722 (0.0008) +[2026-06-02 16:34:37,253][243562] Updated weights for policy 0, policy_version 27732 (0.0008) +[2026-06-02 16:34:37,426][243562] Updated weights for policy 0, policy_version 27742 (0.0008) +[2026-06-02 16:34:37,597][243562] Updated weights for policy 0, policy_version 27752 (0.0008) +[2026-06-02 16:34:38,253][243562] Updated weights for policy 0, policy_version 27763 (0.0009) +[2026-06-02 16:34:38,434][243562] Updated weights for policy 0, policy_version 27774 (0.0009) +[2026-06-02 16:34:38,596][243562] Updated weights for policy 0, policy_version 27784 (0.0008) +[2026-06-02 16:34:38,763][243562] Updated weights for policy 0, policy_version 27794 (0.0008) +[2026-06-02 16:34:38,917][243562] Updated weights for policy 0, policy_version 27804 (0.0008) +[2026-06-02 16:34:39,104][243562] Updated weights for policy 0, policy_version 27815 (0.0008) +[2026-06-02 16:34:39,776][243562] Updated weights for policy 0, policy_version 27826 (0.0009) +[2026-06-02 16:34:39,811][235960] Fps is (10 sec: 22937.5, 60 sec: 21299.1, 300 sec: 21438.0). Total num frames: 14254080. Throughput: 0: 21469.7. Samples: 14275200. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) +[2026-06-02 16:34:39,812][235960] Avg episode reward: [(0, '1104.906')] +[2026-06-02 16:34:39,934][243562] Updated weights for policy 0, policy_version 27836 (0.0008) +[2026-06-02 16:34:40,094][243562] Updated weights for policy 0, policy_version 27846 (0.0009) +[2026-06-02 16:34:40,270][243562] Updated weights for policy 0, policy_version 27857 (0.0009) +[2026-06-02 16:34:40,435][243562] Updated weights for policy 0, policy_version 27867 (0.0008) +[2026-06-02 16:34:40,591][243562] Updated weights for policy 0, policy_version 27877 (0.0009) +[2026-06-02 16:34:40,764][243562] Updated weights for policy 0, policy_version 27887 (0.0008) +[2026-06-02 16:34:41,404][243562] Updated weights for policy 0, policy_version 27897 (0.0009) +[2026-06-02 16:34:41,555][243562] Updated weights for policy 0, policy_version 27907 (0.0009) +[2026-06-02 16:34:41,725][243562] Updated weights for policy 0, policy_version 27917 (0.0009) +[2026-06-02 16:34:41,889][243562] Updated weights for policy 0, policy_version 27927 (0.0008) +[2026-06-02 16:34:42,055][243562] Updated weights for policy 0, policy_version 27937 (0.0008) +[2026-06-02 16:34:42,217][243562] Updated weights for policy 0, policy_version 27947 (0.0008) +[2026-06-02 16:34:42,915][243562] Updated weights for policy 0, policy_version 27959 (0.0009) +[2026-06-02 16:34:43,075][243562] Updated weights for policy 0, policy_version 27969 (0.0008) +[2026-06-02 16:34:43,239][243562] Updated weights for policy 0, policy_version 27979 (0.0009) +[2026-06-02 16:34:43,438][243562] Updated weights for policy 0, policy_version 27991 (0.0009) +[2026-06-02 16:34:43,614][243562] Updated weights for policy 0, policy_version 28002 (0.0008) +[2026-06-02 16:34:43,776][243562] Updated weights for policy 0, policy_version 28012 (0.0008) +[2026-06-02 16:34:44,414][243562] Updated weights for policy 0, policy_version 28022 (0.0008) +[2026-06-02 16:34:44,563][243562] Updated weights for policy 0, policy_version 28032 (0.0008) +[2026-06-02 16:34:44,732][243562] Updated weights for policy 0, policy_version 28042 (0.0009) +[2026-06-02 16:34:44,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 21438.1). Total num frames: 14352384. Throughput: 0: 21566.6. Samples: 14405120. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) +[2026-06-02 16:34:44,812][235960] Avg episode reward: [(0, '1130.703')] +[2026-06-02 16:34:44,910][243562] Updated weights for policy 0, policy_version 28053 (0.0008) +[2026-06-02 16:34:45,077][243562] Updated weights for policy 0, policy_version 28063 (0.0008) +[2026-06-02 16:34:45,246][243562] Updated weights for policy 0, policy_version 28073 (0.0008) +[2026-06-02 16:34:45,924][243562] Updated weights for policy 0, policy_version 28085 (0.0009) +[2026-06-02 16:34:46,101][243562] Updated weights for policy 0, policy_version 28096 (0.0008) +[2026-06-02 16:34:46,266][243562] Updated weights for policy 0, policy_version 28106 (0.0009) +[2026-06-02 16:34:46,432][243562] Updated weights for policy 0, policy_version 28116 (0.0009) +[2026-06-02 16:34:46,594][243562] Updated weights for policy 0, policy_version 28126 (0.0009) +[2026-06-02 16:34:46,754][243562] Updated weights for policy 0, policy_version 28136 (0.0009) +[2026-06-02 16:34:47,399][243562] Updated weights for policy 0, policy_version 28146 (0.0009) +[2026-06-02 16:34:47,557][243562] Updated weights for policy 0, policy_version 28156 (0.0009) +[2026-06-02 16:34:47,721][243562] Updated weights for policy 0, policy_version 28166 (0.0009) +[2026-06-02 16:34:47,905][243562] Updated weights for policy 0, policy_version 28177 (0.0009) +[2026-06-02 16:34:48,063][243562] Updated weights for policy 0, policy_version 28187 (0.0008) +[2026-06-02 16:34:48,224][243562] Updated weights for policy 0, policy_version 28197 (0.0008) +[2026-06-02 16:34:48,389][243562] Updated weights for policy 0, policy_version 28207 (0.0009) +[2026-06-02 16:34:49,045][243562] Updated weights for policy 0, policy_version 28217 (0.0006) +[2026-06-02 16:34:49,202][243562] Updated weights for policy 0, policy_version 28227 (0.0008) +[2026-06-02 16:34:49,386][243562] Updated weights for policy 0, policy_version 28238 (0.0008) +[2026-06-02 16:34:49,545][243562] Updated weights for policy 0, policy_version 28248 (0.0008) +[2026-06-02 16:34:49,715][243562] Updated weights for policy 0, policy_version 28258 (0.0008) +[2026-06-02 16:34:49,811][235960] Fps is (10 sec: 19661.6, 60 sec: 21299.2, 300 sec: 21438.1). Total num frames: 14450688. Throughput: 0: 21597.9. Samples: 14469504. Policy #0 lag: (min: 116.0, avg: 130.3, max: 174.0) +[2026-06-02 16:34:49,812][235960] Avg episode reward: [(0, '1131.278')] +[2026-06-02 16:34:49,880][243562] Updated weights for policy 0, policy_version 28268 (0.0008) +[2026-06-02 16:34:50,554][243562] Updated weights for policy 0, policy_version 28279 (0.0009) +[2026-06-02 16:34:50,713][243562] Updated weights for policy 0, policy_version 28289 (0.0009) +[2026-06-02 16:34:50,896][243562] Updated weights for policy 0, policy_version 28301 (0.0008) +[2026-06-02 16:34:51,065][243562] Updated weights for policy 0, policy_version 28311 (0.0008) +[2026-06-02 16:34:51,228][243562] Updated weights for policy 0, policy_version 28321 (0.0008) +[2026-06-02 16:34:51,411][243562] Updated weights for policy 0, policy_version 28332 (0.0008) +[2026-06-02 16:34:52,049][243562] Updated weights for policy 0, policy_version 28342 (0.0009) +[2026-06-02 16:34:52,216][243562] Updated weights for policy 0, policy_version 28352 (0.0009) +[2026-06-02 16:34:52,375][243562] Updated weights for policy 0, policy_version 28362 (0.0009) +[2026-06-02 16:34:52,535][243562] Updated weights for policy 0, policy_version 28372 (0.0008) +[2026-06-02 16:34:52,698][243562] Updated weights for policy 0, policy_version 28382 (0.0008) +[2026-06-02 16:34:52,880][243562] Updated weights for policy 0, policy_version 28393 (0.0009) +[2026-06-02 16:34:53,575][243562] Updated weights for policy 0, policy_version 28405 (0.0009) +[2026-06-02 16:34:53,734][243562] Updated weights for policy 0, policy_version 28415 (0.0009) +[2026-06-02 16:34:53,896][243562] Updated weights for policy 0, policy_version 28425 (0.0008) +[2026-06-02 16:34:54,078][243562] Updated weights for policy 0, policy_version 28436 (0.0008) +[2026-06-02 16:34:54,267][243562] Updated weights for policy 0, policy_version 28448 (0.0008) +[2026-06-02 16:34:54,435][243562] Updated weights for policy 0, policy_version 28458 (0.0008) +[2026-06-02 16:34:54,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21845.3, 300 sec: 21438.0). Total num frames: 14581760. Throughput: 0: 21600.8. Samples: 14599680. Policy #0 lag: (min: 116.0, avg: 130.3, max: 174.0) +[2026-06-02 16:34:54,812][235960] Avg episode reward: [(0, '1147.704')] +[2026-06-02 16:34:55,106][243562] Updated weights for policy 0, policy_version 28468 (0.0008) +[2026-06-02 16:34:55,265][243562] Updated weights for policy 0, policy_version 28478 (0.0009) +[2026-06-02 16:34:55,425][243562] Updated weights for policy 0, policy_version 28488 (0.0008) +[2026-06-02 16:34:55,588][243562] Updated weights for policy 0, policy_version 28498 (0.0008) +[2026-06-02 16:34:55,752][243562] Updated weights for policy 0, policy_version 28508 (0.0008) +[2026-06-02 16:34:55,917][243562] Updated weights for policy 0, policy_version 28518 (0.0008) +[2026-06-02 16:34:56,074][243562] Updated weights for policy 0, policy_version 28528 (0.0008) +[2026-06-02 16:34:56,720][243562] Updated weights for policy 0, policy_version 28539 (0.0009) +[2026-06-02 16:34:56,891][243562] Updated weights for policy 0, policy_version 28550 (0.0008) +[2026-06-02 16:34:57,061][243562] Updated weights for policy 0, policy_version 28560 (0.0008) +[2026-06-02 16:34:57,227][243562] Updated weights for policy 0, policy_version 28570 (0.0008) +[2026-06-02 16:34:57,389][243562] Updated weights for policy 0, policy_version 28580 (0.0008) +[2026-06-02 16:34:57,571][243562] Updated weights for policy 0, policy_version 28591 (0.0008) +[2026-06-02 16:34:58,240][243562] Updated weights for policy 0, policy_version 28601 (0.0008) +[2026-06-02 16:34:58,408][243562] Updated weights for policy 0, policy_version 28612 (0.0008) +[2026-06-02 16:34:58,575][243562] Updated weights for policy 0, policy_version 28622 (0.0009) +[2026-06-02 16:34:58,739][243562] Updated weights for policy 0, policy_version 28632 (0.0008) +[2026-06-02 16:34:58,897][243562] Updated weights for policy 0, policy_version 28642 (0.0009) +[2026-06-02 16:34:59,067][243562] Updated weights for policy 0, policy_version 28652 (0.0008) +[2026-06-02 16:34:59,700][243562] Updated weights for policy 0, policy_version 28662 (0.0009) +[2026-06-02 16:34:59,811][235960] Fps is (10 sec: 22937.0, 60 sec: 21299.1, 300 sec: 21438.0). Total num frames: 14680064. Throughput: 0: 21586.4. Samples: 14729216. Policy #0 lag: (min: 116.0, avg: 130.3, max: 174.0) +[2026-06-02 16:34:59,812][235960] Avg episode reward: [(0, '1117.737')] +[2026-06-02 16:34:59,889][243562] Updated weights for policy 0, policy_version 28674 (0.0008) +[2026-06-02 16:35:00,070][243562] Updated weights for policy 0, policy_version 28685 (0.0008) +[2026-06-02 16:35:00,231][243562] Updated weights for policy 0, policy_version 28695 (0.0008) +[2026-06-02 16:35:00,402][243562] Updated weights for policy 0, policy_version 28705 (0.0009) +[2026-06-02 16:35:00,564][243562] Updated weights for policy 0, policy_version 28715 (0.0008) +[2026-06-02 16:35:01,205][243562] Updated weights for policy 0, policy_version 28725 (0.0008) +[2026-06-02 16:35:01,385][243562] Updated weights for policy 0, policy_version 28736 (0.0008) +[2026-06-02 16:35:01,551][243562] Updated weights for policy 0, policy_version 28746 (0.0009) +[2026-06-02 16:35:01,712][243562] Updated weights for policy 0, policy_version 28756 (0.0008) +[2026-06-02 16:35:01,872][243562] Updated weights for policy 0, policy_version 28766 (0.0009) +[2026-06-02 16:35:02,052][243562] Updated weights for policy 0, policy_version 28777 (0.0009) +[2026-06-02 16:35:02,707][243562] Updated weights for policy 0, policy_version 28787 (0.0008) +[2026-06-02 16:35:02,885][243562] Updated weights for policy 0, policy_version 28798 (0.0008) +[2026-06-02 16:35:03,058][243562] Updated weights for policy 0, policy_version 28809 (0.0010) +[2026-06-02 16:35:03,240][243562] Updated weights for policy 0, policy_version 28820 (0.0008) +[2026-06-02 16:35:03,407][243562] Updated weights for policy 0, policy_version 28830 (0.0009) +[2026-06-02 16:35:03,588][243562] Updated weights for policy 0, policy_version 28841 (0.0008) +[2026-06-02 16:35:04,230][243562] Updated weights for policy 0, policy_version 28851 (0.0008) +[2026-06-02 16:35:04,400][243562] Updated weights for policy 0, policy_version 28862 (0.0009) +[2026-06-02 16:35:04,578][243562] Updated weights for policy 0, policy_version 28873 (0.0009) +[2026-06-02 16:35:04,745][243562] Updated weights for policy 0, policy_version 28883 (0.0009) +[2026-06-02 16:35:04,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 14778368. Throughput: 0: 21589.4. Samples: 14794496. Policy #0 lag: (min: 116.0, avg: 130.3, max: 174.0) +[2026-06-02 16:35:04,812][235960] Avg episode reward: [(0, '1087.459')] +[2026-06-02 16:35:04,912][243562] Updated weights for policy 0, policy_version 28893 (0.0008) +[2026-06-02 16:35:05,086][243562] Updated weights for policy 0, policy_version 28904 (0.0009) +[2026-06-02 16:35:05,757][243562] Updated weights for policy 0, policy_version 28914 (0.0009) +[2026-06-02 16:35:05,906][243562] Updated weights for policy 0, policy_version 28924 (0.0009) +[2026-06-02 16:35:06,057][243562] Updated weights for policy 0, policy_version 28934 (0.0009) +[2026-06-02 16:35:06,223][243562] Updated weights for policy 0, policy_version 28944 (0.0009) +[2026-06-02 16:35:06,404][243562] Updated weights for policy 0, policy_version 28955 (0.0010) +[2026-06-02 16:35:06,571][243562] Updated weights for policy 0, policy_version 28965 (0.0008) +[2026-06-02 16:35:06,734][243562] Updated weights for policy 0, policy_version 28975 (0.0007) +[2026-06-02 16:35:07,392][243562] Updated weights for policy 0, policy_version 28986 (0.0008) +[2026-06-02 16:35:07,568][243562] Updated weights for policy 0, policy_version 28997 (0.0008) +[2026-06-02 16:35:07,727][243562] Updated weights for policy 0, policy_version 29007 (0.0008) +[2026-06-02 16:35:07,888][243562] Updated weights for policy 0, policy_version 29017 (0.0008) +[2026-06-02 16:35:08,060][243562] Updated weights for policy 0, policy_version 29028 (0.0009) +[2026-06-02 16:35:08,240][243562] Updated weights for policy 0, policy_version 29039 (0.0008) +[2026-06-02 16:35:08,911][243562] Updated weights for policy 0, policy_version 29050 (0.0008) +[2026-06-02 16:35:09,063][243562] Updated weights for policy 0, policy_version 29060 (0.0008) +[2026-06-02 16:35:09,234][243562] Updated weights for policy 0, policy_version 29070 (0.0008) +[2026-06-02 16:35:09,395][243562] Updated weights for policy 0, policy_version 29080 (0.0008) +[2026-06-02 16:35:09,559][243562] Updated weights for policy 0, policy_version 29090 (0.0009) +[2026-06-02 16:35:09,722][243562] Updated weights for policy 0, policy_version 29100 (0.0008) +[2026-06-02 16:35:09,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21845.3, 300 sec: 21438.0). Total num frames: 14909440. Throughput: 0: 21575.0. Samples: 14923264. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) +[2026-06-02 16:35:09,812][235960] Avg episode reward: [(0, '1131.744')] +[2026-06-02 16:35:10,389][243562] Updated weights for policy 0, policy_version 29110 (0.0009) +[2026-06-02 16:35:10,545][243562] Updated weights for policy 0, policy_version 29120 (0.0008) +[2026-06-02 16:35:10,723][243562] Updated weights for policy 0, policy_version 29131 (0.0008) +[2026-06-02 16:35:10,885][243562] Updated weights for policy 0, policy_version 29141 (0.0008) +[2026-06-02 16:35:11,051][243562] Updated weights for policy 0, policy_version 29151 (0.0008) +[2026-06-02 16:35:11,217][243562] Updated weights for policy 0, policy_version 29161 (0.0008) +[2026-06-02 16:35:11,836][243562] Updated weights for policy 0, policy_version 29171 (0.0009) +[2026-06-02 16:35:12,019][243562] Updated weights for policy 0, policy_version 29183 (0.0009) +[2026-06-02 16:35:12,197][243562] Updated weights for policy 0, policy_version 29194 (0.0009) +[2026-06-02 16:35:12,368][243562] Updated weights for policy 0, policy_version 29204 (0.0009) +[2026-06-02 16:35:12,535][243562] Updated weights for policy 0, policy_version 29214 (0.0009) +[2026-06-02 16:35:12,694][243562] Updated weights for policy 0, policy_version 29224 (0.0009) +[2026-06-02 16:35:13,370][243562] Updated weights for policy 0, policy_version 29234 (0.0010) +[2026-06-02 16:35:13,527][243562] Updated weights for policy 0, policy_version 29244 (0.0009) +[2026-06-02 16:35:13,687][243562] Updated weights for policy 0, policy_version 29254 (0.0009) +[2026-06-02 16:35:13,866][243562] Updated weights for policy 0, policy_version 29265 (0.0009) +[2026-06-02 16:35:14,037][243562] Updated weights for policy 0, policy_version 29276 (0.0009) +[2026-06-02 16:35:14,199][243562] Updated weights for policy 0, policy_version 29286 (0.0009) +[2026-06-02 16:35:14,363][243562] Updated weights for policy 0, policy_version 29296 (0.0009) +[2026-06-02 16:35:14,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 15007744. Throughput: 0: 21495.6. Samples: 15048576. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) +[2026-06-02 16:35:14,812][235960] Avg episode reward: [(0, '1128.468')] +[2026-06-02 16:35:15,029][243562] Updated weights for policy 0, policy_version 29306 (0.0009) +[2026-06-02 16:35:15,196][243562] Updated weights for policy 0, policy_version 29317 (0.0008) +[2026-06-02 16:35:15,359][243562] Updated weights for policy 0, policy_version 29327 (0.0008) +[2026-06-02 16:35:15,521][243562] Updated weights for policy 0, policy_version 29337 (0.0008) +[2026-06-02 16:35:15,689][243562] Updated weights for policy 0, policy_version 29347 (0.0008) +[2026-06-02 16:35:15,849][243562] Updated weights for policy 0, policy_version 29357 (0.0008) +[2026-06-02 16:35:16,511][243562] Updated weights for policy 0, policy_version 29367 (0.0008) +[2026-06-02 16:35:16,672][243562] Updated weights for policy 0, policy_version 29377 (0.0008) +[2026-06-02 16:35:16,828][243562] Updated weights for policy 0, policy_version 29387 (0.0008) +[2026-06-02 16:35:17,013][243562] Updated weights for policy 0, policy_version 29398 (0.0008) +[2026-06-02 16:35:17,178][243562] Updated weights for policy 0, policy_version 29408 (0.0008) +[2026-06-02 16:35:17,346][243562] Updated weights for policy 0, policy_version 29418 (0.0008) +[2026-06-02 16:35:17,991][243562] Updated weights for policy 0, policy_version 29428 (0.0009) +[2026-06-02 16:35:18,161][243562] Updated weights for policy 0, policy_version 29439 (0.0008) +[2026-06-02 16:35:18,322][243562] Updated weights for policy 0, policy_version 29449 (0.0008) +[2026-06-02 16:35:18,480][243562] Updated weights for policy 0, policy_version 29459 (0.0008) +[2026-06-02 16:35:18,648][243562] Updated weights for policy 0, policy_version 29469 (0.0009) +[2026-06-02 16:35:18,806][243562] Updated weights for policy 0, policy_version 29479 (0.0009) +[2026-06-02 16:35:19,478][243562] Updated weights for policy 0, policy_version 29490 (0.0009) +[2026-06-02 16:35:19,669][243562] Updated weights for policy 0, policy_version 29502 (0.0009) +[2026-06-02 16:35:19,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 15106048. Throughput: 0: 21458.4. Samples: 15111552. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) +[2026-06-02 16:35:19,812][235960] Avg episode reward: [(0, '1161.334')] +[2026-06-02 16:35:19,826][243562] Updated weights for policy 0, policy_version 29512 (0.0008) +[2026-06-02 16:35:19,991][243562] Updated weights for policy 0, policy_version 29522 (0.0008) +[2026-06-02 16:35:20,154][243562] Updated weights for policy 0, policy_version 29532 (0.0008) +[2026-06-02 16:35:20,335][243562] Updated weights for policy 0, policy_version 29543 (0.0009) +[2026-06-02 16:35:21,012][243562] Updated weights for policy 0, policy_version 29553 (0.0009) +[2026-06-02 16:35:21,165][243562] Updated weights for policy 0, policy_version 29563 (0.0009) +[2026-06-02 16:35:21,325][243562] Updated weights for policy 0, policy_version 29573 (0.0009) +[2026-06-02 16:35:21,478][243562] Updated weights for policy 0, policy_version 29583 (0.0008) +[2026-06-02 16:35:21,644][243562] Updated weights for policy 0, policy_version 29593 (0.0009) +[2026-06-02 16:35:21,822][243562] Updated weights for policy 0, policy_version 29604 (0.0007) +[2026-06-02 16:35:21,988][243562] Updated weights for policy 0, policy_version 29614 (0.0008) +[2026-06-02 16:35:22,631][243562] Updated weights for policy 0, policy_version 29625 (0.0009) +[2026-06-02 16:35:22,810][243562] Updated weights for policy 0, policy_version 29637 (0.0009) +[2026-06-02 16:35:22,970][243562] Updated weights for policy 0, policy_version 29647 (0.0008) +[2026-06-02 16:35:23,131][243562] Updated weights for policy 0, policy_version 29657 (0.0008) +[2026-06-02 16:35:23,301][243562] Updated weights for policy 0, policy_version 29667 (0.0008) +[2026-06-02 16:35:23,477][243562] Updated weights for policy 0, policy_version 29678 (0.0008) +[2026-06-02 16:35:24,165][243562] Updated weights for policy 0, policy_version 29689 (0.0009) +[2026-06-02 16:35:24,334][243562] Updated weights for policy 0, policy_version 29700 (0.0008) +[2026-06-02 16:35:24,505][243562] Updated weights for policy 0, policy_version 29710 (0.0009) +[2026-06-02 16:35:24,674][243562] Updated weights for policy 0, policy_version 29721 (0.0008) +[2026-06-02 16:35:24,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 15204352. Throughput: 0: 21387.5. Samples: 15237632. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) +[2026-06-02 16:35:24,812][235960] Avg episode reward: [(0, '1163.633')] +[2026-06-02 16:35:24,833][243562] Updated weights for policy 0, policy_version 29731 (0.0008) +[2026-06-02 16:35:25,016][243562] Updated weights for policy 0, policy_version 29743 (0.0009) +[2026-06-02 16:35:25,675][243562] Updated weights for policy 0, policy_version 29753 (0.0009) +[2026-06-02 16:35:25,862][243562] Updated weights for policy 0, policy_version 29765 (0.0009) +[2026-06-02 16:35:26,034][243562] Updated weights for policy 0, policy_version 29776 (0.0009) +[2026-06-02 16:35:26,204][243562] Updated weights for policy 0, policy_version 29787 (0.0008) +[2026-06-02 16:35:26,385][243562] Updated weights for policy 0, policy_version 29798 (0.0008) +[2026-06-02 16:35:27,071][243562] Updated weights for policy 0, policy_version 29809 (0.0009) +[2026-06-02 16:35:27,249][243562] Updated weights for policy 0, policy_version 29821 (0.0009) +[2026-06-02 16:35:27,405][243562] Updated weights for policy 0, policy_version 29831 (0.0009) +[2026-06-02 16:35:27,567][243562] Updated weights for policy 0, policy_version 29841 (0.0009) +[2026-06-02 16:35:27,737][243562] Updated weights for policy 0, policy_version 29852 (0.0009) +[2026-06-02 16:35:27,903][243562] Updated weights for policy 0, policy_version 29862 (0.0008) +[2026-06-02 16:35:28,058][243562] Updated weights for policy 0, policy_version 29872 (0.0009) +[2026-06-02 16:35:28,732][243562] Updated weights for policy 0, policy_version 29883 (0.0009) +[2026-06-02 16:35:28,880][243562] Updated weights for policy 0, policy_version 29893 (0.0009) +[2026-06-02 16:35:29,039][243562] Updated weights for policy 0, policy_version 29903 (0.0009) +[2026-06-02 16:35:29,220][243562] Updated weights for policy 0, policy_version 29914 (0.0009) +[2026-06-02 16:35:29,392][243562] Updated weights for policy 0, policy_version 29925 (0.0009) +[2026-06-02 16:35:29,564][243562] Updated weights for policy 0, policy_version 29936 (0.0009) +[2026-06-02 16:35:29,811][235960] Fps is (10 sec: 22937.4, 60 sec: 21845.3, 300 sec: 21438.0). Total num frames: 15335424. Throughput: 0: 21304.8. Samples: 15363840. Policy #0 lag: (min: 63.0, avg: 77.3, max: 127.0) +[2026-06-02 16:35:29,812][235960] Avg episode reward: [(0, '1140.511')] +[2026-06-02 16:35:30,243][243562] Updated weights for policy 0, policy_version 29946 (0.0009) +[2026-06-02 16:35:30,397][243562] Updated weights for policy 0, policy_version 29956 (0.0008) +[2026-06-02 16:35:30,575][243562] Updated weights for policy 0, policy_version 29967 (0.0009) +[2026-06-02 16:35:30,747][243562] Updated weights for policy 0, policy_version 29978 (0.0009) +[2026-06-02 16:35:30,954][243562] Updated weights for policy 0, policy_version 29991 (0.0009) +[2026-06-02 16:35:31,632][243562] Updated weights for policy 0, policy_version 30001 (0.0009) +[2026-06-02 16:35:31,776][243562] Updated weights for policy 0, policy_version 30011 (0.0008) +[2026-06-02 16:35:31,930][243562] Updated weights for policy 0, policy_version 30021 (0.0009) +[2026-06-02 16:35:32,124][243562] Updated weights for policy 0, policy_version 30033 (0.0009) +[2026-06-02 16:35:32,279][243562] Updated weights for policy 0, policy_version 30043 (0.0009) +[2026-06-02 16:35:32,465][243562] Updated weights for policy 0, policy_version 30054 (0.0008) +[2026-06-02 16:35:32,617][243562] Updated weights for policy 0, policy_version 30064 (0.0009) +[2026-06-02 16:35:33,311][243562] Updated weights for policy 0, policy_version 30076 (0.0007) +[2026-06-02 16:35:33,481][243562] Updated weights for policy 0, policy_version 30087 (0.0008) +[2026-06-02 16:35:33,641][243562] Updated weights for policy 0, policy_version 30097 (0.0008) +[2026-06-02 16:35:33,799][243562] Updated weights for policy 0, policy_version 30107 (0.0008) +[2026-06-02 16:35:33,984][243562] Updated weights for policy 0, policy_version 30118 (0.0008) +[2026-06-02 16:35:34,681][243562] Updated weights for policy 0, policy_version 30129 (0.0008) +[2026-06-02 16:35:34,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 15433728. Throughput: 0: 21304.8. Samples: 15428224. Policy #0 lag: (min: 63.0, avg: 77.3, max: 127.0) +[2026-06-02 16:35:34,812][235960] Avg episode reward: [(0, '1130.344')] +[2026-06-02 16:35:34,839][243562] Updated weights for policy 0, policy_version 30139 (0.0008) +[2026-06-02 16:35:34,996][243562] Updated weights for policy 0, policy_version 30150 (0.0008) +[2026-06-02 16:35:35,179][243562] Updated weights for policy 0, policy_version 30161 (0.0008) +[2026-06-02 16:35:35,357][243562] Updated weights for policy 0, policy_version 30172 (0.0008) +[2026-06-02 16:35:35,526][243562] Updated weights for policy 0, policy_version 30183 (0.0009) +[2026-06-02 16:35:36,202][243562] Updated weights for policy 0, policy_version 30193 (0.0008) +[2026-06-02 16:35:36,352][243562] Updated weights for policy 0, policy_version 30203 (0.0009) +[2026-06-02 16:35:36,539][243562] Updated weights for policy 0, policy_version 30215 (0.0009) +[2026-06-02 16:35:36,702][243562] Updated weights for policy 0, policy_version 30225 (0.0008) +[2026-06-02 16:35:36,871][243562] Updated weights for policy 0, policy_version 30236 (0.0009) +[2026-06-02 16:35:37,035][243562] Updated weights for policy 0, policy_version 30246 (0.0009) +[2026-06-02 16:35:37,188][243562] Updated weights for policy 0, policy_version 30256 (0.0009) +[2026-06-02 16:35:37,842][243562] Updated weights for policy 0, policy_version 30266 (0.0009) +[2026-06-02 16:35:38,011][243562] Updated weights for policy 0, policy_version 30277 (0.0008) +[2026-06-02 16:35:38,190][243562] Updated weights for policy 0, policy_version 30288 (0.0009) +[2026-06-02 16:35:38,346][243562] Updated weights for policy 0, policy_version 30298 (0.0009) +[2026-06-02 16:35:38,540][243562] Updated weights for policy 0, policy_version 30310 (0.0009) +[2026-06-02 16:35:38,694][243562] Updated weights for policy 0, policy_version 30320 (0.0009) +[2026-06-02 16:35:39,376][243562] Updated weights for policy 0, policy_version 30331 (0.0009) +[2026-06-02 16:35:39,534][243562] Updated weights for policy 0, policy_version 30341 (0.0009) +[2026-06-02 16:35:39,692][243562] Updated weights for policy 0, policy_version 30351 (0.0009) +[2026-06-02 16:35:39,811][235960] Fps is (10 sec: 19660.9, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 15532032. Throughput: 0: 21284.9. Samples: 15557504. Policy #0 lag: (min: 63.0, avg: 77.3, max: 127.0) +[2026-06-02 16:35:39,812][235960] Avg episode reward: [(0, '1147.652')] +[2026-06-02 16:35:39,853][243562] Updated weights for policy 0, policy_version 30361 (0.0009) +[2026-06-02 16:35:40,068][243562] Updated weights for policy 0, policy_version 30375 (0.0009) +[2026-06-02 16:35:40,762][243562] Updated weights for policy 0, policy_version 30385 (0.0008) +[2026-06-02 16:35:40,927][243562] Updated weights for policy 0, policy_version 30396 (0.0008) +[2026-06-02 16:35:41,097][243562] Updated weights for policy 0, policy_version 30407 (0.0010) +[2026-06-02 16:35:41,282][243562] Updated weights for policy 0, policy_version 30418 (0.0008) +[2026-06-02 16:35:41,444][243562] Updated weights for policy 0, policy_version 30428 (0.0009) +[2026-06-02 16:35:41,622][243562] Updated weights for policy 0, policy_version 30439 (0.0008) +[2026-06-02 16:35:42,272][243562] Updated weights for policy 0, policy_version 30449 (0.0007) +[2026-06-02 16:35:42,446][243562] Updated weights for policy 0, policy_version 30460 (0.0009) +[2026-06-02 16:35:42,627][243562] Updated weights for policy 0, policy_version 30471 (0.0009) +[2026-06-02 16:35:42,790][243562] Updated weights for policy 0, policy_version 30481 (0.0008) +[2026-06-02 16:35:42,955][243562] Updated weights for policy 0, policy_version 30491 (0.0008) +[2026-06-02 16:35:43,138][243562] Updated weights for policy 0, policy_version 30502 (0.0009) +[2026-06-02 16:35:43,295][243562] Updated weights for policy 0, policy_version 30512 (0.0008) +[2026-06-02 16:35:43,941][243562] Updated weights for policy 0, policy_version 30522 (0.0009) +[2026-06-02 16:35:44,104][243562] Updated weights for policy 0, policy_version 30532 (0.0008) +[2026-06-02 16:35:44,288][243562] Updated weights for policy 0, policy_version 30543 (0.0009) +[2026-06-02 16:35:44,462][243562] Updated weights for policy 0, policy_version 30554 (0.0008) +[2026-06-02 16:35:44,645][243562] Updated weights for policy 0, policy_version 30565 (0.0008) +[2026-06-02 16:35:44,811][235960] Fps is (10 sec: 19660.9, 60 sec: 21299.2, 300 sec: 21438.1). Total num frames: 15630336. Throughput: 0: 21307.8. Samples: 15688064. Policy #0 lag: (min: 63.0, avg: 77.3, max: 127.0) +[2026-06-02 16:35:44,811][235960] Avg episode reward: [(0, '1215.444')] +[2026-06-02 16:35:44,815][243562] Updated weights for policy 0, policy_version 30575 (0.0008) +[2026-06-02 16:35:44,823][242748] Saving new best policy, reward=1215.444! +[2026-06-02 16:35:45,438][243562] Updated weights for policy 0, policy_version 30585 (0.0009) +[2026-06-02 16:35:45,621][243562] Updated weights for policy 0, policy_version 30596 (0.0008) +[2026-06-02 16:35:45,790][243562] Updated weights for policy 0, policy_version 30606 (0.0009) +[2026-06-02 16:35:45,952][243562] Updated weights for policy 0, policy_version 30616 (0.0008) +[2026-06-02 16:35:46,124][243562] Updated weights for policy 0, policy_version 30627 (0.0009) +[2026-06-02 16:35:46,291][243562] Updated weights for policy 0, policy_version 30637 (0.0008) +[2026-06-02 16:35:46,953][243562] Updated weights for policy 0, policy_version 30648 (0.0009) +[2026-06-02 16:35:47,116][243562] Updated weights for policy 0, policy_version 30658 (0.0008) +[2026-06-02 16:35:47,280][243562] Updated weights for policy 0, policy_version 30668 (0.0008) +[2026-06-02 16:35:47,441][243562] Updated weights for policy 0, policy_version 30678 (0.0008) +[2026-06-02 16:35:47,622][243562] Updated weights for policy 0, policy_version 30689 (0.0008) +[2026-06-02 16:35:47,807][243562] Updated weights for policy 0, policy_version 30700 (0.0008) +[2026-06-02 16:35:48,450][243562] Updated weights for policy 0, policy_version 30710 (0.0008) +[2026-06-02 16:35:48,609][243562] Updated weights for policy 0, policy_version 30720 (0.0008) +[2026-06-02 16:35:48,766][243562] Updated weights for policy 0, policy_version 30730 (0.0008) +[2026-06-02 16:35:48,926][243562] Updated weights for policy 0, policy_version 30740 (0.0008) +[2026-06-02 16:35:49,099][243562] Updated weights for policy 0, policy_version 30750 (0.0009) +[2026-06-02 16:35:49,277][243562] Updated weights for policy 0, policy_version 30761 (0.0008) +[2026-06-02 16:35:49,811][235960] Fps is (10 sec: 22938.0, 60 sec: 21845.3, 300 sec: 21438.1). Total num frames: 15761408. Throughput: 0: 21287.8. Samples: 15752448. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) +[2026-06-02 16:35:49,813][235960] Avg episode reward: [(0, '1259.166')] +[2026-06-02 16:35:49,937][243562] Updated weights for policy 0, policy_version 30772 (0.0008) +[2026-06-02 16:35:50,112][243562] Updated weights for policy 0, policy_version 30783 (0.0008) +[2026-06-02 16:35:50,276][243562] Updated weights for policy 0, policy_version 30793 (0.0009) +[2026-06-02 16:35:50,443][243562] Updated weights for policy 0, policy_version 30803 (0.0008) +[2026-06-02 16:35:50,620][243562] Updated weights for policy 0, policy_version 30814 (0.0009) +[2026-06-02 16:35:50,786][243562] Updated weights for policy 0, policy_version 30824 (0.0009) +[2026-06-02 16:35:50,911][242748] Saving new best policy, reward=1259.166! +[2026-06-02 16:35:51,433][243562] Updated weights for policy 0, policy_version 30835 (0.0009) +[2026-06-02 16:35:51,596][243562] Updated weights for policy 0, policy_version 30845 (0.0008) +[2026-06-02 16:35:51,773][243562] Updated weights for policy 0, policy_version 30856 (0.0008) +[2026-06-02 16:35:51,953][243562] Updated weights for policy 0, policy_version 30867 (0.0008) +[2026-06-02 16:35:52,132][243562] Updated weights for policy 0, policy_version 30878 (0.0008) +[2026-06-02 16:35:52,302][243562] Updated weights for policy 0, policy_version 30888 (0.0008) +[2026-06-02 16:35:52,940][243562] Updated weights for policy 0, policy_version 30898 (0.0008) +[2026-06-02 16:35:53,104][243562] Updated weights for policy 0, policy_version 30909 (0.0008) +[2026-06-02 16:35:53,281][243562] Updated weights for policy 0, policy_version 30920 (0.0008) +[2026-06-02 16:35:53,448][243562] Updated weights for policy 0, policy_version 30930 (0.0008) +[2026-06-02 16:35:53,614][243562] Updated weights for policy 0, policy_version 30940 (0.0008) +[2026-06-02 16:35:53,780][243562] Updated weights for policy 0, policy_version 30950 (0.0009) +[2026-06-02 16:35:53,937][243562] Updated weights for policy 0, policy_version 30960 (0.0008) +[2026-06-02 16:35:54,587][243562] Updated weights for policy 0, policy_version 30970 (0.0009) +[2026-06-02 16:35:54,741][243562] Updated weights for policy 0, policy_version 30980 (0.0009) +[2026-06-02 16:35:54,811][235960] Fps is (10 sec: 22937.4, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 15859712. Throughput: 0: 21293.6. Samples: 15881472. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) +[2026-06-02 16:35:54,812][235960] Avg episode reward: [(0, '1250.018')] +[2026-06-02 16:35:54,914][243562] Updated weights for policy 0, policy_version 30990 (0.0008) +[2026-06-02 16:35:55,082][243562] Updated weights for policy 0, policy_version 31000 (0.0008) +[2026-06-02 16:35:55,247][243562] Updated weights for policy 0, policy_version 31010 (0.0008) +[2026-06-02 16:35:55,412][243562] Updated weights for policy 0, policy_version 31020 (0.0009) +[2026-06-02 16:35:56,091][243562] Updated weights for policy 0, policy_version 31032 (0.0009) +[2026-06-02 16:35:56,251][243562] Updated weights for policy 0, policy_version 31042 (0.0008) +[2026-06-02 16:35:56,410][243562] Updated weights for policy 0, policy_version 31052 (0.0008) +[2026-06-02 16:35:56,593][243562] Updated weights for policy 0, policy_version 31063 (0.0009) +[2026-06-02 16:35:56,753][243562] Updated weights for policy 0, policy_version 31073 (0.0008) +[2026-06-02 16:35:56,934][243562] Updated weights for policy 0, policy_version 31084 (0.0006) +[2026-06-02 16:35:57,561][243562] Updated weights for policy 0, policy_version 31094 (0.0004) +[2026-06-02 16:35:57,738][243562] Updated weights for policy 0, policy_version 31105 (0.0004) +[2026-06-02 16:35:57,896][243562] Updated weights for policy 0, policy_version 31115 (0.0005) +[2026-06-02 16:35:58,082][243562] Updated weights for policy 0, policy_version 31126 (0.0010) +[2026-06-02 16:35:58,254][243562] Updated weights for policy 0, policy_version 31136 (0.0010) +[2026-06-02 16:35:58,415][243562] Updated weights for policy 0, policy_version 31146 (0.0008) +[2026-06-02 16:35:59,045][243562] Updated weights for policy 0, policy_version 31156 (0.0009) +[2026-06-02 16:35:59,200][243562] Updated weights for policy 0, policy_version 31166 (0.0008) +[2026-06-02 16:35:59,376][243562] Updated weights for policy 0, policy_version 31177 (0.0008) +[2026-06-02 16:35:59,546][243562] Updated weights for policy 0, policy_version 31187 (0.0008) +[2026-06-02 16:35:59,709][243562] Updated weights for policy 0, policy_version 31197 (0.0008) +[2026-06-02 16:35:59,811][235960] Fps is (10 sec: 19660.9, 60 sec: 21299.3, 300 sec: 21438.0). Total num frames: 15958016. Throughput: 0: 21387.4. Samples: 16011008. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) +[2026-06-02 16:35:59,812][235960] Avg episode reward: [(0, '1245.505')] +[2026-06-02 16:35:59,873][243562] Updated weights for policy 0, policy_version 31207 (0.0008) +[2026-06-02 16:36:00,512][243562] Updated weights for policy 0, policy_version 31217 (0.0009) +[2026-06-02 16:36:00,689][243562] Updated weights for policy 0, policy_version 31228 (0.0009) +[2026-06-02 16:36:00,846][243562] Updated weights for policy 0, policy_version 31238 (0.0008) +[2026-06-02 16:36:01,013][243562] Updated weights for policy 0, policy_version 31248 (0.0008) +[2026-06-02 16:36:01,175][243562] Updated weights for policy 0, policy_version 31258 (0.0008) +[2026-06-02 16:36:01,361][243562] Updated weights for policy 0, policy_version 31269 (0.0008) +[2026-06-02 16:36:01,528][243562] Updated weights for policy 0, policy_version 31279 (0.0008) +[2026-06-02 16:36:02,177][243562] Updated weights for policy 0, policy_version 31289 (0.0009) +[2026-06-02 16:36:02,329][243562] Updated weights for policy 0, policy_version 31299 (0.0008) +[2026-06-02 16:36:02,516][243562] Updated weights for policy 0, policy_version 31310 (0.0008) +[2026-06-02 16:36:02,689][243562] Updated weights for policy 0, policy_version 31321 (0.0008) +[2026-06-02 16:36:02,878][243562] Updated weights for policy 0, policy_version 31332 (0.0009) +[2026-06-02 16:36:03,036][243562] Updated weights for policy 0, policy_version 31342 (0.0008) +[2026-06-02 16:36:03,687][243562] Updated weights for policy 0, policy_version 31353 (0.0008) +[2026-06-02 16:36:03,853][243562] Updated weights for policy 0, policy_version 31363 (0.0008) +[2026-06-02 16:36:04,033][243562] Updated weights for policy 0, policy_version 31374 (0.0009) +[2026-06-02 16:36:04,198][243562] Updated weights for policy 0, policy_version 31384 (0.0009) +[2026-06-02 16:36:04,365][243562] Updated weights for policy 0, policy_version 31394 (0.0008) +[2026-06-02 16:36:04,524][243562] Updated weights for policy 0, policy_version 31404 (0.0008) +[2026-06-02 16:36:04,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21845.3, 300 sec: 21438.1). Total num frames: 16089088. Throughput: 0: 21430.1. Samples: 16075904. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) +[2026-06-02 16:36:04,812][235960] Avg episode reward: [(0, '1258.726')] +[2026-06-02 16:36:05,182][243562] Updated weights for policy 0, policy_version 31415 (0.0008) +[2026-06-02 16:36:05,359][243562] Updated weights for policy 0, policy_version 31426 (0.0009) +[2026-06-02 16:36:05,516][243562] Updated weights for policy 0, policy_version 31436 (0.0007) +[2026-06-02 16:36:05,681][243562] Updated weights for policy 0, policy_version 31446 (0.0008) +[2026-06-02 16:36:05,846][243562] Updated weights for policy 0, policy_version 31456 (0.0009) +[2026-06-02 16:36:06,009][243562] Updated weights for policy 0, policy_version 31466 (0.0008) +[2026-06-02 16:36:06,652][243562] Updated weights for policy 0, policy_version 31476 (0.0008) +[2026-06-02 16:36:06,827][243562] Updated weights for policy 0, policy_version 31487 (0.0008) +[2026-06-02 16:36:06,979][243562] Updated weights for policy 0, policy_version 31497 (0.0008) +[2026-06-02 16:36:07,151][243562] Updated weights for policy 0, policy_version 31507 (0.0009) +[2026-06-02 16:36:07,316][243562] Updated weights for policy 0, policy_version 31517 (0.0009) +[2026-06-02 16:36:07,511][243562] Updated weights for policy 0, policy_version 31529 (0.0008) +[2026-06-02 16:36:08,167][243562] Updated weights for policy 0, policy_version 31540 (0.0009) +[2026-06-02 16:36:08,341][243562] Updated weights for policy 0, policy_version 31551 (0.0008) +[2026-06-02 16:36:08,495][243562] Updated weights for policy 0, policy_version 31561 (0.0009) +[2026-06-02 16:36:08,667][243562] Updated weights for policy 0, policy_version 31571 (0.0008) +[2026-06-02 16:36:08,827][243562] Updated weights for policy 0, policy_version 31581 (0.0008) +[2026-06-02 16:36:08,981][243562] Updated weights for policy 0, policy_version 31591 (0.0009) +[2026-06-02 16:36:09,649][243562] Updated weights for policy 0, policy_version 31601 (0.0008) +[2026-06-02 16:36:09,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21299.3, 300 sec: 21438.0). Total num frames: 16187392. Throughput: 0: 21484.1. Samples: 16204416. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) +[2026-06-02 16:36:09,812][235960] Avg episode reward: [(0, '1269.479')] +[2026-06-02 16:36:09,827][243562] Updated weights for policy 0, policy_version 31612 (0.0009) +[2026-06-02 16:36:09,997][243562] Updated weights for policy 0, policy_version 31623 (0.0008) +[2026-06-02 16:36:10,168][243562] Updated weights for policy 0, policy_version 31633 (0.0008) +[2026-06-02 16:36:10,323][243562] Updated weights for policy 0, policy_version 31643 (0.0008) +[2026-06-02 16:36:10,492][243562] Updated weights for policy 0, policy_version 31653 (0.0009) +[2026-06-02 16:36:10,659][243562] Updated weights for policy 0, policy_version 31663 (0.0008) +[2026-06-02 16:36:10,668][242748] Saving new best policy, reward=1269.479! +[2026-06-02 16:36:11,311][243562] Updated weights for policy 0, policy_version 31673 (0.0009) +[2026-06-02 16:36:11,487][243562] Updated weights for policy 0, policy_version 31684 (0.0008) +[2026-06-02 16:36:11,654][243562] Updated weights for policy 0, policy_version 31694 (0.0008) +[2026-06-02 16:36:11,818][243562] Updated weights for policy 0, policy_version 31704 (0.0008) +[2026-06-02 16:36:11,983][243562] Updated weights for policy 0, policy_version 31714 (0.0008) +[2026-06-02 16:36:12,147][243562] Updated weights for policy 0, policy_version 31724 (0.0008) +[2026-06-02 16:36:12,765][243562] Updated weights for policy 0, policy_version 31734 (0.0008) +[2026-06-02 16:36:12,931][243562] Updated weights for policy 0, policy_version 31744 (0.0008) +[2026-06-02 16:36:13,095][243562] Updated weights for policy 0, policy_version 31754 (0.0008) +[2026-06-02 16:36:13,262][243562] Updated weights for policy 0, policy_version 31764 (0.0008) +[2026-06-02 16:36:13,442][243562] Updated weights for policy 0, policy_version 31775 (0.0009) +[2026-06-02 16:36:13,609][243562] Updated weights for policy 0, policy_version 31785 (0.0008) +[2026-06-02 16:36:14,259][243562] Updated weights for policy 0, policy_version 31795 (0.0008) +[2026-06-02 16:36:14,425][243562] Updated weights for policy 0, policy_version 31805 (0.0009) +[2026-06-02 16:36:14,584][243562] Updated weights for policy 0, policy_version 31815 (0.0008) +[2026-06-02 16:36:14,748][243562] Updated weights for policy 0, policy_version 31825 (0.0008) +[2026-06-02 16:36:14,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 16285696. Throughput: 0: 21561.0. Samples: 16334080. Policy #0 lag: (min: 13.0, avg: 62.6, max: 83.0) +[2026-06-02 16:36:14,812][235960] Avg episode reward: [(0, '1311.392')] +[2026-06-02 16:36:14,913][243562] Updated weights for policy 0, policy_version 31835 (0.0008) +[2026-06-02 16:36:15,082][243562] Updated weights for policy 0, policy_version 31845 (0.0008) +[2026-06-02 16:36:15,241][243562] Updated weights for policy 0, policy_version 31855 (0.0008) +[2026-06-02 16:36:15,253][242748] Saving new best policy, reward=1311.392! +[2026-06-02 16:36:15,902][243562] Updated weights for policy 0, policy_version 31866 (0.0008) +[2026-06-02 16:36:16,062][243562] Updated weights for policy 0, policy_version 31876 (0.0008) +[2026-06-02 16:36:16,228][243562] Updated weights for policy 0, policy_version 31886 (0.0008) +[2026-06-02 16:36:16,389][243562] Updated weights for policy 0, policy_version 31896 (0.0008) +[2026-06-02 16:36:16,574][243562] Updated weights for policy 0, policy_version 31907 (0.0008) +[2026-06-02 16:36:16,739][243562] Updated weights for policy 0, policy_version 31917 (0.0008) +[2026-06-02 16:36:17,369][243562] Updated weights for policy 0, policy_version 31927 (0.0008) +[2026-06-02 16:36:17,534][243562] Updated weights for policy 0, policy_version 31937 (0.0008) +[2026-06-02 16:36:17,699][243562] Updated weights for policy 0, policy_version 31947 (0.0008) +[2026-06-02 16:36:17,888][243562] Updated weights for policy 0, policy_version 31959 (0.0010) +[2026-06-02 16:36:18,055][243562] Updated weights for policy 0, policy_version 31969 (0.0008) +[2026-06-02 16:36:18,221][243562] Updated weights for policy 0, policy_version 31979 (0.0008) +[2026-06-02 16:36:18,881][243562] Updated weights for policy 0, policy_version 31990 (0.0008) +[2026-06-02 16:36:19,039][243562] Updated weights for policy 0, policy_version 32000 (0.0008) +[2026-06-02 16:36:19,195][243562] Updated weights for policy 0, policy_version 32010 (0.0008) +[2026-06-02 16:36:19,383][243562] Updated weights for policy 0, policy_version 32021 (0.0008) +[2026-06-02 16:36:19,547][243562] Updated weights for policy 0, policy_version 32031 (0.0008) +[2026-06-02 16:36:19,711][243562] Updated weights for policy 0, policy_version 32041 (0.0008) +[2026-06-02 16:36:19,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.3, 300 sec: 21438.0). Total num frames: 16384000. Throughput: 0: 21566.6. Samples: 16398720. Policy #0 lag: (min: 13.0, avg: 62.6, max: 83.0) +[2026-06-02 16:36:19,812][235960] Avg episode reward: [(0, '1323.586')] +[2026-06-02 16:36:19,819][242748] Saving new best policy, reward=1323.586! +[2026-06-02 16:36:20,359][243562] Updated weights for policy 0, policy_version 32051 (0.0009) +[2026-06-02 16:36:20,528][243562] Updated weights for policy 0, policy_version 32062 (0.0008) +[2026-06-02 16:36:20,685][243562] Updated weights for policy 0, policy_version 32072 (0.0008) +[2026-06-02 16:36:20,850][243562] Updated weights for policy 0, policy_version 32082 (0.0008) +[2026-06-02 16:36:21,019][243562] Updated weights for policy 0, policy_version 32092 (0.0009) +[2026-06-02 16:36:21,184][243562] Updated weights for policy 0, policy_version 32102 (0.0008) +[2026-06-02 16:36:21,340][243562] Updated weights for policy 0, policy_version 32112 (0.0008) +[2026-06-02 16:36:21,976][243562] Updated weights for policy 0, policy_version 32122 (0.0009) +[2026-06-02 16:36:22,130][243562] Updated weights for policy 0, policy_version 32132 (0.0009) +[2026-06-02 16:36:22,293][243562] Updated weights for policy 0, policy_version 32142 (0.0009) +[2026-06-02 16:36:22,488][243562] Updated weights for policy 0, policy_version 32154 (0.0009) +[2026-06-02 16:36:22,649][243562] Updated weights for policy 0, policy_version 32164 (0.0008) +[2026-06-02 16:36:22,818][243562] Updated weights for policy 0, policy_version 32174 (0.0009) +[2026-06-02 16:36:23,459][243562] Updated weights for policy 0, policy_version 32184 (0.0009) +[2026-06-02 16:36:23,621][243562] Updated weights for policy 0, policy_version 32194 (0.0008) +[2026-06-02 16:36:23,796][243562] Updated weights for policy 0, policy_version 32205 (0.0008) +[2026-06-02 16:36:23,960][243562] Updated weights for policy 0, policy_version 32215 (0.0008) +[2026-06-02 16:36:24,121][243562] Updated weights for policy 0, policy_version 32225 (0.0009) +[2026-06-02 16:36:24,302][243562] Updated weights for policy 0, policy_version 32236 (0.0008) +[2026-06-02 16:36:24,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21845.3, 300 sec: 21438.0). Total num frames: 16515072. Throughput: 0: 21535.4. Samples: 16526592. Policy #0 lag: (min: 13.0, avg: 62.6, max: 83.0) +[2026-06-02 16:36:24,812][235960] Avg episode reward: [(0, '1415.957')] +[2026-06-02 16:36:24,951][243562] Updated weights for policy 0, policy_version 32246 (0.0009) +[2026-06-02 16:36:25,123][243562] Updated weights for policy 0, policy_version 32257 (0.0008) +[2026-06-02 16:36:25,284][243562] Updated weights for policy 0, policy_version 32267 (0.0008) +[2026-06-02 16:36:25,479][243562] Updated weights for policy 0, policy_version 32279 (0.0008) +[2026-06-02 16:36:25,645][243562] Updated weights for policy 0, policy_version 32289 (0.0008) +[2026-06-02 16:36:25,826][243562] Updated weights for policy 0, policy_version 32300 (0.0008) +[2026-06-02 16:36:25,881][242748] Saving new best policy, reward=1415.957! +[2026-06-02 16:36:26,473][243562] Updated weights for policy 0, policy_version 32310 (0.0009) +[2026-06-02 16:36:26,630][243562] Updated weights for policy 0, policy_version 32320 (0.0009) +[2026-06-02 16:36:26,794][243562] Updated weights for policy 0, policy_version 32330 (0.0008) +[2026-06-02 16:36:26,974][243562] Updated weights for policy 0, policy_version 32341 (0.0009) +[2026-06-02 16:36:27,153][243562] Updated weights for policy 0, policy_version 32352 (0.0009) +[2026-06-02 16:36:27,319][243562] Updated weights for policy 0, policy_version 32362 (0.0008) +[2026-06-02 16:36:27,966][243562] Updated weights for policy 0, policy_version 32372 (0.0009) +[2026-06-02 16:36:28,132][243562] Updated weights for policy 0, policy_version 32383 (0.0008) +[2026-06-02 16:36:28,296][243562] Updated weights for policy 0, policy_version 32393 (0.0010) +[2026-06-02 16:36:28,456][243562] Updated weights for policy 0, policy_version 32403 (0.0008) +[2026-06-02 16:36:28,621][243562] Updated weights for policy 0, policy_version 32413 (0.0008) +[2026-06-02 16:36:28,790][243562] Updated weights for policy 0, policy_version 32423 (0.0008) +[2026-06-02 16:36:29,450][243562] Updated weights for policy 0, policy_version 32433 (0.0009) +[2026-06-02 16:36:29,626][243562] Updated weights for policy 0, policy_version 32444 (0.0008) +[2026-06-02 16:36:29,786][243562] Updated weights for policy 0, policy_version 32454 (0.0009) +[2026-06-02 16:36:29,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21299.3, 300 sec: 21438.0). Total num frames: 16613376. Throughput: 0: 21509.6. Samples: 16656000. Policy #0 lag: (min: 13.0, avg: 62.6, max: 83.0) +[2026-06-02 16:36:29,812][235960] Avg episode reward: [(0, '1431.968')] +[2026-06-02 16:36:29,965][243562] Updated weights for policy 0, policy_version 32465 (0.0008) +[2026-06-02 16:36:30,132][243562] Updated weights for policy 0, policy_version 32475 (0.0008) +[2026-06-02 16:36:30,300][243562] Updated weights for policy 0, policy_version 32485 (0.0009) +[2026-06-02 16:36:30,461][243562] Updated weights for policy 0, policy_version 32495 (0.0008) +[2026-06-02 16:36:30,467][242748] Saving new best policy, reward=1431.968! +[2026-06-02 16:36:31,099][243562] Updated weights for policy 0, policy_version 32505 (0.0009) +[2026-06-02 16:36:31,257][243562] Updated weights for policy 0, policy_version 32515 (0.0009) +[2026-06-02 16:36:31,471][243562] Updated weights for policy 0, policy_version 32528 (0.0008) +[2026-06-02 16:36:31,635][243562] Updated weights for policy 0, policy_version 32538 (0.0008) +[2026-06-02 16:36:31,796][243562] Updated weights for policy 0, policy_version 32548 (0.0009) +[2026-06-02 16:36:31,970][243562] Updated weights for policy 0, policy_version 32558 (0.0008) +[2026-06-02 16:36:32,620][243562] Updated weights for policy 0, policy_version 32569 (0.0007) +[2026-06-02 16:36:32,777][243562] Updated weights for policy 0, policy_version 32579 (0.0008) +[2026-06-02 16:36:32,938][243562] Updated weights for policy 0, policy_version 32589 (0.0009) +[2026-06-02 16:36:33,100][243562] Updated weights for policy 0, policy_version 32599 (0.0009) +[2026-06-02 16:36:33,267][243562] Updated weights for policy 0, policy_version 32609 (0.0008) +[2026-06-02 16:36:33,423][243562] Updated weights for policy 0, policy_version 32619 (0.0008) +[2026-06-02 16:36:34,071][243562] Updated weights for policy 0, policy_version 32629 (0.0008) +[2026-06-02 16:36:34,245][243562] Updated weights for policy 0, policy_version 32640 (0.0008) +[2026-06-02 16:36:34,402][243562] Updated weights for policy 0, policy_version 32650 (0.0008) +[2026-06-02 16:36:34,567][243562] Updated weights for policy 0, policy_version 32660 (0.0008) +[2026-06-02 16:36:34,736][243562] Updated weights for policy 0, policy_version 32670 (0.0009) +[2026-06-02 16:36:34,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 16711680. Throughput: 0: 21509.7. Samples: 16720384. Policy #0 lag: (min: 63.0, avg: 78.0, max: 127.0) +[2026-06-02 16:36:34,812][235960] Avg episode reward: [(0, '1479.823')] +[2026-06-02 16:36:34,911][243562] Updated weights for policy 0, policy_version 32681 (0.0008) +[2026-06-02 16:36:35,020][242748] Saving new best policy, reward=1479.823! +[2026-06-02 16:36:35,563][243562] Updated weights for policy 0, policy_version 32691 (0.0008) +[2026-06-02 16:36:35,724][243562] Updated weights for policy 0, policy_version 32701 (0.0008) +[2026-06-02 16:36:35,900][243562] Updated weights for policy 0, policy_version 32712 (0.0009) +[2026-06-02 16:36:36,064][243562] Updated weights for policy 0, policy_version 32722 (0.0008) +[2026-06-02 16:36:36,239][243562] Updated weights for policy 0, policy_version 32733 (0.0008) +[2026-06-02 16:36:36,426][243562] Updated weights for policy 0, policy_version 32744 (0.0008) +[2026-06-02 16:36:37,076][243562] Updated weights for policy 0, policy_version 32754 (0.0008) +[2026-06-02 16:36:37,233][243562] Updated weights for policy 0, policy_version 32764 (0.0008) +[2026-06-02 16:36:37,390][243562] Updated weights for policy 0, policy_version 32774 (0.0008) +[2026-06-02 16:36:37,554][243562] Updated weights for policy 0, policy_version 32784 (0.0008) +[2026-06-02 16:36:37,714][243562] Updated weights for policy 0, policy_version 32794 (0.0008) +[2026-06-02 16:36:37,883][243562] Updated weights for policy 0, policy_version 32804 (0.0008) +[2026-06-02 16:36:38,079][243562] Updated weights for policy 0, policy_version 32816 (0.0008) +[2026-06-02 16:36:38,734][243562] Updated weights for policy 0, policy_version 32827 (0.0008) +[2026-06-02 16:36:38,903][243562] Updated weights for policy 0, policy_version 32837 (0.0008) +[2026-06-02 16:36:39,058][243562] Updated weights for policy 0, policy_version 32847 (0.0008) +[2026-06-02 16:36:39,230][243562] Updated weights for policy 0, policy_version 32857 (0.0008) +[2026-06-02 16:36:39,387][243562] Updated weights for policy 0, policy_version 32867 (0.0008) +[2026-06-02 16:36:39,555][243562] Updated weights for policy 0, policy_version 32877 (0.0008) +[2026-06-02 16:36:39,811][235960] Fps is (10 sec: 22937.7, 60 sec: 21845.4, 300 sec: 21438.0). Total num frames: 16842752. Throughput: 0: 21489.8. Samples: 16848512. Policy #0 lag: (min: 63.0, avg: 78.0, max: 127.0) +[2026-06-02 16:36:39,812][235960] Avg episode reward: [(0, '1467.515')] +[2026-06-02 16:36:40,218][243562] Updated weights for policy 0, policy_version 32887 (0.0008) +[2026-06-02 16:36:40,378][243562] Updated weights for policy 0, policy_version 32897 (0.0008) +[2026-06-02 16:36:40,538][243562] Updated weights for policy 0, policy_version 32907 (0.0008) +[2026-06-02 16:36:40,705][243562] Updated weights for policy 0, policy_version 32917 (0.0008) +[2026-06-02 16:36:40,871][243562] Updated weights for policy 0, policy_version 32927 (0.0009) +[2026-06-02 16:36:41,045][243562] Updated weights for policy 0, policy_version 32938 (0.0008) +[2026-06-02 16:36:41,681][243562] Updated weights for policy 0, policy_version 32948 (0.0008) +[2026-06-02 16:36:41,859][243562] Updated weights for policy 0, policy_version 32959 (0.0008) +[2026-06-02 16:36:42,021][243562] Updated weights for policy 0, policy_version 32969 (0.0009) +[2026-06-02 16:36:42,187][243562] Updated weights for policy 0, policy_version 32979 (0.0008) +[2026-06-02 16:36:42,360][243562] Updated weights for policy 0, policy_version 32990 (0.0008) +[2026-06-02 16:36:42,529][243562] Updated weights for policy 0, policy_version 33000 (0.0008) +[2026-06-02 16:36:43,182][243562] Updated weights for policy 0, policy_version 33011 (0.0008) +[2026-06-02 16:36:43,335][243562] Updated weights for policy 0, policy_version 33021 (0.0008) +[2026-06-02 16:36:43,518][243562] Updated weights for policy 0, policy_version 33032 (0.0008) +[2026-06-02 16:36:43,673][243562] Updated weights for policy 0, policy_version 33042 (0.0008) +[2026-06-02 16:36:43,842][243562] Updated weights for policy 0, policy_version 33052 (0.0008) +[2026-06-02 16:36:44,007][243562] Updated weights for policy 0, policy_version 33062 (0.0008) +[2026-06-02 16:36:44,687][243562] Updated weights for policy 0, policy_version 33074 (0.0008) +[2026-06-02 16:36:44,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21845.3, 300 sec: 21438.0). Total num frames: 16941056. Throughput: 0: 21486.9. Samples: 16977920. Policy #0 lag: (min: 63.0, avg: 78.0, max: 127.0) +[2026-06-02 16:36:44,812][235960] Avg episode reward: [(0, '1564.624')] +[2026-06-02 16:36:44,842][243562] Updated weights for policy 0, policy_version 33084 (0.0009) +[2026-06-02 16:36:45,001][243562] Updated weights for policy 0, policy_version 33094 (0.0008) +[2026-06-02 16:36:45,162][243562] Updated weights for policy 0, policy_version 33104 (0.0008) +[2026-06-02 16:36:45,330][243562] Updated weights for policy 0, policy_version 33114 (0.0009) +[2026-06-02 16:36:45,510][243562] Updated weights for policy 0, policy_version 33125 (0.0008) +[2026-06-02 16:36:45,674][243562] Updated weights for policy 0, policy_version 33135 (0.0008) +[2026-06-02 16:36:45,683][242748] Saving new best policy, reward=1564.624! +[2026-06-02 16:36:46,319][243562] Updated weights for policy 0, policy_version 33145 (0.0009) +[2026-06-02 16:36:46,493][243562] Updated weights for policy 0, policy_version 33156 (0.0009) +[2026-06-02 16:36:46,658][243562] Updated weights for policy 0, policy_version 33166 (0.0009) +[2026-06-02 16:36:46,823][243562] Updated weights for policy 0, policy_version 33176 (0.0008) +[2026-06-02 16:36:46,987][243562] Updated weights for policy 0, policy_version 33186 (0.0008) +[2026-06-02 16:36:47,166][243562] Updated weights for policy 0, policy_version 33197 (0.0008) +[2026-06-02 16:36:47,808][243562] Updated weights for policy 0, policy_version 33207 (0.0009) +[2026-06-02 16:36:47,959][243562] Updated weights for policy 0, policy_version 33217 (0.0009) +[2026-06-02 16:36:48,136][243562] Updated weights for policy 0, policy_version 33227 (0.0008) +[2026-06-02 16:36:48,297][243562] Updated weights for policy 0, policy_version 33237 (0.0008) +[2026-06-02 16:36:48,466][243562] Updated weights for policy 0, policy_version 33248 (0.0009) +[2026-06-02 16:36:48,639][243562] Updated weights for policy 0, policy_version 33258 (0.0009) +[2026-06-02 16:36:49,274][243562] Updated weights for policy 0, policy_version 33268 (0.0009) +[2026-06-02 16:36:49,436][243562] Updated weights for policy 0, policy_version 33278 (0.0008) +[2026-06-02 16:36:49,594][243562] Updated weights for policy 0, policy_version 33288 (0.0008) +[2026-06-02 16:36:49,762][243562] Updated weights for policy 0, policy_version 33298 (0.0008) +[2026-06-02 16:36:49,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 17039360. Throughput: 0: 21495.4. Samples: 17043200. Policy #0 lag: (min: 63.0, avg: 78.0, max: 127.0) +[2026-06-02 16:36:49,812][235960] Avg episode reward: [(0, '1570.956')] +[2026-06-02 16:36:49,943][243562] Updated weights for policy 0, policy_version 33309 (0.0009) +[2026-06-02 16:36:50,119][243562] Updated weights for policy 0, policy_version 33320 (0.0009) +[2026-06-02 16:36:50,247][242748] Saving new best policy, reward=1570.956! +[2026-06-02 16:36:50,782][243562] Updated weights for policy 0, policy_version 33330 (0.0009) +[2026-06-02 16:36:50,938][243562] Updated weights for policy 0, policy_version 33340 (0.0008) +[2026-06-02 16:36:51,098][243562] Updated weights for policy 0, policy_version 33350 (0.0008) +[2026-06-02 16:36:51,280][243562] Updated weights for policy 0, policy_version 33361 (0.0010) +[2026-06-02 16:36:51,444][243562] Updated weights for policy 0, policy_version 33371 (0.0008) +[2026-06-02 16:36:51,609][243562] Updated weights for policy 0, policy_version 33381 (0.0009) +[2026-06-02 16:36:51,785][243562] Updated weights for policy 0, policy_version 33392 (0.0008) +[2026-06-02 16:36:52,412][243562] Updated weights for policy 0, policy_version 33402 (0.0009) +[2026-06-02 16:36:52,585][243562] Updated weights for policy 0, policy_version 33413 (0.0008) +[2026-06-02 16:36:52,772][243562] Updated weights for policy 0, policy_version 33424 (0.0008) +[2026-06-02 16:36:52,936][243562] Updated weights for policy 0, policy_version 33434 (0.0008) +[2026-06-02 16:36:53,118][243562] Updated weights for policy 0, policy_version 33445 (0.0009) +[2026-06-02 16:36:53,290][243562] Updated weights for policy 0, policy_version 33456 (0.0009) +[2026-06-02 16:36:53,952][243562] Updated weights for policy 0, policy_version 33466 (0.0009) +[2026-06-02 16:36:54,105][243562] Updated weights for policy 0, policy_version 33476 (0.0008) +[2026-06-02 16:36:54,277][243562] Updated weights for policy 0, policy_version 33486 (0.0008) +[2026-06-02 16:36:54,442][243562] Updated weights for policy 0, policy_version 33496 (0.0008) +[2026-06-02 16:36:54,607][243562] Updated weights for policy 0, policy_version 33506 (0.0008) +[2026-06-02 16:36:54,767][243562] Updated weights for policy 0, policy_version 33516 (0.0008) +[2026-06-02 16:36:54,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 21438.1). Total num frames: 17137664. Throughput: 0: 21452.8. Samples: 17169792. Policy #0 lag: (min: 63.0, avg: 78.0, max: 127.0) +[2026-06-02 16:36:54,812][235960] Avg episode reward: [(0, '1601.510')] +[2026-06-02 16:36:54,841][242748] Saving new best policy, reward=1601.510! +[2026-06-02 16:36:55,415][243562] Updated weights for policy 0, policy_version 33526 (0.0008) +[2026-06-02 16:36:55,570][243562] Updated weights for policy 0, policy_version 33536 (0.0008) +[2026-06-02 16:36:55,739][243562] Updated weights for policy 0, policy_version 33546 (0.0008) +[2026-06-02 16:36:55,902][243562] Updated weights for policy 0, policy_version 33556 (0.0008) +[2026-06-02 16:36:56,091][243562] Updated weights for policy 0, policy_version 33567 (0.0008) +[2026-06-02 16:36:56,254][243562] Updated weights for policy 0, policy_version 33577 (0.0008) +[2026-06-02 16:36:56,891][243562] Updated weights for policy 0, policy_version 33587 (0.0009) +[2026-06-02 16:36:57,043][243562] Updated weights for policy 0, policy_version 33597 (0.0009) +[2026-06-02 16:36:57,204][243562] Updated weights for policy 0, policy_version 33607 (0.0008) +[2026-06-02 16:36:57,372][243562] Updated weights for policy 0, policy_version 33617 (0.0008) +[2026-06-02 16:36:57,564][243562] Updated weights for policy 0, policy_version 33629 (0.0008) +[2026-06-02 16:36:57,726][243562] Updated weights for policy 0, policy_version 33639 (0.0008) +[2026-06-02 16:36:58,394][243562] Updated weights for policy 0, policy_version 33649 (0.0010) +[2026-06-02 16:36:58,539][243562] Updated weights for policy 0, policy_version 33659 (0.0008) +[2026-06-02 16:36:58,721][243562] Updated weights for policy 0, policy_version 33670 (0.0008) +[2026-06-02 16:36:58,878][243562] Updated weights for policy 0, policy_version 33680 (0.0009) +[2026-06-02 16:36:59,049][243562] Updated weights for policy 0, policy_version 33690 (0.0008) +[2026-06-02 16:36:59,209][243562] Updated weights for policy 0, policy_version 33700 (0.0008) +[2026-06-02 16:36:59,374][243562] Updated weights for policy 0, policy_version 33710 (0.0008) +[2026-06-02 16:36:59,811][235960] Fps is (10 sec: 22937.8, 60 sec: 21845.3, 300 sec: 21438.1). Total num frames: 17268736. Throughput: 0: 21353.2. Samples: 17294976. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) +[2026-06-02 16:36:59,812][235960] Avg episode reward: [(0, '1683.391')] +[2026-06-02 16:37:00,026][243562] Updated weights for policy 0, policy_version 33720 (0.0009) +[2026-06-02 16:37:00,191][243562] Updated weights for policy 0, policy_version 33730 (0.0008) +[2026-06-02 16:37:00,353][243562] Updated weights for policy 0, policy_version 33740 (0.0008) +[2026-06-02 16:37:00,532][243562] Updated weights for policy 0, policy_version 33751 (0.0008) +[2026-06-02 16:37:00,691][243562] Updated weights for policy 0, policy_version 33761 (0.0008) +[2026-06-02 16:37:00,854][243562] Updated weights for policy 0, policy_version 33771 (0.0008) +[2026-06-02 16:37:00,934][242748] Saving new best policy, reward=1683.391! +[2026-06-02 16:37:01,501][243562] Updated weights for policy 0, policy_version 33781 (0.0009) +[2026-06-02 16:37:01,674][243562] Updated weights for policy 0, policy_version 33792 (0.0008) +[2026-06-02 16:37:01,834][243562] Updated weights for policy 0, policy_version 33802 (0.0008) +[2026-06-02 16:37:01,993][243562] Updated weights for policy 0, policy_version 33812 (0.0008) +[2026-06-02 16:37:02,165][243562] Updated weights for policy 0, policy_version 33822 (0.0009) +[2026-06-02 16:37:02,344][243562] Updated weights for policy 0, policy_version 33833 (0.0009) +[2026-06-02 16:37:03,009][243562] Updated weights for policy 0, policy_version 33843 (0.0008) +[2026-06-02 16:37:03,162][243562] Updated weights for policy 0, policy_version 33853 (0.0008) +[2026-06-02 16:37:03,328][243562] Updated weights for policy 0, policy_version 33863 (0.0008) +[2026-06-02 16:37:03,491][243562] Updated weights for policy 0, policy_version 33873 (0.0008) +[2026-06-02 16:37:03,668][243562] Updated weights for policy 0, policy_version 33884 (0.0009) +[2026-06-02 16:37:03,826][243562] Updated weights for policy 0, policy_version 33894 (0.0008) +[2026-06-02 16:37:03,993][243562] Updated weights for policy 0, policy_version 33904 (0.0008) +[2026-06-02 16:37:04,654][243562] Updated weights for policy 0, policy_version 33915 (0.0008) +[2026-06-02 16:37:04,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21299.2, 300 sec: 21438.1). Total num frames: 17367040. Throughput: 0: 21307.7. Samples: 17357568. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) +[2026-06-02 16:37:04,812][235960] Avg episode reward: [(0, '1689.130')] +[2026-06-02 16:37:04,817][243562] Updated weights for policy 0, policy_version 33925 (0.0008) +[2026-06-02 16:37:04,980][243562] Updated weights for policy 0, policy_version 33935 (0.0008) +[2026-06-02 16:37:05,170][243562] Updated weights for policy 0, policy_version 33946 (0.0009) +[2026-06-02 16:37:05,331][243562] Updated weights for policy 0, policy_version 33956 (0.0008) +[2026-06-02 16:37:05,507][243562] Updated weights for policy 0, policy_version 33967 (0.0008) +[2026-06-02 16:37:05,517][242748] Saving new best policy, reward=1689.130! +[2026-06-02 16:37:06,158][243562] Updated weights for policy 0, policy_version 33977 (0.0009) +[2026-06-02 16:37:06,332][243562] Updated weights for policy 0, policy_version 33988 (0.0008) +[2026-06-02 16:37:06,500][243562] Updated weights for policy 0, policy_version 33998 (0.0008) +[2026-06-02 16:37:06,680][243562] Updated weights for policy 0, policy_version 34009 (0.0008) +[2026-06-02 16:37:06,842][243562] Updated weights for policy 0, policy_version 34019 (0.0008) +[2026-06-02 16:37:07,014][243562] Updated weights for policy 0, policy_version 34029 (0.0009) +[2026-06-02 16:37:07,652][243562] Updated weights for policy 0, policy_version 34039 (0.0009) +[2026-06-02 16:37:07,818][243562] Updated weights for policy 0, policy_version 34049 (0.0008) +[2026-06-02 16:37:07,994][243562] Updated weights for policy 0, policy_version 34060 (0.0008) +[2026-06-02 16:37:08,154][243562] Updated weights for policy 0, policy_version 34070 (0.0008) +[2026-06-02 16:37:08,318][243562] Updated weights for policy 0, policy_version 34080 (0.0008) +[2026-06-02 16:37:08,506][243562] Updated weights for policy 0, policy_version 34091 (0.0008) +[2026-06-02 16:37:09,184][243562] Updated weights for policy 0, policy_version 34103 (0.0008) +[2026-06-02 16:37:09,339][243562] Updated weights for policy 0, policy_version 34113 (0.0008) +[2026-06-02 16:37:09,497][243562] Updated weights for policy 0, policy_version 34123 (0.0008) +[2026-06-02 16:37:09,659][243562] Updated weights for policy 0, policy_version 34133 (0.0008) +[2026-06-02 16:37:09,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 17465344. Throughput: 0: 21248.0. Samples: 17482752. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) +[2026-06-02 16:37:09,812][235960] Avg episode reward: [(0, '1643.652')] +[2026-06-02 16:37:09,823][243562] Updated weights for policy 0, policy_version 34143 (0.0008) +[2026-06-02 16:37:09,989][243562] Updated weights for policy 0, policy_version 34153 (0.0008) +[2026-06-02 16:37:10,678][243562] Updated weights for policy 0, policy_version 34165 (0.0009) +[2026-06-02 16:37:10,863][243562] Updated weights for policy 0, policy_version 34177 (0.0008) +[2026-06-02 16:37:11,048][243562] Updated weights for policy 0, policy_version 34188 (0.0008) +[2026-06-02 16:37:11,229][243562] Updated weights for policy 0, policy_version 34199 (0.0008) +[2026-06-02 16:37:11,394][243562] Updated weights for policy 0, policy_version 34209 (0.0009) +[2026-06-02 16:37:11,561][243562] Updated weights for policy 0, policy_version 34219 (0.0009) +[2026-06-02 16:37:12,217][243562] Updated weights for policy 0, policy_version 34230 (0.0009) +[2026-06-02 16:37:12,391][243562] Updated weights for policy 0, policy_version 34241 (0.0009) +[2026-06-02 16:37:12,553][243562] Updated weights for policy 0, policy_version 34251 (0.0008) +[2026-06-02 16:37:12,718][243562] Updated weights for policy 0, policy_version 34261 (0.0008) +[2026-06-02 16:37:12,884][243562] Updated weights for policy 0, policy_version 34271 (0.0008) +[2026-06-02 16:37:13,070][243562] Updated weights for policy 0, policy_version 34282 (0.0009) +[2026-06-02 16:37:13,712][243562] Updated weights for policy 0, policy_version 34292 (0.0009) +[2026-06-02 16:37:13,870][243562] Updated weights for policy 0, policy_version 34302 (0.0008) +[2026-06-02 16:37:14,048][243562] Updated weights for policy 0, policy_version 34313 (0.0011) +[2026-06-02 16:37:14,208][243562] Updated weights for policy 0, policy_version 34323 (0.0010) +[2026-06-02 16:37:14,369][243562] Updated weights for policy 0, policy_version 34333 (0.0010) +[2026-06-02 16:37:14,542][243562] Updated weights for policy 0, policy_version 34343 (0.0010) +[2026-06-02 16:37:14,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21845.3, 300 sec: 21549.1). Total num frames: 17596416. Throughput: 0: 21230.9. Samples: 17611392. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) +[2026-06-02 16:37:14,812][235960] Avg episode reward: [(0, '1645.910')] +[2026-06-02 16:37:15,185][243562] Updated weights for policy 0, policy_version 34353 (0.0011) +[2026-06-02 16:37:15,369][243562] Updated weights for policy 0, policy_version 34365 (0.0009) +[2026-06-02 16:37:15,526][243562] Updated weights for policy 0, policy_version 34375 (0.0008) +[2026-06-02 16:37:15,715][243562] Updated weights for policy 0, policy_version 34386 (0.0008) +[2026-06-02 16:37:15,888][243562] Updated weights for policy 0, policy_version 34397 (0.0008) +[2026-06-02 16:37:16,055][243562] Updated weights for policy 0, policy_version 34407 (0.0008) +[2026-06-02 16:37:16,721][243562] Updated weights for policy 0, policy_version 34417 (0.0008) +[2026-06-02 16:37:16,870][243562] Updated weights for policy 0, policy_version 34427 (0.0008) +[2026-06-02 16:37:17,062][243562] Updated weights for policy 0, policy_version 34439 (0.0008) +[2026-06-02 16:37:17,245][243562] Updated weights for policy 0, policy_version 34450 (0.0008) +[2026-06-02 16:37:17,409][243562] Updated weights for policy 0, policy_version 34460 (0.0008) +[2026-06-02 16:37:17,579][243562] Updated weights for policy 0, policy_version 34470 (0.0008) +[2026-06-02 16:37:18,249][243562] Updated weights for policy 0, policy_version 34481 (0.0009) +[2026-06-02 16:37:18,407][243562] Updated weights for policy 0, policy_version 34491 (0.0008) +[2026-06-02 16:37:18,570][243562] Updated weights for policy 0, policy_version 34501 (0.0009) +[2026-06-02 16:37:18,734][243562] Updated weights for policy 0, policy_version 34511 (0.0008) +[2026-06-02 16:37:18,892][243562] Updated weights for policy 0, policy_version 34521 (0.0008) +[2026-06-02 16:37:19,054][243562] Updated weights for policy 0, policy_version 34531 (0.0008) +[2026-06-02 16:37:19,223][243562] Updated weights for policy 0, policy_version 34541 (0.0009) +[2026-06-02 16:37:19,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21845.3, 300 sec: 21438.0). Total num frames: 17694720. Throughput: 0: 21242.3. Samples: 17676288. Policy #0 lag: (min: 63.0, avg: 78.2, max: 127.0) +[2026-06-02 16:37:19,812][235960] Avg episode reward: [(0, '1684.227')] +[2026-06-02 16:37:19,882][243562] Updated weights for policy 0, policy_version 34551 (0.0008) +[2026-06-02 16:37:20,043][243562] Updated weights for policy 0, policy_version 34561 (0.0008) +[2026-06-02 16:37:20,208][243562] Updated weights for policy 0, policy_version 34571 (0.0009) +[2026-06-02 16:37:20,381][243562] Updated weights for policy 0, policy_version 34582 (0.0009) +[2026-06-02 16:37:20,544][243562] Updated weights for policy 0, policy_version 34592 (0.0008) +[2026-06-02 16:37:20,732][243562] Updated weights for policy 0, policy_version 34603 (0.0008) +[2026-06-02 16:37:21,366][243562] Updated weights for policy 0, policy_version 34613 (0.0009) +[2026-06-02 16:37:21,533][243562] Updated weights for policy 0, policy_version 34624 (0.0009) +[2026-06-02 16:37:21,690][243562] Updated weights for policy 0, policy_version 34634 (0.0008) +[2026-06-02 16:37:21,859][243562] Updated weights for policy 0, policy_version 34644 (0.0009) +[2026-06-02 16:37:22,022][243562] Updated weights for policy 0, policy_version 34654 (0.0008) +[2026-06-02 16:37:22,184][243562] Updated weights for policy 0, policy_version 34664 (0.0008) +[2026-06-02 16:37:22,843][243562] Updated weights for policy 0, policy_version 34674 (0.0009) +[2026-06-02 16:37:22,998][243562] Updated weights for policy 0, policy_version 34684 (0.0008) +[2026-06-02 16:37:23,152][243562] Updated weights for policy 0, policy_version 34694 (0.0008) +[2026-06-02 16:37:23,322][243562] Updated weights for policy 0, policy_version 34704 (0.0008) +[2026-06-02 16:37:23,482][243562] Updated weights for policy 0, policy_version 34714 (0.0008) +[2026-06-02 16:37:23,650][243562] Updated weights for policy 0, policy_version 34724 (0.0008) +[2026-06-02 16:37:23,813][243562] Updated weights for policy 0, policy_version 34734 (0.0008) +[2026-06-02 16:37:24,487][243562] Updated weights for policy 0, policy_version 34746 (0.0008) +[2026-06-02 16:37:24,644][243562] Updated weights for policy 0, policy_version 34756 (0.0009) +[2026-06-02 16:37:24,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 17793024. Throughput: 0: 21265.1. Samples: 17805440. Policy #0 lag: (min: 63.0, avg: 78.2, max: 127.0) +[2026-06-02 16:37:24,812][235960] Avg episode reward: [(0, '1660.310')] +[2026-06-02 16:37:24,824][243562] Updated weights for policy 0, policy_version 34768 (0.0009) +[2026-06-02 16:37:25,007][243562] Updated weights for policy 0, policy_version 34779 (0.0009) +[2026-06-02 16:37:25,199][243562] Updated weights for policy 0, policy_version 34791 (0.0009) +[2026-06-02 16:37:25,857][243562] Updated weights for policy 0, policy_version 34801 (0.0009) +[2026-06-02 16:37:26,020][243562] Updated weights for policy 0, policy_version 34812 (0.0009) +[2026-06-02 16:37:26,194][243562] Updated weights for policy 0, policy_version 34823 (0.0008) +[2026-06-02 16:37:26,362][243562] Updated weights for policy 0, policy_version 34834 (0.0009) +[2026-06-02 16:37:26,529][243562] Updated weights for policy 0, policy_version 34844 (0.0009) +[2026-06-02 16:37:26,705][243562] Updated weights for policy 0, policy_version 34855 (0.0009) +[2026-06-02 16:37:27,396][243562] Updated weights for policy 0, policy_version 34865 (0.0009) +[2026-06-02 16:37:27,557][243562] Updated weights for policy 0, policy_version 34876 (0.0009) +[2026-06-02 16:37:27,713][243562] Updated weights for policy 0, policy_version 34886 (0.0009) +[2026-06-02 16:37:27,872][243562] Updated weights for policy 0, policy_version 34896 (0.0008) +[2026-06-02 16:37:28,046][243562] Updated weights for policy 0, policy_version 34907 (0.0008) +[2026-06-02 16:37:28,238][243562] Updated weights for policy 0, policy_version 34919 (0.0009) +[2026-06-02 16:37:28,989][243562] Updated weights for policy 0, policy_version 34930 (0.0008) +[2026-06-02 16:37:29,154][243562] Updated weights for policy 0, policy_version 34941 (0.0008) +[2026-06-02 16:37:29,322][243562] Updated weights for policy 0, policy_version 34952 (0.0008) +[2026-06-02 16:37:29,488][243562] Updated weights for policy 0, policy_version 34962 (0.0008) +[2026-06-02 16:37:29,678][243562] Updated weights for policy 0, policy_version 34974 (0.0009) +[2026-06-02 16:37:29,811][235960] Fps is (10 sec: 19660.9, 60 sec: 21299.2, 300 sec: 21438.1). Total num frames: 17891328. Throughput: 0: 21228.1. Samples: 17933184. Policy #0 lag: (min: 63.0, avg: 78.2, max: 127.0) +[2026-06-02 16:37:29,812][235960] Avg episode reward: [(0, '1672.190')] +[2026-06-02 16:37:29,841][243562] Updated weights for policy 0, policy_version 34984 (0.0008) +[2026-06-02 16:37:30,521][243562] Updated weights for policy 0, policy_version 34995 (0.0009) +[2026-06-02 16:37:30,707][243562] Updated weights for policy 0, policy_version 35007 (0.0008) +[2026-06-02 16:37:30,895][243562] Updated weights for policy 0, policy_version 35018 (0.0008) +[2026-06-02 16:37:31,050][243562] Updated weights for policy 0, policy_version 35028 (0.0008) +[2026-06-02 16:37:31,237][243562] Updated weights for policy 0, policy_version 35039 (0.0009) +[2026-06-02 16:37:31,427][243562] Updated weights for policy 0, policy_version 35050 (0.0008) +[2026-06-02 16:37:32,082][243562] Updated weights for policy 0, policy_version 35061 (0.0008) +[2026-06-02 16:37:32,238][243562] Updated weights for policy 0, policy_version 35071 (0.0008) +[2026-06-02 16:37:32,418][243562] Updated weights for policy 0, policy_version 35082 (0.0008) +[2026-06-02 16:37:32,585][243562] Updated weights for policy 0, policy_version 35092 (0.0009) +[2026-06-02 16:37:32,786][243562] Updated weights for policy 0, policy_version 35104 (0.0008) +[2026-06-02 16:37:32,951][243562] Updated weights for policy 0, policy_version 35114 (0.0008) +[2026-06-02 16:37:33,585][243562] Updated weights for policy 0, policy_version 35124 (0.0009) +[2026-06-02 16:37:33,764][243562] Updated weights for policy 0, policy_version 35135 (0.0008) +[2026-06-02 16:37:33,926][243562] Updated weights for policy 0, policy_version 35145 (0.0009) +[2026-06-02 16:37:34,106][243562] Updated weights for policy 0, policy_version 35156 (0.0008) +[2026-06-02 16:37:34,273][243562] Updated weights for policy 0, policy_version 35166 (0.0008) +[2026-06-02 16:37:34,440][243562] Updated weights for policy 0, policy_version 35176 (0.0008) +[2026-06-02 16:37:34,811][235960] Fps is (10 sec: 22937.7, 60 sec: 21845.3, 300 sec: 21549.1). Total num frames: 18022400. Throughput: 0: 21236.7. Samples: 17998848. Policy #0 lag: (min: 63.0, avg: 78.2, max: 127.0) +[2026-06-02 16:37:34,812][235960] Avg episode reward: [(0, '1688.513')] +[2026-06-02 16:37:35,103][243562] Updated weights for policy 0, policy_version 35188 (0.0008) +[2026-06-02 16:37:35,266][243562] Updated weights for policy 0, policy_version 35198 (0.0005) +[2026-06-02 16:37:35,426][243562] Updated weights for policy 0, policy_version 35208 (0.0008) +[2026-06-02 16:37:35,588][243562] Updated weights for policy 0, policy_version 35218 (0.0008) +[2026-06-02 16:37:35,749][243562] Updated weights for policy 0, policy_version 35228 (0.0008) +[2026-06-02 16:37:35,930][243562] Updated weights for policy 0, policy_version 35239 (0.0009) +[2026-06-02 16:37:36,597][243562] Updated weights for policy 0, policy_version 35249 (0.0009) +[2026-06-02 16:37:36,757][243562] Updated weights for policy 0, policy_version 35259 (0.0008) +[2026-06-02 16:37:36,916][243562] Updated weights for policy 0, policy_version 35269 (0.0008) +[2026-06-02 16:37:37,079][243562] Updated weights for policy 0, policy_version 35279 (0.0008) +[2026-06-02 16:37:37,257][243562] Updated weights for policy 0, policy_version 35290 (0.0008) +[2026-06-02 16:37:37,423][243562] Updated weights for policy 0, policy_version 35300 (0.0010) +[2026-06-02 16:37:37,590][243562] Updated weights for policy 0, policy_version 35310 (0.0009) +[2026-06-02 16:37:38,241][243562] Updated weights for policy 0, policy_version 35321 (0.0008) +[2026-06-02 16:37:38,393][243562] Updated weights for policy 0, policy_version 35331 (0.0008) +[2026-06-02 16:37:38,577][243562] Updated weights for policy 0, policy_version 35342 (0.0009) +[2026-06-02 16:37:38,739][243562] Updated weights for policy 0, policy_version 35352 (0.0008) +[2026-06-02 16:37:38,909][243562] Updated weights for policy 0, policy_version 35362 (0.0008) +[2026-06-02 16:37:39,075][243562] Updated weights for policy 0, policy_version 35372 (0.0008) +[2026-06-02 16:37:39,734][243562] Updated weights for policy 0, policy_version 35383 (0.0006) +[2026-06-02 16:37:39,811][235960] Fps is (10 sec: 22937.7, 60 sec: 21299.2, 300 sec: 21438.1). Total num frames: 18120704. Throughput: 0: 21299.2. Samples: 18128256. Policy #0 lag: (min: 63.0, avg: 78.2, max: 127.0) +[2026-06-02 16:37:39,812][235960] Avg episode reward: [(0, '1731.919')] +[2026-06-02 16:37:39,887][243562] Updated weights for policy 0, policy_version 35393 (0.0007) +[2026-06-02 16:37:40,070][243562] Updated weights for policy 0, policy_version 35404 (0.0009) +[2026-06-02 16:37:40,227][243562] Updated weights for policy 0, policy_version 35414 (0.0010) +[2026-06-02 16:37:40,387][243562] Updated weights for policy 0, policy_version 35424 (0.0009) +[2026-06-02 16:37:40,561][243562] Updated weights for policy 0, policy_version 35434 (0.0010) +[2026-06-02 16:37:40,651][242748] Saving new best policy, reward=1731.919! +[2026-06-02 16:37:41,232][243562] Updated weights for policy 0, policy_version 35445 (0.0010) +[2026-06-02 16:37:41,388][243562] Updated weights for policy 0, policy_version 35455 (0.0008) +[2026-06-02 16:37:41,558][243562] Updated weights for policy 0, policy_version 35466 (0.0009) +[2026-06-02 16:37:41,729][243562] Updated weights for policy 0, policy_version 35476 (0.0008) +[2026-06-02 16:37:41,896][243562] Updated weights for policy 0, policy_version 35486 (0.0008) +[2026-06-02 16:37:42,075][243562] Updated weights for policy 0, policy_version 35497 (0.0008) +[2026-06-02 16:37:42,748][243562] Updated weights for policy 0, policy_version 35508 (0.0009) +[2026-06-02 16:37:42,923][243562] Updated weights for policy 0, policy_version 35519 (0.0008) +[2026-06-02 16:37:43,089][243562] Updated weights for policy 0, policy_version 35529 (0.0008) +[2026-06-02 16:37:43,255][243562] Updated weights for policy 0, policy_version 35539 (0.0008) +[2026-06-02 16:37:43,418][243562] Updated weights for policy 0, policy_version 35549 (0.0009) +[2026-06-02 16:37:43,586][243562] Updated weights for policy 0, policy_version 35559 (0.0008) +[2026-06-02 16:37:44,267][243562] Updated weights for policy 0, policy_version 35571 (0.0009) +[2026-06-02 16:37:44,423][243562] Updated weights for policy 0, policy_version 35581 (0.0008) +[2026-06-02 16:37:44,601][243562] Updated weights for policy 0, policy_version 35592 (0.0008) +[2026-06-02 16:37:44,775][243562] Updated weights for policy 0, policy_version 35602 (0.0008) +[2026-06-02 16:37:44,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 21438.1). Total num frames: 18219008. Throughput: 0: 21407.3. Samples: 18258304. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-02 16:37:44,812][235960] Avg episode reward: [(0, '1703.433')] +[2026-06-02 16:37:44,935][243562] Updated weights for policy 0, policy_version 35612 (0.0009) +[2026-06-02 16:37:45,099][243562] Updated weights for policy 0, policy_version 35622 (0.0008) +[2026-06-02 16:37:45,262][243562] Updated weights for policy 0, policy_version 35632 (0.0008) +[2026-06-02 16:37:45,899][243562] Updated weights for policy 0, policy_version 35642 (0.0008) +[2026-06-02 16:37:46,063][243562] Updated weights for policy 0, policy_version 35652 (0.0009) +[2026-06-02 16:37:46,244][243562] Updated weights for policy 0, policy_version 35663 (0.0008) +[2026-06-02 16:37:46,411][243562] Updated weights for policy 0, policy_version 35673 (0.0008) +[2026-06-02 16:37:46,605][243562] Updated weights for policy 0, policy_version 35685 (0.0009) +[2026-06-02 16:37:46,774][243562] Updated weights for policy 0, policy_version 35695 (0.0008) +[2026-06-02 16:37:47,431][243562] Updated weights for policy 0, policy_version 35705 (0.0009) +[2026-06-02 16:37:47,588][243562] Updated weights for policy 0, policy_version 35715 (0.0008) +[2026-06-02 16:37:47,750][243562] Updated weights for policy 0, policy_version 35725 (0.0008) +[2026-06-02 16:37:47,917][243562] Updated weights for policy 0, policy_version 35735 (0.0008) +[2026-06-02 16:37:48,083][243562] Updated weights for policy 0, policy_version 35745 (0.0008) +[2026-06-02 16:37:48,250][243562] Updated weights for policy 0, policy_version 35755 (0.0008) +[2026-06-02 16:37:48,904][243562] Updated weights for policy 0, policy_version 35766 (0.0009) +[2026-06-02 16:37:49,058][243562] Updated weights for policy 0, policy_version 35776 (0.0009) +[2026-06-02 16:37:49,219][243562] Updated weights for policy 0, policy_version 35786 (0.0007) +[2026-06-02 16:37:49,383][243562] Updated weights for policy 0, policy_version 35796 (0.0008) +[2026-06-02 16:37:49,546][243562] Updated weights for policy 0, policy_version 35806 (0.0009) +[2026-06-02 16:37:49,744][243562] Updated weights for policy 0, policy_version 35818 (0.0009) +[2026-06-02 16:37:49,811][235960] Fps is (10 sec: 19660.6, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 18317312. Throughput: 0: 21469.9. Samples: 18323712. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-02 16:37:49,812][235960] Avg episode reward: [(0, '1648.258')] +[2026-06-02 16:37:50,421][243562] Updated weights for policy 0, policy_version 35828 (0.0009) +[2026-06-02 16:37:50,575][243562] Updated weights for policy 0, policy_version 35838 (0.0008) +[2026-06-02 16:37:50,738][243562] Updated weights for policy 0, policy_version 35848 (0.0008) +[2026-06-02 16:37:50,904][243562] Updated weights for policy 0, policy_version 35858 (0.0009) +[2026-06-02 16:37:51,071][243562] Updated weights for policy 0, policy_version 35868 (0.0008) +[2026-06-02 16:37:51,238][243562] Updated weights for policy 0, policy_version 35878 (0.0008) +[2026-06-02 16:37:51,399][243562] Updated weights for policy 0, policy_version 35888 (0.0008) +[2026-06-02 16:37:52,044][243562] Updated weights for policy 0, policy_version 35898 (0.0009) +[2026-06-02 16:37:52,200][243562] Updated weights for policy 0, policy_version 35908 (0.0008) +[2026-06-02 16:37:52,385][243562] Updated weights for policy 0, policy_version 35919 (0.0008) +[2026-06-02 16:37:52,549][243562] Updated weights for policy 0, policy_version 35929 (0.0008) +[2026-06-02 16:37:52,726][243562] Updated weights for policy 0, policy_version 35939 (0.0009) +[2026-06-02 16:37:52,885][243562] Updated weights for policy 0, policy_version 35949 (0.0008) +[2026-06-02 16:37:53,529][243562] Updated weights for policy 0, policy_version 35959 (0.0008) +[2026-06-02 16:37:53,689][243562] Updated weights for policy 0, policy_version 35969 (0.0008) +[2026-06-02 16:37:53,850][243562] Updated weights for policy 0, policy_version 35979 (0.0008) +[2026-06-02 16:37:54,014][243562] Updated weights for policy 0, policy_version 35989 (0.0009) +[2026-06-02 16:37:54,179][243562] Updated weights for policy 0, policy_version 35999 (0.0008) +[2026-06-02 16:37:54,363][243562] Updated weights for policy 0, policy_version 36010 (0.0008) +[2026-06-02 16:37:54,811][235960] Fps is (10 sec: 22937.5, 60 sec: 21845.3, 300 sec: 21549.1). Total num frames: 18448384. Throughput: 0: 21572.3. Samples: 18453504. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-02 16:37:54,812][235960] Avg episode reward: [(0, '1608.980')] +[2026-06-02 16:37:55,027][243562] Updated weights for policy 0, policy_version 36021 (0.0008) +[2026-06-02 16:37:55,194][243562] Updated weights for policy 0, policy_version 36031 (0.0008) +[2026-06-02 16:37:55,356][243562] Updated weights for policy 0, policy_version 36041 (0.0008) +[2026-06-02 16:37:55,539][243562] Updated weights for policy 0, policy_version 36052 (0.0009) +[2026-06-02 16:37:55,698][243562] Updated weights for policy 0, policy_version 36062 (0.0008) +[2026-06-02 16:37:55,879][243562] Updated weights for policy 0, policy_version 36073 (0.0009) +[2026-06-02 16:37:56,535][243562] Updated weights for policy 0, policy_version 36083 (0.0008) +[2026-06-02 16:37:56,690][243562] Updated weights for policy 0, policy_version 36093 (0.0008) +[2026-06-02 16:37:56,846][243562] Updated weights for policy 0, policy_version 36103 (0.0008) +[2026-06-02 16:37:57,026][243562] Updated weights for policy 0, policy_version 36114 (0.0008) +[2026-06-02 16:37:57,199][243562] Updated weights for policy 0, policy_version 36124 (0.0008) +[2026-06-02 16:37:57,364][243562] Updated weights for policy 0, policy_version 36134 (0.0008) +[2026-06-02 16:37:57,524][243562] Updated weights for policy 0, policy_version 36144 (0.0008) +[2026-06-02 16:37:58,184][243562] Updated weights for policy 0, policy_version 36155 (0.0007) +[2026-06-02 16:37:58,349][243562] Updated weights for policy 0, policy_version 36165 (0.0010) +[2026-06-02 16:37:58,533][243562] Updated weights for policy 0, policy_version 36176 (0.0010) +[2026-06-02 16:37:58,701][243562] Updated weights for policy 0, policy_version 36186 (0.0010) +[2026-06-02 16:37:58,862][243562] Updated weights for policy 0, policy_version 36196 (0.0009) +[2026-06-02 16:37:59,032][243562] Updated weights for policy 0, policy_version 36206 (0.0008) +[2026-06-02 16:37:59,668][243562] Updated weights for policy 0, policy_version 36216 (0.0008) +[2026-06-02 16:37:59,811][235960] Fps is (10 sec: 22937.3, 60 sec: 21299.1, 300 sec: 21438.0). Total num frames: 18546688. Throughput: 0: 21589.3. Samples: 18582912. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-02 16:37:59,812][235960] Avg episode reward: [(0, '1614.905')] +[2026-06-02 16:37:59,824][243562] Updated weights for policy 0, policy_version 36226 (0.0008) +[2026-06-02 16:37:59,986][243562] Updated weights for policy 0, policy_version 36236 (0.0009) +[2026-06-02 16:38:00,153][243562] Updated weights for policy 0, policy_version 36246 (0.0009) +[2026-06-02 16:38:00,319][243562] Updated weights for policy 0, policy_version 36256 (0.0008) +[2026-06-02 16:38:00,493][243562] Updated weights for policy 0, policy_version 36267 (0.0008) +[2026-06-02 16:38:01,172][243562] Updated weights for policy 0, policy_version 36278 (0.0007) +[2026-06-02 16:38:01,338][243562] Updated weights for policy 0, policy_version 36288 (0.0005) +[2026-06-02 16:38:01,501][243562] Updated weights for policy 0, policy_version 36298 (0.0005) +[2026-06-02 16:38:01,677][243562] Updated weights for policy 0, policy_version 36309 (0.0008) +[2026-06-02 16:38:01,848][243562] Updated weights for policy 0, policy_version 36319 (0.0010) +[2026-06-02 16:38:02,015][243562] Updated weights for policy 0, policy_version 36329 (0.0009) +[2026-06-02 16:38:02,671][243562] Updated weights for policy 0, policy_version 36339 (0.0009) +[2026-06-02 16:38:02,835][243562] Updated weights for policy 0, policy_version 36350 (0.0009) +[2026-06-02 16:38:03,010][243562] Updated weights for policy 0, policy_version 36361 (0.0010) +[2026-06-02 16:38:03,191][243562] Updated weights for policy 0, policy_version 36372 (0.0009) +[2026-06-02 16:38:03,362][243562] Updated weights for policy 0, policy_version 36382 (0.0008) +[2026-06-02 16:38:03,545][243562] Updated weights for policy 0, policy_version 36393 (0.0009) +[2026-06-02 16:38:04,196][243562] Updated weights for policy 0, policy_version 36403 (0.0008) +[2026-06-02 16:38:04,368][243562] Updated weights for policy 0, policy_version 36414 (0.0008) +[2026-06-02 16:38:04,558][243562] Updated weights for policy 0, policy_version 36425 (0.0008) +[2026-06-02 16:38:04,736][243562] Updated weights for policy 0, policy_version 36436 (0.0008) +[2026-06-02 16:38:04,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 21438.1). Total num frames: 18644992. Throughput: 0: 21595.0. Samples: 18648064. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-02 16:38:04,812][235960] Avg episode reward: [(0, '1609.137')] +[2026-06-02 16:38:04,903][243562] Updated weights for policy 0, policy_version 36446 (0.0008) +[2026-06-02 16:38:05,080][243562] Updated weights for policy 0, policy_version 36457 (0.0008) +[2026-06-02 16:38:05,732][243562] Updated weights for policy 0, policy_version 36467 (0.0008) +[2026-06-02 16:38:05,890][243562] Updated weights for policy 0, policy_version 36477 (0.0009) +[2026-06-02 16:38:06,084][243562] Updated weights for policy 0, policy_version 36489 (0.0009) +[2026-06-02 16:38:06,246][243562] Updated weights for policy 0, policy_version 36499 (0.0010) +[2026-06-02 16:38:06,431][243562] Updated weights for policy 0, policy_version 36510 (0.0008) +[2026-06-02 16:38:06,609][243562] Updated weights for policy 0, policy_version 36521 (0.0008) +[2026-06-02 16:38:07,263][243562] Updated weights for policy 0, policy_version 36531 (0.0008) +[2026-06-02 16:38:07,428][243562] Updated weights for policy 0, policy_version 36541 (0.0008) +[2026-06-02 16:38:07,580][243562] Updated weights for policy 0, policy_version 36551 (0.0008) +[2026-06-02 16:38:07,744][243562] Updated weights for policy 0, policy_version 36561 (0.0009) +[2026-06-02 16:38:07,931][243562] Updated weights for policy 0, policy_version 36572 (0.0008) +[2026-06-02 16:38:08,089][243562] Updated weights for policy 0, policy_version 36582 (0.0010) +[2026-06-02 16:38:08,253][243562] Updated weights for policy 0, policy_version 36592 (0.0010) +[2026-06-02 16:38:08,914][243562] Updated weights for policy 0, policy_version 36603 (0.0009) +[2026-06-02 16:38:09,066][243562] Updated weights for policy 0, policy_version 36613 (0.0008) +[2026-06-02 16:38:09,244][243562] Updated weights for policy 0, policy_version 36624 (0.0008) +[2026-06-02 16:38:09,424][243562] Updated weights for policy 0, policy_version 36635 (0.0009) +[2026-06-02 16:38:09,599][243562] Updated weights for policy 0, policy_version 36645 (0.0008) +[2026-06-02 16:38:09,772][243562] Updated weights for policy 0, policy_version 36656 (0.0009) +[2026-06-02 16:38:09,811][235960] Fps is (10 sec: 22937.9, 60 sec: 21845.3, 300 sec: 21549.1). Total num frames: 18776064. Throughput: 0: 21603.6. Samples: 18777600. Policy #0 lag: (min: 63.0, avg: 77.8, max: 127.0) +[2026-06-02 16:38:09,812][235960] Avg episode reward: [(0, '1648.672')] +[2026-06-02 16:38:10,424][243562] Updated weights for policy 0, policy_version 36666 (0.0009) +[2026-06-02 16:38:10,574][243562] Updated weights for policy 0, policy_version 36676 (0.0008) +[2026-06-02 16:38:10,744][243562] Updated weights for policy 0, policy_version 36686 (0.0008) +[2026-06-02 16:38:10,911][243562] Updated weights for policy 0, policy_version 36696 (0.0009) +[2026-06-02 16:38:11,071][243562] Updated weights for policy 0, policy_version 36706 (0.0008) +[2026-06-02 16:38:11,251][243562] Updated weights for policy 0, policy_version 36717 (0.0008) +[2026-06-02 16:38:11,896][243562] Updated weights for policy 0, policy_version 36727 (0.0009) +[2026-06-02 16:38:12,087][243562] Updated weights for policy 0, policy_version 36739 (0.0008) +[2026-06-02 16:38:12,254][243562] Updated weights for policy 0, policy_version 36749 (0.0008) +[2026-06-02 16:38:12,414][243562] Updated weights for policy 0, policy_version 36759 (0.0008) +[2026-06-02 16:38:12,592][243562] Updated weights for policy 0, policy_version 36770 (0.0008) +[2026-06-02 16:38:12,763][243562] Updated weights for policy 0, policy_version 36780 (0.0008) +[2026-06-02 16:38:13,385][243562] Updated weights for policy 0, policy_version 36790 (0.0009) +[2026-06-02 16:38:13,551][243562] Updated weights for policy 0, policy_version 36800 (0.0008) +[2026-06-02 16:38:13,719][243562] Updated weights for policy 0, policy_version 36810 (0.0009) +[2026-06-02 16:38:13,888][243562] Updated weights for policy 0, policy_version 36820 (0.0005) +[2026-06-02 16:38:14,053][243562] Updated weights for policy 0, policy_version 36830 (0.0005) +[2026-06-02 16:38:14,232][243562] Updated weights for policy 0, policy_version 36841 (0.0005) +[2026-06-02 16:38:14,811][235960] Fps is (10 sec: 22937.7, 60 sec: 21299.2, 300 sec: 21438.1). Total num frames: 18874368. Throughput: 0: 21555.2. Samples: 18903168. Policy #0 lag: (min: 63.0, avg: 77.8, max: 127.0) +[2026-06-02 16:38:14,812][235960] Avg episode reward: [(0, '1625.927')] +[2026-06-02 16:38:14,887][243562] Updated weights for policy 0, policy_version 36851 (0.0006) +[2026-06-02 16:38:15,047][243562] Updated weights for policy 0, policy_version 36861 (0.0008) +[2026-06-02 16:38:15,204][243562] Updated weights for policy 0, policy_version 36871 (0.0008) +[2026-06-02 16:38:15,361][243562] Updated weights for policy 0, policy_version 36881 (0.0008) +[2026-06-02 16:38:15,551][243562] Updated weights for policy 0, policy_version 36892 (0.0008) +[2026-06-02 16:38:15,725][243562] Updated weights for policy 0, policy_version 36903 (0.0010) +[2026-06-02 16:38:16,377][243562] Updated weights for policy 0, policy_version 36913 (0.0009) +[2026-06-02 16:38:16,534][243562] Updated weights for policy 0, policy_version 36923 (0.0009) +[2026-06-02 16:38:16,707][243562] Updated weights for policy 0, policy_version 36934 (0.0008) +[2026-06-02 16:38:16,873][243562] Updated weights for policy 0, policy_version 36944 (0.0008) +[2026-06-02 16:38:17,035][243562] Updated weights for policy 0, policy_version 36954 (0.0008) +[2026-06-02 16:38:17,203][243562] Updated weights for policy 0, policy_version 36964 (0.0009) +[2026-06-02 16:38:17,371][243562] Updated weights for policy 0, policy_version 36974 (0.0009) +[2026-06-02 16:38:18,016][243562] Updated weights for policy 0, policy_version 36984 (0.0010) +[2026-06-02 16:38:18,168][243562] Updated weights for policy 0, policy_version 36994 (0.0010) +[2026-06-02 16:38:18,355][243562] Updated weights for policy 0, policy_version 37005 (0.0009) +[2026-06-02 16:38:18,523][243562] Updated weights for policy 0, policy_version 37015 (0.0009) +[2026-06-02 16:38:18,678][243562] Updated weights for policy 0, policy_version 37025 (0.0009) +[2026-06-02 16:38:18,852][243562] Updated weights for policy 0, policy_version 37035 (0.0008) +[2026-06-02 16:38:19,510][243562] Updated weights for policy 0, policy_version 37045 (0.0009) +[2026-06-02 16:38:19,668][243562] Updated weights for policy 0, policy_version 37055 (0.0009) +[2026-06-02 16:38:19,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 18972672. Throughput: 0: 21486.9. Samples: 18965760. Policy #0 lag: (min: 63.0, avg: 77.8, max: 127.0) +[2026-06-02 16:38:19,812][235960] Avg episode reward: [(0, '1613.509')] +[2026-06-02 16:38:19,831][243562] Updated weights for policy 0, policy_version 37065 (0.0009) +[2026-06-02 16:38:19,997][243562] Updated weights for policy 0, policy_version 37075 (0.0008) +[2026-06-02 16:38:20,174][243562] Updated weights for policy 0, policy_version 37086 (0.0008) +[2026-06-02 16:38:20,345][243562] Updated weights for policy 0, policy_version 37096 (0.0008) +[2026-06-02 16:38:21,028][243562] Updated weights for policy 0, policy_version 37107 (0.0009) +[2026-06-02 16:38:21,194][243562] Updated weights for policy 0, policy_version 37117 (0.0008) +[2026-06-02 16:38:21,367][243562] Updated weights for policy 0, policy_version 37128 (0.0008) +[2026-06-02 16:38:21,534][243562] Updated weights for policy 0, policy_version 37138 (0.0009) +[2026-06-02 16:38:21,694][243562] Updated weights for policy 0, policy_version 37148 (0.0008) +[2026-06-02 16:38:21,862][243562] Updated weights for policy 0, policy_version 37158 (0.0008) +[2026-06-02 16:38:22,019][243562] Updated weights for policy 0, policy_version 37168 (0.0008) +[2026-06-02 16:38:22,655][243562] Updated weights for policy 0, policy_version 37178 (0.0008) +[2026-06-02 16:38:22,816][243562] Updated weights for policy 0, policy_version 37188 (0.0009) +[2026-06-02 16:38:22,984][243562] Updated weights for policy 0, policy_version 37198 (0.0008) +[2026-06-02 16:38:23,139][243562] Updated weights for policy 0, policy_version 37208 (0.0010) +[2026-06-02 16:38:23,309][243562] Updated weights for policy 0, policy_version 37218 (0.0009) +[2026-06-02 16:38:23,466][243562] Updated weights for policy 0, policy_version 37228 (0.0008) +[2026-06-02 16:38:24,115][243562] Updated weights for policy 0, policy_version 37238 (0.0008) +[2026-06-02 16:38:24,274][243562] Updated weights for policy 0, policy_version 37248 (0.0008) +[2026-06-02 16:38:24,434][243562] Updated weights for policy 0, policy_version 37258 (0.0008) +[2026-06-02 16:38:24,604][243562] Updated weights for policy 0, policy_version 37268 (0.0008) +[2026-06-02 16:38:24,767][243562] Updated weights for policy 0, policy_version 37278 (0.0008) +[2026-06-02 16:38:24,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 19070976. Throughput: 0: 21390.2. Samples: 19090816. Policy #0 lag: (min: 63.0, avg: 77.8, max: 127.0) +[2026-06-02 16:38:24,812][235960] Avg episode reward: [(0, '1677.166')] +[2026-06-02 16:38:24,934][243562] Updated weights for policy 0, policy_version 37288 (0.0008) +[2026-06-02 16:38:25,590][243562] Updated weights for policy 0, policy_version 37298 (0.0009) +[2026-06-02 16:38:25,740][243562] Updated weights for policy 0, policy_version 37308 (0.0008) +[2026-06-02 16:38:25,902][243562] Updated weights for policy 0, policy_version 37318 (0.0009) +[2026-06-02 16:38:26,075][243562] Updated weights for policy 0, policy_version 37329 (0.0008) +[2026-06-02 16:38:26,237][243562] Updated weights for policy 0, policy_version 37339 (0.0008) +[2026-06-02 16:38:26,392][243562] Updated weights for policy 0, policy_version 37349 (0.0007) +[2026-06-02 16:38:26,566][243562] Updated weights for policy 0, policy_version 37360 (0.0008) +[2026-06-02 16:38:27,271][243562] Updated weights for policy 0, policy_version 37372 (0.0008) +[2026-06-02 16:38:27,429][243562] Updated weights for policy 0, policy_version 37382 (0.0008) +[2026-06-02 16:38:27,581][243562] Updated weights for policy 0, policy_version 37392 (0.0008) +[2026-06-02 16:38:27,744][243562] Updated weights for policy 0, policy_version 37402 (0.0009) +[2026-06-02 16:38:27,905][243562] Updated weights for policy 0, policy_version 37412 (0.0008) +[2026-06-02 16:38:28,077][243562] Updated weights for policy 0, policy_version 37422 (0.0008) +[2026-06-02 16:38:28,742][243562] Updated weights for policy 0, policy_version 37433 (0.0009) +[2026-06-02 16:38:28,910][243562] Updated weights for policy 0, policy_version 37444 (0.0008) +[2026-06-02 16:38:29,071][243562] Updated weights for policy 0, policy_version 37454 (0.0007) +[2026-06-02 16:38:29,235][243562] Updated weights for policy 0, policy_version 37464 (0.0004) +[2026-06-02 16:38:29,407][243562] Updated weights for policy 0, policy_version 37474 (0.0005) +[2026-06-02 16:38:29,570][243562] Updated weights for policy 0, policy_version 37484 (0.0005) +[2026-06-02 16:38:29,811][235960] Fps is (10 sec: 22937.8, 60 sec: 21845.3, 300 sec: 21549.1). Total num frames: 19202048. Throughput: 0: 21336.2. Samples: 19218432. Policy #0 lag: (min: 63.0, avg: 77.8, max: 127.0) +[2026-06-02 16:38:29,812][235960] Avg episode reward: [(0, '1705.346')] +[2026-06-02 16:38:30,249][243562] Updated weights for policy 0, policy_version 37496 (0.0004) +[2026-06-02 16:38:30,415][243562] Updated weights for policy 0, policy_version 37507 (0.0004) +[2026-06-02 16:38:30,577][243562] Updated weights for policy 0, policy_version 37517 (0.0004) +[2026-06-02 16:38:30,739][243562] Updated weights for policy 0, policy_version 37527 (0.0004) +[2026-06-02 16:38:30,895][243562] Updated weights for policy 0, policy_version 37537 (0.0008) +[2026-06-02 16:38:31,074][243562] Updated weights for policy 0, policy_version 37548 (0.0008) +[2026-06-02 16:38:31,770][243562] Updated weights for policy 0, policy_version 37560 (0.0009) +[2026-06-02 16:38:31,922][243562] Updated weights for policy 0, policy_version 37570 (0.0008) +[2026-06-02 16:38:32,076][243562] Updated weights for policy 0, policy_version 37580 (0.0005) +[2026-06-02 16:38:32,235][243562] Updated weights for policy 0, policy_version 37590 (0.0005) +[2026-06-02 16:38:32,397][243562] Updated weights for policy 0, policy_version 37600 (0.0005) +[2026-06-02 16:38:32,555][243562] Updated weights for policy 0, policy_version 37610 (0.0005) +[2026-06-02 16:38:33,241][243562] Updated weights for policy 0, policy_version 37622 (0.0007) +[2026-06-02 16:38:33,457][243562] Updated weights for policy 0, policy_version 37636 (0.0008) +[2026-06-02 16:38:33,618][243562] Updated weights for policy 0, policy_version 37646 (0.0008) +[2026-06-02 16:38:33,784][243562] Updated weights for policy 0, policy_version 37656 (0.0009) +[2026-06-02 16:38:33,939][243562] Updated weights for policy 0, policy_version 37666 (0.0008) +[2026-06-02 16:38:34,096][243562] Updated weights for policy 0, policy_version 37676 (0.0008) +[2026-06-02 16:38:34,789][243562] Updated weights for policy 0, policy_version 37687 (0.0008) +[2026-06-02 16:38:34,811][235960] Fps is (10 sec: 22937.8, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 19300352. Throughput: 0: 21330.5. Samples: 19283584. Policy #0 lag: (min: 63.0, avg: 77.3, max: 127.0) +[2026-06-02 16:38:34,812][235960] Avg episode reward: [(0, '1673.966')] +[2026-06-02 16:38:34,956][243562] Updated weights for policy 0, policy_version 37698 (0.0009) +[2026-06-02 16:38:35,157][243562] Updated weights for policy 0, policy_version 37711 (0.0008) +[2026-06-02 16:38:35,349][243562] Updated weights for policy 0, policy_version 37723 (0.0008) +[2026-06-02 16:38:35,511][243562] Updated weights for policy 0, policy_version 37733 (0.0008) +[2026-06-02 16:38:35,685][243562] Updated weights for policy 0, policy_version 37744 (0.0008) +[2026-06-02 16:38:36,401][243562] Updated weights for policy 0, policy_version 37756 (0.0009) +[2026-06-02 16:38:36,584][243562] Updated weights for policy 0, policy_version 37768 (0.0009) +[2026-06-02 16:38:36,793][243562] Updated weights for policy 0, policy_version 37781 (0.0008) +[2026-06-02 16:38:36,973][243562] Updated weights for policy 0, policy_version 37792 (0.0008) +[2026-06-02 16:38:37,150][243562] Updated weights for policy 0, policy_version 37803 (0.0008) +[2026-06-02 16:38:37,835][243562] Updated weights for policy 0, policy_version 37814 (0.0008) +[2026-06-02 16:38:38,022][243562] Updated weights for policy 0, policy_version 37826 (0.0007) +[2026-06-02 16:38:38,197][243562] Updated weights for policy 0, policy_version 37837 (0.0004) +[2026-06-02 16:38:38,373][243562] Updated weights for policy 0, policy_version 37848 (0.0005) +[2026-06-02 16:38:38,541][243562] Updated weights for policy 0, policy_version 37859 (0.0008) +[2026-06-02 16:38:38,743][243562] Updated weights for policy 0, policy_version 37871 (0.0008) +[2026-06-02 16:38:39,423][243562] Updated weights for policy 0, policy_version 37882 (0.0008) +[2026-06-02 16:38:39,594][243562] Updated weights for policy 0, policy_version 37893 (0.0009) +[2026-06-02 16:38:39,764][243562] Updated weights for policy 0, policy_version 37904 (0.0009) +[2026-06-02 16:38:39,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 19398656. Throughput: 0: 21344.7. Samples: 19414016. Policy #0 lag: (min: 63.0, avg: 77.3, max: 127.0) +[2026-06-02 16:38:39,812][235960] Avg episode reward: [(0, '1659.148')] +[2026-06-02 16:38:39,940][243562] Updated weights for policy 0, policy_version 37915 (0.0008) +[2026-06-02 16:38:40,112][243562] Updated weights for policy 0, policy_version 37926 (0.0008) +[2026-06-02 16:38:40,824][243562] Updated weights for policy 0, policy_version 37937 (0.0009) +[2026-06-02 16:38:40,977][243562] Updated weights for policy 0, policy_version 37947 (0.0009) +[2026-06-02 16:38:41,133][243562] Updated weights for policy 0, policy_version 37957 (0.0008) +[2026-06-02 16:38:41,298][243562] Updated weights for policy 0, policy_version 37967 (0.0008) +[2026-06-02 16:38:41,477][243562] Updated weights for policy 0, policy_version 37978 (0.0008) +[2026-06-02 16:38:41,643][243562] Updated weights for policy 0, policy_version 37988 (0.0008) +[2026-06-02 16:38:41,829][243562] Updated weights for policy 0, policy_version 38000 (0.0008) +[2026-06-02 16:38:42,496][243562] Updated weights for policy 0, policy_version 38010 (0.0008) +[2026-06-02 16:38:42,690][243562] Updated weights for policy 0, policy_version 38022 (0.0009) +[2026-06-02 16:38:42,864][243562] Updated weights for policy 0, policy_version 38033 (0.0009) +[2026-06-02 16:38:43,036][243562] Updated weights for policy 0, policy_version 38043 (0.0009) +[2026-06-02 16:38:43,224][243562] Updated weights for policy 0, policy_version 38054 (0.0008) +[2026-06-02 16:38:43,867][243562] Updated weights for policy 0, policy_version 38065 (0.0009) +[2026-06-02 16:38:44,017][243562] Updated weights for policy 0, policy_version 38075 (0.0008) +[2026-06-02 16:38:44,180][243562] Updated weights for policy 0, policy_version 38085 (0.0009) +[2026-06-02 16:38:44,341][243562] Updated weights for policy 0, policy_version 38095 (0.0009) +[2026-06-02 16:38:44,524][243562] Updated weights for policy 0, policy_version 38106 (0.0009) +[2026-06-02 16:38:44,686][243562] Updated weights for policy 0, policy_version 38116 (0.0008) +[2026-06-02 16:38:44,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 19496960. Throughput: 0: 21370.4. Samples: 19544576. Policy #0 lag: (min: 63.0, avg: 77.3, max: 127.0) +[2026-06-02 16:38:44,812][235960] Avg episode reward: [(0, '1667.945')] +[2026-06-02 16:38:44,855][243562] Updated weights for policy 0, policy_version 38126 (0.0008) +[2026-06-02 16:38:45,506][243562] Updated weights for policy 0, policy_version 38136 (0.0009) +[2026-06-02 16:38:45,681][243562] Updated weights for policy 0, policy_version 38147 (0.0008) +[2026-06-02 16:38:45,869][243562] Updated weights for policy 0, policy_version 38158 (0.0008) +[2026-06-02 16:38:46,045][243562] Updated weights for policy 0, policy_version 38169 (0.0009) +[2026-06-02 16:38:46,211][243562] Updated weights for policy 0, policy_version 38179 (0.0008) +[2026-06-02 16:38:46,383][243562] Updated weights for policy 0, policy_version 38189 (0.0008) +[2026-06-02 16:38:47,027][243562] Updated weights for policy 0, policy_version 38199 (0.0008) +[2026-06-02 16:38:47,204][243562] Updated weights for policy 0, policy_version 38210 (0.0008) +[2026-06-02 16:38:47,382][243562] Updated weights for policy 0, policy_version 38221 (0.0008) +[2026-06-02 16:38:47,547][243562] Updated weights for policy 0, policy_version 38231 (0.0009) +[2026-06-02 16:38:47,710][243562] Updated weights for policy 0, policy_version 38241 (0.0009) +[2026-06-02 16:38:47,876][243562] Updated weights for policy 0, policy_version 38251 (0.0008) +[2026-06-02 16:38:48,526][243562] Updated weights for policy 0, policy_version 38261 (0.0009) +[2026-06-02 16:38:48,701][243562] Updated weights for policy 0, policy_version 38272 (0.0008) +[2026-06-02 16:38:48,870][243562] Updated weights for policy 0, policy_version 38282 (0.0008) +[2026-06-02 16:38:49,027][243562] Updated weights for policy 0, policy_version 38292 (0.0008) +[2026-06-02 16:38:49,193][243562] Updated weights for policy 0, policy_version 38302 (0.0008) +[2026-06-02 16:38:49,357][243562] Updated weights for policy 0, policy_version 38312 (0.0007) +[2026-06-02 16:38:49,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21845.4, 300 sec: 21549.1). Total num frames: 19628032. Throughput: 0: 21370.4. Samples: 19609728. Policy #0 lag: (min: 63.0, avg: 77.3, max: 127.0) +[2026-06-02 16:38:49,812][235960] Avg episode reward: [(0, '1694.401')] +[2026-06-02 16:38:50,025][243562] Updated weights for policy 0, policy_version 38322 (0.0008) +[2026-06-02 16:38:50,178][243562] Updated weights for policy 0, policy_version 38332 (0.0008) +[2026-06-02 16:38:50,345][243562] Updated weights for policy 0, policy_version 38342 (0.0008) +[2026-06-02 16:38:50,523][243562] Updated weights for policy 0, policy_version 38353 (0.0008) +[2026-06-02 16:38:50,683][243562] Updated weights for policy 0, policy_version 38363 (0.0008) +[2026-06-02 16:38:50,857][243562] Updated weights for policy 0, policy_version 38373 (0.0009) +[2026-06-02 16:38:51,019][243562] Updated weights for policy 0, policy_version 38383 (0.0009) +[2026-06-02 16:38:51,645][243562] Updated weights for policy 0, policy_version 38393 (0.0008) +[2026-06-02 16:38:51,809][243562] Updated weights for policy 0, policy_version 38403 (0.0008) +[2026-06-02 16:38:51,971][243562] Updated weights for policy 0, policy_version 38413 (0.0009) +[2026-06-02 16:38:52,136][243562] Updated weights for policy 0, policy_version 38423 (0.0008) +[2026-06-02 16:38:52,315][243562] Updated weights for policy 0, policy_version 38434 (0.0008) +[2026-06-02 16:38:52,497][243562] Updated weights for policy 0, policy_version 38445 (0.0009) +[2026-06-02 16:38:53,163][243562] Updated weights for policy 0, policy_version 38455 (0.0008) +[2026-06-02 16:38:53,320][243562] Updated weights for policy 0, policy_version 38465 (0.0009) +[2026-06-02 16:38:53,482][243562] Updated weights for policy 0, policy_version 38475 (0.0008) +[2026-06-02 16:38:53,663][243562] Updated weights for policy 0, policy_version 38486 (0.0008) +[2026-06-02 16:38:53,823][243562] Updated weights for policy 0, policy_version 38496 (0.0008) +[2026-06-02 16:38:53,995][243562] Updated weights for policy 0, policy_version 38506 (0.0004) +[2026-06-02 16:38:54,642][243562] Updated weights for policy 0, policy_version 38516 (0.0005) +[2026-06-02 16:38:54,792][243562] Updated weights for policy 0, policy_version 38526 (0.0005) +[2026-06-02 16:38:54,811][235960] Fps is (10 sec: 22937.8, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 19726336. Throughput: 0: 21395.9. Samples: 19740416. Policy #0 lag: (min: 63.0, avg: 77.3, max: 127.0) +[2026-06-02 16:38:54,812][235960] Avg episode reward: [(0, '1656.171')] +[2026-06-02 16:38:54,960][243562] Updated weights for policy 0, policy_version 38536 (0.0005) +[2026-06-02 16:38:55,121][243562] Updated weights for policy 0, policy_version 38546 (0.0005) +[2026-06-02 16:38:55,280][243562] Updated weights for policy 0, policy_version 38556 (0.0005) +[2026-06-02 16:38:55,453][243562] Updated weights for policy 0, policy_version 38566 (0.0004) +[2026-06-02 16:38:55,618][243562] Updated weights for policy 0, policy_version 38576 (0.0004) +[2026-06-02 16:38:56,258][243562] Updated weights for policy 0, policy_version 38586 (0.0007) +[2026-06-02 16:38:56,417][243562] Updated weights for policy 0, policy_version 38596 (0.0008) +[2026-06-02 16:38:56,577][243562] Updated weights for policy 0, policy_version 38606 (0.0008) +[2026-06-02 16:38:56,740][243562] Updated weights for policy 0, policy_version 38616 (0.0008) +[2026-06-02 16:38:56,928][243562] Updated weights for policy 0, policy_version 38627 (0.0008) +[2026-06-02 16:38:57,092][243562] Updated weights for policy 0, policy_version 38637 (0.0008) +[2026-06-02 16:38:57,754][243562] Updated weights for policy 0, policy_version 38647 (0.0008) +[2026-06-02 16:38:57,909][243562] Updated weights for policy 0, policy_version 38657 (0.0008) +[2026-06-02 16:38:58,072][243562] Updated weights for policy 0, policy_version 38667 (0.0008) +[2026-06-02 16:38:58,239][243562] Updated weights for policy 0, policy_version 38677 (0.0008) +[2026-06-02 16:38:58,400][243562] Updated weights for policy 0, policy_version 38687 (0.0009) +[2026-06-02 16:38:58,559][243562] Updated weights for policy 0, policy_version 38697 (0.0008) +[2026-06-02 16:38:59,217][243562] Updated weights for policy 0, policy_version 38707 (0.0009) +[2026-06-02 16:38:59,394][243562] Updated weights for policy 0, policy_version 38718 (0.0008) +[2026-06-02 16:38:59,570][243562] Updated weights for policy 0, policy_version 38729 (0.0009) +[2026-06-02 16:38:59,737][243562] Updated weights for policy 0, policy_version 38739 (0.0008) +[2026-06-02 16:38:59,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.3, 300 sec: 21438.0). Total num frames: 19824640. Throughput: 0: 21506.9. Samples: 19870976. Policy #0 lag: (min: 63.0, avg: 77.3, max: 127.0) +[2026-06-02 16:38:59,812][235960] Avg episode reward: [(0, '1705.786')] +[2026-06-02 16:38:59,902][243562] Updated weights for policy 0, policy_version 38749 (0.0008) +[2026-06-02 16:39:00,062][243562] Updated weights for policy 0, policy_version 38759 (0.0008) +[2026-06-02 16:39:00,724][243562] Updated weights for policy 0, policy_version 38769 (0.0009) +[2026-06-02 16:39:00,892][243562] Updated weights for policy 0, policy_version 38780 (0.0009) +[2026-06-02 16:39:01,057][243562] Updated weights for policy 0, policy_version 38790 (0.0008) +[2026-06-02 16:39:01,227][243562] Updated weights for policy 0, policy_version 38800 (0.0008) +[2026-06-02 16:39:01,384][243562] Updated weights for policy 0, policy_version 38810 (0.0008) +[2026-06-02 16:39:01,546][243562] Updated weights for policy 0, policy_version 38820 (0.0008) +[2026-06-02 16:39:01,716][243562] Updated weights for policy 0, policy_version 38830 (0.0008) +[2026-06-02 16:39:02,373][243562] Updated weights for policy 0, policy_version 38840 (0.0009) +[2026-06-02 16:39:02,530][243562] Updated weights for policy 0, policy_version 38850 (0.0009) +[2026-06-02 16:39:02,705][243562] Updated weights for policy 0, policy_version 38861 (0.0008) +[2026-06-02 16:39:02,864][243562] Updated weights for policy 0, policy_version 38871 (0.0008) +[2026-06-02 16:39:03,053][243562] Updated weights for policy 0, policy_version 38882 (0.0009) +[2026-06-02 16:39:03,222][243562] Updated weights for policy 0, policy_version 38892 (0.0008) +[2026-06-02 16:39:03,861][243562] Updated weights for policy 0, policy_version 38902 (0.0009) +[2026-06-02 16:39:04,028][243562] Updated weights for policy 0, policy_version 38913 (0.0008) +[2026-06-02 16:39:04,200][243562] Updated weights for policy 0, policy_version 38923 (0.0009) +[2026-06-02 16:39:04,356][243562] Updated weights for policy 0, policy_version 38933 (0.0008) +[2026-06-02 16:39:04,517][243562] Updated weights for policy 0, policy_version 38943 (0.0008) +[2026-06-02 16:39:04,690][243562] Updated weights for policy 0, policy_version 38953 (0.0008) +[2026-06-02 16:39:04,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21845.4, 300 sec: 21549.1). Total num frames: 19955712. Throughput: 0: 21580.9. Samples: 19936896. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) +[2026-06-02 16:39:04,812][235960] Avg episode reward: [(0, '1701.046')] +[2026-06-02 16:39:05,355][243562] Updated weights for policy 0, policy_version 38963 (0.0008) +[2026-06-02 16:39:05,508][243562] Updated weights for policy 0, policy_version 38973 (0.0008) +[2026-06-02 16:39:05,677][243562] Updated weights for policy 0, policy_version 38983 (0.0008) +[2026-06-02 16:39:05,838][243562] Updated weights for policy 0, policy_version 38993 (0.0008) +[2026-06-02 16:39:05,998][243562] Updated weights for policy 0, policy_version 39003 (0.0008) +[2026-06-02 16:39:06,167][243562] Updated weights for policy 0, policy_version 39013 (0.0008) +[2026-06-02 16:39:06,336][243562] Updated weights for policy 0, policy_version 39023 (0.0008) +[2026-06-02 16:39:06,980][243562] Updated weights for policy 0, policy_version 39033 (0.0008) +[2026-06-02 16:39:07,142][243562] Updated weights for policy 0, policy_version 39043 (0.0008) +[2026-06-02 16:39:07,312][243562] Updated weights for policy 0, policy_version 39054 (0.0008) +[2026-06-02 16:39:07,489][243562] Updated weights for policy 0, policy_version 39064 (0.0008) +[2026-06-02 16:39:07,657][243562] Updated weights for policy 0, policy_version 39074 (0.0008) +[2026-06-02 16:39:07,843][243562] Updated weights for policy 0, policy_version 39085 (0.0008) +[2026-06-02 16:39:08,487][243562] Updated weights for policy 0, policy_version 39095 (0.0008) +[2026-06-02 16:39:08,648][243562] Updated weights for policy 0, policy_version 39105 (0.0008) +[2026-06-02 16:39:08,830][243562] Updated weights for policy 0, policy_version 39116 (0.0008) +[2026-06-02 16:39:08,993][243562] Updated weights for policy 0, policy_version 39126 (0.0008) +[2026-06-02 16:39:09,162][243562] Updated weights for policy 0, policy_version 39136 (0.0008) +[2026-06-02 16:39:09,334][243562] Updated weights for policy 0, policy_version 39147 (0.0008) +[2026-06-02 16:39:09,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 20054016. Throughput: 0: 21688.9. Samples: 20066816. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) +[2026-06-02 16:39:09,812][235960] Avg episode reward: [(0, '1667.040')] +[2026-06-02 16:39:10,014][243562] Updated weights for policy 0, policy_version 39158 (0.0010) +[2026-06-02 16:39:10,171][243562] Updated weights for policy 0, policy_version 39168 (0.0008) +[2026-06-02 16:39:10,329][243562] Updated weights for policy 0, policy_version 39178 (0.0008) +[2026-06-02 16:39:10,497][243562] Updated weights for policy 0, policy_version 39188 (0.0008) +[2026-06-02 16:39:10,664][243562] Updated weights for policy 0, policy_version 39198 (0.0008) +[2026-06-02 16:39:10,832][243562] Updated weights for policy 0, policy_version 39208 (0.0008) +[2026-06-02 16:39:11,477][243562] Updated weights for policy 0, policy_version 39218 (0.0008) +[2026-06-02 16:39:11,634][243562] Updated weights for policy 0, policy_version 39228 (0.0008) +[2026-06-02 16:39:11,809][243562] Updated weights for policy 0, policy_version 39239 (0.0009) +[2026-06-02 16:39:11,974][243562] Updated weights for policy 0, policy_version 39249 (0.0009) +[2026-06-02 16:39:12,132][243562] Updated weights for policy 0, policy_version 39259 (0.0008) +[2026-06-02 16:39:12,306][243562] Updated weights for policy 0, policy_version 39269 (0.0009) +[2026-06-02 16:39:12,470][243562] Updated weights for policy 0, policy_version 39279 (0.0008) +[2026-06-02 16:39:13,113][243562] Updated weights for policy 0, policy_version 39289 (0.0008) +[2026-06-02 16:39:13,272][243562] Updated weights for policy 0, policy_version 39299 (0.0008) +[2026-06-02 16:39:13,432][243562] Updated weights for policy 0, policy_version 39309 (0.0009) +[2026-06-02 16:39:13,610][243562] Updated weights for policy 0, policy_version 39320 (0.0008) +[2026-06-02 16:39:13,780][243562] Updated weights for policy 0, policy_version 39330 (0.0008) +[2026-06-02 16:39:13,941][243562] Updated weights for policy 0, policy_version 39340 (0.0009) +[2026-06-02 16:39:14,611][243562] Updated weights for policy 0, policy_version 39351 (0.0006) +[2026-06-02 16:39:14,793][243562] Updated weights for policy 0, policy_version 39362 (0.0004) +[2026-06-02 16:39:14,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 21438.1). Total num frames: 20152320. Throughput: 0: 21751.4. Samples: 20197248. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) +[2026-06-02 16:39:14,812][235960] Avg episode reward: [(0, '1776.170')] +[2026-06-02 16:39:14,953][243562] Updated weights for policy 0, policy_version 39372 (0.0006) +[2026-06-02 16:39:15,122][243562] Updated weights for policy 0, policy_version 39382 (0.0008) +[2026-06-02 16:39:15,289][243562] Updated weights for policy 0, policy_version 39392 (0.0009) +[2026-06-02 16:39:15,467][243562] Updated weights for policy 0, policy_version 39403 (0.0009) +[2026-06-02 16:39:15,541][242748] Saving new best policy, reward=1776.170! +[2026-06-02 16:39:16,104][243562] Updated weights for policy 0, policy_version 39413 (0.0010) +[2026-06-02 16:39:16,261][243562] Updated weights for policy 0, policy_version 39423 (0.0005) +[2026-06-02 16:39:16,428][243562] Updated weights for policy 0, policy_version 39433 (0.0005) +[2026-06-02 16:39:16,586][243562] Updated weights for policy 0, policy_version 39443 (0.0006) +[2026-06-02 16:39:16,792][243562] Updated weights for policy 0, policy_version 39455 (0.0008) +[2026-06-02 16:39:16,975][243562] Updated weights for policy 0, policy_version 39466 (0.0009) +[2026-06-02 16:39:17,651][243562] Updated weights for policy 0, policy_version 39478 (0.0009) +[2026-06-02 16:39:17,830][243562] Updated weights for policy 0, policy_version 39489 (0.0009) +[2026-06-02 16:39:18,008][243562] Updated weights for policy 0, policy_version 39500 (0.0008) +[2026-06-02 16:39:18,172][243562] Updated weights for policy 0, policy_version 39510 (0.0008) +[2026-06-02 16:39:18,337][243562] Updated weights for policy 0, policy_version 39520 (0.0008) +[2026-06-02 16:39:18,515][243562] Updated weights for policy 0, policy_version 39531 (0.0008) +[2026-06-02 16:39:19,171][243562] Updated weights for policy 0, policy_version 39542 (0.0008) +[2026-06-02 16:39:19,332][243562] Updated weights for policy 0, policy_version 39552 (0.0009) +[2026-06-02 16:39:19,489][243562] Updated weights for policy 0, policy_version 39562 (0.0008) +[2026-06-02 16:39:19,675][243562] Updated weights for policy 0, policy_version 39573 (0.0009) +[2026-06-02 16:39:19,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 20250624. Throughput: 0: 21754.3. Samples: 20262528. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) +[2026-06-02 16:39:19,812][235960] Avg episode reward: [(0, '1779.832')] +[2026-06-02 16:39:19,843][243562] Updated weights for policy 0, policy_version 39583 (0.0008) +[2026-06-02 16:39:20,011][243562] Updated weights for policy 0, policy_version 39593 (0.0009) +[2026-06-02 16:39:20,123][242748] Saving new best policy, reward=1779.832! +[2026-06-02 16:39:20,692][243562] Updated weights for policy 0, policy_version 39603 (0.0009) +[2026-06-02 16:39:20,848][243562] Updated weights for policy 0, policy_version 39613 (0.0008) +[2026-06-02 16:39:21,029][243562] Updated weights for policy 0, policy_version 39624 (0.0009) +[2026-06-02 16:39:21,189][243562] Updated weights for policy 0, policy_version 39634 (0.0009) +[2026-06-02 16:39:21,368][243562] Updated weights for policy 0, policy_version 39645 (0.0009) +[2026-06-02 16:39:21,528][243562] Updated weights for policy 0, policy_version 39655 (0.0008) +[2026-06-02 16:39:22,194][243562] Updated weights for policy 0, policy_version 39665 (0.0009) +[2026-06-02 16:39:22,364][243562] Updated weights for policy 0, policy_version 39676 (0.0008) +[2026-06-02 16:39:22,533][243562] Updated weights for policy 0, policy_version 39687 (0.0008) +[2026-06-02 16:39:22,723][243562] Updated weights for policy 0, policy_version 39698 (0.0008) +[2026-06-02 16:39:22,888][243562] Updated weights for policy 0, policy_version 39708 (0.0008) +[2026-06-02 16:39:23,052][243562] Updated weights for policy 0, policy_version 39718 (0.0008) +[2026-06-02 16:39:23,211][243562] Updated weights for policy 0, policy_version 39728 (0.0009) +[2026-06-02 16:39:23,858][243562] Updated weights for policy 0, policy_version 39738 (0.0008) +[2026-06-02 16:39:24,016][243562] Updated weights for policy 0, policy_version 39748 (0.0009) +[2026-06-02 16:39:24,203][243562] Updated weights for policy 0, policy_version 39759 (0.0009) +[2026-06-02 16:39:24,371][243562] Updated weights for policy 0, policy_version 39770 (0.0007) +[2026-06-02 16:39:24,553][243562] Updated weights for policy 0, policy_version 39781 (0.0009) +[2026-06-02 16:39:24,725][243562] Updated weights for policy 0, policy_version 39791 (0.0008) +[2026-06-02 16:39:24,811][235960] Fps is (10 sec: 22937.7, 60 sec: 21845.4, 300 sec: 21549.1). Total num frames: 20381696. Throughput: 0: 21794.1. Samples: 20394752. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) +[2026-06-02 16:39:24,812][235960] Avg episode reward: [(0, '1755.087')] +[2026-06-02 16:39:25,397][243562] Updated weights for policy 0, policy_version 39802 (0.0009) +[2026-06-02 16:39:25,574][243562] Updated weights for policy 0, policy_version 39813 (0.0008) +[2026-06-02 16:39:25,738][243562] Updated weights for policy 0, policy_version 39823 (0.0009) +[2026-06-02 16:39:25,906][243562] Updated weights for policy 0, policy_version 39833 (0.0008) +[2026-06-02 16:39:26,068][243562] Updated weights for policy 0, policy_version 39843 (0.0009) +[2026-06-02 16:39:26,243][243562] Updated weights for policy 0, policy_version 39853 (0.0008) +[2026-06-02 16:39:26,888][243562] Updated weights for policy 0, policy_version 39863 (0.0008) +[2026-06-02 16:39:27,046][243562] Updated weights for policy 0, policy_version 39873 (0.0009) +[2026-06-02 16:39:27,208][243562] Updated weights for policy 0, policy_version 39883 (0.0009) +[2026-06-02 16:39:27,386][243562] Updated weights for policy 0, policy_version 39894 (0.0008) +[2026-06-02 16:39:27,553][243562] Updated weights for policy 0, policy_version 39904 (0.0008) +[2026-06-02 16:39:27,715][243562] Updated weights for policy 0, policy_version 39914 (0.0008) +[2026-06-02 16:39:28,369][243562] Updated weights for policy 0, policy_version 39924 (0.0009) +[2026-06-02 16:39:28,539][243562] Updated weights for policy 0, policy_version 39934 (0.0009) +[2026-06-02 16:39:28,686][243562] Updated weights for policy 0, policy_version 39944 (0.0008) +[2026-06-02 16:39:28,872][243562] Updated weights for policy 0, policy_version 39955 (0.0008) +[2026-06-02 16:39:29,034][243562] Updated weights for policy 0, policy_version 39965 (0.0008) +[2026-06-02 16:39:29,199][243562] Updated weights for policy 0, policy_version 39975 (0.0008) +[2026-06-02 16:39:29,811][235960] Fps is (10 sec: 22937.7, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 20480000. Throughput: 0: 21703.1. Samples: 20521216. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) +[2026-06-02 16:39:29,812][235960] Avg episode reward: [(0, '1733.611')] +[2026-06-02 16:39:29,857][243562] Updated weights for policy 0, policy_version 39985 (0.0009) +[2026-06-02 16:39:30,008][243562] Updated weights for policy 0, policy_version 39995 (0.0009) +[2026-06-02 16:39:30,170][243562] Updated weights for policy 0, policy_version 40005 (0.0009) +[2026-06-02 16:39:30,335][243562] Updated weights for policy 0, policy_version 40015 (0.0009) +[2026-06-02 16:39:30,499][243562] Updated weights for policy 0, policy_version 40025 (0.0008) +[2026-06-02 16:39:30,678][243562] Updated weights for policy 0, policy_version 40036 (0.0008) +[2026-06-02 16:39:30,859][243562] Updated weights for policy 0, policy_version 40047 (0.0008) +[2026-06-02 16:39:31,521][243562] Updated weights for policy 0, policy_version 40058 (0.0009) +[2026-06-02 16:39:31,689][243562] Updated weights for policy 0, policy_version 40068 (0.0009) +[2026-06-02 16:39:31,850][243562] Updated weights for policy 0, policy_version 40078 (0.0008) +[2026-06-02 16:39:32,010][243562] Updated weights for policy 0, policy_version 40088 (0.0008) +[2026-06-02 16:39:32,197][243562] Updated weights for policy 0, policy_version 40099 (0.0009) +[2026-06-02 16:39:32,377][243562] Updated weights for policy 0, policy_version 40110 (0.0008) +[2026-06-02 16:39:33,016][243562] Updated weights for policy 0, policy_version 40120 (0.0008) +[2026-06-02 16:39:33,191][243562] Updated weights for policy 0, policy_version 40131 (0.0008) +[2026-06-02 16:39:33,355][243562] Updated weights for policy 0, policy_version 40141 (0.0008) +[2026-06-02 16:39:33,523][243562] Updated weights for policy 0, policy_version 40151 (0.0009) +[2026-06-02 16:39:33,686][243562] Updated weights for policy 0, policy_version 40161 (0.0009) +[2026-06-02 16:39:33,850][243562] Updated weights for policy 0, policy_version 40171 (0.0008) +[2026-06-02 16:39:34,503][243562] Updated weights for policy 0, policy_version 40182 (0.0009) +[2026-06-02 16:39:34,677][243562] Updated weights for policy 0, policy_version 40193 (0.0008) +[2026-06-02 16:39:34,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 21438.1). Total num frames: 20578304. Throughput: 0: 21651.9. Samples: 20584064. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) +[2026-06-02 16:39:34,812][235960] Avg episode reward: [(0, '1743.094')] +[2026-06-02 16:39:34,837][243562] Updated weights for policy 0, policy_version 40203 (0.0008) +[2026-06-02 16:39:34,997][243562] Updated weights for policy 0, policy_version 40213 (0.0008) +[2026-06-02 16:39:35,168][243562] Updated weights for policy 0, policy_version 40223 (0.0008) +[2026-06-02 16:39:35,363][243562] Updated weights for policy 0, policy_version 40235 (0.0008) +[2026-06-02 16:39:36,024][243562] Updated weights for policy 0, policy_version 40245 (0.0008) +[2026-06-02 16:39:36,181][243562] Updated weights for policy 0, policy_version 40255 (0.0008) +[2026-06-02 16:39:36,345][243562] Updated weights for policy 0, policy_version 40265 (0.0008) +[2026-06-02 16:39:36,501][243562] Updated weights for policy 0, policy_version 40275 (0.0008) +[2026-06-02 16:39:36,684][243562] Updated weights for policy 0, policy_version 40286 (0.0008) +[2026-06-02 16:39:36,851][243562] Updated weights for policy 0, policy_version 40296 (0.0009) +[2026-06-02 16:39:37,499][243562] Updated weights for policy 0, policy_version 40306 (0.0008) +[2026-06-02 16:39:37,652][243562] Updated weights for policy 0, policy_version 40316 (0.0008) +[2026-06-02 16:39:37,823][243562] Updated weights for policy 0, policy_version 40327 (0.0008) +[2026-06-02 16:39:37,990][243562] Updated weights for policy 0, policy_version 40337 (0.0008) +[2026-06-02 16:39:38,155][243562] Updated weights for policy 0, policy_version 40347 (0.0008) +[2026-06-02 16:39:38,346][243562] Updated weights for policy 0, policy_version 40359 (0.0008) +[2026-06-02 16:39:39,002][243562] Updated weights for policy 0, policy_version 40369 (0.0009) +[2026-06-02 16:39:39,156][243562] Updated weights for policy 0, policy_version 40379 (0.0008) +[2026-06-02 16:39:39,322][243562] Updated weights for policy 0, policy_version 40389 (0.0008) +[2026-06-02 16:39:39,495][243562] Updated weights for policy 0, policy_version 40400 (0.0008) +[2026-06-02 16:39:39,677][243562] Updated weights for policy 0, policy_version 40411 (0.0008) +[2026-06-02 16:39:39,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 20676608. Throughput: 0: 21558.0. Samples: 20710528. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) +[2026-06-02 16:39:39,812][235960] Avg episode reward: [(0, '1708.266')] +[2026-06-02 16:39:39,855][243562] Updated weights for policy 0, policy_version 40422 (0.0008) +[2026-06-02 16:39:40,013][243562] Updated weights for policy 0, policy_version 40432 (0.0010) +[2026-06-02 16:39:40,677][243562] Updated weights for policy 0, policy_version 40442 (0.0008) +[2026-06-02 16:39:40,836][243562] Updated weights for policy 0, policy_version 40452 (0.0008) +[2026-06-02 16:39:41,015][243562] Updated weights for policy 0, policy_version 40463 (0.0009) +[2026-06-02 16:39:41,196][243562] Updated weights for policy 0, policy_version 40474 (0.0008) +[2026-06-02 16:39:41,358][243562] Updated weights for policy 0, policy_version 40484 (0.0008) +[2026-06-02 16:39:41,523][243562] Updated weights for policy 0, policy_version 40494 (0.0009) +[2026-06-02 16:39:42,161][243562] Updated weights for policy 0, policy_version 40504 (0.0008) +[2026-06-02 16:39:42,320][243562] Updated weights for policy 0, policy_version 40514 (0.0009) +[2026-06-02 16:39:42,490][243562] Updated weights for policy 0, policy_version 40524 (0.0008) +[2026-06-02 16:39:42,654][243562] Updated weights for policy 0, policy_version 40534 (0.0008) +[2026-06-02 16:39:42,819][243562] Updated weights for policy 0, policy_version 40544 (0.0008) +[2026-06-02 16:39:42,994][243562] Updated weights for policy 0, policy_version 40555 (0.0008) +[2026-06-02 16:39:43,657][243562] Updated weights for policy 0, policy_version 40565 (0.0009) +[2026-06-02 16:39:43,814][243562] Updated weights for policy 0, policy_version 40575 (0.0008) +[2026-06-02 16:39:43,975][243562] Updated weights for policy 0, policy_version 40585 (0.0009) +[2026-06-02 16:39:44,139][243562] Updated weights for policy 0, policy_version 40595 (0.0009) +[2026-06-02 16:39:44,298][243562] Updated weights for policy 0, policy_version 40605 (0.0009) +[2026-06-02 16:39:44,464][243562] Updated weights for policy 0, policy_version 40615 (0.0008) +[2026-06-02 16:39:44,811][235960] Fps is (10 sec: 22937.7, 60 sec: 21845.4, 300 sec: 21549.1). Total num frames: 20807680. Throughput: 0: 21475.6. Samples: 20837376. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) +[2026-06-02 16:39:44,812][235960] Avg episode reward: [(0, '1709.604')] +[2026-06-02 16:39:45,119][243562] Updated weights for policy 0, policy_version 40625 (0.0009) +[2026-06-02 16:39:45,286][243562] Updated weights for policy 0, policy_version 40636 (0.0009) +[2026-06-02 16:39:45,436][243562] Updated weights for policy 0, policy_version 40646 (0.0008) +[2026-06-02 16:39:45,605][243562] Updated weights for policy 0, policy_version 40656 (0.0008) +[2026-06-02 16:39:45,761][243562] Updated weights for policy 0, policy_version 40666 (0.0008) +[2026-06-02 16:39:45,934][243562] Updated weights for policy 0, policy_version 40676 (0.0008) +[2026-06-02 16:39:46,126][243562] Updated weights for policy 0, policy_version 40688 (0.0009) +[2026-06-02 16:39:46,789][243562] Updated weights for policy 0, policy_version 40698 (0.0009) +[2026-06-02 16:39:46,950][243562] Updated weights for policy 0, policy_version 40708 (0.0008) +[2026-06-02 16:39:47,109][243562] Updated weights for policy 0, policy_version 40718 (0.0009) +[2026-06-02 16:39:47,293][243562] Updated weights for policy 0, policy_version 40729 (0.0009) +[2026-06-02 16:39:47,470][243562] Updated weights for policy 0, policy_version 40740 (0.0009) +[2026-06-02 16:39:47,627][243562] Updated weights for policy 0, policy_version 40750 (0.0008) +[2026-06-02 16:39:48,290][243562] Updated weights for policy 0, policy_version 40760 (0.0009) +[2026-06-02 16:39:48,440][243562] Updated weights for policy 0, policy_version 40770 (0.0009) +[2026-06-02 16:39:48,599][243562] Updated weights for policy 0, policy_version 40780 (0.0008) +[2026-06-02 16:39:48,784][243562] Updated weights for policy 0, policy_version 40791 (0.0009) +[2026-06-02 16:39:48,949][243562] Updated weights for policy 0, policy_version 40801 (0.0009) +[2026-06-02 16:39:49,134][243562] Updated weights for policy 0, policy_version 40812 (0.0009) +[2026-06-02 16:39:49,805][243562] Updated weights for policy 0, policy_version 40822 (0.0009) +[2026-06-02 16:39:49,811][235960] Fps is (10 sec: 22937.5, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 20905984. Throughput: 0: 21447.1. Samples: 20902016. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) +[2026-06-02 16:39:49,812][235960] Avg episode reward: [(0, '1800.848')] +[2026-06-02 16:39:49,965][243562] Updated weights for policy 0, policy_version 40832 (0.0009) +[2026-06-02 16:39:50,130][243562] Updated weights for policy 0, policy_version 40842 (0.0009) +[2026-06-02 16:39:50,294][243562] Updated weights for policy 0, policy_version 40852 (0.0009) +[2026-06-02 16:39:50,465][243562] Updated weights for policy 0, policy_version 40862 (0.0009) +[2026-06-02 16:39:50,622][243562] Updated weights for policy 0, policy_version 40872 (0.0008) +[2026-06-02 16:39:50,746][242748] Saving new best policy, reward=1800.848! +[2026-06-02 16:39:51,264][243562] Updated weights for policy 0, policy_version 40882 (0.0009) +[2026-06-02 16:39:51,411][243562] Updated weights for policy 0, policy_version 40892 (0.0008) +[2026-06-02 16:39:51,574][243562] Updated weights for policy 0, policy_version 40902 (0.0008) +[2026-06-02 16:39:51,758][243562] Updated weights for policy 0, policy_version 40913 (0.0008) +[2026-06-02 16:39:51,920][243562] Updated weights for policy 0, policy_version 40923 (0.0008) +[2026-06-02 16:39:52,095][243562] Updated weights for policy 0, policy_version 40934 (0.0008) +[2026-06-02 16:39:52,259][243562] Updated weights for policy 0, policy_version 40944 (0.0008) +[2026-06-02 16:39:52,911][243562] Updated weights for policy 0, policy_version 40954 (0.0008) +[2026-06-02 16:39:53,063][243562] Updated weights for policy 0, policy_version 40964 (0.0008) +[2026-06-02 16:39:53,232][243562] Updated weights for policy 0, policy_version 40974 (0.0008) +[2026-06-02 16:39:53,392][243562] Updated weights for policy 0, policy_version 40984 (0.0008) +[2026-06-02 16:39:53,575][243562] Updated weights for policy 0, policy_version 40995 (0.0009) +[2026-06-02 16:39:53,734][243562] Updated weights for policy 0, policy_version 41005 (0.0008) +[2026-06-02 16:39:54,370][243562] Updated weights for policy 0, policy_version 41015 (0.0009) +[2026-06-02 16:39:54,537][243562] Updated weights for policy 0, policy_version 41025 (0.0008) +[2026-06-02 16:39:54,705][243562] Updated weights for policy 0, policy_version 41036 (0.0008) +[2026-06-02 16:39:54,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 21438.1). Total num frames: 21004288. Throughput: 0: 21461.3. Samples: 21032576. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) +[2026-06-02 16:39:54,812][235960] Avg episode reward: [(0, '1900.222')] +[2026-06-02 16:39:54,893][243562] Updated weights for policy 0, policy_version 41047 (0.0009) +[2026-06-02 16:39:55,051][243562] Updated weights for policy 0, policy_version 41057 (0.0008) +[2026-06-02 16:39:55,218][243562] Updated weights for policy 0, policy_version 41067 (0.0009) +[2026-06-02 16:39:55,293][242748] Saving new best policy, reward=1900.222! +[2026-06-02 16:39:55,848][243562] Updated weights for policy 0, policy_version 41077 (0.0009) +[2026-06-02 16:39:56,019][243562] Updated weights for policy 0, policy_version 41088 (0.0008) +[2026-06-02 16:39:56,198][243562] Updated weights for policy 0, policy_version 41099 (0.0008) +[2026-06-02 16:39:56,379][243562] Updated weights for policy 0, policy_version 41110 (0.0008) +[2026-06-02 16:39:56,539][243562] Updated weights for policy 0, policy_version 41120 (0.0008) +[2026-06-02 16:39:56,733][243562] Updated weights for policy 0, policy_version 41132 (0.0008) +[2026-06-02 16:39:57,381][243562] Updated weights for policy 0, policy_version 41142 (0.0009) +[2026-06-02 16:39:57,537][243562] Updated weights for policy 0, policy_version 41152 (0.0008) +[2026-06-02 16:39:57,700][243562] Updated weights for policy 0, policy_version 41162 (0.0009) +[2026-06-02 16:39:57,874][243562] Updated weights for policy 0, policy_version 41173 (0.0008) +[2026-06-02 16:39:58,043][243562] Updated weights for policy 0, policy_version 41183 (0.0009) +[2026-06-02 16:39:58,201][243562] Updated weights for policy 0, policy_version 41193 (0.0008) +[2026-06-02 16:39:58,854][243562] Updated weights for policy 0, policy_version 41203 (0.0009) +[2026-06-02 16:39:59,026][243562] Updated weights for policy 0, policy_version 41214 (0.0008) +[2026-06-02 16:39:59,185][243562] Updated weights for policy 0, policy_version 41224 (0.0008) +[2026-06-02 16:39:59,342][243562] Updated weights for policy 0, policy_version 41234 (0.0008) +[2026-06-02 16:39:59,512][243562] Updated weights for policy 0, policy_version 41244 (0.0008) +[2026-06-02 16:39:59,675][243562] Updated weights for policy 0, policy_version 41254 (0.0009) +[2026-06-02 16:39:59,811][235960] Fps is (10 sec: 19660.9, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 21102592. Throughput: 0: 21427.2. Samples: 21161472. Policy #0 lag: (min: 96.0, avg: 110.8, max: 160.0) +[2026-06-02 16:39:59,812][235960] Avg episode reward: [(0, '1937.422')] +[2026-06-02 16:39:59,831][242748] Saving new best policy, reward=1937.422! +[2026-06-02 16:39:59,833][243562] Updated weights for policy 0, policy_version 41264 (0.0008) +[2026-06-02 16:40:00,478][243562] Updated weights for policy 0, policy_version 41274 (0.0008) +[2026-06-02 16:40:00,650][243562] Updated weights for policy 0, policy_version 41284 (0.0009) +[2026-06-02 16:40:00,810][243562] Updated weights for policy 0, policy_version 41294 (0.0008) +[2026-06-02 16:40:00,991][243562] Updated weights for policy 0, policy_version 41305 (0.0008) +[2026-06-02 16:40:01,157][243562] Updated weights for policy 0, policy_version 41315 (0.0008) +[2026-06-02 16:40:01,318][243562] Updated weights for policy 0, policy_version 41325 (0.0008) +[2026-06-02 16:40:01,970][243562] Updated weights for policy 0, policy_version 41335 (0.0008) +[2026-06-02 16:40:02,138][243562] Updated weights for policy 0, policy_version 41346 (0.0008) +[2026-06-02 16:40:02,319][243562] Updated weights for policy 0, policy_version 41357 (0.0008) +[2026-06-02 16:40:02,480][243562] Updated weights for policy 0, policy_version 41367 (0.0008) +[2026-06-02 16:40:02,650][243562] Updated weights for policy 0, policy_version 41377 (0.0008) +[2026-06-02 16:40:02,820][243562] Updated weights for policy 0, policy_version 41387 (0.0008) +[2026-06-02 16:40:03,468][243562] Updated weights for policy 0, policy_version 41398 (0.0008) +[2026-06-02 16:40:03,641][243562] Updated weights for policy 0, policy_version 41409 (0.0008) +[2026-06-02 16:40:03,805][243562] Updated weights for policy 0, policy_version 41419 (0.0008) +[2026-06-02 16:40:03,964][243562] Updated weights for policy 0, policy_version 41429 (0.0008) +[2026-06-02 16:40:04,129][243562] Updated weights for policy 0, policy_version 41439 (0.0008) +[2026-06-02 16:40:04,298][243562] Updated weights for policy 0, policy_version 41449 (0.0008) +[2026-06-02 16:40:04,811][235960] Fps is (10 sec: 22937.7, 60 sec: 21299.2, 300 sec: 21438.1). Total num frames: 21233664. Throughput: 0: 21415.8. Samples: 21226240. Policy #0 lag: (min: 96.0, avg: 110.8, max: 160.0) +[2026-06-02 16:40:04,812][235960] Avg episode reward: [(0, '1997.324')] +[2026-06-02 16:40:04,936][243562] Updated weights for policy 0, policy_version 41459 (0.0008) +[2026-06-02 16:40:05,089][243562] Updated weights for policy 0, policy_version 41469 (0.0008) +[2026-06-02 16:40:05,275][243562] Updated weights for policy 0, policy_version 41480 (0.0008) +[2026-06-02 16:40:05,435][243562] Updated weights for policy 0, policy_version 41490 (0.0008) +[2026-06-02 16:40:05,611][243562] Updated weights for policy 0, policy_version 41501 (0.0008) +[2026-06-02 16:40:05,780][243562] Updated weights for policy 0, policy_version 41511 (0.0008) +[2026-06-02 16:40:05,919][242748] Saving new best policy, reward=1997.324! +[2026-06-02 16:40:06,427][243562] Updated weights for policy 0, policy_version 41521 (0.0008) +[2026-06-02 16:40:06,582][243562] Updated weights for policy 0, policy_version 41531 (0.0008) +[2026-06-02 16:40:06,761][243562] Updated weights for policy 0, policy_version 41542 (0.0009) +[2026-06-02 16:40:06,917][243562] Updated weights for policy 0, policy_version 41552 (0.0009) +[2026-06-02 16:40:07,106][243562] Updated weights for policy 0, policy_version 41563 (0.0009) +[2026-06-02 16:40:07,264][243562] Updated weights for policy 0, policy_version 41573 (0.0008) +[2026-06-02 16:40:07,447][243562] Updated weights for policy 0, policy_version 41584 (0.0005) +[2026-06-02 16:40:08,101][243562] Updated weights for policy 0, policy_version 41595 (0.0009) +[2026-06-02 16:40:08,256][243562] Updated weights for policy 0, policy_version 41605 (0.0009) +[2026-06-02 16:40:08,423][243562] Updated weights for policy 0, policy_version 41615 (0.0009) +[2026-06-02 16:40:08,581][243562] Updated weights for policy 0, policy_version 41625 (0.0009) +[2026-06-02 16:40:08,743][243562] Updated weights for policy 0, policy_version 41635 (0.0008) +[2026-06-02 16:40:08,904][243562] Updated weights for policy 0, policy_version 41645 (0.0008) +[2026-06-02 16:40:09,566][243562] Updated weights for policy 0, policy_version 41656 (0.0009) +[2026-06-02 16:40:09,724][243562] Updated weights for policy 0, policy_version 41666 (0.0008) +[2026-06-02 16:40:09,811][235960] Fps is (10 sec: 22937.5, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 21331968. Throughput: 0: 21347.5. Samples: 21355392. Policy #0 lag: (min: 96.0, avg: 110.8, max: 160.0) +[2026-06-02 16:40:09,812][235960] Avg episode reward: [(0, '2012.914')] +[2026-06-02 16:40:09,917][243562] Updated weights for policy 0, policy_version 41678 (0.0009) +[2026-06-02 16:40:10,079][243562] Updated weights for policy 0, policy_version 41688 (0.0008) +[2026-06-02 16:40:10,245][243562] Updated weights for policy 0, policy_version 41698 (0.0009) +[2026-06-02 16:40:10,407][243562] Updated weights for policy 0, policy_version 41708 (0.0008) +[2026-06-02 16:40:10,466][242748] Saving new best policy, reward=2012.914! +[2026-06-02 16:40:11,054][243562] Updated weights for policy 0, policy_version 41718 (0.0009) +[2026-06-02 16:40:11,224][243562] Updated weights for policy 0, policy_version 41729 (0.0008) +[2026-06-02 16:40:11,386][243562] Updated weights for policy 0, policy_version 41739 (0.0009) +[2026-06-02 16:40:11,565][243562] Updated weights for policy 0, policy_version 41750 (0.0009) +[2026-06-02 16:40:11,727][243562] Updated weights for policy 0, policy_version 41760 (0.0008) +[2026-06-02 16:40:11,895][243562] Updated weights for policy 0, policy_version 41770 (0.0008) +[2026-06-02 16:40:12,557][243562] Updated weights for policy 0, policy_version 41781 (0.0008) +[2026-06-02 16:40:12,721][243562] Updated weights for policy 0, policy_version 41791 (0.0008) +[2026-06-02 16:40:12,891][243562] Updated weights for policy 0, policy_version 41802 (0.0008) +[2026-06-02 16:40:13,062][243562] Updated weights for policy 0, policy_version 41812 (0.0008) +[2026-06-02 16:40:13,243][243562] Updated weights for policy 0, policy_version 41823 (0.0008) +[2026-06-02 16:40:13,407][243562] Updated weights for policy 0, policy_version 41833 (0.0008) +[2026-06-02 16:40:14,062][243562] Updated weights for policy 0, policy_version 41843 (0.0009) +[2026-06-02 16:40:14,223][243562] Updated weights for policy 0, policy_version 41853 (0.0008) +[2026-06-02 16:40:14,397][243562] Updated weights for policy 0, policy_version 41864 (0.0008) +[2026-06-02 16:40:14,556][243562] Updated weights for policy 0, policy_version 41874 (0.0008) +[2026-06-02 16:40:14,723][243562] Updated weights for policy 0, policy_version 41884 (0.0008) +[2026-06-02 16:40:14,811][235960] Fps is (10 sec: 19660.9, 60 sec: 21299.2, 300 sec: 21438.1). Total num frames: 21430272. Throughput: 0: 21415.8. Samples: 21484928. Policy #0 lag: (min: 96.0, avg: 110.8, max: 160.0) +[2026-06-02 16:40:14,812][235960] Avg episode reward: [(0, '2044.876')] +[2026-06-02 16:40:14,924][243562] Updated weights for policy 0, policy_version 41896 (0.0008) +[2026-06-02 16:40:15,047][242748] Saving new best policy, reward=2044.876! +[2026-06-02 16:40:15,581][243562] Updated weights for policy 0, policy_version 41906 (0.0008) +[2026-06-02 16:40:15,731][243562] Updated weights for policy 0, policy_version 41916 (0.0008) +[2026-06-02 16:40:15,887][243562] Updated weights for policy 0, policy_version 41926 (0.0008) +[2026-06-02 16:40:16,064][243562] Updated weights for policy 0, policy_version 41936 (0.0008) +[2026-06-02 16:40:16,239][243562] Updated weights for policy 0, policy_version 41947 (0.0008) +[2026-06-02 16:40:16,421][243562] Updated weights for policy 0, policy_version 41958 (0.0008) +[2026-06-02 16:40:16,575][243562] Updated weights for policy 0, policy_version 41968 (0.0008) +[2026-06-02 16:40:17,237][243562] Updated weights for policy 0, policy_version 41979 (0.0008) +[2026-06-02 16:40:17,396][243562] Updated weights for policy 0, policy_version 41989 (0.0008) +[2026-06-02 16:40:17,564][243562] Updated weights for policy 0, policy_version 41999 (0.0008) +[2026-06-02 16:40:17,743][243562] Updated weights for policy 0, policy_version 42010 (0.0008) +[2026-06-02 16:40:17,908][243562] Updated weights for policy 0, policy_version 42020 (0.0008) +[2026-06-02 16:40:18,074][243562] Updated weights for policy 0, policy_version 42030 (0.0008) +[2026-06-02 16:40:18,711][243562] Updated weights for policy 0, policy_version 42040 (0.0008) +[2026-06-02 16:40:18,862][243562] Updated weights for policy 0, policy_version 42050 (0.0008) +[2026-06-02 16:40:19,030][243562] Updated weights for policy 0, policy_version 42060 (0.0008) +[2026-06-02 16:40:19,184][243562] Updated weights for policy 0, policy_version 42070 (0.0008) +[2026-06-02 16:40:19,352][243562] Updated weights for policy 0, policy_version 42080 (0.0008) +[2026-06-02 16:40:19,529][243562] Updated weights for policy 0, policy_version 42091 (0.0008) +[2026-06-02 16:40:19,811][235960] Fps is (10 sec: 22937.8, 60 sec: 21845.3, 300 sec: 21549.1). Total num frames: 21561344. Throughput: 0: 21469.9. Samples: 21550208. Policy #0 lag: (min: 96.0, avg: 110.8, max: 160.0) +[2026-06-02 16:40:19,812][235960] Avg episode reward: [(0, '2061.329')] +[2026-06-02 16:40:19,817][242748] Saving new best policy, reward=2061.329! +[2026-06-02 16:40:20,205][243562] Updated weights for policy 0, policy_version 42101 (0.0009) +[2026-06-02 16:40:20,369][243562] Updated weights for policy 0, policy_version 42111 (0.0008) +[2026-06-02 16:40:20,529][243562] Updated weights for policy 0, policy_version 42121 (0.0009) +[2026-06-02 16:40:20,743][243562] Updated weights for policy 0, policy_version 42134 (0.0009) +[2026-06-02 16:40:20,906][243562] Updated weights for policy 0, policy_version 42144 (0.0008) +[2026-06-02 16:40:21,087][243562] Updated weights for policy 0, policy_version 42155 (0.0009) +[2026-06-02 16:40:21,720][243562] Updated weights for policy 0, policy_version 42165 (0.0008) +[2026-06-02 16:40:21,883][243562] Updated weights for policy 0, policy_version 42175 (0.0008) +[2026-06-02 16:40:22,055][243562] Updated weights for policy 0, policy_version 42186 (0.0009) +[2026-06-02 16:40:22,217][243562] Updated weights for policy 0, policy_version 42196 (0.0008) +[2026-06-02 16:40:22,383][243562] Updated weights for policy 0, policy_version 42206 (0.0008) +[2026-06-02 16:40:22,553][243562] Updated weights for policy 0, policy_version 42216 (0.0009) +[2026-06-02 16:40:23,198][243562] Updated weights for policy 0, policy_version 42226 (0.0009) +[2026-06-02 16:40:23,357][243562] Updated weights for policy 0, policy_version 42236 (0.0009) +[2026-06-02 16:40:23,517][243562] Updated weights for policy 0, policy_version 42246 (0.0008) +[2026-06-02 16:40:23,672][243562] Updated weights for policy 0, policy_version 42256 (0.0008) +[2026-06-02 16:40:23,843][243562] Updated weights for policy 0, policy_version 42266 (0.0008) +[2026-06-02 16:40:24,008][243562] Updated weights for policy 0, policy_version 42276 (0.0008) +[2026-06-02 16:40:24,189][243562] Updated weights for policy 0, policy_version 42287 (0.0008) +[2026-06-02 16:40:24,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21299.2, 300 sec: 21438.1). Total num frames: 21659648. Throughput: 0: 21532.5. Samples: 21679488. Policy #0 lag: (min: 6.0, avg: 22.7, max: 70.0) +[2026-06-02 16:40:24,812][235960] Avg episode reward: [(0, '2078.365')] +[2026-06-02 16:40:24,827][243562] Updated weights for policy 0, policy_version 42297 (0.0011) +[2026-06-02 16:40:24,992][243562] Updated weights for policy 0, policy_version 42307 (0.0009) +[2026-06-02 16:40:25,149][243562] Updated weights for policy 0, policy_version 42317 (0.0009) +[2026-06-02 16:40:25,310][243562] Updated weights for policy 0, policy_version 42327 (0.0008) +[2026-06-02 16:40:25,474][243562] Updated weights for policy 0, policy_version 42337 (0.0008) +[2026-06-02 16:40:25,635][243562] Updated weights for policy 0, policy_version 42347 (0.0009) +[2026-06-02 16:40:25,713][242748] Saving new best policy, reward=2078.365! +[2026-06-02 16:40:26,277][243562] Updated weights for policy 0, policy_version 42357 (0.0009) +[2026-06-02 16:40:26,441][243562] Updated weights for policy 0, policy_version 42367 (0.0008) +[2026-06-02 16:40:26,618][243562] Updated weights for policy 0, policy_version 42378 (0.0008) +[2026-06-02 16:40:26,791][243562] Updated weights for policy 0, policy_version 42389 (0.0008) +[2026-06-02 16:40:26,962][243562] Updated weights for policy 0, policy_version 42399 (0.0008) +[2026-06-02 16:40:27,131][243562] Updated weights for policy 0, policy_version 42409 (0.0008) +[2026-06-02 16:40:27,779][243562] Updated weights for policy 0, policy_version 42419 (0.0009) +[2026-06-02 16:40:27,934][243562] Updated weights for policy 0, policy_version 42429 (0.0008) +[2026-06-02 16:40:28,090][243562] Updated weights for policy 0, policy_version 42439 (0.0008) +[2026-06-02 16:40:28,259][243562] Updated weights for policy 0, policy_version 42449 (0.0008) +[2026-06-02 16:40:28,427][243562] Updated weights for policy 0, policy_version 42459 (0.0008) +[2026-06-02 16:40:28,602][243562] Updated weights for policy 0, policy_version 42470 (0.0008) +[2026-06-02 16:40:29,261][243562] Updated weights for policy 0, policy_version 42481 (0.0009) +[2026-06-02 16:40:29,424][243562] Updated weights for policy 0, policy_version 42491 (0.0008) +[2026-06-02 16:40:29,595][243562] Updated weights for policy 0, policy_version 42502 (0.0008) +[2026-06-02 16:40:29,762][243562] Updated weights for policy 0, policy_version 42512 (0.0009) +[2026-06-02 16:40:29,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 21757952. Throughput: 0: 21589.3. Samples: 21808896. Policy #0 lag: (min: 6.0, avg: 22.7, max: 70.0) +[2026-06-02 16:40:29,812][235960] Avg episode reward: [(0, '2025.228')] +[2026-06-02 16:40:29,960][243562] Updated weights for policy 0, policy_version 42524 (0.0008) +[2026-06-02 16:40:30,122][243562] Updated weights for policy 0, policy_version 42534 (0.0008) +[2026-06-02 16:40:30,279][243562] Updated weights for policy 0, policy_version 42544 (0.0008) +[2026-06-02 16:40:30,935][243562] Updated weights for policy 0, policy_version 42554 (0.0008) +[2026-06-02 16:40:31,109][243562] Updated weights for policy 0, policy_version 42565 (0.0009) +[2026-06-02 16:40:31,282][243562] Updated weights for policy 0, policy_version 42576 (0.0008) +[2026-06-02 16:40:31,451][243562] Updated weights for policy 0, policy_version 42586 (0.0008) +[2026-06-02 16:40:31,629][243562] Updated weights for policy 0, policy_version 42597 (0.0008) +[2026-06-02 16:40:31,792][243562] Updated weights for policy 0, policy_version 42607 (0.0008) +[2026-06-02 16:40:32,451][243562] Updated weights for policy 0, policy_version 42617 (0.0009) +[2026-06-02 16:40:32,628][243562] Updated weights for policy 0, policy_version 42628 (0.0008) +[2026-06-02 16:40:32,804][243562] Updated weights for policy 0, policy_version 42639 (0.0008) +[2026-06-02 16:40:32,964][243562] Updated weights for policy 0, policy_version 42649 (0.0009) +[2026-06-02 16:40:33,133][243562] Updated weights for policy 0, policy_version 42659 (0.0008) +[2026-06-02 16:40:33,295][243562] Updated weights for policy 0, policy_version 42669 (0.0008) +[2026-06-02 16:40:33,949][243562] Updated weights for policy 0, policy_version 42679 (0.0009) +[2026-06-02 16:40:34,102][243562] Updated weights for policy 0, policy_version 42689 (0.0008) +[2026-06-02 16:40:34,261][243562] Updated weights for policy 0, policy_version 42699 (0.0009) +[2026-06-02 16:40:34,427][243562] Updated weights for policy 0, policy_version 42709 (0.0008) +[2026-06-02 16:40:34,623][243562] Updated weights for policy 0, policy_version 42721 (0.0009) +[2026-06-02 16:40:34,793][243562] Updated weights for policy 0, policy_version 42731 (0.0008) +[2026-06-02 16:40:34,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 21438.1). Total num frames: 21856256. Throughput: 0: 21600.7. Samples: 21874048. Policy #0 lag: (min: 6.0, avg: 22.7, max: 70.0) +[2026-06-02 16:40:34,812][235960] Avg episode reward: [(0, '2081.993')] +[2026-06-02 16:40:34,869][242748] Saving new best policy, reward=2081.993! +[2026-06-02 16:40:35,438][243562] Updated weights for policy 0, policy_version 42741 (0.0008) +[2026-06-02 16:40:35,611][243562] Updated weights for policy 0, policy_version 42752 (0.0008) +[2026-06-02 16:40:35,789][243562] Updated weights for policy 0, policy_version 42763 (0.0009) +[2026-06-02 16:40:35,953][243562] Updated weights for policy 0, policy_version 42773 (0.0008) +[2026-06-02 16:40:36,116][243562] Updated weights for policy 0, policy_version 42783 (0.0008) +[2026-06-02 16:40:36,308][243562] Updated weights for policy 0, policy_version 42794 (0.0009) +[2026-06-02 16:40:36,955][243562] Updated weights for policy 0, policy_version 42805 (0.0009) +[2026-06-02 16:40:37,112][243562] Updated weights for policy 0, policy_version 42815 (0.0008) +[2026-06-02 16:40:37,267][243562] Updated weights for policy 0, policy_version 42825 (0.0007) +[2026-06-02 16:40:37,439][243562] Updated weights for policy 0, policy_version 42835 (0.0008) +[2026-06-02 16:40:37,623][243562] Updated weights for policy 0, policy_version 42846 (0.0009) +[2026-06-02 16:40:37,800][243562] Updated weights for policy 0, policy_version 42857 (0.0008) +[2026-06-02 16:40:38,455][243562] Updated weights for policy 0, policy_version 42867 (0.0009) +[2026-06-02 16:40:38,609][243562] Updated weights for policy 0, policy_version 42877 (0.0008) +[2026-06-02 16:40:38,768][243562] Updated weights for policy 0, policy_version 42887 (0.0008) +[2026-06-02 16:40:38,936][243562] Updated weights for policy 0, policy_version 42897 (0.0008) +[2026-06-02 16:40:39,093][243562] Updated weights for policy 0, policy_version 42907 (0.0008) +[2026-06-02 16:40:39,267][243562] Updated weights for policy 0, policy_version 42917 (0.0008) +[2026-06-02 16:40:39,432][243562] Updated weights for policy 0, policy_version 42927 (0.0009) +[2026-06-02 16:40:39,811][235960] Fps is (10 sec: 22937.5, 60 sec: 21845.3, 300 sec: 21549.1). Total num frames: 21987328. Throughput: 0: 21572.3. Samples: 22003328. Policy #0 lag: (min: 6.0, avg: 22.7, max: 70.0) +[2026-06-02 16:40:39,812][235960] Avg episode reward: [(0, '2123.643')] +[2026-06-02 16:40:40,057][243562] Updated weights for policy 0, policy_version 42937 (0.0008) +[2026-06-02 16:40:40,222][243562] Updated weights for policy 0, policy_version 42947 (0.0008) +[2026-06-02 16:40:40,388][243562] Updated weights for policy 0, policy_version 42957 (0.0008) +[2026-06-02 16:40:40,572][243562] Updated weights for policy 0, policy_version 42968 (0.0008) +[2026-06-02 16:40:40,734][243562] Updated weights for policy 0, policy_version 42978 (0.0008) +[2026-06-02 16:40:40,900][243562] Updated weights for policy 0, policy_version 42988 (0.0009) +[2026-06-02 16:40:40,955][242748] Saving new best policy, reward=2123.643! +[2026-06-02 16:40:41,523][243562] Updated weights for policy 0, policy_version 42998 (0.0008) +[2026-06-02 16:40:41,689][243562] Updated weights for policy 0, policy_version 43008 (0.0008) +[2026-06-02 16:40:41,854][243562] Updated weights for policy 0, policy_version 43018 (0.0009) +[2026-06-02 16:40:42,015][243562] Updated weights for policy 0, policy_version 43028 (0.0008) +[2026-06-02 16:40:42,171][243562] Updated weights for policy 0, policy_version 43038 (0.0008) +[2026-06-02 16:40:42,341][243562] Updated weights for policy 0, policy_version 43048 (0.0008) +[2026-06-02 16:40:42,993][243562] Updated weights for policy 0, policy_version 43058 (0.0009) +[2026-06-02 16:40:43,145][243562] Updated weights for policy 0, policy_version 43068 (0.0009) +[2026-06-02 16:40:43,322][243562] Updated weights for policy 0, policy_version 43079 (0.0008) +[2026-06-02 16:40:43,486][243562] Updated weights for policy 0, policy_version 43089 (0.0008) +[2026-06-02 16:40:43,638][243562] Updated weights for policy 0, policy_version 43099 (0.0008) +[2026-06-02 16:40:43,805][243562] Updated weights for policy 0, policy_version 43109 (0.0008) +[2026-06-02 16:40:43,987][243562] Updated weights for policy 0, policy_version 43120 (0.0008) +[2026-06-02 16:40:44,631][243562] Updated weights for policy 0, policy_version 43130 (0.0008) +[2026-06-02 16:40:44,797][243562] Updated weights for policy 0, policy_version 43140 (0.0009) +[2026-06-02 16:40:44,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 22085632. Throughput: 0: 21578.0. Samples: 22132480. Policy #0 lag: (min: 6.0, avg: 22.7, max: 70.0) +[2026-06-02 16:40:44,812][235960] Avg episode reward: [(0, '2158.527')] +[2026-06-02 16:40:44,955][243562] Updated weights for policy 0, policy_version 43150 (0.0009) +[2026-06-02 16:40:45,112][243562] Updated weights for policy 0, policy_version 43160 (0.0008) +[2026-06-02 16:40:45,318][243562] Updated weights for policy 0, policy_version 43172 (0.0008) +[2026-06-02 16:40:45,482][243562] Updated weights for policy 0, policy_version 43182 (0.0008) +[2026-06-02 16:40:45,504][242748] Saving new best policy, reward=2158.527! +[2026-06-02 16:40:46,116][243562] Updated weights for policy 0, policy_version 43192 (0.0008) +[2026-06-02 16:40:46,287][243562] Updated weights for policy 0, policy_version 43202 (0.0008) +[2026-06-02 16:40:46,447][243562] Updated weights for policy 0, policy_version 43212 (0.0008) +[2026-06-02 16:40:46,612][243562] Updated weights for policy 0, policy_version 43222 (0.0008) +[2026-06-02 16:40:46,776][243562] Updated weights for policy 0, policy_version 43232 (0.0008) +[2026-06-02 16:40:46,939][243562] Updated weights for policy 0, policy_version 43242 (0.0008) +[2026-06-02 16:40:47,588][243562] Updated weights for policy 0, policy_version 43252 (0.0008) +[2026-06-02 16:40:47,745][243562] Updated weights for policy 0, policy_version 43262 (0.0008) +[2026-06-02 16:40:47,921][243562] Updated weights for policy 0, policy_version 43273 (0.0008) +[2026-06-02 16:40:48,102][243562] Updated weights for policy 0, policy_version 43284 (0.0008) +[2026-06-02 16:40:48,275][243562] Updated weights for policy 0, policy_version 43294 (0.0008) +[2026-06-02 16:40:48,441][243562] Updated weights for policy 0, policy_version 43304 (0.0008) +[2026-06-02 16:40:49,072][243562] Updated weights for policy 0, policy_version 43314 (0.0008) +[2026-06-02 16:40:49,230][243562] Updated weights for policy 0, policy_version 43324 (0.0008) +[2026-06-02 16:40:49,386][243562] Updated weights for policy 0, policy_version 43334 (0.0008) +[2026-06-02 16:40:49,554][243562] Updated weights for policy 0, policy_version 43344 (0.0008) +[2026-06-02 16:40:49,724][243562] Updated weights for policy 0, policy_version 43354 (0.0009) +[2026-06-02 16:40:49,811][235960] Fps is (10 sec: 19660.9, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 22183936. Throughput: 0: 21580.8. Samples: 22197376. Policy #0 lag: (min: 6.0, avg: 22.7, max: 70.0) +[2026-06-02 16:40:49,812][235960] Avg episode reward: [(0, '2119.315')] +[2026-06-02 16:40:49,882][243562] Updated weights for policy 0, policy_version 43364 (0.0008) +[2026-06-02 16:40:50,067][243562] Updated weights for policy 0, policy_version 43375 (0.0008) +[2026-06-02 16:40:50,727][243562] Updated weights for policy 0, policy_version 43385 (0.0009) +[2026-06-02 16:40:50,892][243562] Updated weights for policy 0, policy_version 43395 (0.0009) +[2026-06-02 16:40:51,051][243562] Updated weights for policy 0, policy_version 43405 (0.0008) +[2026-06-02 16:40:51,216][243562] Updated weights for policy 0, policy_version 43415 (0.0009) +[2026-06-02 16:40:51,385][243562] Updated weights for policy 0, policy_version 43425 (0.0010) +[2026-06-02 16:40:51,554][243562] Updated weights for policy 0, policy_version 43435 (0.0008) +[2026-06-02 16:40:52,175][243562] Updated weights for policy 0, policy_version 43445 (0.0008) +[2026-06-02 16:40:52,341][243562] Updated weights for policy 0, policy_version 43455 (0.0008) +[2026-06-02 16:40:52,500][243562] Updated weights for policy 0, policy_version 43465 (0.0008) +[2026-06-02 16:40:52,668][243562] Updated weights for policy 0, policy_version 43475 (0.0008) +[2026-06-02 16:40:52,833][243562] Updated weights for policy 0, policy_version 43485 (0.0008) +[2026-06-02 16:40:52,988][243562] Updated weights for policy 0, policy_version 43495 (0.0008) +[2026-06-02 16:40:53,636][243562] Updated weights for policy 0, policy_version 43505 (0.0008) +[2026-06-02 16:40:53,788][243562] Updated weights for policy 0, policy_version 43515 (0.0008) +[2026-06-02 16:40:53,949][243562] Updated weights for policy 0, policy_version 43525 (0.0008) +[2026-06-02 16:40:54,126][243562] Updated weights for policy 0, policy_version 43536 (0.0007) +[2026-06-02 16:40:54,322][243562] Updated weights for policy 0, policy_version 43548 (0.0008) +[2026-06-02 16:40:54,479][243562] Updated weights for policy 0, policy_version 43558 (0.0008) +[2026-06-02 16:40:54,647][243562] Updated weights for policy 0, policy_version 43568 (0.0008) +[2026-06-02 16:40:54,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21845.4, 300 sec: 21549.1). Total num frames: 22315008. Throughput: 0: 21578.0. Samples: 22326400. Policy #0 lag: (min: 63.0, avg: 79.1, max: 127.0) +[2026-06-02 16:40:54,812][235960] Avg episode reward: [(0, '2113.162')] +[2026-06-02 16:40:55,299][243562] Updated weights for policy 0, policy_version 43578 (0.0008) +[2026-06-02 16:40:55,476][243562] Updated weights for policy 0, policy_version 43589 (0.0009) +[2026-06-02 16:40:55,636][243562] Updated weights for policy 0, policy_version 43599 (0.0009) +[2026-06-02 16:40:55,793][243562] Updated weights for policy 0, policy_version 43609 (0.0008) +[2026-06-02 16:40:55,965][243562] Updated weights for policy 0, policy_version 43619 (0.0006) +[2026-06-02 16:40:56,135][243562] Updated weights for policy 0, policy_version 43629 (0.0005) +[2026-06-02 16:40:56,789][243562] Updated weights for policy 0, policy_version 43640 (0.0008) +[2026-06-02 16:40:56,955][243562] Updated weights for policy 0, policy_version 43650 (0.0009) +[2026-06-02 16:40:57,126][243562] Updated weights for policy 0, policy_version 43660 (0.0008) +[2026-06-02 16:40:57,282][243562] Updated weights for policy 0, policy_version 43670 (0.0008) +[2026-06-02 16:40:57,463][243562] Updated weights for policy 0, policy_version 43681 (0.0008) +[2026-06-02 16:40:57,645][243562] Updated weights for policy 0, policy_version 43692 (0.0007) +[2026-06-02 16:40:58,288][243562] Updated weights for policy 0, policy_version 43702 (0.0009) +[2026-06-02 16:40:58,454][243562] Updated weights for policy 0, policy_version 43713 (0.0008) +[2026-06-02 16:40:58,618][243562] Updated weights for policy 0, policy_version 43723 (0.0008) +[2026-06-02 16:40:58,807][243562] Updated weights for policy 0, policy_version 43734 (0.0008) +[2026-06-02 16:40:58,971][243562] Updated weights for policy 0, policy_version 43744 (0.0009) +[2026-06-02 16:40:59,159][243562] Updated weights for policy 0, policy_version 43755 (0.0009) +[2026-06-02 16:40:59,793][243562] Updated weights for policy 0, policy_version 43766 (0.0009) +[2026-06-02 16:40:59,811][235960] Fps is (10 sec: 22937.5, 60 sec: 21845.3, 300 sec: 21438.0). Total num frames: 22413312. Throughput: 0: 21589.3. Samples: 22456448. Policy #0 lag: (min: 63.0, avg: 79.1, max: 127.0) +[2026-06-02 16:40:59,812][235960] Avg episode reward: [(0, '2093.213')] +[2026-06-02 16:40:59,945][243562] Updated weights for policy 0, policy_version 43776 (0.0009) +[2026-06-02 16:41:00,129][243562] Updated weights for policy 0, policy_version 43787 (0.0008) +[2026-06-02 16:41:00,291][243562] Updated weights for policy 0, policy_version 43797 (0.0008) +[2026-06-02 16:41:00,450][243562] Updated weights for policy 0, policy_version 43807 (0.0008) +[2026-06-02 16:41:00,612][243562] Updated weights for policy 0, policy_version 43817 (0.0009) +[2026-06-02 16:41:01,277][243562] Updated weights for policy 0, policy_version 43827 (0.0009) +[2026-06-02 16:41:01,428][243562] Updated weights for policy 0, policy_version 43837 (0.0008) +[2026-06-02 16:41:01,590][243562] Updated weights for policy 0, policy_version 43847 (0.0009) +[2026-06-02 16:41:01,753][243562] Updated weights for policy 0, policy_version 43857 (0.0009) +[2026-06-02 16:41:01,916][243562] Updated weights for policy 0, policy_version 43867 (0.0008) +[2026-06-02 16:41:02,100][243562] Updated weights for policy 0, policy_version 43878 (0.0009) +[2026-06-02 16:41:02,774][243562] Updated weights for policy 0, policy_version 43889 (0.0011) +[2026-06-02 16:41:02,927][243562] Updated weights for policy 0, policy_version 43899 (0.0008) +[2026-06-02 16:41:03,093][243562] Updated weights for policy 0, policy_version 43909 (0.0008) +[2026-06-02 16:41:03,259][243562] Updated weights for policy 0, policy_version 43919 (0.0008) +[2026-06-02 16:41:03,422][243562] Updated weights for policy 0, policy_version 43929 (0.0008) +[2026-06-02 16:41:03,584][243562] Updated weights for policy 0, policy_version 43939 (0.0009) +[2026-06-02 16:41:03,762][243562] Updated weights for policy 0, policy_version 43950 (0.0009) +[2026-06-02 16:41:04,407][243562] Updated weights for policy 0, policy_version 43960 (0.0009) +[2026-06-02 16:41:04,562][243562] Updated weights for policy 0, policy_version 43970 (0.0008) +[2026-06-02 16:41:04,727][243562] Updated weights for policy 0, policy_version 43980 (0.0009) +[2026-06-02 16:41:04,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 22511616. Throughput: 0: 21549.5. Samples: 22519936. Policy #0 lag: (min: 63.0, avg: 79.1, max: 127.0) +[2026-06-02 16:41:04,812][235960] Avg episode reward: [(0, '2118.442')] +[2026-06-02 16:41:04,886][243562] Updated weights for policy 0, policy_version 43990 (0.0009) +[2026-06-02 16:41:05,050][243562] Updated weights for policy 0, policy_version 44000 (0.0008) +[2026-06-02 16:41:05,214][243562] Updated weights for policy 0, policy_version 44010 (0.0008) +[2026-06-02 16:41:05,870][243562] Updated weights for policy 0, policy_version 44020 (0.0008) +[2026-06-02 16:41:06,026][243562] Updated weights for policy 0, policy_version 44030 (0.0008) +[2026-06-02 16:41:06,186][243562] Updated weights for policy 0, policy_version 44040 (0.0008) +[2026-06-02 16:41:06,350][243562] Updated weights for policy 0, policy_version 44050 (0.0009) +[2026-06-02 16:41:06,522][243562] Updated weights for policy 0, policy_version 44061 (0.0009) +[2026-06-02 16:41:06,682][243562] Updated weights for policy 0, policy_version 44071 (0.0008) +[2026-06-02 16:41:07,344][243562] Updated weights for policy 0, policy_version 44081 (0.0008) +[2026-06-02 16:41:07,501][243562] Updated weights for policy 0, policy_version 44091 (0.0008) +[2026-06-02 16:41:07,654][243562] Updated weights for policy 0, policy_version 44101 (0.0008) +[2026-06-02 16:41:07,835][243562] Updated weights for policy 0, policy_version 44112 (0.0008) +[2026-06-02 16:41:07,992][243562] Updated weights for policy 0, policy_version 44122 (0.0008) +[2026-06-02 16:41:08,157][243562] Updated weights for policy 0, policy_version 44132 (0.0008) +[2026-06-02 16:41:08,322][243562] Updated weights for policy 0, policy_version 44142 (0.0008) +[2026-06-02 16:41:09,000][243562] Updated weights for policy 0, policy_version 44153 (0.0009) +[2026-06-02 16:41:09,158][243562] Updated weights for policy 0, policy_version 44163 (0.0008) +[2026-06-02 16:41:09,314][243562] Updated weights for policy 0, policy_version 44173 (0.0008) +[2026-06-02 16:41:09,498][243562] Updated weights for policy 0, policy_version 44184 (0.0008) +[2026-06-02 16:41:09,683][243562] Updated weights for policy 0, policy_version 44195 (0.0008) +[2026-06-02 16:41:09,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 22609920. Throughput: 0: 21452.8. Samples: 22644864. Policy #0 lag: (min: 63.0, avg: 79.1, max: 127.0) +[2026-06-02 16:41:09,812][235960] Avg episode reward: [(0, '2174.537')] +[2026-06-02 16:41:09,845][243562] Updated weights for policy 0, policy_version 44205 (0.0010) +[2026-06-02 16:41:09,887][242748] Saving new best policy, reward=2174.537! +[2026-06-02 16:41:10,509][243562] Updated weights for policy 0, policy_version 44215 (0.0008) +[2026-06-02 16:41:10,686][243562] Updated weights for policy 0, policy_version 44226 (0.0008) +[2026-06-02 16:41:10,857][243562] Updated weights for policy 0, policy_version 44237 (0.0008) +[2026-06-02 16:41:11,027][243562] Updated weights for policy 0, policy_version 44247 (0.0009) +[2026-06-02 16:41:11,193][243562] Updated weights for policy 0, policy_version 44257 (0.0008) +[2026-06-02 16:41:11,367][243562] Updated weights for policy 0, policy_version 44268 (0.0008) +[2026-06-02 16:41:12,009][243562] Updated weights for policy 0, policy_version 44279 (0.0009) +[2026-06-02 16:41:12,177][243562] Updated weights for policy 0, policy_version 44289 (0.0009) +[2026-06-02 16:41:12,355][243562] Updated weights for policy 0, policy_version 44300 (0.0008) +[2026-06-02 16:41:12,517][243562] Updated weights for policy 0, policy_version 44310 (0.0008) +[2026-06-02 16:41:12,677][243562] Updated weights for policy 0, policy_version 44320 (0.0009) +[2026-06-02 16:41:12,884][243562] Updated weights for policy 0, policy_version 44332 (0.0009) +[2026-06-02 16:41:13,524][243562] Updated weights for policy 0, policy_version 44342 (0.0009) +[2026-06-02 16:41:13,682][243562] Updated weights for policy 0, policy_version 44352 (0.0008) +[2026-06-02 16:41:13,857][243562] Updated weights for policy 0, policy_version 44363 (0.0008) +[2026-06-02 16:41:14,031][243562] Updated weights for policy 0, policy_version 44373 (0.0008) +[2026-06-02 16:41:14,192][243562] Updated weights for policy 0, policy_version 44383 (0.0008) +[2026-06-02 16:41:14,355][243562] Updated weights for policy 0, policy_version 44393 (0.0008) +[2026-06-02 16:41:14,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21845.3, 300 sec: 21549.1). Total num frames: 22740992. Throughput: 0: 21364.6. Samples: 22770304. Policy #0 lag: (min: 63.0, avg: 79.1, max: 127.0) +[2026-06-02 16:41:14,812][235960] Avg episode reward: [(0, '2138.790')] +[2026-06-02 16:41:15,001][243562] Updated weights for policy 0, policy_version 44403 (0.0009) +[2026-06-02 16:41:15,188][243562] Updated weights for policy 0, policy_version 44415 (0.0008) +[2026-06-02 16:41:15,369][243562] Updated weights for policy 0, policy_version 44426 (0.0008) +[2026-06-02 16:41:15,533][243562] Updated weights for policy 0, policy_version 44436 (0.0008) +[2026-06-02 16:41:15,692][243562] Updated weights for policy 0, policy_version 44446 (0.0007) +[2026-06-02 16:41:15,860][243562] Updated weights for policy 0, policy_version 44456 (0.0008) +[2026-06-02 16:41:16,496][243562] Updated weights for policy 0, policy_version 44466 (0.0006) +[2026-06-02 16:41:16,679][243562] Updated weights for policy 0, policy_version 44478 (0.0009) +[2026-06-02 16:41:16,836][243562] Updated weights for policy 0, policy_version 44488 (0.0008) +[2026-06-02 16:41:16,999][243562] Updated weights for policy 0, policy_version 44498 (0.0008) +[2026-06-02 16:41:17,173][243562] Updated weights for policy 0, policy_version 44508 (0.0008) +[2026-06-02 16:41:17,329][243562] Updated weights for policy 0, policy_version 44518 (0.0008) +[2026-06-02 16:41:17,487][243562] Updated weights for policy 0, policy_version 44528 (0.0008) +[2026-06-02 16:41:18,138][243562] Updated weights for policy 0, policy_version 44538 (0.0009) +[2026-06-02 16:41:18,296][243562] Updated weights for policy 0, policy_version 44548 (0.0008) +[2026-06-02 16:41:18,460][243562] Updated weights for policy 0, policy_version 44558 (0.0008) +[2026-06-02 16:41:18,620][243562] Updated weights for policy 0, policy_version 44568 (0.0008) +[2026-06-02 16:41:18,788][243562] Updated weights for policy 0, policy_version 44578 (0.0008) +[2026-06-02 16:41:18,961][243562] Updated weights for policy 0, policy_version 44588 (0.0008) +[2026-06-02 16:41:19,610][243562] Updated weights for policy 0, policy_version 44598 (0.0009) +[2026-06-02 16:41:19,759][243562] Updated weights for policy 0, policy_version 44608 (0.0008) +[2026-06-02 16:41:19,811][235960] Fps is (10 sec: 22937.7, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 22839296. Throughput: 0: 21330.5. Samples: 22833920. Policy #0 lag: (min: 39.0, avg: 54.0, max: 103.0) +[2026-06-02 16:41:19,812][235960] Avg episode reward: [(0, '2138.790')] +[2026-06-02 16:41:19,919][243562] Updated weights for policy 0, policy_version 44618 (0.0008) +[2026-06-02 16:41:20,091][243562] Updated weights for policy 0, policy_version 44628 (0.0008) +[2026-06-02 16:41:20,253][243562] Updated weights for policy 0, policy_version 44638 (0.0008) +[2026-06-02 16:41:20,415][243562] Updated weights for policy 0, policy_version 44648 (0.0008) +[2026-06-02 16:41:21,085][243562] Updated weights for policy 0, policy_version 44659 (0.0008) +[2026-06-02 16:41:21,248][243562] Updated weights for policy 0, policy_version 44669 (0.0009) +[2026-06-02 16:41:21,409][243562] Updated weights for policy 0, policy_version 44679 (0.0008) +[2026-06-02 16:41:21,588][243562] Updated weights for policy 0, policy_version 44690 (0.0008) +[2026-06-02 16:41:21,744][243562] Updated weights for policy 0, policy_version 44700 (0.0008) +[2026-06-02 16:41:21,906][243562] Updated weights for policy 0, policy_version 44710 (0.0008) +[2026-06-02 16:41:22,067][243562] Updated weights for policy 0, policy_version 44720 (0.0008) +[2026-06-02 16:41:22,723][243562] Updated weights for policy 0, policy_version 44730 (0.0009) +[2026-06-02 16:41:22,875][243562] Updated weights for policy 0, policy_version 44740 (0.0008) +[2026-06-02 16:41:23,056][243562] Updated weights for policy 0, policy_version 44751 (0.0010) +[2026-06-02 16:41:23,227][243562] Updated weights for policy 0, policy_version 44761 (0.0008) +[2026-06-02 16:41:23,392][243562] Updated weights for policy 0, policy_version 44771 (0.0008) +[2026-06-02 16:41:23,559][243562] Updated weights for policy 0, policy_version 44781 (0.0008) +[2026-06-02 16:41:24,212][243562] Updated weights for policy 0, policy_version 44791 (0.0009) +[2026-06-02 16:41:24,365][243562] Updated weights for policy 0, policy_version 44801 (0.0008) +[2026-06-02 16:41:24,532][243562] Updated weights for policy 0, policy_version 44811 (0.0008) +[2026-06-02 16:41:24,715][243562] Updated weights for policy 0, policy_version 44822 (0.0008) +[2026-06-02 16:41:24,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 22937600. Throughput: 0: 21259.4. Samples: 22960000. Policy #0 lag: (min: 39.0, avg: 54.0, max: 103.0) +[2026-06-02 16:41:24,812][235960] Avg episode reward: [(0, '2114.818')] +[2026-06-02 16:41:24,880][243562] Updated weights for policy 0, policy_version 44832 (0.0009) +[2026-06-02 16:41:25,080][243562] Updated weights for policy 0, policy_version 44844 (0.0009) +[2026-06-02 16:41:25,724][243562] Updated weights for policy 0, policy_version 44854 (0.0008) +[2026-06-02 16:41:25,893][243562] Updated weights for policy 0, policy_version 44864 (0.0008) +[2026-06-02 16:41:26,054][243562] Updated weights for policy 0, policy_version 44874 (0.0009) +[2026-06-02 16:41:26,232][243562] Updated weights for policy 0, policy_version 44885 (0.0008) +[2026-06-02 16:41:26,399][243562] Updated weights for policy 0, policy_version 44895 (0.0008) +[2026-06-02 16:41:26,561][243562] Updated weights for policy 0, policy_version 44905 (0.0008) +[2026-06-02 16:41:27,202][243562] Updated weights for policy 0, policy_version 44915 (0.0008) +[2026-06-02 16:41:27,367][243562] Updated weights for policy 0, policy_version 44925 (0.0008) +[2026-06-02 16:41:27,523][243562] Updated weights for policy 0, policy_version 44935 (0.0008) +[2026-06-02 16:41:27,688][243562] Updated weights for policy 0, policy_version 44945 (0.0008) +[2026-06-02 16:41:27,853][243562] Updated weights for policy 0, policy_version 44955 (0.0008) +[2026-06-02 16:41:28,034][243562] Updated weights for policy 0, policy_version 44966 (0.0008) +[2026-06-02 16:41:28,194][243562] Updated weights for policy 0, policy_version 44976 (0.0008) +[2026-06-02 16:41:28,850][243562] Updated weights for policy 0, policy_version 44986 (0.0009) +[2026-06-02 16:41:29,019][243562] Updated weights for policy 0, policy_version 44997 (0.0008) +[2026-06-02 16:41:29,193][243562] Updated weights for policy 0, policy_version 45007 (0.0008) +[2026-06-02 16:41:29,381][243562] Updated weights for policy 0, policy_version 45019 (0.0008) +[2026-06-02 16:41:29,557][243562] Updated weights for policy 0, policy_version 45030 (0.0009) +[2026-06-02 16:41:29,718][243562] Updated weights for policy 0, policy_version 45040 (0.0008) +[2026-06-02 16:41:29,811][235960] Fps is (10 sec: 22937.7, 60 sec: 21845.3, 300 sec: 21549.1). Total num frames: 23068672. Throughput: 0: 21267.9. Samples: 23089536. Policy #0 lag: (min: 39.0, avg: 54.0, max: 103.0) +[2026-06-02 16:41:29,812][235960] Avg episode reward: [(0, '2155.161')] +[2026-06-02 16:41:30,380][243562] Updated weights for policy 0, policy_version 45050 (0.0009) +[2026-06-02 16:41:30,558][243562] Updated weights for policy 0, policy_version 45061 (0.0009) +[2026-06-02 16:41:30,741][243562] Updated weights for policy 0, policy_version 45072 (0.0009) +[2026-06-02 16:41:30,905][243562] Updated weights for policy 0, policy_version 45082 (0.0008) +[2026-06-02 16:41:31,077][243562] Updated weights for policy 0, policy_version 45092 (0.0008) +[2026-06-02 16:41:31,256][243562] Updated weights for policy 0, policy_version 45103 (0.0009) +[2026-06-02 16:41:31,921][243562] Updated weights for policy 0, policy_version 45115 (0.0008) +[2026-06-02 16:41:32,095][243562] Updated weights for policy 0, policy_version 45126 (0.0008) +[2026-06-02 16:41:32,265][243562] Updated weights for policy 0, policy_version 45136 (0.0008) +[2026-06-02 16:41:32,466][243562] Updated weights for policy 0, policy_version 45148 (0.0008) +[2026-06-02 16:41:32,626][243562] Updated weights for policy 0, policy_version 45158 (0.0009) +[2026-06-02 16:41:32,787][243562] Updated weights for policy 0, policy_version 45168 (0.0008) +[2026-06-02 16:41:33,433][243562] Updated weights for policy 0, policy_version 45178 (0.0009) +[2026-06-02 16:41:33,587][243562] Updated weights for policy 0, policy_version 45188 (0.0008) +[2026-06-02 16:41:33,745][243562] Updated weights for policy 0, policy_version 45198 (0.0008) +[2026-06-02 16:41:33,933][243562] Updated weights for policy 0, policy_version 45209 (0.0009) +[2026-06-02 16:41:34,092][243562] Updated weights for policy 0, policy_version 45219 (0.0008) +[2026-06-02 16:41:34,262][243562] Updated weights for policy 0, policy_version 45229 (0.0009) +[2026-06-02 16:41:34,811][235960] Fps is (10 sec: 22937.5, 60 sec: 21845.3, 300 sec: 21438.0). Total num frames: 23166976. Throughput: 0: 21285.0. Samples: 23155200. Policy #0 lag: (min: 39.0, avg: 54.0, max: 103.0) +[2026-06-02 16:41:34,812][235960] Avg episode reward: [(0, '2202.685')] +[2026-06-02 16:41:34,901][243562] Updated weights for policy 0, policy_version 45239 (0.0010) +[2026-06-02 16:41:35,096][243562] Updated weights for policy 0, policy_version 45251 (0.0008) +[2026-06-02 16:41:35,253][243562] Updated weights for policy 0, policy_version 45261 (0.0008) +[2026-06-02 16:41:35,423][243562] Updated weights for policy 0, policy_version 45271 (0.0008) +[2026-06-02 16:41:35,593][243562] Updated weights for policy 0, policy_version 45281 (0.0008) +[2026-06-02 16:41:35,760][243562] Updated weights for policy 0, policy_version 45291 (0.0008) +[2026-06-02 16:41:35,830][242748] Saving new best policy, reward=2202.685! +[2026-06-02 16:41:36,411][243562] Updated weights for policy 0, policy_version 45301 (0.0008) +[2026-06-02 16:41:36,565][243562] Updated weights for policy 0, policy_version 45311 (0.0008) +[2026-06-02 16:41:36,736][243562] Updated weights for policy 0, policy_version 45321 (0.0009) +[2026-06-02 16:41:36,909][243562] Updated weights for policy 0, policy_version 45332 (0.0009) +[2026-06-02 16:41:37,082][243562] Updated weights for policy 0, policy_version 45342 (0.0008) +[2026-06-02 16:41:37,243][243562] Updated weights for policy 0, policy_version 45352 (0.0009) +[2026-06-02 16:41:37,894][243562] Updated weights for policy 0, policy_version 45362 (0.0009) +[2026-06-02 16:41:38,056][243562] Updated weights for policy 0, policy_version 45372 (0.0008) +[2026-06-02 16:41:38,211][243562] Updated weights for policy 0, policy_version 45382 (0.0008) +[2026-06-02 16:41:38,383][243562] Updated weights for policy 0, policy_version 45392 (0.0008) +[2026-06-02 16:41:38,549][243562] Updated weights for policy 0, policy_version 45402 (0.0008) +[2026-06-02 16:41:38,710][243562] Updated weights for policy 0, policy_version 45412 (0.0008) +[2026-06-02 16:41:38,874][243562] Updated weights for policy 0, policy_version 45422 (0.0009) +[2026-06-02 16:41:39,518][243562] Updated weights for policy 0, policy_version 45432 (0.0008) +[2026-06-02 16:41:39,681][243562] Updated weights for policy 0, policy_version 45442 (0.0008) +[2026-06-02 16:41:39,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 23265280. Throughput: 0: 21307.7. Samples: 23285248. Policy #0 lag: (min: 39.0, avg: 54.0, max: 103.0) +[2026-06-02 16:41:39,812][235960] Avg episode reward: [(0, '2219.014')] +[2026-06-02 16:41:39,841][243562] Updated weights for policy 0, policy_version 45452 (0.0008) +[2026-06-02 16:41:40,027][243562] Updated weights for policy 0, policy_version 45464 (0.0008) +[2026-06-02 16:41:40,250][243562] Updated weights for policy 0, policy_version 45478 (0.0008) +[2026-06-02 16:41:40,405][242748] Saving new best policy, reward=2219.014! +[2026-06-02 16:41:40,946][243562] Updated weights for policy 0, policy_version 45489 (0.0009) +[2026-06-02 16:41:41,099][243562] Updated weights for policy 0, policy_version 45499 (0.0009) +[2026-06-02 16:41:41,280][243562] Updated weights for policy 0, policy_version 45511 (0.0008) +[2026-06-02 16:41:41,472][243562] Updated weights for policy 0, policy_version 45523 (0.0008) +[2026-06-02 16:41:41,629][243562] Updated weights for policy 0, policy_version 45533 (0.0009) +[2026-06-02 16:41:41,798][243562] Updated weights for policy 0, policy_version 45543 (0.0008) +[2026-06-02 16:41:42,460][243562] Updated weights for policy 0, policy_version 45553 (0.0009) +[2026-06-02 16:41:42,635][243562] Updated weights for policy 0, policy_version 45565 (0.0008) +[2026-06-02 16:41:42,805][243562] Updated weights for policy 0, policy_version 45576 (0.0009) +[2026-06-02 16:41:42,959][243562] Updated weights for policy 0, policy_version 45586 (0.0007) +[2026-06-02 16:41:43,127][243562] Updated weights for policy 0, policy_version 45596 (0.0008) +[2026-06-02 16:41:43,320][243562] Updated weights for policy 0, policy_version 45608 (0.0009) +[2026-06-02 16:41:43,995][243562] Updated weights for policy 0, policy_version 45618 (0.0009) +[2026-06-02 16:41:44,159][243562] Updated weights for policy 0, policy_version 45629 (0.0008) +[2026-06-02 16:41:44,346][243562] Updated weights for policy 0, policy_version 45641 (0.0009) +[2026-06-02 16:41:44,522][243562] Updated weights for policy 0, policy_version 45652 (0.0009) +[2026-06-02 16:41:44,686][243562] Updated weights for policy 0, policy_version 45662 (0.0008) +[2026-06-02 16:41:44,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 23363584. Throughput: 0: 21273.6. Samples: 23413760. Policy #0 lag: (min: 39.0, avg: 54.0, max: 103.0) +[2026-06-02 16:41:44,812][235960] Avg episode reward: [(0, '2269.666')] +[2026-06-02 16:41:44,857][243562] Updated weights for policy 0, policy_version 45673 (0.0008) +[2026-06-02 16:41:44,961][242748] Saving new best policy, reward=2269.666! +[2026-06-02 16:41:45,543][243562] Updated weights for policy 0, policy_version 45684 (0.0009) +[2026-06-02 16:41:45,710][243562] Updated weights for policy 0, policy_version 45695 (0.0008) +[2026-06-02 16:41:45,893][243562] Updated weights for policy 0, policy_version 45706 (0.0009) +[2026-06-02 16:41:46,080][243562] Updated weights for policy 0, policy_version 45718 (0.0009) +[2026-06-02 16:41:46,238][243562] Updated weights for policy 0, policy_version 45728 (0.0008) +[2026-06-02 16:41:46,423][243562] Updated weights for policy 0, policy_version 45739 (0.0009) +[2026-06-02 16:41:47,082][243562] Updated weights for policy 0, policy_version 45749 (0.0009) +[2026-06-02 16:41:47,241][243562] Updated weights for policy 0, policy_version 45759 (0.0009) +[2026-06-02 16:41:47,417][243562] Updated weights for policy 0, policy_version 45770 (0.0009) +[2026-06-02 16:41:47,589][243562] Updated weights for policy 0, policy_version 45780 (0.0009) +[2026-06-02 16:41:47,769][243562] Updated weights for policy 0, policy_version 45791 (0.0009) +[2026-06-02 16:41:47,933][243562] Updated weights for policy 0, policy_version 45801 (0.0009) +[2026-06-02 16:41:48,583][243562] Updated weights for policy 0, policy_version 45811 (0.0008) +[2026-06-02 16:41:48,747][243562] Updated weights for policy 0, policy_version 45821 (0.0008) +[2026-06-02 16:41:48,896][243562] Updated weights for policy 0, policy_version 45831 (0.0008) +[2026-06-02 16:41:49,071][243562] Updated weights for policy 0, policy_version 45841 (0.0009) +[2026-06-02 16:41:49,247][243562] Updated weights for policy 0, policy_version 45852 (0.0008) +[2026-06-02 16:41:49,410][243562] Updated weights for policy 0, policy_version 45862 (0.0008) +[2026-06-02 16:41:49,575][243562] Updated weights for policy 0, policy_version 45872 (0.0009) +[2026-06-02 16:41:49,811][235960] Fps is (10 sec: 22937.7, 60 sec: 21845.3, 300 sec: 21549.1). Total num frames: 23494656. Throughput: 0: 21316.3. Samples: 23479168. Policy #0 lag: (min: 63.0, avg: 78.3, max: 127.0) +[2026-06-02 16:41:49,812][235960] Avg episode reward: [(0, '2273.838')] +[2026-06-02 16:41:49,816][242748] Saving new best policy, reward=2273.838! +[2026-06-02 16:41:50,226][243562] Updated weights for policy 0, policy_version 45882 (0.0008) +[2026-06-02 16:41:50,412][243562] Updated weights for policy 0, policy_version 45893 (0.0008) +[2026-06-02 16:41:50,570][243562] Updated weights for policy 0, policy_version 45903 (0.0008) +[2026-06-02 16:41:50,740][243562] Updated weights for policy 0, policy_version 45913 (0.0008) +[2026-06-02 16:41:50,920][243562] Updated weights for policy 0, policy_version 45924 (0.0008) +[2026-06-02 16:41:51,084][243562] Updated weights for policy 0, policy_version 45934 (0.0008) +[2026-06-02 16:41:51,706][243562] Updated weights for policy 0, policy_version 45944 (0.0010) +[2026-06-02 16:41:51,863][243562] Updated weights for policy 0, policy_version 45954 (0.0009) +[2026-06-02 16:41:52,035][243562] Updated weights for policy 0, policy_version 45964 (0.0008) +[2026-06-02 16:41:52,205][243562] Updated weights for policy 0, policy_version 45975 (0.0008) +[2026-06-02 16:41:52,371][243562] Updated weights for policy 0, policy_version 45985 (0.0008) +[2026-06-02 16:41:52,539][243562] Updated weights for policy 0, policy_version 45995 (0.0008) +[2026-06-02 16:41:53,183][243562] Updated weights for policy 0, policy_version 46005 (0.0009) +[2026-06-02 16:41:53,347][243562] Updated weights for policy 0, policy_version 46015 (0.0008) +[2026-06-02 16:41:53,507][243562] Updated weights for policy 0, policy_version 46025 (0.0008) +[2026-06-02 16:41:53,686][243562] Updated weights for policy 0, policy_version 46036 (0.0008) +[2026-06-02 16:41:53,853][243562] Updated weights for policy 0, policy_version 46046 (0.0008) +[2026-06-02 16:41:54,020][243562] Updated weights for policy 0, policy_version 46056 (0.0008) +[2026-06-02 16:41:54,674][243562] Updated weights for policy 0, policy_version 46066 (0.0008) +[2026-06-02 16:41:54,811][235960] Fps is (10 sec: 22937.8, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 23592960. Throughput: 0: 21407.3. Samples: 23608192. Policy #0 lag: (min: 63.0, avg: 78.3, max: 127.0) +[2026-06-02 16:41:54,812][235960] Avg episode reward: [(0, '2236.474')] +[2026-06-02 16:41:54,845][243562] Updated weights for policy 0, policy_version 46077 (0.0009) +[2026-06-02 16:41:55,002][243562] Updated weights for policy 0, policy_version 46087 (0.0008) +[2026-06-02 16:41:55,183][243562] Updated weights for policy 0, policy_version 46098 (0.0005) +[2026-06-02 16:41:55,345][243562] Updated weights for policy 0, policy_version 46108 (0.0005) +[2026-06-02 16:41:55,526][243562] Updated weights for policy 0, policy_version 46119 (0.0005) +[2026-06-02 16:41:56,171][243562] Updated weights for policy 0, policy_version 46129 (0.0004) +[2026-06-02 16:41:56,323][243562] Updated weights for policy 0, policy_version 46139 (0.0008) +[2026-06-02 16:41:56,485][243562] Updated weights for policy 0, policy_version 46149 (0.0008) +[2026-06-02 16:41:56,650][243562] Updated weights for policy 0, policy_version 46159 (0.0008) +[2026-06-02 16:41:56,827][243562] Updated weights for policy 0, policy_version 46170 (0.0008) +[2026-06-02 16:41:56,995][243562] Updated weights for policy 0, policy_version 46180 (0.0008) +[2026-06-02 16:41:57,182][243562] Updated weights for policy 0, policy_version 46191 (0.0008) +[2026-06-02 16:41:57,804][243562] Updated weights for policy 0, policy_version 46201 (0.0009) +[2026-06-02 16:41:57,963][243562] Updated weights for policy 0, policy_version 46211 (0.0008) +[2026-06-02 16:41:58,130][243562] Updated weights for policy 0, policy_version 46222 (0.0007) +[2026-06-02 16:41:58,306][243562] Updated weights for policy 0, policy_version 46232 (0.0004) +[2026-06-02 16:41:58,484][243562] Updated weights for policy 0, policy_version 46243 (0.0004) +[2026-06-02 16:41:58,658][243562] Updated weights for policy 0, policy_version 46253 (0.0004) +[2026-06-02 16:41:59,266][243562] Updated weights for policy 0, policy_version 46263 (0.0004) +[2026-06-02 16:41:59,433][243562] Updated weights for policy 0, policy_version 46273 (0.0004) +[2026-06-02 16:41:59,590][243562] Updated weights for policy 0, policy_version 46283 (0.0007) +[2026-06-02 16:41:59,758][243562] Updated weights for policy 0, policy_version 46293 (0.0009) +[2026-06-02 16:41:59,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 23691264. Throughput: 0: 21498.3. Samples: 23737728. Policy #0 lag: (min: 63.0, avg: 78.3, max: 127.0) +[2026-06-02 16:41:59,812][235960] Avg episode reward: [(0, '2271.899')] +[2026-06-02 16:41:59,946][243562] Updated weights for policy 0, policy_version 46304 (0.0009) +[2026-06-02 16:42:00,122][243562] Updated weights for policy 0, policy_version 46315 (0.0008) +[2026-06-02 16:42:00,757][243562] Updated weights for policy 0, policy_version 46325 (0.0009) +[2026-06-02 16:42:00,934][243562] Updated weights for policy 0, policy_version 46336 (0.0009) +[2026-06-02 16:42:01,110][243562] Updated weights for policy 0, policy_version 46347 (0.0008) +[2026-06-02 16:42:01,296][243562] Updated weights for policy 0, policy_version 46358 (0.0008) +[2026-06-02 16:42:01,462][243562] Updated weights for policy 0, policy_version 46368 (0.0009) +[2026-06-02 16:42:01,629][243562] Updated weights for policy 0, policy_version 46378 (0.0008) +[2026-06-02 16:42:02,281][243562] Updated weights for policy 0, policy_version 46389 (0.0009) +[2026-06-02 16:42:02,425][243562] Updated weights for policy 0, policy_version 46399 (0.0008) +[2026-06-02 16:42:02,627][243562] Updated weights for policy 0, policy_version 46411 (0.0009) +[2026-06-02 16:42:02,806][243562] Updated weights for policy 0, policy_version 46422 (0.0008) +[2026-06-02 16:42:02,984][243562] Updated weights for policy 0, policy_version 46433 (0.0009) +[2026-06-02 16:42:03,165][243562] Updated weights for policy 0, policy_version 46444 (0.0008) +[2026-06-02 16:42:03,820][243562] Updated weights for policy 0, policy_version 46454 (0.0009) +[2026-06-02 16:42:03,976][243562] Updated weights for policy 0, policy_version 46464 (0.0008) +[2026-06-02 16:42:04,160][243562] Updated weights for policy 0, policy_version 46475 (0.0009) +[2026-06-02 16:42:04,343][243562] Updated weights for policy 0, policy_version 46486 (0.0009) +[2026-06-02 16:42:04,505][243562] Updated weights for policy 0, policy_version 46496 (0.0008) +[2026-06-02 16:42:04,664][243562] Updated weights for policy 0, policy_version 46506 (0.0008) +[2026-06-02 16:42:04,811][235960] Fps is (10 sec: 22937.7, 60 sec: 21845.3, 300 sec: 21549.1). Total num frames: 23822336. Throughput: 0: 21512.6. Samples: 23801984. Policy #0 lag: (min: 63.0, avg: 78.3, max: 127.0) +[2026-06-02 16:42:04,812][235960] Avg episode reward: [(0, '2266.722')] +[2026-06-02 16:42:05,332][243562] Updated weights for policy 0, policy_version 46517 (0.0008) +[2026-06-02 16:42:05,482][243562] Updated weights for policy 0, policy_version 46527 (0.0008) +[2026-06-02 16:42:05,655][243562] Updated weights for policy 0, policy_version 46538 (0.0009) +[2026-06-02 16:42:05,827][243562] Updated weights for policy 0, policy_version 46548 (0.0009) +[2026-06-02 16:42:05,991][243562] Updated weights for policy 0, policy_version 46558 (0.0009) +[2026-06-02 16:42:06,153][243562] Updated weights for policy 0, policy_version 46568 (0.0009) +[2026-06-02 16:42:06,802][243562] Updated weights for policy 0, policy_version 46578 (0.0009) +[2026-06-02 16:42:06,946][243562] Updated weights for policy 0, policy_version 46588 (0.0008) +[2026-06-02 16:42:07,114][243562] Updated weights for policy 0, policy_version 46598 (0.0008) +[2026-06-02 16:42:07,275][243562] Updated weights for policy 0, policy_version 46608 (0.0008) +[2026-06-02 16:42:07,433][243562] Updated weights for policy 0, policy_version 46618 (0.0007) +[2026-06-02 16:42:07,603][243562] Updated weights for policy 0, policy_version 46628 (0.0008) +[2026-06-02 16:42:07,768][243562] Updated weights for policy 0, policy_version 46638 (0.0008) +[2026-06-02 16:42:08,411][243562] Updated weights for policy 0, policy_version 46648 (0.0008) +[2026-06-02 16:42:08,583][243562] Updated weights for policy 0, policy_version 46659 (0.0008) +[2026-06-02 16:42:08,750][243562] Updated weights for policy 0, policy_version 46669 (0.0008) +[2026-06-02 16:42:08,914][243562] Updated weights for policy 0, policy_version 46679 (0.0008) +[2026-06-02 16:42:09,080][243562] Updated weights for policy 0, policy_version 46689 (0.0008) +[2026-06-02 16:42:09,244][243562] Updated weights for policy 0, policy_version 46699 (0.0008) +[2026-06-02 16:42:09,811][235960] Fps is (10 sec: 22937.5, 60 sec: 21845.3, 300 sec: 21438.0). Total num frames: 23920640. Throughput: 0: 21583.6. Samples: 23931264. Policy #0 lag: (min: 63.0, avg: 78.3, max: 127.0) +[2026-06-02 16:42:09,812][235960] Avg episode reward: [(0, '2233.694')] +[2026-06-02 16:42:09,895][243562] Updated weights for policy 0, policy_version 46709 (0.0009) +[2026-06-02 16:42:10,056][243562] Updated weights for policy 0, policy_version 46719 (0.0008) +[2026-06-02 16:42:10,215][243562] Updated weights for policy 0, policy_version 46729 (0.0009) +[2026-06-02 16:42:10,374][243562] Updated weights for policy 0, policy_version 46739 (0.0008) +[2026-06-02 16:42:10,542][243562] Updated weights for policy 0, policy_version 46749 (0.0008) +[2026-06-02 16:42:10,700][243562] Updated weights for policy 0, policy_version 46759 (0.0008) +[2026-06-02 16:42:11,395][243562] Updated weights for policy 0, policy_version 46771 (0.0009) +[2026-06-02 16:42:11,555][243562] Updated weights for policy 0, policy_version 46781 (0.0007) +[2026-06-02 16:42:11,718][243562] Updated weights for policy 0, policy_version 46791 (0.0009) +[2026-06-02 16:42:11,876][243562] Updated weights for policy 0, policy_version 46801 (0.0009) +[2026-06-02 16:42:12,069][243562] Updated weights for policy 0, policy_version 46813 (0.0009) +[2026-06-02 16:42:12,238][243562] Updated weights for policy 0, policy_version 46823 (0.0007) +[2026-06-02 16:42:12,901][243562] Updated weights for policy 0, policy_version 46834 (0.0009) +[2026-06-02 16:42:13,070][243562] Updated weights for policy 0, policy_version 46845 (0.0009) +[2026-06-02 16:42:13,250][243562] Updated weights for policy 0, policy_version 46856 (0.0008) +[2026-06-02 16:42:13,414][243562] Updated weights for policy 0, policy_version 46866 (0.0009) +[2026-06-02 16:42:13,580][243562] Updated weights for policy 0, policy_version 46876 (0.0009) +[2026-06-02 16:42:13,761][243562] Updated weights for policy 0, policy_version 46887 (0.0009) +[2026-06-02 16:42:14,439][243562] Updated weights for policy 0, policy_version 46898 (0.0009) +[2026-06-02 16:42:14,588][243562] Updated weights for policy 0, policy_version 46908 (0.0009) +[2026-06-02 16:42:14,751][243562] Updated weights for policy 0, policy_version 46918 (0.0008) +[2026-06-02 16:42:14,811][235960] Fps is (10 sec: 19660.7, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 24018944. Throughput: 0: 21595.0. Samples: 24061312. Policy #0 lag: (min: 63.0, avg: 78.3, max: 127.0) +[2026-06-02 16:42:14,812][235960] Avg episode reward: [(0, '2278.019')] +[2026-06-02 16:42:14,918][243562] Updated weights for policy 0, policy_version 46928 (0.0009) +[2026-06-02 16:42:15,080][243562] Updated weights for policy 0, policy_version 46938 (0.0009) +[2026-06-02 16:42:15,245][243562] Updated weights for policy 0, policy_version 46948 (0.0008) +[2026-06-02 16:42:15,414][243562] Updated weights for policy 0, policy_version 46958 (0.0008) +[2026-06-02 16:42:15,438][242748] Saving new best policy, reward=2278.019! +[2026-06-02 16:42:16,059][243562] Updated weights for policy 0, policy_version 46968 (0.0009) +[2026-06-02 16:42:16,213][243562] Updated weights for policy 0, policy_version 46978 (0.0009) +[2026-06-02 16:42:16,393][243562] Updated weights for policy 0, policy_version 46989 (0.0009) +[2026-06-02 16:42:16,568][243562] Updated weights for policy 0, policy_version 47000 (0.0008) +[2026-06-02 16:42:16,737][243562] Updated weights for policy 0, policy_version 47010 (0.0006) +[2026-06-02 16:42:16,920][243562] Updated weights for policy 0, policy_version 47021 (0.0009) +[2026-06-02 16:42:17,555][243562] Updated weights for policy 0, policy_version 47031 (0.0008) +[2026-06-02 16:42:17,713][243562] Updated weights for policy 0, policy_version 47041 (0.0008) +[2026-06-02 16:42:17,879][243562] Updated weights for policy 0, policy_version 47051 (0.0008) +[2026-06-02 16:42:18,055][243562] Updated weights for policy 0, policy_version 47062 (0.0008) +[2026-06-02 16:42:18,226][243562] Updated weights for policy 0, policy_version 47072 (0.0008) +[2026-06-02 16:42:18,398][243562] Updated weights for policy 0, policy_version 47082 (0.0008) +[2026-06-02 16:42:19,067][243562] Updated weights for policy 0, policy_version 47093 (0.0010) +[2026-06-02 16:42:19,223][243562] Updated weights for policy 0, policy_version 47103 (0.0008) +[2026-06-02 16:42:19,424][243562] Updated weights for policy 0, policy_version 47115 (0.0008) +[2026-06-02 16:42:19,589][243562] Updated weights for policy 0, policy_version 47125 (0.0008) +[2026-06-02 16:42:19,746][243562] Updated weights for policy 0, policy_version 47135 (0.0008) +[2026-06-02 16:42:19,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 24117248. Throughput: 0: 21583.7. Samples: 24126464. Policy #0 lag: (min: 63.0, avg: 78.3, max: 127.0) +[2026-06-02 16:42:19,812][235960] Avg episode reward: [(0, '2283.538')] +[2026-06-02 16:42:19,912][243562] Updated weights for policy 0, policy_version 47145 (0.0008) +[2026-06-02 16:42:20,018][242748] Saving new best policy, reward=2283.538! +[2026-06-02 16:42:20,575][243562] Updated weights for policy 0, policy_version 47155 (0.0008) +[2026-06-02 16:42:20,734][243562] Updated weights for policy 0, policy_version 47165 (0.0008) +[2026-06-02 16:42:20,894][243562] Updated weights for policy 0, policy_version 47175 (0.0008) +[2026-06-02 16:42:21,066][243562] Updated weights for policy 0, policy_version 47185 (0.0008) +[2026-06-02 16:42:21,226][243562] Updated weights for policy 0, policy_version 47195 (0.0008) +[2026-06-02 16:42:21,407][243562] Updated weights for policy 0, policy_version 47206 (0.0008) +[2026-06-02 16:42:22,057][243562] Updated weights for policy 0, policy_version 47217 (0.0008) +[2026-06-02 16:42:22,214][243562] Updated weights for policy 0, policy_version 47227 (0.0009) +[2026-06-02 16:42:22,374][243562] Updated weights for policy 0, policy_version 47237 (0.0009) +[2026-06-02 16:42:22,548][243562] Updated weights for policy 0, policy_version 47248 (0.0009) +[2026-06-02 16:42:22,733][243562] Updated weights for policy 0, policy_version 47259 (0.0009) +[2026-06-02 16:42:22,902][243562] Updated weights for policy 0, policy_version 47269 (0.0009) +[2026-06-02 16:42:23,067][243562] Updated weights for policy 0, policy_version 47279 (0.0007) +[2026-06-02 16:42:23,706][243562] Updated weights for policy 0, policy_version 47289 (0.0008) +[2026-06-02 16:42:23,863][243562] Updated weights for policy 0, policy_version 47299 (0.0008) +[2026-06-02 16:42:24,032][243562] Updated weights for policy 0, policy_version 47309 (0.0009) +[2026-06-02 16:42:24,193][243562] Updated weights for policy 0, policy_version 47319 (0.0008) +[2026-06-02 16:42:24,358][243562] Updated weights for policy 0, policy_version 47329 (0.0008) +[2026-06-02 16:42:24,527][243562] Updated weights for policy 0, policy_version 47339 (0.0008) +[2026-06-02 16:42:24,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21845.3, 300 sec: 21549.1). Total num frames: 24248320. Throughput: 0: 21580.8. Samples: 24256384. Policy #0 lag: (min: 23.0, avg: 38.0, max: 87.0) +[2026-06-02 16:42:24,812][235960] Avg episode reward: [(0, '2311.524')] +[2026-06-02 16:42:24,817][242748] Saving new best policy, reward=2311.524! +[2026-06-02 16:42:25,151][243562] Updated weights for policy 0, policy_version 47349 (0.0008) +[2026-06-02 16:42:25,318][243562] Updated weights for policy 0, policy_version 47359 (0.0008) +[2026-06-02 16:42:25,474][243562] Updated weights for policy 0, policy_version 47369 (0.0008) +[2026-06-02 16:42:25,641][243562] Updated weights for policy 0, policy_version 47379 (0.0008) +[2026-06-02 16:42:25,821][243562] Updated weights for policy 0, policy_version 47390 (0.0008) +[2026-06-02 16:42:25,987][243562] Updated weights for policy 0, policy_version 47400 (0.0008) +[2026-06-02 16:42:26,647][243562] Updated weights for policy 0, policy_version 47410 (0.0008) +[2026-06-02 16:42:26,803][243562] Updated weights for policy 0, policy_version 47420 (0.0009) +[2026-06-02 16:42:26,964][243562] Updated weights for policy 0, policy_version 47430 (0.0008) +[2026-06-02 16:42:27,122][243562] Updated weights for policy 0, policy_version 47440 (0.0008) +[2026-06-02 16:42:27,296][243562] Updated weights for policy 0, policy_version 47450 (0.0008) +[2026-06-02 16:42:27,454][243562] Updated weights for policy 0, policy_version 47460 (0.0009) +[2026-06-02 16:42:27,614][243562] Updated weights for policy 0, policy_version 47470 (0.0009) +[2026-06-02 16:42:28,267][243562] Updated weights for policy 0, policy_version 47481 (0.0009) +[2026-06-02 16:42:28,434][243562] Updated weights for policy 0, policy_version 47491 (0.0009) +[2026-06-02 16:42:28,594][243562] Updated weights for policy 0, policy_version 47501 (0.0008) +[2026-06-02 16:42:28,758][243562] Updated weights for policy 0, policy_version 47511 (0.0009) +[2026-06-02 16:42:28,924][243562] Updated weights for policy 0, policy_version 47521 (0.0008) +[2026-06-02 16:42:29,081][243562] Updated weights for policy 0, policy_version 47531 (0.0008) +[2026-06-02 16:42:29,727][243562] Updated weights for policy 0, policy_version 47541 (0.0009) +[2026-06-02 16:42:29,811][235960] Fps is (10 sec: 22937.7, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 24346624. Throughput: 0: 21609.3. Samples: 24386176. Policy #0 lag: (min: 23.0, avg: 38.0, max: 87.0) +[2026-06-02 16:42:29,812][235960] Avg episode reward: [(0, '2334.862')] +[2026-06-02 16:42:29,885][243562] Updated weights for policy 0, policy_version 47551 (0.0008) +[2026-06-02 16:42:30,047][243562] Updated weights for policy 0, policy_version 47561 (0.0008) +[2026-06-02 16:42:30,211][243562] Updated weights for policy 0, policy_version 47571 (0.0008) +[2026-06-02 16:42:30,376][243562] Updated weights for policy 0, policy_version 47581 (0.0008) +[2026-06-02 16:42:30,531][243562] Updated weights for policy 0, policy_version 47591 (0.0008) +[2026-06-02 16:42:30,675][242748] Saving new best policy, reward=2334.862! +[2026-06-02 16:42:31,198][243562] Updated weights for policy 0, policy_version 47601 (0.0008) +[2026-06-02 16:42:31,353][243562] Updated weights for policy 0, policy_version 47611 (0.0008) +[2026-06-02 16:42:31,512][243562] Updated weights for policy 0, policy_version 47621 (0.0008) +[2026-06-02 16:42:31,677][243562] Updated weights for policy 0, policy_version 47631 (0.0008) +[2026-06-02 16:42:31,839][243562] Updated weights for policy 0, policy_version 47641 (0.0008) +[2026-06-02 16:42:32,007][243562] Updated weights for policy 0, policy_version 47651 (0.0008) +[2026-06-02 16:42:32,172][243562] Updated weights for policy 0, policy_version 47661 (0.0008) +[2026-06-02 16:42:32,841][243562] Updated weights for policy 0, policy_version 47673 (0.0008) +[2026-06-02 16:42:32,996][243562] Updated weights for policy 0, policy_version 47683 (0.0008) +[2026-06-02 16:42:33,166][243562] Updated weights for policy 0, policy_version 47693 (0.0008) +[2026-06-02 16:42:33,333][243562] Updated weights for policy 0, policy_version 47703 (0.0009) +[2026-06-02 16:42:33,499][243562] Updated weights for policy 0, policy_version 47713 (0.0008) +[2026-06-02 16:42:33,666][243562] Updated weights for policy 0, policy_version 47723 (0.0008) +[2026-06-02 16:42:34,297][243562] Updated weights for policy 0, policy_version 47733 (0.0009) +[2026-06-02 16:42:34,450][243562] Updated weights for policy 0, policy_version 47743 (0.0007) +[2026-06-02 16:42:34,615][243562] Updated weights for policy 0, policy_version 47753 (0.0004) +[2026-06-02 16:42:34,800][243562] Updated weights for policy 0, policy_version 47764 (0.0005) +[2026-06-02 16:42:34,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 24444928. Throughput: 0: 21597.9. Samples: 24451072. Policy #0 lag: (min: 23.0, avg: 38.0, max: 87.0) +[2026-06-02 16:42:34,812][235960] Avg episode reward: [(0, '2329.436')] +[2026-06-02 16:42:34,986][243562] Updated weights for policy 0, policy_version 47775 (0.0004) +[2026-06-02 16:42:35,143][243562] Updated weights for policy 0, policy_version 47785 (0.0005) +[2026-06-02 16:42:35,773][243562] Updated weights for policy 0, policy_version 47795 (0.0005) +[2026-06-02 16:42:35,927][243562] Updated weights for policy 0, policy_version 47805 (0.0005) +[2026-06-02 16:42:36,087][243562] Updated weights for policy 0, policy_version 47815 (0.0005) +[2026-06-02 16:42:36,251][243562] Updated weights for policy 0, policy_version 47825 (0.0004) +[2026-06-02 16:42:36,409][243562] Updated weights for policy 0, policy_version 47835 (0.0005) +[2026-06-02 16:42:36,577][243562] Updated weights for policy 0, policy_version 47845 (0.0005) +[2026-06-02 16:42:36,751][243562] Updated weights for policy 0, policy_version 47855 (0.0005) +[2026-06-02 16:42:37,409][243562] Updated weights for policy 0, policy_version 47866 (0.0005) +[2026-06-02 16:42:37,569][243562] Updated weights for policy 0, policy_version 47876 (0.0005) +[2026-06-02 16:42:37,753][243562] Updated weights for policy 0, policy_version 47887 (0.0005) +[2026-06-02 16:42:37,929][243562] Updated weights for policy 0, policy_version 47898 (0.0005) +[2026-06-02 16:42:38,103][243562] Updated weights for policy 0, policy_version 47908 (0.0005) +[2026-06-02 16:42:38,288][243562] Updated weights for policy 0, policy_version 47919 (0.0005) +[2026-06-02 16:42:38,923][243562] Updated weights for policy 0, policy_version 47929 (0.0005) +[2026-06-02 16:42:39,083][243562] Updated weights for policy 0, policy_version 47939 (0.0005) +[2026-06-02 16:42:39,247][243562] Updated weights for policy 0, policy_version 47949 (0.0005) +[2026-06-02 16:42:39,415][243562] Updated weights for policy 0, policy_version 47959 (0.0005) +[2026-06-02 16:42:39,577][243562] Updated weights for policy 0, policy_version 47969 (0.0005) +[2026-06-02 16:42:39,761][243562] Updated weights for policy 0, policy_version 47980 (0.0005) +[2026-06-02 16:42:39,811][235960] Fps is (10 sec: 19660.8, 60 sec: 21299.2, 300 sec: 21438.0). Total num frames: 24543232. Throughput: 0: 21543.8. Samples: 24577664. Policy #0 lag: (min: 23.0, avg: 38.0, max: 87.0) +[2026-06-02 16:42:39,812][235960] Avg episode reward: [(0, '2325.464')] +[2026-06-02 16:42:40,396][243562] Updated weights for policy 0, policy_version 47990 (0.0005) +[2026-06-02 16:42:40,556][243562] Updated weights for policy 0, policy_version 48000 (0.0006) +[2026-06-02 16:42:40,717][243562] Updated weights for policy 0, policy_version 48010 (0.0008) +[2026-06-02 16:42:40,882][243562] Updated weights for policy 0, policy_version 48020 (0.0008) +[2026-06-02 16:42:41,044][243562] Updated weights for policy 0, policy_version 48030 (0.0008) +[2026-06-02 16:42:41,217][243562] Updated weights for policy 0, policy_version 48040 (0.0009) +[2026-06-02 16:42:41,889][243562] Updated weights for policy 0, policy_version 48051 (0.0008) +[2026-06-02 16:42:42,045][243562] Updated weights for policy 0, policy_version 48061 (0.0008) +[2026-06-02 16:42:42,211][243562] Updated weights for policy 0, policy_version 48071 (0.0008) +[2026-06-02 16:42:42,371][243562] Updated weights for policy 0, policy_version 48081 (0.0008) +[2026-06-02 16:42:42,549][243562] Updated weights for policy 0, policy_version 48092 (0.0008) +[2026-06-02 16:42:42,715][243562] Updated weights for policy 0, policy_version 48102 (0.0008) +[2026-06-02 16:42:42,882][243562] Updated weights for policy 0, policy_version 48112 (0.0008) +[2026-06-02 16:42:43,513][243562] Updated weights for policy 0, policy_version 48122 (0.0010) +[2026-06-02 16:42:43,692][243562] Updated weights for policy 0, policy_version 48133 (0.0008) +[2026-06-02 16:42:43,852][243562] Updated weights for policy 0, policy_version 48143 (0.0008) +[2026-06-02 16:42:44,012][243562] Updated weights for policy 0, policy_version 48153 (0.0009) +[2026-06-02 16:42:44,175][243562] Updated weights for policy 0, policy_version 48163 (0.0008) +[2026-06-02 16:42:44,342][243562] Updated weights for policy 0, policy_version 48173 (0.0008) +[2026-06-02 16:42:44,811][235960] Fps is (10 sec: 22937.8, 60 sec: 21845.4, 300 sec: 21549.1). Total num frames: 24674304. Throughput: 0: 21458.5. Samples: 24703360. Policy #0 lag: (min: 23.0, avg: 38.0, max: 87.0) +[2026-06-02 16:42:44,811][235960] Avg episode reward: [(0, '2308.754')] +[2026-06-02 16:42:44,988][243562] Updated weights for policy 0, policy_version 48183 (0.0008) +[2026-06-02 16:42:45,152][243562] Updated weights for policy 0, policy_version 48193 (0.0008) +[2026-06-02 16:42:45,311][243562] Updated weights for policy 0, policy_version 48203 (0.0008) +[2026-06-02 16:42:45,491][243562] Updated weights for policy 0, policy_version 48214 (0.0009) +[2026-06-02 16:42:45,665][243562] Updated weights for policy 0, policy_version 48224 (0.0005) +[2026-06-02 16:42:45,818][243562] Updated weights for policy 0, policy_version 48234 (0.0008) +[2026-06-02 16:42:46,467][243562] Updated weights for policy 0, policy_version 48244 (0.0007) +[2026-06-02 16:42:46,625][243562] Updated weights for policy 0, policy_version 48254 (0.0009) +[2026-06-02 16:42:46,807][243562] Updated weights for policy 0, policy_version 48265 (0.0008) +[2026-06-02 16:42:46,995][243562] Updated weights for policy 0, policy_version 48276 (0.0008) +[2026-06-02 16:42:47,157][243562] Updated weights for policy 0, policy_version 48286 (0.0009) +[2026-06-02 16:42:47,324][243562] Updated weights for policy 0, policy_version 48296 (0.0008) +[2026-06-02 16:42:47,963][243562] Updated weights for policy 0, policy_version 48306 (0.0009) +[2026-06-02 16:42:48,126][243562] Updated weights for policy 0, policy_version 48316 (0.0009) +[2026-06-02 16:42:48,273][243562] Updated weights for policy 0, policy_version 48326 (0.0008) +[2026-06-02 16:42:48,446][243562] Updated weights for policy 0, policy_version 48336 (0.0008) +[2026-06-02 16:42:48,611][243562] Updated weights for policy 0, policy_version 48346 (0.0009) +[2026-06-02 16:42:48,782][243562] Updated weights for policy 0, policy_version 48356 (0.0008) +[2026-06-02 16:42:48,957][243562] Updated weights for policy 0, policy_version 48367 (0.0009) +[2026-06-02 16:42:49,601][243562] Updated weights for policy 0, policy_version 48377 (0.0007) +[2026-06-02 16:42:49,767][243562] Updated weights for policy 0, policy_version 48387 (0.0008) +[2026-06-02 16:42:49,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21299.2, 300 sec: 21438.1). Total num frames: 24772608. Throughput: 0: 21432.9. Samples: 24766464. Policy #0 lag: (min: 23.0, avg: 38.0, max: 87.0) +[2026-06-02 16:42:49,812][235960] Avg episode reward: [(0, '2359.087')] +[2026-06-02 16:42:49,928][243562] Updated weights for policy 0, policy_version 48397 (0.0008) +[2026-06-02 16:42:50,111][243562] Updated weights for policy 0, policy_version 48408 (0.0010) +[2026-06-02 16:42:50,292][243562] Updated weights for policy 0, policy_version 48419 (0.0009) +[2026-06-02 16:42:50,475][243562] Updated weights for policy 0, policy_version 48430 (0.0009) +[2026-06-02 16:42:50,499][242748] Saving new best policy, reward=2359.087! +[2026-06-02 16:42:51,131][243562] Updated weights for policy 0, policy_version 48440 (0.0008) +[2026-06-02 16:42:51,318][243562] Updated weights for policy 0, policy_version 48452 (0.0008) +[2026-06-02 16:42:51,483][243562] Updated weights for policy 0, policy_version 48462 (0.0008) +[2026-06-02 16:42:51,656][243562] Updated weights for policy 0, policy_version 48472 (0.0008) +[2026-06-02 16:42:51,819][243562] Updated weights for policy 0, policy_version 48482 (0.0008) +[2026-06-02 16:42:52,003][243562] Updated weights for policy 0, policy_version 48493 (0.0008) +[2026-06-02 16:42:52,676][243562] Updated weights for policy 0, policy_version 48505 (0.0008) +[2026-06-02 16:42:52,833][243562] Updated weights for policy 0, policy_version 48515 (0.0008) +[2026-06-02 16:42:52,993][243562] Updated weights for policy 0, policy_version 48525 (0.0008) +[2026-06-02 16:42:53,164][243562] Updated weights for policy 0, policy_version 48535 (0.0008) +[2026-06-02 16:42:53,344][243562] Updated weights for policy 0, policy_version 48546 (0.0008) +[2026-06-02 16:42:53,508][243562] Updated weights for policy 0, policy_version 48556 (0.0008) +[2026-06-02 16:42:54,155][243562] Updated weights for policy 0, policy_version 48566 (0.0008) +[2026-06-02 16:42:54,316][243562] Updated weights for policy 0, policy_version 48576 (0.0007) +[2026-06-02 16:42:54,469][243562] Updated weights for policy 0, policy_version 48586 (0.0008) +[2026-06-02 16:42:54,636][243562] Updated weights for policy 0, policy_version 48596 (0.0008) +[2026-06-02 16:42:54,803][243562] Updated weights for policy 0, policy_version 48606 (0.0008) +[2026-06-02 16:42:54,811][235960] Fps is (10 sec: 19660.5, 60 sec: 21299.2, 300 sec: 21438.1). Total num frames: 24870912. Throughput: 0: 21350.4. Samples: 24892032. Policy #0 lag: (min: 43.0, avg: 78.1, max: 107.0) +[2026-06-02 16:42:54,812][235960] Avg episode reward: [(0, '2404.899')] +[2026-06-02 16:42:54,983][243562] Updated weights for policy 0, policy_version 48617 (0.0009) +[2026-06-02 16:42:55,095][242748] Saving new best policy, reward=2404.899! +[2026-06-02 16:42:55,644][243562] Updated weights for policy 0, policy_version 48627 (0.0008) +[2026-06-02 16:42:55,805][243562] Updated weights for policy 0, policy_version 48638 (0.0009) +[2026-06-02 16:42:55,966][243562] Updated weights for policy 0, policy_version 48648 (0.0008) +[2026-06-02 16:42:56,133][243562] Updated weights for policy 0, policy_version 48658 (0.0009) +[2026-06-02 16:42:56,297][243562] Updated weights for policy 0, policy_version 48668 (0.0008) +[2026-06-02 16:42:56,457][243562] Updated weights for policy 0, policy_version 48678 (0.0008) +[2026-06-02 16:42:56,626][243562] Updated weights for policy 0, policy_version 48688 (0.0008) +[2026-06-02 16:42:57,276][243562] Updated weights for policy 0, policy_version 48698 (0.0008) +[2026-06-02 16:42:57,447][243562] Updated weights for policy 0, policy_version 48708 (0.0008) +[2026-06-02 16:42:57,619][243562] Updated weights for policy 0, policy_version 48719 (0.0009) +[2026-06-02 16:42:57,798][243562] Updated weights for policy 0, policy_version 48730 (0.0008) +[2026-06-02 16:42:57,963][243562] Updated weights for policy 0, policy_version 48740 (0.0009) +[2026-06-02 16:42:58,133][243562] Updated weights for policy 0, policy_version 48750 (0.0008) +[2026-06-02 16:42:58,752][243562] Updated weights for policy 0, policy_version 48760 (0.0005) +[2026-06-02 16:42:58,913][243562] Updated weights for policy 0, policy_version 48770 (0.0009) +[2026-06-02 16:42:59,074][243562] Updated weights for policy 0, policy_version 48780 (0.0008) +[2026-06-02 16:42:59,242][243562] Updated weights for policy 0, policy_version 48790 (0.0008) +[2026-06-02 16:42:59,413][243562] Updated weights for policy 0, policy_version 48800 (0.0008) +[2026-06-02 16:42:59,627][243562] Updated weights for policy 0, policy_version 48813 (0.0008) +[2026-06-02 16:42:59,811][235960] Fps is (10 sec: 22937.6, 60 sec: 21845.3, 300 sec: 21549.1). Total num frames: 25001984. Throughput: 0: 21336.2. Samples: 25021440. Policy #0 lag: (min: 43.0, avg: 78.1, max: 107.0) +[2026-06-02 16:42:59,812][235960] Avg episode reward: [(0, '2425.785')] +[2026-06-02 16:42:59,816][242748] Saving new best policy, reward=2425.785! +[2026-06-02 16:43:00,279][243562] Updated weights for policy 0, policy_version 48824 (0.0009) +[2026-06-02 16:43:00,401][242748] Early stopping after 2 epochs (16 sgd steps), loss delta 0.0000007 +[2026-06-02 16:43:00,402][242748] Stopping Batcher_0... +[2026-06-02 16:43:00,402][243563] Stopping RolloutWorker_w0... +[2026-06-02 16:43:00,403][243563] Loop rollout_proc0_evt_loop terminating... +[2026-06-02 16:43:00,403][242748] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs4_seed12/checkpoint_p0/checkpoint_000048832_25034752.pth... +[2026-06-02 16:43:00,403][235960] Component RolloutWorker_w0 stopped! +[2026-06-02 16:43:00,404][235960] Component Batcher_0 stopped! +[2026-06-02 16:43:00,403][242748] Loop batcher_evt_loop terminating... +[2026-06-02 16:43:00,405][235960] Component RolloutWorker_w1 stopped! +[2026-06-02 16:43:00,405][243564] Stopping RolloutWorker_w1... +[2026-06-02 16:43:00,406][243564] Loop rollout_proc1_evt_loop terminating... +[2026-06-02 16:43:00,420][242748] Saving new best policy, reward=2442.527! +[2026-06-02 16:43:00,441][242748] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs4_seed12/checkpoint_p0/checkpoint_000048832_25034752.pth... +[2026-06-02 16:43:00,462][242748] Stopping LearnerWorker_p0... +[2026-06-02 16:43:00,462][242748] Loop learner_proc0_evt_loop terminating... +[2026-06-02 16:43:00,462][235960] Component LearnerWorker_p0 stopped! +[2026-06-02 16:43:00,475][243562] Weights refcount: 2 0 +[2026-06-02 16:43:00,477][243562] Stopping InferenceWorker_p0-w0... +[2026-06-02 16:43:00,477][243562] Loop inference_proc0-0_evt_loop terminating... +[2026-06-02 16:43:00,477][235960] Component InferenceWorker_p0-w0 stopped! +[2026-06-02 16:43:00,478][235960] Waiting for process learner_proc0 to stop... +[2026-06-02 16:43:01,297][235960] Waiting for process inference_proc0-0 to join... +[2026-06-02 16:43:01,298][235960] Waiting for process rollout_proc0 to join... +[2026-06-02 16:43:01,299][235960] Waiting for process rollout_proc1 to join... +[2026-06-02 16:43:01,300][235960] Batcher 0 profile tree view: +batching: 0.8558, releasing_batches: 0.0333 +[2026-06-02 16:43:01,301][235960] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0000 + wait_policy_total: 728.4250 +update_model: 44.6722 + weight_update: 0.0008 +one_step: 0.0017 + handle_policy_step: 376.6304 + deserialize: 5.0217, stack: 0.3641, obs_to_device_normalize: 52.8109, forward: 140.6926, prepare_outputs: 152.0958, send_messages: 10.1530 +[2026-06-02 16:43:01,301][235960] Learner 0 profile tree view: +misc: 0.0050, prepare_batch: 100.0742 +train: 787.7330 + epoch_init: 0.0587, minibatch_init: 2.4813, losses_postprocess: 269.4823, kl_divergence: 24.9206, after_optimizer: 313.0513 + calculate_losses: 38.9565 + losses_init: 0.0762, forward_head: 12.6365, bptt_initial: 0.3586, bptt: 0.3833, tail: 8.8340, advantages_returns: 2.8794, losses: 10.8837 + update: 135.6388 + clip: 12.9467 +[2026-06-02 16:43:01,302][235960] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.0333, enqueue_policy_requests: 128.0205, process_policy_outputs: 7.5717, env_step: 768.1250, finalize_trajectories: 0.1026, complete_rollouts: 0.0765 +post_env_step: 16.1925 + process_env_step: 4.7767 +[2026-06-02 16:43:01,302][235960] RolloutWorker_w1 profile tree view: +wait_for_trajectories: 0.0334, enqueue_policy_requests: 134.9238, process_policy_outputs: 7.4725, env_step: 765.0066, finalize_trajectories: 0.1095, complete_rollouts: 0.0771 +post_env_step: 15.9501 + process_env_step: 4.6671 +[2026-06-02 16:43:01,304][235960] Loop Runner_EvtLoop terminating... +[2026-06-02 16:43:01,305][235960] Runner profile tree view: +main_loop: 1181.9510 +[2026-06-02 16:43:01,305][235960] Collected {0: 25034752}, FPS: 21180.9