diff --git a/.gitattributes b/.gitattributes index 6107fb96d551328d36e6735de37978dde760b793..3b00d9ee06680f6dbaed833058f6f898d42ba078 100644 --- a/.gitattributes +++ b/.gitattributes @@ -41,3 +41,4 @@ factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_ factor_sweeps/flappy/observation_stride/train/factor_sweep:flappy:observation_stride:fixed_l2:fs4:obs30:stride1:seed12/episode_metrics.jsonl filter=lfs diff=lfs merge=lfs -text factor_sweeps/flappy/observation_stride/train/factor_sweep:flappy:observation_stride:fixed_l2:fs4:obs30:stride1:seed11/episode_metrics.jsonl filter=lfs diff=lfs merge=lfs -text factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed11/episode_metrics.jsonl filter=lfs diff=lfs merge=lfs -text +factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed14/episode_metrics.jsonl filter=lfs diff=lfs merge=lfs -text diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed14/checkpoint_p0/best_000048848_25034752_reward_2387.653.pth b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed14/checkpoint_p0/best_000048848_25034752_reward_2387.653.pth new file mode 100644 index 0000000000000000000000000000000000000000..61f8657dd024577c9b2ed0f99779cc3b81ceacaa --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed14/checkpoint_p0/best_000048848_25034752_reward_2387.653.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd291aa8f392a90eded228f9ac30bac6eca2dc66372e3fffa712aab73bc3c151 +size 21797945 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed14/checkpoint_p0/checkpoint_000023360_11960320.pth b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed14/checkpoint_p0/checkpoint_000023360_11960320.pth new file mode 100644 index 0000000000000000000000000000000000000000..2a255cad66a1dd8e2795cefe3e31d8c5385f39c3 --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed14/checkpoint_p0/checkpoint_000023360_11960320.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae31f4da719579e6122700eedc8535adc727f0b69e2400e5d029ccb3579671f +size 21798305 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed14/checkpoint_p0/checkpoint_000047168_24150016.pth b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed14/checkpoint_p0/checkpoint_000047168_24150016.pth new file mode 100644 index 0000000000000000000000000000000000000000..7842d1396f45e2c45c6fd24b32623da14ead057c --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed14/checkpoint_p0/checkpoint_000047168_24150016.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0ba8b0d7f981715416b962838766872be2596b34c4bdcb2086d6efc794ce6f2 +size 21798305 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed14/checkpoint_p0/checkpoint_000048848_25034752.pth b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed14/checkpoint_p0/checkpoint_000048848_25034752.pth new file mode 100644 index 0000000000000000000000000000000000000000..8bb87e4f1bd46a564f9cde3a2408225033e26ef5 --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed14/checkpoint_p0/checkpoint_000048848_25034752.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bffd8a334d15e5f96621248f8ae12be8600ba3b4babc88c87823fd35308923b +size 21798305 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed14/config.json b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed14/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3432d6fdca185dee06dee92c84f5f8d3555c4c57 --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed14/config.json @@ -0,0 +1,266 @@ +{ + "help": false, + "algo": "APPO", + "env": "latency_flappy", + "experiment": "flappy_frame_stack_fixed_l2_fs4_seed14", + "train_dir": "results/checkpoints_factor_sweeps/flappy/context_window", + "restart_behavior": "resume", + "device": "gpu", + "seed": 14, + "num_policies": 1, + "async_rl": true, + "serial_mode": false, + "batched_sampling": true, + "num_batches_to_accumulate": 2, + "worker_num_splits": 1, + "policy_workers_per_policy": 1, + "max_policy_lag": 400, + "num_workers": 2, + "num_envs_per_worker": 1, + "batch_size": 4096, + "num_batches_per_epoch": 8, + "num_epochs": 8, + "rollout": 128, + "recurrence": 1, + "shuffle_minibatches": false, + "gamma": 0.99, + "reward_scale": 1.0, + "reward_clip": 1000.0, + "value_bootstrap": false, + "normalize_returns": true, + "exploration_loss_coeff": 0.003, + "value_loss_coeff": 0.5, + "kl_loss_coeff": 0.0, + "exploration_loss": "entropy", + "gae_lambda": 0.95, + "ppo_clip_ratio": 0.1, + "ppo_clip_value": 0.2, + "with_vtrace": false, + "vtrace_rho": 1.0, + "vtrace_c": 1.0, + "optimizer": "adam", + "adam_eps": 1e-05, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "max_grad_norm": 0.5, + "learning_rate": 0.00025, + "lr_schedule": "linear_decay", + "lr_schedule_kl_threshold": 0.008, + "lr_adaptive_min": 1e-06, + "lr_adaptive_max": 0.01, + "obs_subtract_mean": 0.0, + "obs_scale": 255.0, + "normalize_input": true, + "normalize_input_keys": null, + "decorrelate_experience_max_seconds": 0, + "decorrelate_envs_on_one_worker": true, + "actor_worker_gpus": [ + 0 + ], + "set_workers_cpu_affinity": true, + "force_envs_single_thread": false, + "default_niceness": 0, + "log_to_file": true, + "experiment_summaries_interval": 1, + "flush_summaries_interval": 30, + "stats_avg": 100, + "summaries_use_frameskip": true, + "heartbeat_interval": 20, + "heartbeat_reporting_interval": 180, + "train_for_env_steps": 25000000, + "train_for_seconds": 10000000000, + "save_every_sec": 600, + "keep_checkpoints": 5, + "load_checkpoint_kind": "latest", + "save_milestones_sec": -1, + "save_best_every_sec": 5, + "save_best_metric": "reward", + "save_best_after": 100000, + "benchmark": false, + "encoder_mlp_layers": [ + 512, + 512 + ], + "encoder_conv_architecture": "convnet_atari", + "encoder_conv_mlp_layers": [ + 512 + ], + "use_rnn": false, + "rnn_size": 512, + "rnn_type": "gru", + "rnn_num_layers": 1, + "decoder_mlp_layers": [], + "nonlinearity": "elu", + "policy_initialization": "orthogonal", + "policy_init_gain": 1.0, + "actor_critic_share_weights": true, + "adaptive_stddev": true, + "continuous_tanh_scale": 0.0, + "initial_stddev": 1.0, + "use_env_info_cache": false, + "env_gpu_actions": true, + "env_gpu_observations": true, + "env_frameskip": 1, + "env_framestack": 1, + "pixel_format": "CHW", + "use_record_episode_statistics": false, + "with_wandb": true, + "wandb_user": null, + "wandb_project": "latency-sensitive-bench", + "wandb_group": "flappy-fs4-fixed_l2", + "wandb_job_type": "sample_factory", + "wandb_tags": [ + "factor_sweep", + "flappy", + "frame_stack", + "fixed", + "fixed_l2", + "fs4", + "seed14" + ], + "with_pbt": false, + "pbt_mix_policies_in_one_env": true, + "pbt_period_env_steps": 5000000, + "pbt_start_mutation": 20000000, + "pbt_replace_fraction": 0.3, + "pbt_mutation_rate": 0.15, + "pbt_replace_reward_gap": 0.1, + "pbt_replace_reward_gap_absolute": 1e-06, + "pbt_optimize_gamma": false, + "pbt_target_objective": "true_objective", + "pbt_perturb_min": 1.1, + "pbt_perturb_max": 1.5, + "gym_id": "FlappyBird-v0", + "env_fps": 30.0, + "obs_fps": 30.0, + "use_lidar": false, + "normalize_obs": true, + "audio_on": false, + "screen_size": "", + "obs_resize": "84,84", + "use_gpu_render": true, + "simulator": "gpu", + "gpu_render_device": "auto", + "gpu_render_batch_size": 128, + "gpu_render_profile": false, + "gpu_render_profile_interval": 200, + "pipe_gap": 100, + "bird_color": "yellow", + "pipe_color": "green", + "background": "day", + "score_limit": -1, + "frame_stack": 4, + "debug": false, + "debug_timelimit_diagnostics": false, + "max_episode_steps": 0, + "mode": "train", + "latency_type": "fixed", + "fixed_latency_ms": 66.66666666666667, + "mean_latency_ms": null, + "std_latency_ms": null, + "min_latency_ms": null, + "max_latency_ms": null, + "latency_seed": null, + "add_latency_info": false, + "max_pending_actions": null, + "hold_policy": "one_frame_then_noop", + "ordering_policy": "latest_ready", + "eval_episodes": 100, + "eval_parallel_envs": 100, + "eval_latency_raw_frame_values": "0,1,2,3,4,5", + "eval_max_steps": 3600, + "eval_deterministic": true, + "eval_raw_reward": false, + "episode_metrics_path": "results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs4_seed14/episode_metrics.jsonl", + "command_line": "--mode train --algo APPO --env latency_flappy --experiment flappy_frame_stack_fixed_l2_fs4_seed14 --train_dir results/checkpoints_factor_sweeps/flappy/context_window --restart_behavior resume --device gpu --actor_worker_gpus 0 --env_gpu_observations True --env_gpu_actions True --gpu-render-batch-size 128 --seed 14 --episode_metrics_path results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs4_seed14/episode_metrics.jsonl --train_for_env_steps 25000000 --num_workers 2 --num_envs_per_worker 1 --num_policies 1 --batch_size 4096 --rollout 128 --recurrence 1 --num_epochs 8 --num_batches_per_epoch 8 --worker_num_splits 1 --max_policy_lag 400 --learning_rate 0.00025 --gamma 0.99 --gae_lambda 0.95 --ppo_clip_ratio 0.1 --ppo_clip_value 0.2 --value_loss_coeff 0.5 --max_grad_norm 0.5 --save_every_sec 600 --keep_checkpoints 5 --stats_avg 100 --experiment_summaries_interval 1 --batched_sampling True --async_rl True --use_rnn False --normalize_returns True --normalize_input True --latency-type fixed --fixed-latency-ms 66.66666666666667 --add-latency-info False --eval-episodes 100 --eval-parallel-envs 100 --eval-max-steps 3600 --eval-deterministic True --with_wandb True --wandb_project latency-sensitive-bench --wandb_group flappy-fs4-fixed_l2 --wandb_job_type sample_factory --wandb_tags factor_sweep flappy frame_stack fixed fixed_l2 fs4 seed14 --gym_id FlappyBird-v0 --env-fps 30 --obs-fps 30.0 --use_lidar False --normalize_obs True --audio_on False --obs_resize 84,84 --use-gpu-render True --simulator gpu --gpu-render-device auto --gpu-render-profile False --gpu-render-profile-interval 200 --pipe_gap 100 --bird_color yellow --pipe_color green --background day --frame_stack 4 --debug False --debug-timelimit-diagnostics False --hold-policy one_frame_then_noop --ordering-policy latest_ready", + "cli_args": { + "algo": "APPO", + "env": "latency_flappy", + "experiment": "flappy_frame_stack_fixed_l2_fs4_seed14", + "train_dir": "results/checkpoints_factor_sweeps/flappy/context_window", + "restart_behavior": "resume", + "device": "gpu", + "seed": 14, + "num_policies": 1, + "async_rl": true, + "batched_sampling": true, + "worker_num_splits": 1, + "max_policy_lag": 400, + "num_workers": 2, + "num_envs_per_worker": 1, + "batch_size": 4096, + "num_batches_per_epoch": 8, + "num_epochs": 8, + "rollout": 128, + "recurrence": 1, + "gamma": 0.99, + "normalize_returns": true, + "value_loss_coeff": 0.5, + "gae_lambda": 0.95, + "ppo_clip_ratio": 0.1, + "ppo_clip_value": 0.2, + "max_grad_norm": 0.5, + "learning_rate": 0.00025, + "normalize_input": true, + "actor_worker_gpus": [ + 0 + ], + "experiment_summaries_interval": 1, + "stats_avg": 100, + "train_for_env_steps": 25000000, + "save_every_sec": 600, + "keep_checkpoints": 5, + "use_rnn": false, + "env_gpu_actions": true, + "env_gpu_observations": true, + "with_wandb": true, + "wandb_project": "latency-sensitive-bench", + "wandb_group": "flappy-fs4-fixed_l2", + "wandb_job_type": "sample_factory", + "wandb_tags": [ + "factor_sweep", + "flappy", + "frame_stack", + "fixed", + "fixed_l2", + "fs4", + "seed14" + ], + "gym_id": "FlappyBird-v0", + "env_fps": 30.0, + "obs_fps": 30.0, + "use_lidar": false, + "normalize_obs": true, + "audio_on": false, + "obs_resize": "84,84", + "use_gpu_render": true, + "simulator": "gpu", + "gpu_render_device": "auto", + "gpu_render_batch_size": 128, + "gpu_render_profile": false, + "gpu_render_profile_interval": 200, + "pipe_gap": 100, + "bird_color": "yellow", + "pipe_color": "green", + "background": "day", + "frame_stack": 4, + "debug": false, + "debug_timelimit_diagnostics": false, + "mode": "train", + "latency_type": "fixed", + "fixed_latency_ms": 66.66666666666667, + "add_latency_info": false, + "hold_policy": "one_frame_then_noop", + "ordering_policy": "latest_ready", + "eval_episodes": 100, + "eval_parallel_envs": 100, + "eval_max_steps": 3600, + "eval_deterministic": true, + "episode_metrics_path": "results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs4_seed14/episode_metrics.jsonl" + }, + "git_hash": "284fe8ace24f0e8a40c03c5b559969abd7caeb29", + "git_repo_name": "git@github.com:ZihanWang314/latency-sensitive-bench.git", + "eval_env_frameskip": 1, + "output_dir": "outputs/factor_sweeps/flappy/context_window/train/frame_stack/fixed_l2/fs4/seed_14", + "wandb_unique_id": "flappy-fs4-fixed_l2-s14" +} \ No newline at end of file diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed14/episode_metrics.jsonl b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed14/episode_metrics.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..04ca760fdc876bb3f97022ccd6fc5dc711f653cb --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed14/episode_metrics.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ade23a58e5a04c6c00c2e1c29205fb4c4085614fb5ef64771c8530e8e830b53c +size 23363204 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed14/git.diff b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed14/git.diff new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed14/sf_log.txt b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed14/sf_log.txt new file mode 100644 index 0000000000000000000000000000000000000000..b80fd29dfcdf52e0a36b28d562c903db53315cd2 --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs4:obs30:stride1:seed14/sf_log.txt @@ -0,0 +1,5532 @@ +[2026-06-02 16:37:37,722][246448] Saving configuration to results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs4_seed14/config.json... +[2026-06-02 16:37:37,787][246448] Using GPUs [0] for process 0 (actually maps to GPUs [1]) +[2026-06-02 16:37:37,788][246448] Rollout worker 0 uses device cuda:0 +[2026-06-02 16:37:37,789][246448] Using GPUs [0] for process 1 (actually maps to GPUs [1]) +[2026-06-02 16:37:37,789][246448] Rollout worker 1 uses device cuda:0 +[2026-06-02 16:37:39,257][246448] Using GPUs [0] for process 0 (actually maps to GPUs [1]) +[2026-06-02 16:37:39,258][246448] InferenceWorker_p0-w0: min num requests: 1 +[2026-06-02 16:37:39,262][246448] Using GPUs [0] for process 0 (actually maps to GPUs [1]) +[2026-06-02 16:37:39,266][246448] Using GPUs [0] for process 1 (actually maps to GPUs [1]) +[2026-06-02 16:37:39,267][246448] Starting all processes... +[2026-06-02 16:37:39,267][246448] Starting process learner_proc0 +[2026-06-02 16:37:40,467][246448] Starting all processes... +[2026-06-02 16:37:40,471][246448] Starting process inference_proc0-0 +[2026-06-02 16:37:40,472][246448] Starting process rollout_proc0 +[2026-06-02 16:37:40,472][246448] Starting process rollout_proc1 +[2026-06-02 16:37:40,861][247399] Using GPUs [0] for process 0 (actually maps to GPUs [1]) +[2026-06-02 16:37:40,861][247399] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [0]) for learning process 0 +[2026-06-02 16:37:40,861][247399] Num visible devices: 1 +[2026-06-02 16:37:40,862][247399] Setting fixed seed 14 +[2026-06-02 16:37:40,862][247399] Using GPUs [0] for process 0 (actually maps to GPUs [1]) +[2026-06-02 16:37:40,863][247399] Initializing actor-critic model on device cuda:0 +[2026-06-02 16:37:40,863][247399] RunningMeanStd input shape: (12, 84, 84) +[2026-06-02 16:37:40,892][247399] RunningMeanStd input shape: (1,) +[2026-06-02 16:37:40,901][247399] ConvEncoder: input_channels=12 +[2026-06-02 16:37:40,972][247399] Conv encoder output size: 512 +[2026-06-02 16:37:40,973][247399] Created Actor Critic model with architecture: +[2026-06-02 16:37:40,973][247399] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): MultiInputEncoder( + (encoders): ModuleDict( + (obs): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + ) + (core): ModelCoreIdentity() + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=2, bias=True) + ) +) +[2026-06-02 16:37:40,976][247399] Using optimizer +[2026-06-02 16:37:41,692][247399] No checkpoints found +[2026-06-02 16:37:41,692][247399] Did not load from checkpoint, starting from scratch! +[2026-06-02 16:37:41,692][247399] Initialized policy 0 weights for model version 0 +[2026-06-02 16:37:41,694][247399] LearnerWorker_p0 finished initialization! +[2026-06-02 16:37:41,694][247399] Using GPUs [0] for process 0 (actually maps to GPUs [1]) +[2026-06-02 16:37:42,489][247478] Using GPUs [0] for process 0 (actually maps to GPUs [1]) +[2026-06-02 16:37:42,490][247478] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [0]) for inference process 0 +[2026-06-02 16:37:42,490][247478] Num visible devices: 1 +[2026-06-02 16:37:42,491][247478] RunningMeanStd input shape: (12, 84, 84) +[2026-06-02 16:37:42,515][247478] RunningMeanStd input shape: (1,) +[2026-06-02 16:37:42,523][247478] ConvEncoder: input_channels=12 +[2026-06-02 16:37:42,582][247478] Conv encoder output size: 512 +[2026-06-02 16:37:42,589][246448] Inference worker 0-0 is ready! +[2026-06-02 16:37:42,590][246448] All inference workers are ready! Signal rollout workers to start! +[2026-06-02 16:37:42,674][247480] Worker 1 uses CPU cores [192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383] +[2026-06-02 16:37:42,675][247480] Using GPUs [0] for process 1 (actually maps to GPUs [1]) +[2026-06-02 16:37:42,675][247480] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [0]) for actor process 1 +[2026-06-02 16:37:42,675][247480] Num visible devices: 1 +[2026-06-02 16:37:42,676][247480] EnvRunner 1-0 uses policy 0 +[2026-06-02 16:37:42,693][247479] Worker 0 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191] +[2026-06-02 16:37:42,693][247479] Using GPUs [0] for process 0 (actually maps to GPUs [1]) +[2026-06-02 16:37:42,693][247479] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [0]) for actor process 0 +[2026-06-02 16:37:42,693][247479] Num visible devices: 1 +[2026-06-02 16:37:42,694][247479] EnvRunner 0-0 uses policy 0 +[2026-06-02 16:37:44,726][246448] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2026-06-02 16:37:44,727][246448] Avg episode reward: [(0, '-7.439')] +[2026-06-02 16:37:45,437][247399] Signal inference workers to stop experience collection... +[2026-06-02 16:37:45,448][247478] InferenceWorker_p0-w0: stopping experience collection +[2026-06-02 16:37:47,030][247399] Signal inference workers to resume experience collection... +[2026-06-02 16:37:47,030][247478] InferenceWorker_p0-w0: resuming experience collection +[2026-06-02 16:37:47,330][247478] Updated weights for policy 0, policy_version 73 (0.0051) +[2026-06-02 16:37:47,502][247478] Updated weights for policy 0, policy_version 83 (0.0009) +[2026-06-02 16:37:47,668][247478] Updated weights for policy 0, policy_version 93 (0.0008) +[2026-06-02 16:37:47,859][247478] Updated weights for policy 0, policy_version 103 (0.0009) +[2026-06-02 16:37:48,049][247478] Updated weights for policy 0, policy_version 113 (0.0008) +[2026-06-02 16:37:48,223][247478] Updated weights for policy 0, policy_version 123 (0.0009) +[2026-06-02 16:37:48,701][247478] Updated weights for policy 0, policy_version 135 (0.0009) +[2026-06-02 16:37:48,873][247478] Updated weights for policy 0, policy_version 145 (0.0008) +[2026-06-02 16:37:49,064][247478] Updated weights for policy 0, policy_version 155 (0.0008) +[2026-06-02 16:37:49,248][247478] Updated weights for policy 0, policy_version 165 (0.0008) +[2026-06-02 16:37:49,421][247478] Updated weights for policy 0, policy_version 175 (0.0008) +[2026-06-02 16:37:49,616][247478] Updated weights for policy 0, policy_version 185 (0.0009) +[2026-06-02 16:37:49,725][246448] Fps is (10 sec: 13108.1, 60 sec: 13108.1, 300 sec: 13108.1). Total num frames: 65536. Throughput: 0: 20379.0. Samples: 101888. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) +[2026-06-02 16:37:49,727][246448] Avg episode reward: [(0, '-7.181')] +[2026-06-02 16:37:50,118][247478] Updated weights for policy 0, policy_version 195 (0.0007) +[2026-06-02 16:37:50,284][247478] Updated weights for policy 0, policy_version 205 (0.0006) +[2026-06-02 16:37:50,453][247478] Updated weights for policy 0, policy_version 215 (0.0009) +[2026-06-02 16:37:50,644][247478] Updated weights for policy 0, policy_version 225 (0.0009) +[2026-06-02 16:37:50,835][247478] Updated weights for policy 0, policy_version 235 (0.0008) +[2026-06-02 16:37:51,007][247478] Updated weights for policy 0, policy_version 245 (0.0009) +[2026-06-02 16:37:51,191][247478] Updated weights for policy 0, policy_version 255 (0.0012) +[2026-06-02 16:37:51,593][247478] Updated weights for policy 0, policy_version 265 (0.0009) +[2026-06-02 16:37:51,771][247478] Updated weights for policy 0, policy_version 275 (0.0009) +[2026-06-02 16:37:51,964][247478] Updated weights for policy 0, policy_version 285 (0.0009) +[2026-06-02 16:37:52,144][247478] Updated weights for policy 0, policy_version 295 (0.0009) +[2026-06-02 16:37:52,321][247478] Updated weights for policy 0, policy_version 305 (0.0009) +[2026-06-02 16:37:52,515][247478] Updated weights for policy 0, policy_version 315 (0.0009) +[2026-06-02 16:37:53,014][247478] Updated weights for policy 0, policy_version 325 (0.0009) +[2026-06-02 16:37:53,184][247478] Updated weights for policy 0, policy_version 335 (0.0009) +[2026-06-02 16:37:53,365][247478] Updated weights for policy 0, policy_version 345 (0.0009) +[2026-06-02 16:37:53,555][247478] Updated weights for policy 0, policy_version 355 (0.0009) +[2026-06-02 16:37:53,734][247478] Updated weights for policy 0, policy_version 365 (0.0009) +[2026-06-02 16:37:53,915][247478] Updated weights for policy 0, policy_version 375 (0.0009) +[2026-06-02 16:37:54,361][247478] Updated weights for policy 0, policy_version 385 (0.0009) +[2026-06-02 16:37:54,533][247478] Updated weights for policy 0, policy_version 395 (0.0009) +[2026-06-02 16:37:54,701][247478] Updated weights for policy 0, policy_version 405 (0.0009) +[2026-06-02 16:37:54,725][246448] Fps is (10 sec: 19661.6, 60 sec: 19661.6, 300 sec: 19661.6). Total num frames: 196608. Throughput: 0: 23872.9. Samples: 238720. Policy #0 lag: (min: 63.0, avg: 85.1, max: 127.0) +[2026-06-02 16:37:54,727][246448] Avg episode reward: [(0, '-5.714')] +[2026-06-02 16:37:54,893][247478] Updated weights for policy 0, policy_version 415 (0.0009) +[2026-06-02 16:37:55,076][247478] Updated weights for policy 0, policy_version 425 (0.0008) +[2026-06-02 16:37:55,267][247478] Updated weights for policy 0, policy_version 435 (0.0008) +[2026-06-02 16:37:55,452][247478] Updated weights for policy 0, policy_version 445 (0.0008) +[2026-06-02 16:37:55,508][247399] Saving new best policy, reward=-5.714! +[2026-06-02 16:37:55,906][247478] Updated weights for policy 0, policy_version 455 (0.0009) +[2026-06-02 16:37:56,076][247478] Updated weights for policy 0, policy_version 465 (0.0008) +[2026-06-02 16:37:56,268][247478] Updated weights for policy 0, policy_version 475 (0.0008) +[2026-06-02 16:37:56,455][247478] Updated weights for policy 0, policy_version 485 (0.0008) +[2026-06-02 16:37:56,641][247478] Updated weights for policy 0, policy_version 495 (0.0010) +[2026-06-02 16:37:56,829][247478] Updated weights for policy 0, policy_version 505 (0.0009) +[2026-06-02 16:37:57,313][247478] Updated weights for policy 0, policy_version 515 (0.0006) +[2026-06-02 16:37:57,479][247478] Updated weights for policy 0, policy_version 525 (0.0006) +[2026-06-02 16:37:57,668][247478] Updated weights for policy 0, policy_version 535 (0.0008) +[2026-06-02 16:37:57,858][247478] Updated weights for policy 0, policy_version 545 (0.0009) +[2026-06-02 16:37:58,045][247478] Updated weights for policy 0, policy_version 555 (0.0007) +[2026-06-02 16:37:58,233][247478] Updated weights for policy 0, policy_version 565 (0.0008) +[2026-06-02 16:37:58,413][247478] Updated weights for policy 0, policy_version 575 (0.0007) +[2026-06-02 16:37:58,805][247478] Updated weights for policy 0, policy_version 585 (0.0009) +[2026-06-02 16:37:58,979][247478] Updated weights for policy 0, policy_version 595 (0.0008) +[2026-06-02 16:37:59,179][247478] Updated weights for policy 0, policy_version 605 (0.0009) +[2026-06-02 16:37:59,246][246448] Heartbeat connected on Batcher_0 +[2026-06-02 16:37:59,267][246448] Heartbeat connected on RolloutWorker_w0 +[2026-06-02 16:37:59,270][246448] Heartbeat connected on InferenceWorker_p0-w0 +[2026-06-02 16:37:59,333][246448] Heartbeat connected on RolloutWorker_w1 +[2026-06-02 16:37:59,356][247478] Updated weights for policy 0, policy_version 615 (0.0009) +[2026-06-02 16:37:59,534][247478] Updated weights for policy 0, policy_version 625 (0.0009) +[2026-06-02 16:37:59,725][246448] Fps is (10 sec: 22937.8, 60 sec: 19661.4, 300 sec: 19661.4). Total num frames: 294912. Throughput: 0: 20583.0. Samples: 308736. Policy #0 lag: (min: 16.0, avg: 51.9, max: 80.0) +[2026-06-02 16:37:59,726][246448] Avg episode reward: [(0, '-0.890')] +[2026-06-02 16:37:59,728][247478] Updated weights for policy 0, policy_version 635 (0.0008) +[2026-06-02 16:37:59,826][247399] Saving new best policy, reward=-0.890! +[2026-06-02 16:37:59,846][246448] Heartbeat connected on LearnerWorker_p0 +[2026-06-02 16:38:00,242][247478] Updated weights for policy 0, policy_version 645 (0.0008) +[2026-06-02 16:38:00,420][247478] Updated weights for policy 0, policy_version 655 (0.0008) +[2026-06-02 16:38:00,588][247478] Updated weights for policy 0, policy_version 665 (0.0009) +[2026-06-02 16:38:00,777][247478] Updated weights for policy 0, policy_version 675 (0.0010) +[2026-06-02 16:38:00,962][247478] Updated weights for policy 0, policy_version 685 (0.0008) +[2026-06-02 16:38:01,150][247478] Updated weights for policy 0, policy_version 695 (0.0008) +[2026-06-02 16:38:01,601][247478] Updated weights for policy 0, policy_version 705 (0.0007) +[2026-06-02 16:38:01,767][247478] Updated weights for policy 0, policy_version 715 (0.0008) +[2026-06-02 16:38:01,938][247478] Updated weights for policy 0, policy_version 725 (0.0009) +[2026-06-02 16:38:02,132][247478] Updated weights for policy 0, policy_version 735 (0.0008) +[2026-06-02 16:38:02,301][247478] Updated weights for policy 0, policy_version 745 (0.0008) +[2026-06-02 16:38:02,478][247478] Updated weights for policy 0, policy_version 755 (0.0009) +[2026-06-02 16:38:02,657][247478] Updated weights for policy 0, policy_version 765 (0.0006) +[2026-06-02 16:38:03,249][247478] Updated weights for policy 0, policy_version 775 (0.0006) +[2026-06-02 16:38:03,443][247478] Updated weights for policy 0, policy_version 786 (0.0005) +[2026-06-02 16:38:03,603][247478] Updated weights for policy 0, policy_version 796 (0.0005) +[2026-06-02 16:38:03,785][247478] Updated weights for policy 0, policy_version 806 (0.0007) +[2026-06-02 16:38:03,968][247478] Updated weights for policy 0, policy_version 816 (0.0009) +[2026-06-02 16:38:04,148][247478] Updated weights for policy 0, policy_version 826 (0.0008) +[2026-06-02 16:38:04,694][247478] Updated weights for policy 0, policy_version 836 (0.0008) +[2026-06-02 16:38:04,725][246448] Fps is (10 sec: 22937.6, 60 sec: 21299.6, 300 sec: 21299.6). Total num frames: 425984. Throughput: 0: 21946.0. Samples: 438912. Policy #0 lag: (min: 63.0, avg: 83.3, max: 127.0) +[2026-06-02 16:38:04,727][246448] Avg episode reward: [(0, '3.835')] +[2026-06-02 16:38:04,848][247478] Updated weights for policy 0, policy_version 846 (0.0008) +[2026-06-02 16:38:05,025][247478] Updated weights for policy 0, policy_version 856 (0.0008) +[2026-06-02 16:38:05,200][247478] Updated weights for policy 0, policy_version 866 (0.0009) +[2026-06-02 16:38:05,382][247478] Updated weights for policy 0, policy_version 876 (0.0009) +[2026-06-02 16:38:05,557][247478] Updated weights for policy 0, policy_version 886 (0.0008) +[2026-06-02 16:38:05,728][247399] Saving new best policy, reward=3.835! +[2026-06-02 16:38:05,731][247478] Updated weights for policy 0, policy_version 896 (0.0009) +[2026-06-02 16:38:06,419][247478] Updated weights for policy 0, policy_version 906 (0.0009) +[2026-06-02 16:38:06,604][247478] Updated weights for policy 0, policy_version 917 (0.0008) +[2026-06-02 16:38:06,778][247478] Updated weights for policy 0, policy_version 927 (0.0008) +[2026-06-02 16:38:06,954][247478] Updated weights for policy 0, policy_version 937 (0.0009) +[2026-06-02 16:38:07,139][247478] Updated weights for policy 0, policy_version 947 (0.0008) +[2026-06-02 16:38:07,304][247478] Updated weights for policy 0, policy_version 957 (0.0008) +[2026-06-02 16:38:07,880][247478] Updated weights for policy 0, policy_version 969 (0.0009) +[2026-06-02 16:38:08,047][247478] Updated weights for policy 0, policy_version 979 (0.0008) +[2026-06-02 16:38:08,222][247478] Updated weights for policy 0, policy_version 989 (0.0008) +[2026-06-02 16:38:08,398][247478] Updated weights for policy 0, policy_version 999 (0.0009) +[2026-06-02 16:38:08,574][247478] Updated weights for policy 0, policy_version 1009 (0.0009) +[2026-06-02 16:38:08,767][247478] Updated weights for policy 0, policy_version 1020 (0.0009) +[2026-06-02 16:38:09,433][247478] Updated weights for policy 0, policy_version 1030 (0.0009) +[2026-06-02 16:38:09,629][247478] Updated weights for policy 0, policy_version 1041 (0.0008) +[2026-06-02 16:38:09,725][246448] Fps is (10 sec: 22937.6, 60 sec: 20971.9, 300 sec: 20971.9). Total num frames: 524288. Throughput: 0: 22543.8. Samples: 563584. Policy #0 lag: (min: 27.0, avg: 59.0, max: 91.0) +[2026-06-02 16:38:09,726][246448] Avg episode reward: [(0, '3.898')] +[2026-06-02 16:38:09,828][247478] Updated weights for policy 0, policy_version 1053 (0.0008) +[2026-06-02 16:38:10,018][247478] Updated weights for policy 0, policy_version 1065 (0.0008) +[2026-06-02 16:38:10,193][247478] Updated weights for policy 0, policy_version 1075 (0.0008) +[2026-06-02 16:38:10,377][247478] Updated weights for policy 0, policy_version 1085 (0.0008) +[2026-06-02 16:38:10,425][247399] Saving new best policy, reward=3.898! +[2026-06-02 16:38:11,066][247478] Updated weights for policy 0, policy_version 1095 (0.0008) +[2026-06-02 16:38:11,229][247478] Updated weights for policy 0, policy_version 1105 (0.0008) +[2026-06-02 16:38:11,399][247478] Updated weights for policy 0, policy_version 1115 (0.0008) +[2026-06-02 16:38:11,581][247478] Updated weights for policy 0, policy_version 1125 (0.0008) +[2026-06-02 16:38:11,755][247478] Updated weights for policy 0, policy_version 1135 (0.0008) +[2026-06-02 16:38:11,966][247478] Updated weights for policy 0, policy_version 1148 (0.0008) +[2026-06-02 16:38:12,683][247478] Updated weights for policy 0, policy_version 1159 (0.0009) +[2026-06-02 16:38:12,855][247478] Updated weights for policy 0, policy_version 1169 (0.0008) +[2026-06-02 16:38:13,036][247478] Updated weights for policy 0, policy_version 1180 (0.0009) +[2026-06-02 16:38:13,199][247478] Updated weights for policy 0, policy_version 1190 (0.0008) +[2026-06-02 16:38:13,373][247478] Updated weights for policy 0, policy_version 1200 (0.0008) +[2026-06-02 16:38:13,553][247478] Updated weights for policy 0, policy_version 1210 (0.0008) +[2026-06-02 16:38:14,237][247478] Updated weights for policy 0, policy_version 1220 (0.0009) +[2026-06-02 16:38:14,393][247478] Updated weights for policy 0, policy_version 1230 (0.0008) +[2026-06-02 16:38:14,567][247478] Updated weights for policy 0, policy_version 1240 (0.0008) +[2026-06-02 16:38:14,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20753.4, 300 sec: 20753.4). Total num frames: 622592. Throughput: 0: 20864.3. Samples: 625920. Policy #0 lag: (min: 63.0, avg: 81.2, max: 127.0) +[2026-06-02 16:38:14,726][246448] Avg episode reward: [(0, '4.049')] +[2026-06-02 16:38:14,749][247478] Updated weights for policy 0, policy_version 1250 (0.0008) +[2026-06-02 16:38:14,935][247478] Updated weights for policy 0, policy_version 1260 (0.0008) +[2026-06-02 16:38:15,119][247478] Updated weights for policy 0, policy_version 1272 (0.0008) +[2026-06-02 16:38:15,249][247399] Saving new best policy, reward=4.049! +[2026-06-02 16:38:15,864][247478] Updated weights for policy 0, policy_version 1283 (0.0009) +[2026-06-02 16:38:16,018][247478] Updated weights for policy 0, policy_version 1293 (0.0008) +[2026-06-02 16:38:16,217][247478] Updated weights for policy 0, policy_version 1305 (0.0008) +[2026-06-02 16:38:16,405][247478] Updated weights for policy 0, policy_version 1316 (0.0009) +[2026-06-02 16:38:16,575][247478] Updated weights for policy 0, policy_version 1326 (0.0008) +[2026-06-02 16:38:16,749][247478] Updated weights for policy 0, policy_version 1336 (0.0008) +[2026-06-02 16:38:17,522][247478] Updated weights for policy 0, policy_version 1346 (0.0009) +[2026-06-02 16:38:17,684][247478] Updated weights for policy 0, policy_version 1356 (0.0008) +[2026-06-02 16:38:17,844][247478] Updated weights for policy 0, policy_version 1366 (0.0008) +[2026-06-02 16:38:18,015][247478] Updated weights for policy 0, policy_version 1376 (0.0008) +[2026-06-02 16:38:18,195][247478] Updated weights for policy 0, policy_version 1386 (0.0010) +[2026-06-02 16:38:18,372][247478] Updated weights for policy 0, policy_version 1396 (0.0009) +[2026-06-02 16:38:18,539][247478] Updated weights for policy 0, policy_version 1406 (0.0008) +[2026-06-02 16:38:19,311][247478] Updated weights for policy 0, policy_version 1417 (0.0008) +[2026-06-02 16:38:19,469][247478] Updated weights for policy 0, policy_version 1427 (0.0009) +[2026-06-02 16:38:19,648][247478] Updated weights for policy 0, policy_version 1438 (0.0009) +[2026-06-02 16:38:19,726][246448] Fps is (10 sec: 19660.9, 60 sec: 20597.3, 300 sec: 20597.3). Total num frames: 720896. Throughput: 0: 21050.8. Samples: 736768. Policy #0 lag: (min: 25.0, avg: 42.9, max: 89.0) +[2026-06-02 16:38:19,727][246448] Avg episode reward: [(0, '4.136')] +[2026-06-02 16:38:19,823][247478] Updated weights for policy 0, policy_version 1448 (0.0009) +[2026-06-02 16:38:19,994][247478] Updated weights for policy 0, policy_version 1458 (0.0009) +[2026-06-02 16:38:20,183][247478] Updated weights for policy 0, policy_version 1469 (0.0008) +[2026-06-02 16:38:20,240][247399] Saving new best policy, reward=4.136! +[2026-06-02 16:38:20,978][247478] Updated weights for policy 0, policy_version 1479 (0.0009) +[2026-06-02 16:38:21,170][247478] Updated weights for policy 0, policy_version 1491 (0.0008) +[2026-06-02 16:38:21,352][247478] Updated weights for policy 0, policy_version 1501 (0.0010) +[2026-06-02 16:38:21,517][247478] Updated weights for policy 0, policy_version 1511 (0.0008) +[2026-06-02 16:38:21,690][247478] Updated weights for policy 0, policy_version 1521 (0.0008) +[2026-06-02 16:38:21,874][247478] Updated weights for policy 0, policy_version 1532 (0.0008) +[2026-06-02 16:38:22,662][247478] Updated weights for policy 0, policy_version 1542 (0.0008) +[2026-06-02 16:38:22,838][247478] Updated weights for policy 0, policy_version 1553 (0.0008) +[2026-06-02 16:38:23,005][247478] Updated weights for policy 0, policy_version 1563 (0.0008) +[2026-06-02 16:38:23,185][247478] Updated weights for policy 0, policy_version 1573 (0.0009) +[2026-06-02 16:38:23,358][247478] Updated weights for policy 0, policy_version 1583 (0.0008) +[2026-06-02 16:38:23,532][247478] Updated weights for policy 0, policy_version 1593 (0.0008) +[2026-06-02 16:38:24,313][247478] Updated weights for policy 0, policy_version 1604 (0.0010) +[2026-06-02 16:38:24,466][247478] Updated weights for policy 0, policy_version 1614 (0.0008) +[2026-06-02 16:38:24,653][247478] Updated weights for policy 0, policy_version 1625 (0.0008) +[2026-06-02 16:38:24,726][246448] Fps is (10 sec: 19660.6, 60 sec: 20480.2, 300 sec: 20480.2). Total num frames: 819200. Throughput: 0: 21443.4. Samples: 857728. Policy #0 lag: (min: 63.0, avg: 77.5, max: 127.0) +[2026-06-02 16:38:24,727][246448] Avg episode reward: [(0, '4.457')] +[2026-06-02 16:38:24,828][247478] Updated weights for policy 0, policy_version 1635 (0.0008) +[2026-06-02 16:38:25,003][247478] Updated weights for policy 0, policy_version 1645 (0.0008) +[2026-06-02 16:38:25,176][247478] Updated weights for policy 0, policy_version 1655 (0.0009) +[2026-06-02 16:38:25,323][247399] Saving new best policy, reward=4.452! +[2026-06-02 16:38:25,991][247478] Updated weights for policy 0, policy_version 1666 (0.0009) +[2026-06-02 16:38:26,143][247478] Updated weights for policy 0, policy_version 1676 (0.0008) +[2026-06-02 16:38:26,331][247478] Updated weights for policy 0, policy_version 1687 (0.0008) +[2026-06-02 16:38:26,500][247478] Updated weights for policy 0, policy_version 1697 (0.0008) +[2026-06-02 16:38:26,675][247478] Updated weights for policy 0, policy_version 1707 (0.0009) +[2026-06-02 16:38:26,845][247478] Updated weights for policy 0, policy_version 1717 (0.0008) +[2026-06-02 16:38:27,023][247478] Updated weights for policy 0, policy_version 1727 (0.0009) +[2026-06-02 16:38:27,801][247478] Updated weights for policy 0, policy_version 1737 (0.0009) +[2026-06-02 16:38:28,001][247478] Updated weights for policy 0, policy_version 1749 (0.0008) +[2026-06-02 16:38:28,184][247478] Updated weights for policy 0, policy_version 1760 (0.0008) +[2026-06-02 16:38:28,363][247478] Updated weights for policy 0, policy_version 1770 (0.0008) +[2026-06-02 16:38:28,536][247478] Updated weights for policy 0, policy_version 1780 (0.0009) +[2026-06-02 16:38:28,710][247478] Updated weights for policy 0, policy_version 1790 (0.0008) +[2026-06-02 16:38:29,486][247478] Updated weights for policy 0, policy_version 1801 (0.0008) +[2026-06-02 16:38:29,671][247478] Updated weights for policy 0, policy_version 1812 (0.0008) +[2026-06-02 16:38:29,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20389.2, 300 sec: 20389.2). Total num frames: 917504. Throughput: 0: 20417.6. Samples: 918784. Policy #0 lag: (min: 16.0, avg: 32.1, max: 80.0) +[2026-06-02 16:38:29,727][246448] Avg episode reward: [(0, '4.940')] +[2026-06-02 16:38:29,841][247478] Updated weights for policy 0, policy_version 1822 (0.0008) +[2026-06-02 16:38:30,017][247478] Updated weights for policy 0, policy_version 1832 (0.0008) +[2026-06-02 16:38:30,191][247478] Updated weights for policy 0, policy_version 1842 (0.0008) +[2026-06-02 16:38:30,369][247478] Updated weights for policy 0, policy_version 1852 (0.0008) +[2026-06-02 16:38:30,424][247399] Saving new best policy, reward=4.940! +[2026-06-02 16:38:31,183][247478] Updated weights for policy 0, policy_version 1863 (0.0008) +[2026-06-02 16:38:31,343][247478] Updated weights for policy 0, policy_version 1873 (0.0009) +[2026-06-02 16:38:31,519][247478] Updated weights for policy 0, policy_version 1883 (0.0008) +[2026-06-02 16:38:31,699][247478] Updated weights for policy 0, policy_version 1893 (0.0008) +[2026-06-02 16:38:31,866][247478] Updated weights for policy 0, policy_version 1903 (0.0009) +[2026-06-02 16:38:32,053][247478] Updated weights for policy 0, policy_version 1914 (0.0008) +[2026-06-02 16:38:32,867][247478] Updated weights for policy 0, policy_version 1925 (0.0009) +[2026-06-02 16:38:33,027][247478] Updated weights for policy 0, policy_version 1935 (0.0008) +[2026-06-02 16:38:33,198][247478] Updated weights for policy 0, policy_version 1945 (0.0008) +[2026-06-02 16:38:33,363][247478] Updated weights for policy 0, policy_version 1955 (0.0009) +[2026-06-02 16:38:33,541][247478] Updated weights for policy 0, policy_version 1965 (0.0008) +[2026-06-02 16:38:33,735][247478] Updated weights for policy 0, policy_version 1977 (0.0009) +[2026-06-02 16:38:34,548][247478] Updated weights for policy 0, policy_version 1987 (0.0008) +[2026-06-02 16:38:34,716][247478] Updated weights for policy 0, policy_version 1998 (0.0009) +[2026-06-02 16:38:34,726][246448] Fps is (10 sec: 19660.3, 60 sec: 20316.2, 300 sec: 20316.2). Total num frames: 1015808. Throughput: 0: 20551.0. Samples: 1026688. Policy #0 lag: (min: 63.0, avg: 77.3, max: 127.0) +[2026-06-02 16:38:34,728][246448] Avg episode reward: [(0, '5.405')] +[2026-06-02 16:38:34,898][247478] Updated weights for policy 0, policy_version 2008 (0.0009) +[2026-06-02 16:38:35,094][247478] Updated weights for policy 0, policy_version 2020 (0.0009) +[2026-06-02 16:38:35,269][247478] Updated weights for policy 0, policy_version 2030 (0.0008) +[2026-06-02 16:38:35,448][247478] Updated weights for policy 0, policy_version 2040 (0.0008) +[2026-06-02 16:38:35,571][247399] Saving new best policy, reward=5.405! +[2026-06-02 16:38:36,242][247478] Updated weights for policy 0, policy_version 2050 (0.0008) +[2026-06-02 16:38:36,422][247478] Updated weights for policy 0, policy_version 2061 (0.0009) +[2026-06-02 16:38:36,591][247478] Updated weights for policy 0, policy_version 2071 (0.0008) +[2026-06-02 16:38:36,753][247478] Updated weights for policy 0, policy_version 2081 (0.0008) +[2026-06-02 16:38:36,931][247478] Updated weights for policy 0, policy_version 2091 (0.0008) +[2026-06-02 16:38:37,120][247478] Updated weights for policy 0, policy_version 2102 (0.0008) +[2026-06-02 16:38:37,289][247478] Updated weights for policy 0, policy_version 2112 (0.0009) +[2026-06-02 16:38:38,109][247478] Updated weights for policy 0, policy_version 2122 (0.0009) +[2026-06-02 16:38:38,273][247478] Updated weights for policy 0, policy_version 2132 (0.0008) +[2026-06-02 16:38:38,449][247478] Updated weights for policy 0, policy_version 2142 (0.0008) +[2026-06-02 16:38:38,625][247478] Updated weights for policy 0, policy_version 2152 (0.0008) +[2026-06-02 16:38:38,802][247478] Updated weights for policy 0, policy_version 2163 (0.0009) +[2026-06-02 16:38:38,982][247478] Updated weights for policy 0, policy_version 2173 (0.0008) +[2026-06-02 16:38:39,726][246448] Fps is (10 sec: 19660.8, 60 sec: 20256.7, 300 sec: 20256.7). Total num frames: 1114112. Throughput: 0: 20218.3. Samples: 1148544. Policy #0 lag: (min: 56.0, avg: 71.7, max: 120.0) +[2026-06-02 16:38:39,727][246448] Avg episode reward: [(0, '5.991')] +[2026-06-02 16:38:39,812][247478] Updated weights for policy 0, policy_version 2184 (0.0009) +[2026-06-02 16:38:40,001][247478] Updated weights for policy 0, policy_version 2196 (0.0008) +[2026-06-02 16:38:40,172][247478] Updated weights for policy 0, policy_version 2206 (0.0008) +[2026-06-02 16:38:40,343][247478] Updated weights for policy 0, policy_version 2216 (0.0008) +[2026-06-02 16:38:40,515][247478] Updated weights for policy 0, policy_version 2226 (0.0009) +[2026-06-02 16:38:40,690][247478] Updated weights for policy 0, policy_version 2236 (0.0009) +[2026-06-02 16:38:40,743][247399] Saving new best policy, reward=5.991! +[2026-06-02 16:38:41,526][247478] Updated weights for policy 0, policy_version 2248 (0.0009) +[2026-06-02 16:38:41,686][247478] Updated weights for policy 0, policy_version 2258 (0.0008) +[2026-06-02 16:38:41,858][247478] Updated weights for policy 0, policy_version 2268 (0.0009) +[2026-06-02 16:38:42,027][247478] Updated weights for policy 0, policy_version 2278 (0.0008) +[2026-06-02 16:38:42,213][247478] Updated weights for policy 0, policy_version 2289 (0.0009) +[2026-06-02 16:38:42,386][247478] Updated weights for policy 0, policy_version 2299 (0.0009) +[2026-06-02 16:38:43,195][247478] Updated weights for policy 0, policy_version 2310 (0.0009) +[2026-06-02 16:38:43,359][247478] Updated weights for policy 0, policy_version 2320 (0.0008) +[2026-06-02 16:38:43,545][247478] Updated weights for policy 0, policy_version 2331 (0.0009) +[2026-06-02 16:38:43,748][247478] Updated weights for policy 0, policy_version 2343 (0.0009) +[2026-06-02 16:38:43,938][247478] Updated weights for policy 0, policy_version 2354 (0.0009) +[2026-06-02 16:38:44,120][247478] Updated weights for policy 0, policy_version 2365 (0.0009) +[2026-06-02 16:38:44,726][246448] Fps is (10 sec: 19660.6, 60 sec: 20206.9, 300 sec: 20206.9). Total num frames: 1212416. Throughput: 0: 19757.3. Samples: 1197824. Policy #0 lag: (min: 63.0, avg: 76.0, max: 127.0) +[2026-06-02 16:38:44,728][246448] Avg episode reward: [(0, '6.020')] +[2026-06-02 16:38:44,957][247478] Updated weights for policy 0, policy_version 2376 (0.0008) +[2026-06-02 16:38:45,120][247478] Updated weights for policy 0, policy_version 2386 (0.0008) +[2026-06-02 16:38:45,286][247478] Updated weights for policy 0, policy_version 2396 (0.0008) +[2026-06-02 16:38:45,465][247478] Updated weights for policy 0, policy_version 2406 (0.0008) +[2026-06-02 16:38:45,653][247478] Updated weights for policy 0, policy_version 2417 (0.0009) +[2026-06-02 16:38:45,824][247478] Updated weights for policy 0, policy_version 2427 (0.0008) +[2026-06-02 16:38:45,903][247399] Saving new best policy, reward=6.020! +[2026-06-02 16:38:46,643][247478] Updated weights for policy 0, policy_version 2438 (0.0008) +[2026-06-02 16:38:46,836][247478] Updated weights for policy 0, policy_version 2450 (0.0008) +[2026-06-02 16:38:47,019][247478] Updated weights for policy 0, policy_version 2461 (0.0008) +[2026-06-02 16:38:47,195][247478] Updated weights for policy 0, policy_version 2471 (0.0008) +[2026-06-02 16:38:47,397][247478] Updated weights for policy 0, policy_version 2483 (0.0008) +[2026-06-02 16:38:47,596][247478] Updated weights for policy 0, policy_version 2494 (0.0008) +[2026-06-02 16:38:48,424][247478] Updated weights for policy 0, policy_version 2504 (0.0008) +[2026-06-02 16:38:48,616][247478] Updated weights for policy 0, policy_version 2516 (0.0008) +[2026-06-02 16:38:48,806][247478] Updated weights for policy 0, policy_version 2527 (0.0008) +[2026-06-02 16:38:48,975][247478] Updated weights for policy 0, policy_version 2537 (0.0008) +[2026-06-02 16:38:49,150][247478] Updated weights for policy 0, policy_version 2547 (0.0008) +[2026-06-02 16:38:49,336][247478] Updated weights for policy 0, policy_version 2557 (0.0008) +[2026-06-02 16:38:49,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20753.1, 300 sec: 20165.1). Total num frames: 1310720. Throughput: 0: 19484.5. Samples: 1315712. Policy #0 lag: (min: 63.0, avg: 76.2, max: 127.0) +[2026-06-02 16:38:49,727][246448] Avg episode reward: [(0, '6.741')] +[2026-06-02 16:38:49,731][247399] Saving new best policy, reward=6.741! +[2026-06-02 16:38:50,146][247478] Updated weights for policy 0, policy_version 2568 (0.0009) +[2026-06-02 16:38:50,302][247478] Updated weights for policy 0, policy_version 2578 (0.0008) +[2026-06-02 16:38:50,492][247478] Updated weights for policy 0, policy_version 2589 (0.0008) +[2026-06-02 16:38:50,667][247478] Updated weights for policy 0, policy_version 2599 (0.0008) +[2026-06-02 16:38:50,852][247478] Updated weights for policy 0, policy_version 2610 (0.0008) +[2026-06-02 16:38:51,048][247478] Updated weights for policy 0, policy_version 2621 (0.0008) +[2026-06-02 16:38:51,867][247478] Updated weights for policy 0, policy_version 2632 (0.0008) +[2026-06-02 16:38:52,030][247478] Updated weights for policy 0, policy_version 2642 (0.0008) +[2026-06-02 16:38:52,227][247478] Updated weights for policy 0, policy_version 2653 (0.0008) +[2026-06-02 16:38:52,414][247478] Updated weights for policy 0, policy_version 2664 (0.0008) +[2026-06-02 16:38:52,594][247478] Updated weights for policy 0, policy_version 2674 (0.0007) +[2026-06-02 16:38:52,783][247478] Updated weights for policy 0, policy_version 2685 (0.0009) +[2026-06-02 16:38:53,564][247478] Updated weights for policy 0, policy_version 2695 (0.0008) +[2026-06-02 16:38:53,734][247478] Updated weights for policy 0, policy_version 2705 (0.0008) +[2026-06-02 16:38:53,928][247478] Updated weights for policy 0, policy_version 2717 (0.0008) +[2026-06-02 16:38:54,135][247478] Updated weights for policy 0, policy_version 2729 (0.0009) +[2026-06-02 16:38:54,309][247478] Updated weights for policy 0, policy_version 2739 (0.0008) +[2026-06-02 16:38:54,498][247478] Updated weights for policy 0, policy_version 2750 (0.0008) +[2026-06-02 16:38:54,726][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.8, 300 sec: 20128.9). Total num frames: 1409024. Throughput: 0: 19157.1. Samples: 1425664. Policy #0 lag: (min: 31.0, avg: 45.2, max: 95.0) +[2026-06-02 16:38:54,729][246448] Avg episode reward: [(0, '7.268')] +[2026-06-02 16:38:54,736][247399] Saving new best policy, reward=7.268! +[2026-06-02 16:38:55,348][247478] Updated weights for policy 0, policy_version 2761 (0.0009) +[2026-06-02 16:38:55,514][247478] Updated weights for policy 0, policy_version 2771 (0.0009) +[2026-06-02 16:38:55,681][247478] Updated weights for policy 0, policy_version 2781 (0.0009) +[2026-06-02 16:38:55,858][247478] Updated weights for policy 0, policy_version 2791 (0.0009) +[2026-06-02 16:38:56,036][247478] Updated weights for policy 0, policy_version 2801 (0.0009) +[2026-06-02 16:38:56,243][247478] Updated weights for policy 0, policy_version 2813 (0.0009) +[2026-06-02 16:38:57,037][247478] Updated weights for policy 0, policy_version 2825 (0.0009) +[2026-06-02 16:38:57,203][247478] Updated weights for policy 0, policy_version 2835 (0.0008) +[2026-06-02 16:38:57,382][247478] Updated weights for policy 0, policy_version 2845 (0.0008) +[2026-06-02 16:38:57,580][247478] Updated weights for policy 0, policy_version 2856 (0.0008) +[2026-06-02 16:38:57,742][247478] Updated weights for policy 0, policy_version 2866 (0.0008) +[2026-06-02 16:38:57,911][247478] Updated weights for policy 0, policy_version 2876 (0.0008) +[2026-06-02 16:38:58,705][247478] Updated weights for policy 0, policy_version 2886 (0.0009) +[2026-06-02 16:38:58,888][247478] Updated weights for policy 0, policy_version 2897 (0.0008) +[2026-06-02 16:38:59,058][247478] Updated weights for policy 0, policy_version 2907 (0.0008) +[2026-06-02 16:38:59,237][247478] Updated weights for policy 0, policy_version 2917 (0.0008) +[2026-06-02 16:38:59,414][247478] Updated weights for policy 0, policy_version 2927 (0.0008) +[2026-06-02 16:38:59,586][247478] Updated weights for policy 0, policy_version 2937 (0.0008) +[2026-06-02 16:38:59,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20097.8). Total num frames: 1507328. Throughput: 0: 19083.4. Samples: 1484672. Policy #0 lag: (min: 26.0, avg: 46.9, max: 90.0) +[2026-06-02 16:38:59,727][246448] Avg episode reward: [(0, '7.377')] +[2026-06-02 16:38:59,732][247399] Saving new best policy, reward=7.377! +[2026-06-02 16:39:00,376][247478] Updated weights for policy 0, policy_version 2947 (0.0009) +[2026-06-02 16:39:00,563][247478] Updated weights for policy 0, policy_version 2958 (0.0008) +[2026-06-02 16:39:00,730][247478] Updated weights for policy 0, policy_version 2968 (0.0008) +[2026-06-02 16:39:00,905][247478] Updated weights for policy 0, policy_version 2978 (0.0008) +[2026-06-02 16:39:01,089][247478] Updated weights for policy 0, policy_version 2989 (0.0008) +[2026-06-02 16:39:01,290][247478] Updated weights for policy 0, policy_version 3001 (0.0008) +[2026-06-02 16:39:02,088][247478] Updated weights for policy 0, policy_version 3011 (0.0009) +[2026-06-02 16:39:02,264][247478] Updated weights for policy 0, policy_version 3022 (0.0008) +[2026-06-02 16:39:02,434][247478] Updated weights for policy 0, policy_version 3032 (0.0008) +[2026-06-02 16:39:02,614][247478] Updated weights for policy 0, policy_version 3042 (0.0008) +[2026-06-02 16:39:02,778][247478] Updated weights for policy 0, policy_version 3052 (0.0010) +[2026-06-02 16:39:02,955][247478] Updated weights for policy 0, policy_version 3062 (0.0008) +[2026-06-02 16:39:03,121][247478] Updated weights for policy 0, policy_version 3072 (0.0008) +[2026-06-02 16:39:03,948][247478] Updated weights for policy 0, policy_version 3084 (0.0009) +[2026-06-02 16:39:04,123][247478] Updated weights for policy 0, policy_version 3094 (0.0009) +[2026-06-02 16:39:04,293][247478] Updated weights for policy 0, policy_version 3104 (0.0009) +[2026-06-02 16:39:04,477][247478] Updated weights for policy 0, policy_version 3114 (0.0009) +[2026-06-02 16:39:04,647][247478] Updated weights for policy 0, policy_version 3124 (0.0008) +[2026-06-02 16:39:04,726][246448] Fps is (10 sec: 16384.7, 60 sec: 19114.7, 300 sec: 19660.9). Total num frames: 1572864. Throughput: 0: 19313.7. Samples: 1605888. Policy #0 lag: (min: 44.0, avg: 58.1, max: 108.0) +[2026-06-02 16:39:04,727][246448] Avg episode reward: [(0, '8.905')] +[2026-06-02 16:39:04,838][247478] Updated weights for policy 0, policy_version 3135 (0.0009) +[2026-06-02 16:39:04,847][247399] Saving new best policy, reward=8.905! +[2026-06-02 16:39:05,638][247478] Updated weights for policy 0, policy_version 3146 (0.0009) +[2026-06-02 16:39:05,825][247478] Updated weights for policy 0, policy_version 3157 (0.0009) +[2026-06-02 16:39:06,017][247478] Updated weights for policy 0, policy_version 3168 (0.0009) +[2026-06-02 16:39:06,186][247478] Updated weights for policy 0, policy_version 3178 (0.0008) +[2026-06-02 16:39:06,359][247478] Updated weights for policy 0, policy_version 3188 (0.0009) +[2026-06-02 16:39:06,532][247478] Updated weights for policy 0, policy_version 3198 (0.0008) +[2026-06-02 16:39:07,295][247478] Updated weights for policy 0, policy_version 3208 (0.0008) +[2026-06-02 16:39:07,493][247478] Updated weights for policy 0, policy_version 3220 (0.0010) +[2026-06-02 16:39:07,685][247478] Updated weights for policy 0, policy_version 3231 (0.0011) +[2026-06-02 16:39:07,862][247478] Updated weights for policy 0, policy_version 3241 (0.0011) +[2026-06-02 16:39:08,068][247478] Updated weights for policy 0, policy_version 3253 (0.0009) +[2026-06-02 16:39:08,246][247478] Updated weights for policy 0, policy_version 3263 (0.0010) +[2026-06-02 16:39:09,055][247478] Updated weights for policy 0, policy_version 3275 (0.0009) +[2026-06-02 16:39:09,219][247478] Updated weights for policy 0, policy_version 3285 (0.0008) +[2026-06-02 16:39:09,394][247478] Updated weights for policy 0, policy_version 3295 (0.0009) +[2026-06-02 16:39:09,588][247478] Updated weights for policy 0, policy_version 3306 (0.0009) +[2026-06-02 16:39:09,725][246448] Fps is (10 sec: 16384.0, 60 sec: 19114.7, 300 sec: 19660.9). Total num frames: 1671168. Throughput: 0: 19015.2. Samples: 1713408. Policy #0 lag: (min: 27.0, avg: 44.7, max: 91.0) +[2026-06-02 16:39:09,726][246448] Avg episode reward: [(0, '9.316')] +[2026-06-02 16:39:09,769][247478] Updated weights for policy 0, policy_version 3316 (0.0008) +[2026-06-02 16:39:09,937][247478] Updated weights for policy 0, policy_version 3326 (0.0008) +[2026-06-02 16:39:09,962][247399] Saving new best policy, reward=9.316! +[2026-06-02 16:39:10,749][247478] Updated weights for policy 0, policy_version 3337 (0.0009) +[2026-06-02 16:39:10,919][247478] Updated weights for policy 0, policy_version 3347 (0.0008) +[2026-06-02 16:39:11,099][247478] Updated weights for policy 0, policy_version 3357 (0.0008) +[2026-06-02 16:39:11,301][247478] Updated weights for policy 0, policy_version 3369 (0.0007) +[2026-06-02 16:39:11,494][247478] Updated weights for policy 0, policy_version 3380 (0.0009) +[2026-06-02 16:39:11,699][247478] Updated weights for policy 0, policy_version 3392 (0.0008) +[2026-06-02 16:39:12,488][247478] Updated weights for policy 0, policy_version 3403 (0.0008) +[2026-06-02 16:39:12,659][247478] Updated weights for policy 0, policy_version 3413 (0.0008) +[2026-06-02 16:39:12,832][247478] Updated weights for policy 0, policy_version 3423 (0.0009) +[2026-06-02 16:39:13,008][247478] Updated weights for policy 0, policy_version 3433 (0.0009) +[2026-06-02 16:39:13,213][247478] Updated weights for policy 0, policy_version 3445 (0.0009) +[2026-06-02 16:39:13,411][247478] Updated weights for policy 0, policy_version 3456 (0.0008) +[2026-06-02 16:39:14,212][247478] Updated weights for policy 0, policy_version 3467 (0.0009) +[2026-06-02 16:39:14,399][247478] Updated weights for policy 0, policy_version 3478 (0.0009) +[2026-06-02 16:39:14,580][247478] Updated weights for policy 0, policy_version 3488 (0.0008) +[2026-06-02 16:39:14,726][246448] Fps is (10 sec: 19660.9, 60 sec: 19114.7, 300 sec: 19660.9). Total num frames: 1769472. Throughput: 0: 18995.2. Samples: 1773568. Policy #0 lag: (min: 63.0, avg: 76.9, max: 127.0) +[2026-06-02 16:39:14,727][246448] Avg episode reward: [(0, '9.082')] +[2026-06-02 16:39:14,748][247478] Updated weights for policy 0, policy_version 3498 (0.0008) +[2026-06-02 16:39:14,943][247478] Updated weights for policy 0, policy_version 3509 (0.0009) +[2026-06-02 16:39:15,741][247478] Updated weights for policy 0, policy_version 3521 (0.0008) +[2026-06-02 16:39:15,896][247478] Updated weights for policy 0, policy_version 3531 (0.0008) +[2026-06-02 16:39:16,067][247478] Updated weights for policy 0, policy_version 3541 (0.0008) +[2026-06-02 16:39:16,255][247478] Updated weights for policy 0, policy_version 3552 (0.0008) +[2026-06-02 16:39:16,433][247478] Updated weights for policy 0, policy_version 3562 (0.0008) +[2026-06-02 16:39:16,606][247478] Updated weights for policy 0, policy_version 3572 (0.0008) +[2026-06-02 16:39:16,779][247478] Updated weights for policy 0, policy_version 3582 (0.0008) +[2026-06-02 16:39:17,549][247478] Updated weights for policy 0, policy_version 3592 (0.0009) +[2026-06-02 16:39:17,728][247478] Updated weights for policy 0, policy_version 3603 (0.0009) +[2026-06-02 16:39:17,930][247478] Updated weights for policy 0, policy_version 3614 (0.0009) +[2026-06-02 16:39:18,113][247478] Updated weights for policy 0, policy_version 3625 (0.0009) +[2026-06-02 16:39:18,304][247478] Updated weights for policy 0, policy_version 3636 (0.0009) +[2026-06-02 16:39:18,484][247478] Updated weights for policy 0, policy_version 3646 (0.0009) +[2026-06-02 16:39:19,293][247478] Updated weights for policy 0, policy_version 3657 (0.0009) +[2026-06-02 16:39:19,474][247478] Updated weights for policy 0, policy_version 3668 (0.0008) +[2026-06-02 16:39:19,667][247478] Updated weights for policy 0, policy_version 3679 (0.0008) +[2026-06-02 16:39:19,725][246448] Fps is (10 sec: 19660.6, 60 sec: 19114.6, 300 sec: 19660.9). Total num frames: 1867776. Throughput: 0: 19063.6. Samples: 1884544. Policy #0 lag: (min: 63.0, avg: 76.9, max: 127.0) +[2026-06-02 16:39:19,726][246448] Avg episode reward: [(0, '13.299')] +[2026-06-02 16:39:19,866][247478] Updated weights for policy 0, policy_version 3690 (0.0008) +[2026-06-02 16:39:20,053][247478] Updated weights for policy 0, policy_version 3701 (0.0008) +[2026-06-02 16:39:20,242][247399] Saving new best policy, reward=13.299! +[2026-06-02 16:39:20,245][247478] Updated weights for policy 0, policy_version 3712 (0.0008) +[2026-06-02 16:39:21,025][247478] Updated weights for policy 0, policy_version 3722 (0.0009) +[2026-06-02 16:39:21,212][247478] Updated weights for policy 0, policy_version 3733 (0.0008) +[2026-06-02 16:39:21,428][247478] Updated weights for policy 0, policy_version 3745 (0.0008) +[2026-06-02 16:39:21,590][247478] Updated weights for policy 0, policy_version 3755 (0.0008) +[2026-06-02 16:39:21,774][247478] Updated weights for policy 0, policy_version 3765 (0.0008) +[2026-06-02 16:39:21,951][247478] Updated weights for policy 0, policy_version 3776 (0.0008) +[2026-06-02 16:39:22,732][247478] Updated weights for policy 0, policy_version 3789 (0.0008) +[2026-06-02 16:39:22,932][247478] Updated weights for policy 0, policy_version 3801 (0.0009) +[2026-06-02 16:39:23,123][247478] Updated weights for policy 0, policy_version 3812 (0.0008) +[2026-06-02 16:39:23,338][247478] Updated weights for policy 0, policy_version 3824 (0.0009) +[2026-06-02 16:39:23,523][247478] Updated weights for policy 0, policy_version 3835 (0.0008) +[2026-06-02 16:39:24,354][247478] Updated weights for policy 0, policy_version 3848 (0.0009) +[2026-06-02 16:39:24,552][247478] Updated weights for policy 0, policy_version 3860 (0.0008) +[2026-06-02 16:39:24,726][246448] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19660.9). Total num frames: 1966080. Throughput: 0: 19026.5. Samples: 2004736. Policy #0 lag: (min: 63.0, avg: 76.3, max: 127.0) +[2026-06-02 16:39:24,728][246448] Avg episode reward: [(0, '15.146')] +[2026-06-02 16:39:24,742][247478] Updated weights for policy 0, policy_version 3871 (0.0008) +[2026-06-02 16:39:24,922][247478] Updated weights for policy 0, policy_version 3881 (0.0008) +[2026-06-02 16:39:25,128][247478] Updated weights for policy 0, policy_version 3893 (0.0009) +[2026-06-02 16:39:25,309][247399] Saving new best policy, reward=15.146! +[2026-06-02 16:39:25,311][247478] Updated weights for policy 0, policy_version 3904 (0.0008) +[2026-06-02 16:39:26,063][247478] Updated weights for policy 0, policy_version 3914 (0.0009) +[2026-06-02 16:39:26,250][247478] Updated weights for policy 0, policy_version 3925 (0.0008) +[2026-06-02 16:39:26,437][247478] Updated weights for policy 0, policy_version 3936 (0.0008) +[2026-06-02 16:39:26,636][247478] Updated weights for policy 0, policy_version 3947 (0.0009) +[2026-06-02 16:39:26,820][247478] Updated weights for policy 0, policy_version 3958 (0.0008) +[2026-06-02 16:39:27,000][247478] Updated weights for policy 0, policy_version 3968 (0.0008) +[2026-06-02 16:39:27,790][247478] Updated weights for policy 0, policy_version 3980 (0.0009) +[2026-06-02 16:39:27,972][247478] Updated weights for policy 0, policy_version 3991 (0.0008) +[2026-06-02 16:39:28,160][247478] Updated weights for policy 0, policy_version 4002 (0.0008) +[2026-06-02 16:39:28,347][247478] Updated weights for policy 0, policy_version 4012 (0.0008) +[2026-06-02 16:39:28,514][247478] Updated weights for policy 0, policy_version 4022 (0.0008) +[2026-06-02 16:39:28,684][247478] Updated weights for policy 0, policy_version 4032 (0.0008) +[2026-06-02 16:39:29,498][247478] Updated weights for policy 0, policy_version 4044 (0.0008) +[2026-06-02 16:39:29,706][247478] Updated weights for policy 0, policy_version 4056 (0.0008) +[2026-06-02 16:39:29,725][246448] Fps is (10 sec: 19660.9, 60 sec: 19114.7, 300 sec: 19660.9). Total num frames: 2064384. Throughput: 0: 19285.5. Samples: 2065664. Policy #0 lag: (min: 63.0, avg: 77.1, max: 127.0) +[2026-06-02 16:39:29,727][246448] Avg episode reward: [(0, '19.186')] +[2026-06-02 16:39:29,897][247478] Updated weights for policy 0, policy_version 4067 (0.0008) +[2026-06-02 16:39:30,089][247478] Updated weights for policy 0, policy_version 4078 (0.0009) +[2026-06-02 16:39:30,259][247478] Updated weights for policy 0, policy_version 4088 (0.0008) +[2026-06-02 16:39:30,396][247399] Saving new best policy, reward=19.186! +[2026-06-02 16:39:31,015][247478] Updated weights for policy 0, policy_version 4099 (0.0008) +[2026-06-02 16:39:31,218][247478] Updated weights for policy 0, policy_version 4112 (0.0008) +[2026-06-02 16:39:31,408][247478] Updated weights for policy 0, policy_version 4123 (0.0008) +[2026-06-02 16:39:31,594][247478] Updated weights for policy 0, policy_version 4134 (0.0009) +[2026-06-02 16:39:31,771][247478] Updated weights for policy 0, policy_version 4144 (0.0008) +[2026-06-02 16:39:31,989][247478] Updated weights for policy 0, policy_version 4156 (0.0008) +[2026-06-02 16:39:32,736][247478] Updated weights for policy 0, policy_version 4166 (0.0009) +[2026-06-02 16:39:32,912][247478] Updated weights for policy 0, policy_version 4176 (0.0008) +[2026-06-02 16:39:33,083][247478] Updated weights for policy 0, policy_version 4186 (0.0008) +[2026-06-02 16:39:33,257][247478] Updated weights for policy 0, policy_version 4196 (0.0008) +[2026-06-02 16:39:33,450][247478] Updated weights for policy 0, policy_version 4207 (0.0009) +[2026-06-02 16:39:33,641][247478] Updated weights for policy 0, policy_version 4217 (0.0008) +[2026-06-02 16:39:34,351][247478] Updated weights for policy 0, policy_version 4227 (0.0009) +[2026-06-02 16:39:34,539][247478] Updated weights for policy 0, policy_version 4238 (0.0008) +[2026-06-02 16:39:34,704][247478] Updated weights for policy 0, policy_version 4248 (0.0009) +[2026-06-02 16:39:34,725][246448] Fps is (10 sec: 19660.9, 60 sec: 19114.8, 300 sec: 19660.9). Total num frames: 2162688. Throughput: 0: 19134.6. Samples: 2176768. Policy #0 lag: (min: 57.0, avg: 75.4, max: 121.0) +[2026-06-02 16:39:34,727][246448] Avg episode reward: [(0, '22.949')] +[2026-06-02 16:39:34,882][247478] Updated weights for policy 0, policy_version 4258 (0.0008) +[2026-06-02 16:39:35,057][247478] Updated weights for policy 0, policy_version 4268 (0.0008) +[2026-06-02 16:39:35,254][247478] Updated weights for policy 0, policy_version 4279 (0.0008) +[2026-06-02 16:39:35,401][247399] Saving new best policy, reward=22.949! +[2026-06-02 16:39:36,002][247478] Updated weights for policy 0, policy_version 4290 (0.0009) +[2026-06-02 16:39:36,164][247478] Updated weights for policy 0, policy_version 4300 (0.0009) +[2026-06-02 16:39:36,365][247478] Updated weights for policy 0, policy_version 4312 (0.0009) +[2026-06-02 16:39:36,566][247478] Updated weights for policy 0, policy_version 4323 (0.0009) +[2026-06-02 16:39:36,745][247478] Updated weights for policy 0, policy_version 4333 (0.0009) +[2026-06-02 16:39:36,920][247478] Updated weights for policy 0, policy_version 4344 (0.0009) +[2026-06-02 16:39:37,669][247478] Updated weights for policy 0, policy_version 4354 (0.0009) +[2026-06-02 16:39:37,834][247478] Updated weights for policy 0, policy_version 4364 (0.0008) +[2026-06-02 16:39:38,008][247478] Updated weights for policy 0, policy_version 4374 (0.0008) +[2026-06-02 16:39:38,201][247478] Updated weights for policy 0, policy_version 4385 (0.0009) +[2026-06-02 16:39:38,396][247478] Updated weights for policy 0, policy_version 4396 (0.0009) +[2026-06-02 16:39:38,574][247478] Updated weights for policy 0, policy_version 4406 (0.0009) +[2026-06-02 16:39:39,303][247478] Updated weights for policy 0, policy_version 4417 (0.0009) +[2026-06-02 16:39:39,471][247478] Updated weights for policy 0, policy_version 4427 (0.0009) +[2026-06-02 16:39:39,673][247478] Updated weights for policy 0, policy_version 4439 (0.0009) +[2026-06-02 16:39:39,725][246448] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19660.9). Total num frames: 2260992. Throughput: 0: 19410.7. Samples: 2299136. Policy #0 lag: (min: 57.0, avg: 75.4, max: 121.0) +[2026-06-02 16:39:39,726][246448] Avg episode reward: [(0, '26.740')] +[2026-06-02 16:39:39,846][247478] Updated weights for policy 0, policy_version 4449 (0.0009) +[2026-06-02 16:39:40,027][247478] Updated weights for policy 0, policy_version 4460 (0.0007) +[2026-06-02 16:39:40,211][247478] Updated weights for policy 0, policy_version 4470 (0.0009) +[2026-06-02 16:39:40,384][247399] Saving new best policy, reward=26.740! +[2026-06-02 16:39:40,963][247478] Updated weights for policy 0, policy_version 4481 (0.0008) +[2026-06-02 16:39:41,133][247478] Updated weights for policy 0, policy_version 4491 (0.0004) +[2026-06-02 16:39:41,319][247478] Updated weights for policy 0, policy_version 4502 (0.0004) +[2026-06-02 16:39:41,511][247478] Updated weights for policy 0, policy_version 4513 (0.0007) +[2026-06-02 16:39:41,694][247478] Updated weights for policy 0, policy_version 4524 (0.0009) +[2026-06-02 16:39:41,924][247478] Updated weights for policy 0, policy_version 4537 (0.0008) +[2026-06-02 16:39:42,613][247478] Updated weights for policy 0, policy_version 4547 (0.0008) +[2026-06-02 16:39:42,793][247478] Updated weights for policy 0, policy_version 4558 (0.0008) +[2026-06-02 16:39:42,973][247478] Updated weights for policy 0, policy_version 4568 (0.0008) +[2026-06-02 16:39:43,149][247478] Updated weights for policy 0, policy_version 4578 (0.0008) +[2026-06-02 16:39:43,325][247478] Updated weights for policy 0, policy_version 4588 (0.0009) +[2026-06-02 16:39:43,512][247478] Updated weights for policy 0, policy_version 4599 (0.0008) +[2026-06-02 16:39:44,235][247478] Updated weights for policy 0, policy_version 4609 (0.0008) +[2026-06-02 16:39:44,396][247478] Updated weights for policy 0, policy_version 4619 (0.0008) +[2026-06-02 16:39:44,584][247478] Updated weights for policy 0, policy_version 4630 (0.0008) +[2026-06-02 16:39:44,725][246448] Fps is (10 sec: 19660.6, 60 sec: 19114.8, 300 sec: 19660.9). Total num frames: 2359296. Throughput: 0: 19478.7. Samples: 2361216. Policy #0 lag: (min: 63.0, avg: 78.1, max: 127.0) +[2026-06-02 16:39:44,727][246448] Avg episode reward: [(0, '38.608')] +[2026-06-02 16:39:44,794][247478] Updated weights for policy 0, policy_version 4642 (0.0009) +[2026-06-02 16:39:44,963][247478] Updated weights for policy 0, policy_version 4652 (0.0008) +[2026-06-02 16:39:45,144][247478] Updated weights for policy 0, policy_version 4662 (0.0008) +[2026-06-02 16:39:45,306][247399] Saving new best policy, reward=38.749! +[2026-06-02 16:39:45,873][247478] Updated weights for policy 0, policy_version 4673 (0.0009) +[2026-06-02 16:39:46,030][247478] Updated weights for policy 0, policy_version 4683 (0.0008) +[2026-06-02 16:39:46,204][247478] Updated weights for policy 0, policy_version 4693 (0.0009) +[2026-06-02 16:39:46,377][247478] Updated weights for policy 0, policy_version 4703 (0.0009) +[2026-06-02 16:39:46,556][247478] Updated weights for policy 0, policy_version 4713 (0.0008) +[2026-06-02 16:39:46,728][247478] Updated weights for policy 0, policy_version 4723 (0.0008) +[2026-06-02 16:39:46,906][247478] Updated weights for policy 0, policy_version 4733 (0.0008) +[2026-06-02 16:39:47,624][247478] Updated weights for policy 0, policy_version 4744 (0.0009) +[2026-06-02 16:39:47,791][247478] Updated weights for policy 0, policy_version 4754 (0.0008) +[2026-06-02 16:39:47,973][247478] Updated weights for policy 0, policy_version 4764 (0.0008) +[2026-06-02 16:39:48,162][247478] Updated weights for policy 0, policy_version 4775 (0.0007) +[2026-06-02 16:39:48,360][247478] Updated weights for policy 0, policy_version 4786 (0.0010) +[2026-06-02 16:39:48,569][247478] Updated weights for policy 0, policy_version 4798 (0.0009) +[2026-06-02 16:39:49,252][247478] Updated weights for policy 0, policy_version 4808 (0.0009) +[2026-06-02 16:39:49,419][247478] Updated weights for policy 0, policy_version 4818 (0.0008) +[2026-06-02 16:39:49,612][247478] Updated weights for policy 0, policy_version 4829 (0.0009) +[2026-06-02 16:39:49,725][246448] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19660.9). Total num frames: 2457600. Throughput: 0: 19265.5. Samples: 2472832. Policy #0 lag: (min: 63.0, avg: 78.3, max: 127.0) +[2026-06-02 16:39:49,727][246448] Avg episode reward: [(0, '49.411')] +[2026-06-02 16:39:49,810][247478] Updated weights for policy 0, policy_version 4840 (0.0009) +[2026-06-02 16:39:49,987][247478] Updated weights for policy 0, policy_version 4850 (0.0008) +[2026-06-02 16:39:50,158][247478] Updated weights for policy 0, policy_version 4860 (0.0009) +[2026-06-02 16:39:50,227][247399] Saving new best policy, reward=49.411! +[2026-06-02 16:39:50,846][247478] Updated weights for policy 0, policy_version 4870 (0.0008) +[2026-06-02 16:39:51,018][247478] Updated weights for policy 0, policy_version 4880 (0.0008) +[2026-06-02 16:39:51,194][247478] Updated weights for policy 0, policy_version 4890 (0.0008) +[2026-06-02 16:39:51,369][247478] Updated weights for policy 0, policy_version 4900 (0.0008) +[2026-06-02 16:39:51,546][247478] Updated weights for policy 0, policy_version 4910 (0.0008) +[2026-06-02 16:39:51,719][247478] Updated weights for policy 0, policy_version 4920 (0.0008) +[2026-06-02 16:39:52,402][247478] Updated weights for policy 0, policy_version 4930 (0.0008) +[2026-06-02 16:39:52,583][247478] Updated weights for policy 0, policy_version 4941 (0.0008) +[2026-06-02 16:39:52,759][247478] Updated weights for policy 0, policy_version 4951 (0.0008) +[2026-06-02 16:39:52,968][247478] Updated weights for policy 0, policy_version 4963 (0.0008) +[2026-06-02 16:39:53,142][247478] Updated weights for policy 0, policy_version 4973 (0.0008) +[2026-06-02 16:39:53,313][247478] Updated weights for policy 0, policy_version 4983 (0.0008) +[2026-06-02 16:39:54,011][247478] Updated weights for policy 0, policy_version 4993 (0.0008) +[2026-06-02 16:39:54,212][247478] Updated weights for policy 0, policy_version 5005 (0.0008) +[2026-06-02 16:39:54,385][247478] Updated weights for policy 0, policy_version 5015 (0.0008) +[2026-06-02 16:39:54,567][247478] Updated weights for policy 0, policy_version 5025 (0.0008) +[2026-06-02 16:39:54,725][246448] Fps is (10 sec: 19660.9, 60 sec: 19114.8, 300 sec: 19660.9). Total num frames: 2555904. Throughput: 0: 19672.1. Samples: 2598656. Policy #0 lag: (min: 54.0, avg: 69.2, max: 118.0) +[2026-06-02 16:39:54,726][246448] Avg episode reward: [(0, '67.097')] +[2026-06-02 16:39:54,749][247478] Updated weights for policy 0, policy_version 5036 (0.0008) +[2026-06-02 16:39:54,931][247478] Updated weights for policy 0, policy_version 5046 (0.0008) +[2026-06-02 16:39:55,101][247399] Saving new best policy, reward=66.835! +[2026-06-02 16:39:55,102][247478] Updated weights for policy 0, policy_version 5056 (0.0009) +[2026-06-02 16:39:55,806][247478] Updated weights for policy 0, policy_version 5067 (0.0009) +[2026-06-02 16:39:55,994][247478] Updated weights for policy 0, policy_version 5078 (0.0008) +[2026-06-02 16:39:56,183][247478] Updated weights for policy 0, policy_version 5089 (0.0009) +[2026-06-02 16:39:56,377][247478] Updated weights for policy 0, policy_version 5100 (0.0008) +[2026-06-02 16:39:56,573][247478] Updated weights for policy 0, policy_version 5111 (0.0008) +[2026-06-02 16:39:57,285][247478] Updated weights for policy 0, policy_version 5121 (0.0009) +[2026-06-02 16:39:57,445][247478] Updated weights for policy 0, policy_version 5131 (0.0008) +[2026-06-02 16:39:57,615][247478] Updated weights for policy 0, policy_version 5141 (0.0008) +[2026-06-02 16:39:57,792][247478] Updated weights for policy 0, policy_version 5151 (0.0008) +[2026-06-02 16:39:57,968][247478] Updated weights for policy 0, policy_version 5161 (0.0008) +[2026-06-02 16:39:58,151][247478] Updated weights for policy 0, policy_version 5171 (0.0008) +[2026-06-02 16:39:58,327][247478] Updated weights for policy 0, policy_version 5181 (0.0008) +[2026-06-02 16:39:59,005][247478] Updated weights for policy 0, policy_version 5191 (0.0008) +[2026-06-02 16:39:59,177][247478] Updated weights for policy 0, policy_version 5201 (0.0009) +[2026-06-02 16:39:59,357][247478] Updated weights for policy 0, policy_version 5211 (0.0009) +[2026-06-02 16:39:59,540][247478] Updated weights for policy 0, policy_version 5221 (0.0008) +[2026-06-02 16:39:59,718][247478] Updated weights for policy 0, policy_version 5231 (0.0009) +[2026-06-02 16:39:59,725][246448] Fps is (10 sec: 19660.9, 60 sec: 19114.7, 300 sec: 19660.9). Total num frames: 2654208. Throughput: 0: 19740.5. Samples: 2661888. Policy #0 lag: (min: 54.0, avg: 69.2, max: 118.0) +[2026-06-02 16:39:59,726][246448] Avg episode reward: [(0, '66.447')] +[2026-06-02 16:39:59,897][247478] Updated weights for policy 0, policy_version 5241 (0.0008) +[2026-06-02 16:40:00,546][247478] Updated weights for policy 0, policy_version 5251 (0.0008) +[2026-06-02 16:40:00,704][247478] Updated weights for policy 0, policy_version 5261 (0.0008) +[2026-06-02 16:40:00,882][247478] Updated weights for policy 0, policy_version 5271 (0.0009) +[2026-06-02 16:40:01,059][247478] Updated weights for policy 0, policy_version 5281 (0.0009) +[2026-06-02 16:40:01,235][247478] Updated weights for policy 0, policy_version 5291 (0.0008) +[2026-06-02 16:40:01,411][247478] Updated weights for policy 0, policy_version 5301 (0.0009) +[2026-06-02 16:40:01,584][247478] Updated weights for policy 0, policy_version 5311 (0.0009) +[2026-06-02 16:40:02,278][247478] Updated weights for policy 0, policy_version 5322 (0.0009) +[2026-06-02 16:40:02,470][247478] Updated weights for policy 0, policy_version 5333 (0.0008) +[2026-06-02 16:40:02,647][247478] Updated weights for policy 0, policy_version 5343 (0.0008) +[2026-06-02 16:40:02,822][247478] Updated weights for policy 0, policy_version 5353 (0.0008) +[2026-06-02 16:40:03,006][247478] Updated weights for policy 0, policy_version 5363 (0.0008) +[2026-06-02 16:40:03,190][247478] Updated weights for policy 0, policy_version 5373 (0.0008) +[2026-06-02 16:40:03,852][247478] Updated weights for policy 0, policy_version 5383 (0.0009) +[2026-06-02 16:40:04,028][247478] Updated weights for policy 0, policy_version 5393 (0.0009) +[2026-06-02 16:40:04,196][247478] Updated weights for policy 0, policy_version 5403 (0.0009) +[2026-06-02 16:40:04,383][247478] Updated weights for policy 0, policy_version 5413 (0.0009) +[2026-06-02 16:40:04,553][247478] Updated weights for policy 0, policy_version 5423 (0.0009) +[2026-06-02 16:40:04,725][246448] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19660.9). Total num frames: 2752512. Throughput: 0: 20093.2. Samples: 2788736. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) +[2026-06-02 16:40:04,726][246448] Avg episode reward: [(0, '83.448')] +[2026-06-02 16:40:04,756][247478] Updated weights for policy 0, policy_version 5434 (0.0009) +[2026-06-02 16:40:04,853][247399] Saving new best policy, reward=83.448! +[2026-06-02 16:40:05,418][247478] Updated weights for policy 0, policy_version 5444 (0.0009) +[2026-06-02 16:40:05,581][247478] Updated weights for policy 0, policy_version 5454 (0.0008) +[2026-06-02 16:40:05,755][247478] Updated weights for policy 0, policy_version 5464 (0.0006) +[2026-06-02 16:40:05,935][247478] Updated weights for policy 0, policy_version 5474 (0.0004) +[2026-06-02 16:40:06,108][247478] Updated weights for policy 0, policy_version 5484 (0.0004) +[2026-06-02 16:40:06,287][247478] Updated weights for policy 0, policy_version 5494 (0.0004) +[2026-06-02 16:40:06,467][247478] Updated weights for policy 0, policy_version 5504 (0.0004) +[2026-06-02 16:40:07,122][247478] Updated weights for policy 0, policy_version 5514 (0.0007) +[2026-06-02 16:40:07,292][247478] Updated weights for policy 0, policy_version 5524 (0.0008) +[2026-06-02 16:40:07,488][247478] Updated weights for policy 0, policy_version 5535 (0.0009) +[2026-06-02 16:40:07,669][247478] Updated weights for policy 0, policy_version 5545 (0.0008) +[2026-06-02 16:40:07,844][247478] Updated weights for policy 0, policy_version 5555 (0.0008) +[2026-06-02 16:40:08,032][247478] Updated weights for policy 0, policy_version 5565 (0.0009) +[2026-06-02 16:40:08,728][247478] Updated weights for policy 0, policy_version 5577 (0.0008) +[2026-06-02 16:40:08,899][247478] Updated weights for policy 0, policy_version 5587 (0.0009) +[2026-06-02 16:40:09,073][247478] Updated weights for policy 0, policy_version 5597 (0.0009) +[2026-06-02 16:40:09,254][247478] Updated weights for policy 0, policy_version 5607 (0.0008) +[2026-06-02 16:40:09,438][247478] Updated weights for policy 0, policy_version 5618 (0.0009) +[2026-06-02 16:40:09,619][247478] Updated weights for policy 0, policy_version 5628 (0.0009) +[2026-06-02 16:40:09,726][246448] Fps is (10 sec: 22937.5, 60 sec: 20206.9, 300 sec: 19886.8). Total num frames: 2883584. Throughput: 0: 19936.7. Samples: 2901888. Policy #0 lag: (min: 63.0, avg: 78.7, max: 127.0) +[2026-06-02 16:40:09,727][246448] Avg episode reward: [(0, '99.096')] +[2026-06-02 16:40:09,732][247399] Saving new best policy, reward=99.096! +[2026-06-02 16:40:10,323][247478] Updated weights for policy 0, policy_version 5638 (0.0009) +[2026-06-02 16:40:10,496][247478] Updated weights for policy 0, policy_version 5648 (0.0008) +[2026-06-02 16:40:10,688][247478] Updated weights for policy 0, policy_version 5659 (0.0009) +[2026-06-02 16:40:10,861][247478] Updated weights for policy 0, policy_version 5669 (0.0009) +[2026-06-02 16:40:11,042][247478] Updated weights for policy 0, policy_version 5679 (0.0009) +[2026-06-02 16:40:11,221][247478] Updated weights for policy 0, policy_version 5689 (0.0009) +[2026-06-02 16:40:11,864][247478] Updated weights for policy 0, policy_version 5699 (0.0009) +[2026-06-02 16:40:12,035][247478] Updated weights for policy 0, policy_version 5709 (0.0008) +[2026-06-02 16:40:12,206][247478] Updated weights for policy 0, policy_version 5719 (0.0008) +[2026-06-02 16:40:12,385][247478] Updated weights for policy 0, policy_version 5729 (0.0008) +[2026-06-02 16:40:12,575][247478] Updated weights for policy 0, policy_version 5739 (0.0008) +[2026-06-02 16:40:12,751][247478] Updated weights for policy 0, policy_version 5749 (0.0008) +[2026-06-02 16:40:12,934][247478] Updated weights for policy 0, policy_version 5759 (0.0008) +[2026-06-02 16:40:13,605][247478] Updated weights for policy 0, policy_version 5769 (0.0009) +[2026-06-02 16:40:13,777][247478] Updated weights for policy 0, policy_version 5779 (0.0008) +[2026-06-02 16:40:13,962][247478] Updated weights for policy 0, policy_version 5789 (0.0008) +[2026-06-02 16:40:14,147][247478] Updated weights for policy 0, policy_version 5799 (0.0008) +[2026-06-02 16:40:14,323][247478] Updated weights for policy 0, policy_version 5809 (0.0008) +[2026-06-02 16:40:14,505][247478] Updated weights for policy 0, policy_version 5819 (0.0008) +[2026-06-02 16:40:14,725][246448] Fps is (10 sec: 22937.7, 60 sec: 20207.0, 300 sec: 19879.3). Total num frames: 2981888. Throughput: 0: 20016.4. Samples: 2966400. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) +[2026-06-02 16:40:14,726][246448] Avg episode reward: [(0, '122.760')] +[2026-06-02 16:40:14,731][247399] Saving new best policy, reward=122.760! +[2026-06-02 16:40:15,152][247478] Updated weights for policy 0, policy_version 5830 (0.0009) +[2026-06-02 16:40:15,310][247478] Updated weights for policy 0, policy_version 5840 (0.0008) +[2026-06-02 16:40:15,490][247478] Updated weights for policy 0, policy_version 5850 (0.0009) +[2026-06-02 16:40:15,672][247478] Updated weights for policy 0, policy_version 5860 (0.0008) +[2026-06-02 16:40:15,846][247478] Updated weights for policy 0, policy_version 5870 (0.0008) +[2026-06-02 16:40:16,023][247478] Updated weights for policy 0, policy_version 5880 (0.0008) +[2026-06-02 16:40:16,711][247478] Updated weights for policy 0, policy_version 5890 (0.0008) +[2026-06-02 16:40:16,882][247478] Updated weights for policy 0, policy_version 5900 (0.0009) +[2026-06-02 16:40:17,070][247478] Updated weights for policy 0, policy_version 5910 (0.0009) +[2026-06-02 16:40:17,242][247478] Updated weights for policy 0, policy_version 5920 (0.0008) +[2026-06-02 16:40:17,414][247478] Updated weights for policy 0, policy_version 5930 (0.0008) +[2026-06-02 16:40:17,593][247478] Updated weights for policy 0, policy_version 5940 (0.0008) +[2026-06-02 16:40:17,773][247478] Updated weights for policy 0, policy_version 5950 (0.0008) +[2026-06-02 16:40:18,429][247478] Updated weights for policy 0, policy_version 5960 (0.0009) +[2026-06-02 16:40:18,598][247478] Updated weights for policy 0, policy_version 5970 (0.0008) +[2026-06-02 16:40:18,793][247478] Updated weights for policy 0, policy_version 5981 (0.0010) +[2026-06-02 16:40:18,972][247478] Updated weights for policy 0, policy_version 5991 (0.0008) +[2026-06-02 16:40:19,145][247478] Updated weights for policy 0, policy_version 6001 (0.0008) +[2026-06-02 16:40:19,322][247478] Updated weights for policy 0, policy_version 6011 (0.0008) +[2026-06-02 16:40:19,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20207.0, 300 sec: 19872.3). Total num frames: 3080192. Throughput: 0: 20357.7. Samples: 3092864. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) +[2026-06-02 16:40:19,726][246448] Avg episode reward: [(0, '142.764')] +[2026-06-02 16:40:19,731][247399] Saving new best policy, reward=142.764! +[2026-06-02 16:40:19,988][247478] Updated weights for policy 0, policy_version 6021 (0.0010) +[2026-06-02 16:40:20,191][247478] Updated weights for policy 0, policy_version 6033 (0.0008) +[2026-06-02 16:40:20,371][247478] Updated weights for policy 0, policy_version 6043 (0.0008) +[2026-06-02 16:40:20,553][247478] Updated weights for policy 0, policy_version 6053 (0.0008) +[2026-06-02 16:40:20,734][247478] Updated weights for policy 0, policy_version 6063 (0.0008) +[2026-06-02 16:40:20,915][247478] Updated weights for policy 0, policy_version 6073 (0.0008) +[2026-06-02 16:40:21,583][247478] Updated weights for policy 0, policy_version 6083 (0.0009) +[2026-06-02 16:40:21,769][247478] Updated weights for policy 0, policy_version 6094 (0.0009) +[2026-06-02 16:40:21,944][247478] Updated weights for policy 0, policy_version 6104 (0.0009) +[2026-06-02 16:40:22,137][247478] Updated weights for policy 0, policy_version 6115 (0.0008) +[2026-06-02 16:40:22,317][247478] Updated weights for policy 0, policy_version 6125 (0.0008) +[2026-06-02 16:40:22,495][247478] Updated weights for policy 0, policy_version 6135 (0.0008) +[2026-06-02 16:40:23,171][247478] Updated weights for policy 0, policy_version 6145 (0.0009) +[2026-06-02 16:40:23,335][247478] Updated weights for policy 0, policy_version 6155 (0.0008) +[2026-06-02 16:40:23,526][247478] Updated weights for policy 0, policy_version 6166 (0.0008) +[2026-06-02 16:40:23,698][247478] Updated weights for policy 0, policy_version 6176 (0.0008) +[2026-06-02 16:40:23,887][247478] Updated weights for policy 0, policy_version 6186 (0.0008) +[2026-06-02 16:40:24,086][247478] Updated weights for policy 0, policy_version 6197 (0.0009) +[2026-06-02 16:40:24,278][247478] Updated weights for policy 0, policy_version 6208 (0.0008) +[2026-06-02 16:40:24,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20207.0, 300 sec: 19865.7). Total num frames: 3178496. Throughput: 0: 20397.5. Samples: 3217024. Policy #0 lag: (min: 19.0, avg: 63.6, max: 83.0) +[2026-06-02 16:40:24,726][246448] Avg episode reward: [(0, '142.044')] +[2026-06-02 16:40:24,927][247478] Updated weights for policy 0, policy_version 6218 (0.0009) +[2026-06-02 16:40:25,103][247478] Updated weights for policy 0, policy_version 6228 (0.0008) +[2026-06-02 16:40:25,282][247478] Updated weights for policy 0, policy_version 6238 (0.0008) +[2026-06-02 16:40:25,476][247478] Updated weights for policy 0, policy_version 6249 (0.0008) +[2026-06-02 16:40:25,654][247478] Updated weights for policy 0, policy_version 6259 (0.0008) +[2026-06-02 16:40:25,828][247478] Updated weights for policy 0, policy_version 6269 (0.0009) +[2026-06-02 16:40:26,515][247478] Updated weights for policy 0, policy_version 6280 (0.0009) +[2026-06-02 16:40:26,686][247478] Updated weights for policy 0, policy_version 6290 (0.0008) +[2026-06-02 16:40:26,872][247478] Updated weights for policy 0, policy_version 6301 (0.0008) +[2026-06-02 16:40:27,059][247478] Updated weights for policy 0, policy_version 6311 (0.0008) +[2026-06-02 16:40:27,248][247478] Updated weights for policy 0, policy_version 6321 (0.0008) +[2026-06-02 16:40:27,427][247478] Updated weights for policy 0, policy_version 6331 (0.0008) +[2026-06-02 16:40:28,083][247478] Updated weights for policy 0, policy_version 6341 (0.0009) +[2026-06-02 16:40:28,275][247478] Updated weights for policy 0, policy_version 6352 (0.0009) +[2026-06-02 16:40:28,462][247478] Updated weights for policy 0, policy_version 6363 (0.0008) +[2026-06-02 16:40:28,643][247478] Updated weights for policy 0, policy_version 6373 (0.0008) +[2026-06-02 16:40:28,832][247478] Updated weights for policy 0, policy_version 6383 (0.0010) +[2026-06-02 16:40:29,010][247478] Updated weights for policy 0, policy_version 6393 (0.0008) +[2026-06-02 16:40:29,671][247478] Updated weights for policy 0, policy_version 6403 (0.0009) +[2026-06-02 16:40:29,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 19859.4). Total num frames: 3276800. Throughput: 0: 20218.4. Samples: 3271040. Policy #0 lag: (min: 39.0, avg: 54.7, max: 103.0) +[2026-06-02 16:40:29,726][246448] Avg episode reward: [(0, '154.772')] +[2026-06-02 16:40:29,842][247478] Updated weights for policy 0, policy_version 6413 (0.0009) +[2026-06-02 16:40:30,035][247478] Updated weights for policy 0, policy_version 6424 (0.0009) +[2026-06-02 16:40:30,227][247478] Updated weights for policy 0, policy_version 6435 (0.0009) +[2026-06-02 16:40:30,407][247478] Updated weights for policy 0, policy_version 6445 (0.0008) +[2026-06-02 16:40:30,605][247478] Updated weights for policy 0, policy_version 6456 (0.0009) +[2026-06-02 16:40:30,738][247399] Saving new best policy, reward=154.772! +[2026-06-02 16:40:31,254][247478] Updated weights for policy 0, policy_version 6466 (0.0008) +[2026-06-02 16:40:31,424][247478] Updated weights for policy 0, policy_version 6477 (0.0009) +[2026-06-02 16:40:31,606][247478] Updated weights for policy 0, policy_version 6487 (0.0008) +[2026-06-02 16:40:31,784][247478] Updated weights for policy 0, policy_version 6497 (0.0008) +[2026-06-02 16:40:31,955][247478] Updated weights for policy 0, policy_version 6507 (0.0009) +[2026-06-02 16:40:32,138][247478] Updated weights for policy 0, policy_version 6517 (0.0008) +[2026-06-02 16:40:32,321][247478] Updated weights for policy 0, policy_version 6527 (0.0008) +[2026-06-02 16:40:32,978][247478] Updated weights for policy 0, policy_version 6537 (0.0009) +[2026-06-02 16:40:33,159][247478] Updated weights for policy 0, policy_version 6547 (0.0009) +[2026-06-02 16:40:33,338][247478] Updated weights for policy 0, policy_version 6557 (0.0009) +[2026-06-02 16:40:33,549][247478] Updated weights for policy 0, policy_version 6569 (0.0008) +[2026-06-02 16:40:33,723][247478] Updated weights for policy 0, policy_version 6579 (0.0008) +[2026-06-02 16:40:33,912][247478] Updated weights for policy 0, policy_version 6589 (0.0009) +[2026-06-02 16:40:34,558][247478] Updated weights for policy 0, policy_version 6600 (0.0009) +[2026-06-02 16:40:34,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 19853.6). Total num frames: 3375104. Throughput: 0: 20460.1. Samples: 3393536. Policy #0 lag: (min: 39.0, avg: 54.7, max: 103.0) +[2026-06-02 16:40:34,726][246448] Avg episode reward: [(0, '157.623')] +[2026-06-02 16:40:34,740][247478] Updated weights for policy 0, policy_version 6610 (0.0008) +[2026-06-02 16:40:34,921][247478] Updated weights for policy 0, policy_version 6620 (0.0008) +[2026-06-02 16:40:35,105][247478] Updated weights for policy 0, policy_version 6630 (0.0009) +[2026-06-02 16:40:35,276][247478] Updated weights for policy 0, policy_version 6640 (0.0009) +[2026-06-02 16:40:35,460][247478] Updated weights for policy 0, policy_version 6650 (0.0009) +[2026-06-02 16:40:35,567][247399] Saving new best policy, reward=157.623! +[2026-06-02 16:40:36,107][247478] Updated weights for policy 0, policy_version 6660 (0.0008) +[2026-06-02 16:40:36,301][247478] Updated weights for policy 0, policy_version 6671 (0.0009) +[2026-06-02 16:40:36,483][247478] Updated weights for policy 0, policy_version 6681 (0.0008) +[2026-06-02 16:40:36,659][247478] Updated weights for policy 0, policy_version 6691 (0.0008) +[2026-06-02 16:40:36,833][247478] Updated weights for policy 0, policy_version 6701 (0.0008) +[2026-06-02 16:40:37,041][247478] Updated weights for policy 0, policy_version 6712 (0.0008) +[2026-06-02 16:40:37,689][247478] Updated weights for policy 0, policy_version 6722 (0.0008) +[2026-06-02 16:40:37,870][247478] Updated weights for policy 0, policy_version 6733 (0.0008) +[2026-06-02 16:40:38,045][247478] Updated weights for policy 0, policy_version 6743 (0.0009) +[2026-06-02 16:40:38,241][247478] Updated weights for policy 0, policy_version 6754 (0.0008) +[2026-06-02 16:40:38,419][247478] Updated weights for policy 0, policy_version 6764 (0.0008) +[2026-06-02 16:40:38,594][247478] Updated weights for policy 0, policy_version 6774 (0.0008) +[2026-06-02 16:40:38,778][247478] Updated weights for policy 0, policy_version 6784 (0.0008) +[2026-06-02 16:40:39,419][247478] Updated weights for policy 0, policy_version 6794 (0.0009) +[2026-06-02 16:40:39,583][247478] Updated weights for policy 0, policy_version 6804 (0.0008) +[2026-06-02 16:40:39,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 19848.1). Total num frames: 3473408. Throughput: 0: 20457.3. Samples: 3519232. Policy #0 lag: (min: 63.0, avg: 79.3, max: 127.0) +[2026-06-02 16:40:39,726][246448] Avg episode reward: [(0, '168.328')] +[2026-06-02 16:40:39,769][247478] Updated weights for policy 0, policy_version 6814 (0.0009) +[2026-06-02 16:40:39,968][247478] Updated weights for policy 0, policy_version 6825 (0.0008) +[2026-06-02 16:40:40,143][247478] Updated weights for policy 0, policy_version 6835 (0.0008) +[2026-06-02 16:40:40,344][247478] Updated weights for policy 0, policy_version 6846 (0.0009) +[2026-06-02 16:40:40,370][247399] Saving new best policy, reward=168.328! +[2026-06-02 16:40:40,997][247478] Updated weights for policy 0, policy_version 6856 (0.0009) +[2026-06-02 16:40:41,172][247478] Updated weights for policy 0, policy_version 6866 (0.0009) +[2026-06-02 16:40:41,361][247478] Updated weights for policy 0, policy_version 6877 (0.0009) +[2026-06-02 16:40:41,561][247478] Updated weights for policy 0, policy_version 6888 (0.0009) +[2026-06-02 16:40:41,741][247478] Updated weights for policy 0, policy_version 6898 (0.0009) +[2026-06-02 16:40:41,934][247478] Updated weights for policy 0, policy_version 6909 (0.0009) +[2026-06-02 16:40:42,575][247478] Updated weights for policy 0, policy_version 6919 (0.0009) +[2026-06-02 16:40:42,743][247478] Updated weights for policy 0, policy_version 6929 (0.0008) +[2026-06-02 16:40:42,926][247478] Updated weights for policy 0, policy_version 6939 (0.0009) +[2026-06-02 16:40:43,119][247478] Updated weights for policy 0, policy_version 6950 (0.0008) +[2026-06-02 16:40:43,296][247478] Updated weights for policy 0, policy_version 6960 (0.0009) +[2026-06-02 16:40:43,487][247478] Updated weights for policy 0, policy_version 6970 (0.0008) +[2026-06-02 16:40:44,128][247478] Updated weights for policy 0, policy_version 6980 (0.0009) +[2026-06-02 16:40:44,323][247478] Updated weights for policy 0, policy_version 6991 (0.0008) +[2026-06-02 16:40:44,521][247478] Updated weights for policy 0, policy_version 7002 (0.0008) +[2026-06-02 16:40:44,701][247478] Updated weights for policy 0, policy_version 7012 (0.0008) +[2026-06-02 16:40:44,725][246448] Fps is (10 sec: 19661.0, 60 sec: 20207.0, 300 sec: 19842.9). Total num frames: 3571712. Throughput: 0: 20434.5. Samples: 3581440. Policy #0 lag: (min: 63.0, avg: 79.8, max: 127.0) +[2026-06-02 16:40:44,726][246448] Avg episode reward: [(0, '189.740')] +[2026-06-02 16:40:44,877][247478] Updated weights for policy 0, policy_version 7022 (0.0010) +[2026-06-02 16:40:45,055][247478] Updated weights for policy 0, policy_version 7032 (0.0009) +[2026-06-02 16:40:45,189][247399] Saving new best policy, reward=189.740! +[2026-06-02 16:40:45,721][247478] Updated weights for policy 0, policy_version 7043 (0.0008) +[2026-06-02 16:40:45,894][247478] Updated weights for policy 0, policy_version 7053 (0.0009) +[2026-06-02 16:40:46,074][247478] Updated weights for policy 0, policy_version 7063 (0.0009) +[2026-06-02 16:40:46,246][247478] Updated weights for policy 0, policy_version 7073 (0.0008) +[2026-06-02 16:40:46,437][247478] Updated weights for policy 0, policy_version 7084 (0.0008) +[2026-06-02 16:40:46,623][247478] Updated weights for policy 0, policy_version 7094 (0.0008) +[2026-06-02 16:40:46,797][247478] Updated weights for policy 0, policy_version 7104 (0.0008) +[2026-06-02 16:40:47,456][247478] Updated weights for policy 0, policy_version 7114 (0.0009) +[2026-06-02 16:40:47,651][247478] Updated weights for policy 0, policy_version 7125 (0.0008) +[2026-06-02 16:40:47,824][247478] Updated weights for policy 0, policy_version 7135 (0.0008) +[2026-06-02 16:40:48,006][247478] Updated weights for policy 0, policy_version 7145 (0.0009) +[2026-06-02 16:40:48,191][247478] Updated weights for policy 0, policy_version 7155 (0.0008) +[2026-06-02 16:40:48,381][247478] Updated weights for policy 0, policy_version 7166 (0.0008) +[2026-06-02 16:40:49,044][247478] Updated weights for policy 0, policy_version 7176 (0.0009) +[2026-06-02 16:40:49,238][247478] Updated weights for policy 0, policy_version 7187 (0.0009) +[2026-06-02 16:40:49,418][247478] Updated weights for policy 0, policy_version 7197 (0.0008) +[2026-06-02 16:40:49,602][247478] Updated weights for policy 0, policy_version 7207 (0.0009) +[2026-06-02 16:40:49,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20206.9, 300 sec: 19838.0). Total num frames: 3670016. Throughput: 0: 20261.0. Samples: 3700480. Policy #0 lag: (min: 63.0, avg: 79.8, max: 127.0) +[2026-06-02 16:40:49,726][246448] Avg episode reward: [(0, '218.249')] +[2026-06-02 16:40:49,781][247478] Updated weights for policy 0, policy_version 7217 (0.0008) +[2026-06-02 16:40:49,957][247478] Updated weights for policy 0, policy_version 7227 (0.0008) +[2026-06-02 16:40:50,040][247399] Saving new best policy, reward=218.249! +[2026-06-02 16:40:50,606][247478] Updated weights for policy 0, policy_version 7237 (0.0009) +[2026-06-02 16:40:50,794][247478] Updated weights for policy 0, policy_version 7248 (0.0008) +[2026-06-02 16:40:50,967][247478] Updated weights for policy 0, policy_version 7258 (0.0009) +[2026-06-02 16:40:51,148][247478] Updated weights for policy 0, policy_version 7268 (0.0008) +[2026-06-02 16:40:51,324][247478] Updated weights for policy 0, policy_version 7278 (0.0009) +[2026-06-02 16:40:51,524][247478] Updated weights for policy 0, policy_version 7289 (0.0008) +[2026-06-02 16:40:52,181][247478] Updated weights for policy 0, policy_version 7299 (0.0008) +[2026-06-02 16:40:52,367][247478] Updated weights for policy 0, policy_version 7310 (0.0008) +[2026-06-02 16:40:52,543][247478] Updated weights for policy 0, policy_version 7320 (0.0011) +[2026-06-02 16:40:52,726][247478] Updated weights for policy 0, policy_version 7330 (0.0009) +[2026-06-02 16:40:52,921][247478] Updated weights for policy 0, policy_version 7341 (0.0010) +[2026-06-02 16:40:53,114][247478] Updated weights for policy 0, policy_version 7352 (0.0009) +[2026-06-02 16:40:53,777][247478] Updated weights for policy 0, policy_version 7362 (0.0009) +[2026-06-02 16:40:53,961][247478] Updated weights for policy 0, policy_version 7373 (0.0009) +[2026-06-02 16:40:54,140][247478] Updated weights for policy 0, policy_version 7383 (0.0009) +[2026-06-02 16:40:54,316][247478] Updated weights for policy 0, policy_version 7393 (0.0008) +[2026-06-02 16:40:54,494][247478] Updated weights for policy 0, policy_version 7403 (0.0008) +[2026-06-02 16:40:54,678][247478] Updated weights for policy 0, policy_version 7413 (0.0008) +[2026-06-02 16:40:54,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 19833.3). Total num frames: 3768320. Throughput: 0: 20408.9. Samples: 3820288. Policy #0 lag: (min: 1.0, avg: 45.7, max: 65.0) +[2026-06-02 16:40:54,726][246448] Avg episode reward: [(0, '239.892')] +[2026-06-02 16:40:54,862][247399] Saving new best policy, reward=239.892! +[2026-06-02 16:40:54,865][247478] Updated weights for policy 0, policy_version 7424 (0.0008) +[2026-06-02 16:40:55,528][247478] Updated weights for policy 0, policy_version 7434 (0.0009) +[2026-06-02 16:40:55,695][247478] Updated weights for policy 0, policy_version 7444 (0.0008) +[2026-06-02 16:40:55,879][247478] Updated weights for policy 0, policy_version 7455 (0.0009) +[2026-06-02 16:40:56,073][247478] Updated weights for policy 0, policy_version 7466 (0.0009) +[2026-06-02 16:40:56,252][247478] Updated weights for policy 0, policy_version 7476 (0.0009) +[2026-06-02 16:40:56,422][247478] Updated weights for policy 0, policy_version 7486 (0.0008) +[2026-06-02 16:40:57,137][247478] Updated weights for policy 0, policy_version 7498 (0.0009) +[2026-06-02 16:40:57,312][247478] Updated weights for policy 0, policy_version 7508 (0.0009) +[2026-06-02 16:40:57,482][247478] Updated weights for policy 0, policy_version 7518 (0.0008) +[2026-06-02 16:40:57,676][247478] Updated weights for policy 0, policy_version 7529 (0.0009) +[2026-06-02 16:40:57,855][247478] Updated weights for policy 0, policy_version 7539 (0.0008) +[2026-06-02 16:40:58,070][247478] Updated weights for policy 0, policy_version 7551 (0.0008) +[2026-06-02 16:40:58,750][247478] Updated weights for policy 0, policy_version 7562 (0.0009) +[2026-06-02 16:40:58,950][247478] Updated weights for policy 0, policy_version 7573 (0.0008) +[2026-06-02 16:40:59,151][247478] Updated weights for policy 0, policy_version 7585 (0.0009) +[2026-06-02 16:40:59,365][247478] Updated weights for policy 0, policy_version 7597 (0.0009) +[2026-06-02 16:40:59,546][247478] Updated weights for policy 0, policy_version 7607 (0.0009) +[2026-06-02 16:40:59,725][246448] Fps is (10 sec: 22937.7, 60 sec: 20753.1, 300 sec: 19996.9). Total num frames: 3899392. Throughput: 0: 20369.1. Samples: 3883008. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-02 16:40:59,726][246448] Avg episode reward: [(0, '257.411')] +[2026-06-02 16:40:59,731][247399] Saving new best policy, reward=257.411! +[2026-06-02 16:41:00,224][247478] Updated weights for policy 0, policy_version 7617 (0.0009) +[2026-06-02 16:41:00,380][247478] Updated weights for policy 0, policy_version 7627 (0.0008) +[2026-06-02 16:41:00,576][247478] Updated weights for policy 0, policy_version 7638 (0.0008) +[2026-06-02 16:41:00,763][247478] Updated weights for policy 0, policy_version 7648 (0.0009) +[2026-06-02 16:41:00,939][247478] Updated weights for policy 0, policy_version 7658 (0.0008) +[2026-06-02 16:41:01,118][247478] Updated weights for policy 0, policy_version 7668 (0.0009) +[2026-06-02 16:41:01,296][247478] Updated weights for policy 0, policy_version 7678 (0.0009) +[2026-06-02 16:41:01,945][247478] Updated weights for policy 0, policy_version 7688 (0.0009) +[2026-06-02 16:41:02,119][247478] Updated weights for policy 0, policy_version 7698 (0.0009) +[2026-06-02 16:41:02,298][247478] Updated weights for policy 0, policy_version 7708 (0.0009) +[2026-06-02 16:41:02,478][247478] Updated weights for policy 0, policy_version 7718 (0.0009) +[2026-06-02 16:41:02,657][247478] Updated weights for policy 0, policy_version 7728 (0.0009) +[2026-06-02 16:41:02,835][247478] Updated weights for policy 0, policy_version 7738 (0.0009) +[2026-06-02 16:41:03,482][247478] Updated weights for policy 0, policy_version 7748 (0.0009) +[2026-06-02 16:41:03,659][247478] Updated weights for policy 0, policy_version 7758 (0.0008) +[2026-06-02 16:41:03,830][247478] Updated weights for policy 0, policy_version 7768 (0.0008) +[2026-06-02 16:41:04,013][247478] Updated weights for policy 0, policy_version 7778 (0.0009) +[2026-06-02 16:41:04,206][247478] Updated weights for policy 0, policy_version 7789 (0.0009) +[2026-06-02 16:41:04,384][247478] Updated weights for policy 0, policy_version 7799 (0.0009) +[2026-06-02 16:41:04,725][246448] Fps is (10 sec: 22937.6, 60 sec: 20753.1, 300 sec: 19988.5). Total num frames: 3997696. Throughput: 0: 20374.7. Samples: 4009728. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-02 16:41:04,726][246448] Avg episode reward: [(0, '299.859')] +[2026-06-02 16:41:04,731][247399] Saving new best policy, reward=299.859! +[2026-06-02 16:41:05,072][247478] Updated weights for policy 0, policy_version 7809 (0.0009) +[2026-06-02 16:41:05,237][247478] Updated weights for policy 0, policy_version 7819 (0.0009) +[2026-06-02 16:41:05,434][247478] Updated weights for policy 0, policy_version 7830 (0.0008) +[2026-06-02 16:41:05,600][247478] Updated weights for policy 0, policy_version 7840 (0.0009) +[2026-06-02 16:41:05,783][247478] Updated weights for policy 0, policy_version 7850 (0.0009) +[2026-06-02 16:41:05,965][247478] Updated weights for policy 0, policy_version 7860 (0.0009) +[2026-06-02 16:41:06,150][247478] Updated weights for policy 0, policy_version 7870 (0.0008) +[2026-06-02 16:41:06,795][247478] Updated weights for policy 0, policy_version 7880 (0.0008) +[2026-06-02 16:41:06,963][247478] Updated weights for policy 0, policy_version 7890 (0.0008) +[2026-06-02 16:41:07,143][247478] Updated weights for policy 0, policy_version 7900 (0.0009) +[2026-06-02 16:41:07,326][247478] Updated weights for policy 0, policy_version 7910 (0.0010) +[2026-06-02 16:41:07,510][247478] Updated weights for policy 0, policy_version 7920 (0.0009) +[2026-06-02 16:41:07,692][247478] Updated weights for policy 0, policy_version 7930 (0.0009) +[2026-06-02 16:41:08,347][247478] Updated weights for policy 0, policy_version 7941 (0.0009) +[2026-06-02 16:41:08,526][247478] Updated weights for policy 0, policy_version 7951 (0.0009) +[2026-06-02 16:41:08,699][247478] Updated weights for policy 0, policy_version 7961 (0.0008) +[2026-06-02 16:41:08,887][247478] Updated weights for policy 0, policy_version 7971 (0.0008) +[2026-06-02 16:41:09,064][247478] Updated weights for policy 0, policy_version 7981 (0.0008) +[2026-06-02 16:41:09,249][247478] Updated weights for policy 0, policy_version 7991 (0.0008) +[2026-06-02 16:41:09,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 19980.5). Total num frames: 4096000. Throughput: 0: 20278.1. Samples: 4129536. Policy #0 lag: (min: 63.0, avg: 77.6, max: 127.0) +[2026-06-02 16:41:09,726][246448] Avg episode reward: [(0, '313.000')] +[2026-06-02 16:41:09,731][247399] Saving new best policy, reward=313.000! +[2026-06-02 16:41:09,934][247478] Updated weights for policy 0, policy_version 8001 (0.0009) +[2026-06-02 16:41:10,106][247478] Updated weights for policy 0, policy_version 8011 (0.0008) +[2026-06-02 16:41:10,283][247478] Updated weights for policy 0, policy_version 8021 (0.0008) +[2026-06-02 16:41:10,462][247478] Updated weights for policy 0, policy_version 8031 (0.0008) +[2026-06-02 16:41:10,644][247478] Updated weights for policy 0, policy_version 8041 (0.0009) +[2026-06-02 16:41:10,822][247478] Updated weights for policy 0, policy_version 8051 (0.0009) +[2026-06-02 16:41:10,996][247478] Updated weights for policy 0, policy_version 8061 (0.0009) +[2026-06-02 16:41:11,627][247478] Updated weights for policy 0, policy_version 8072 (0.0009) +[2026-06-02 16:41:11,806][247478] Updated weights for policy 0, policy_version 8082 (0.0009) +[2026-06-02 16:41:11,989][247478] Updated weights for policy 0, policy_version 8092 (0.0008) +[2026-06-02 16:41:12,166][247478] Updated weights for policy 0, policy_version 8102 (0.0008) +[2026-06-02 16:41:12,348][247478] Updated weights for policy 0, policy_version 8112 (0.0008) +[2026-06-02 16:41:12,533][247478] Updated weights for policy 0, policy_version 8122 (0.0008) +[2026-06-02 16:41:13,194][247478] Updated weights for policy 0, policy_version 8133 (0.0009) +[2026-06-02 16:41:13,369][247478] Updated weights for policy 0, policy_version 8143 (0.0008) +[2026-06-02 16:41:13,547][247478] Updated weights for policy 0, policy_version 8153 (0.0008) +[2026-06-02 16:41:13,730][247478] Updated weights for policy 0, policy_version 8163 (0.0008) +[2026-06-02 16:41:13,911][247478] Updated weights for policy 0, policy_version 8173 (0.0008) +[2026-06-02 16:41:14,115][247478] Updated weights for policy 0, policy_version 8184 (0.0008) +[2026-06-02 16:41:14,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 19972.9). Total num frames: 4194304. Throughput: 0: 20329.2. Samples: 4185856. Policy #0 lag: (min: 15.0, avg: 61.1, max: 85.0) +[2026-06-02 16:41:14,726][246448] Avg episode reward: [(0, '321.067')] +[2026-06-02 16:41:14,766][247478] Updated weights for policy 0, policy_version 8194 (0.0008) +[2026-06-02 16:41:14,950][247478] Updated weights for policy 0, policy_version 8205 (0.0009) +[2026-06-02 16:41:15,130][247478] Updated weights for policy 0, policy_version 8215 (0.0008) +[2026-06-02 16:41:15,314][247478] Updated weights for policy 0, policy_version 8225 (0.0008) +[2026-06-02 16:41:15,501][247478] Updated weights for policy 0, policy_version 8235 (0.0008) +[2026-06-02 16:41:15,678][247478] Updated weights for policy 0, policy_version 8245 (0.0009) +[2026-06-02 16:41:15,853][247478] Updated weights for policy 0, policy_version 8255 (0.0009) +[2026-06-02 16:41:15,871][247399] Saving new best policy, reward=321.067! +[2026-06-02 16:41:16,485][247478] Updated weights for policy 0, policy_version 8265 (0.0008) +[2026-06-02 16:41:16,658][247478] Updated weights for policy 0, policy_version 8275 (0.0009) +[2026-06-02 16:41:16,835][247478] Updated weights for policy 0, policy_version 8285 (0.0008) +[2026-06-02 16:41:17,035][247478] Updated weights for policy 0, policy_version 8296 (0.0008) +[2026-06-02 16:41:17,227][247478] Updated weights for policy 0, policy_version 8306 (0.0008) +[2026-06-02 16:41:17,409][247478] Updated weights for policy 0, policy_version 8316 (0.0008) +[2026-06-02 16:41:18,032][247478] Updated weights for policy 0, policy_version 8326 (0.0009) +[2026-06-02 16:41:18,213][247478] Updated weights for policy 0, policy_version 8336 (0.0008) +[2026-06-02 16:41:18,394][247478] Updated weights for policy 0, policy_version 8346 (0.0008) +[2026-06-02 16:41:18,591][247478] Updated weights for policy 0, policy_version 8357 (0.0009) +[2026-06-02 16:41:18,780][247478] Updated weights for policy 0, policy_version 8367 (0.0009) +[2026-06-02 16:41:18,953][247478] Updated weights for policy 0, policy_version 8377 (0.0009) +[2026-06-02 16:41:19,591][247478] Updated weights for policy 0, policy_version 8387 (0.0009) +[2026-06-02 16:41:19,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 19965.7). Total num frames: 4292608. Throughput: 0: 20403.2. Samples: 4311680. Policy #0 lag: (min: 15.0, avg: 61.1, max: 85.0) +[2026-06-02 16:41:19,726][246448] Avg episode reward: [(0, '321.022')] +[2026-06-02 16:41:19,787][247478] Updated weights for policy 0, policy_version 8398 (0.0009) +[2026-06-02 16:41:19,966][247478] Updated weights for policy 0, policy_version 8408 (0.0008) +[2026-06-02 16:41:20,148][247478] Updated weights for policy 0, policy_version 8418 (0.0008) +[2026-06-02 16:41:20,325][247478] Updated weights for policy 0, policy_version 8428 (0.0009) +[2026-06-02 16:41:20,533][247478] Updated weights for policy 0, policy_version 8440 (0.0009) +[2026-06-02 16:41:21,192][247478] Updated weights for policy 0, policy_version 8450 (0.0009) +[2026-06-02 16:41:21,367][247478] Updated weights for policy 0, policy_version 8460 (0.0009) +[2026-06-02 16:41:21,535][247478] Updated weights for policy 0, policy_version 8470 (0.0009) +[2026-06-02 16:41:21,725][247478] Updated weights for policy 0, policy_version 8480 (0.0009) +[2026-06-02 16:41:21,899][247478] Updated weights for policy 0, policy_version 8490 (0.0008) +[2026-06-02 16:41:22,088][247478] Updated weights for policy 0, policy_version 8500 (0.0009) +[2026-06-02 16:41:22,262][247478] Updated weights for policy 0, policy_version 8510 (0.0008) +[2026-06-02 16:41:22,899][247478] Updated weights for policy 0, policy_version 8520 (0.0010) +[2026-06-02 16:41:23,070][247478] Updated weights for policy 0, policy_version 8530 (0.0008) +[2026-06-02 16:41:23,254][247478] Updated weights for policy 0, policy_version 8540 (0.0009) +[2026-06-02 16:41:23,432][247478] Updated weights for policy 0, policy_version 8550 (0.0009) +[2026-06-02 16:41:23,607][247478] Updated weights for policy 0, policy_version 8560 (0.0009) +[2026-06-02 16:41:23,795][247478] Updated weights for policy 0, policy_version 8570 (0.0008) +[2026-06-02 16:41:24,447][247478] Updated weights for policy 0, policy_version 8581 (0.0008) +[2026-06-02 16:41:24,621][247478] Updated weights for policy 0, policy_version 8591 (0.0009) +[2026-06-02 16:41:24,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 19958.7). Total num frames: 4390912. Throughput: 0: 20383.3. Samples: 4436480. Policy #0 lag: (min: 5.0, avg: 21.6, max: 69.0) +[2026-06-02 16:41:24,726][246448] Avg episode reward: [(0, '332.779')] +[2026-06-02 16:41:24,799][247478] Updated weights for policy 0, policy_version 8601 (0.0008) +[2026-06-02 16:41:24,977][247478] Updated weights for policy 0, policy_version 8611 (0.0008) +[2026-06-02 16:41:25,152][247478] Updated weights for policy 0, policy_version 8621 (0.0008) +[2026-06-02 16:41:25,339][247478] Updated weights for policy 0, policy_version 8631 (0.0008) +[2026-06-02 16:41:25,494][247399] Saving new best policy, reward=332.779! +[2026-06-02 16:41:25,990][247478] Updated weights for policy 0, policy_version 8641 (0.0009) +[2026-06-02 16:41:26,162][247478] Updated weights for policy 0, policy_version 8651 (0.0009) +[2026-06-02 16:41:26,340][247478] Updated weights for policy 0, policy_version 8661 (0.0009) +[2026-06-02 16:41:26,508][247478] Updated weights for policy 0, policy_version 8671 (0.0008) +[2026-06-02 16:41:26,701][247478] Updated weights for policy 0, policy_version 8681 (0.0008) +[2026-06-02 16:41:26,883][247478] Updated weights for policy 0, policy_version 8691 (0.0008) +[2026-06-02 16:41:27,057][247478] Updated weights for policy 0, policy_version 8701 (0.0008) +[2026-06-02 16:41:27,723][247478] Updated weights for policy 0, policy_version 8711 (0.0008) +[2026-06-02 16:41:27,886][247478] Updated weights for policy 0, policy_version 8721 (0.0009) +[2026-06-02 16:41:28,076][247478] Updated weights for policy 0, policy_version 8731 (0.0008) +[2026-06-02 16:41:28,248][247478] Updated weights for policy 0, policy_version 8741 (0.0008) +[2026-06-02 16:41:28,439][247478] Updated weights for policy 0, policy_version 8751 (0.0008) +[2026-06-02 16:41:28,611][247478] Updated weights for policy 0, policy_version 8761 (0.0008) +[2026-06-02 16:41:29,272][247478] Updated weights for policy 0, policy_version 8771 (0.0009) +[2026-06-02 16:41:29,439][247478] Updated weights for policy 0, policy_version 8781 (0.0008) +[2026-06-02 16:41:29,619][247478] Updated weights for policy 0, policy_version 8791 (0.0009) +[2026-06-02 16:41:29,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 19952.1). Total num frames: 4489216. Throughput: 0: 20406.0. Samples: 4499712. Policy #0 lag: (min: 5.0, avg: 21.6, max: 69.0) +[2026-06-02 16:41:29,727][246448] Avg episode reward: [(0, '367.399')] +[2026-06-02 16:41:29,799][247478] Updated weights for policy 0, policy_version 8801 (0.0008) +[2026-06-02 16:41:29,979][247478] Updated weights for policy 0, policy_version 8811 (0.0009) +[2026-06-02 16:41:30,160][247478] Updated weights for policy 0, policy_version 8821 (0.0008) +[2026-06-02 16:41:30,344][247478] Updated weights for policy 0, policy_version 8831 (0.0008) +[2026-06-02 16:41:30,353][247399] Saving new best policy, reward=367.399! +[2026-06-02 16:41:30,988][247478] Updated weights for policy 0, policy_version 8841 (0.0008) +[2026-06-02 16:41:31,177][247478] Updated weights for policy 0, policy_version 8852 (0.0008) +[2026-06-02 16:41:31,359][247478] Updated weights for policy 0, policy_version 8862 (0.0008) +[2026-06-02 16:41:31,556][247478] Updated weights for policy 0, policy_version 8873 (0.0008) +[2026-06-02 16:41:31,739][247478] Updated weights for policy 0, policy_version 8883 (0.0008) +[2026-06-02 16:41:31,924][247478] Updated weights for policy 0, policy_version 8893 (0.0008) +[2026-06-02 16:41:32,559][247478] Updated weights for policy 0, policy_version 8903 (0.0008) +[2026-06-02 16:41:32,742][247478] Updated weights for policy 0, policy_version 8913 (0.0009) +[2026-06-02 16:41:32,922][247478] Updated weights for policy 0, policy_version 8923 (0.0008) +[2026-06-02 16:41:33,108][247478] Updated weights for policy 0, policy_version 8933 (0.0008) +[2026-06-02 16:41:33,285][247478] Updated weights for policy 0, policy_version 8943 (0.0008) +[2026-06-02 16:41:33,465][247478] Updated weights for policy 0, policy_version 8953 (0.0008) +[2026-06-02 16:41:34,114][247478] Updated weights for policy 0, policy_version 8963 (0.0009) +[2026-06-02 16:41:34,290][247478] Updated weights for policy 0, policy_version 8973 (0.0009) +[2026-06-02 16:41:34,472][247478] Updated weights for policy 0, policy_version 8983 (0.0009) +[2026-06-02 16:41:34,646][247478] Updated weights for policy 0, policy_version 8993 (0.0008) +[2026-06-02 16:41:34,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20207.0, 300 sec: 19945.8). Total num frames: 4587520. Throughput: 0: 20283.7. Samples: 4613248. Policy #0 lag: (min: 10.0, avg: 55.5, max: 69.0) +[2026-06-02 16:41:34,726][246448] Avg episode reward: [(0, '372.454')] +[2026-06-02 16:41:34,830][247478] Updated weights for policy 0, policy_version 9003 (0.0009) +[2026-06-02 16:41:35,011][247478] Updated weights for policy 0, policy_version 9013 (0.0009) +[2026-06-02 16:41:35,199][247399] Saving new best policy, reward=372.454! +[2026-06-02 16:41:35,201][247478] Updated weights for policy 0, policy_version 9024 (0.0008) +[2026-06-02 16:41:35,884][247478] Updated weights for policy 0, policy_version 9034 (0.0009) +[2026-06-02 16:41:36,052][247478] Updated weights for policy 0, policy_version 9044 (0.0008) +[2026-06-02 16:41:36,234][247478] Updated weights for policy 0, policy_version 9054 (0.0008) +[2026-06-02 16:41:36,422][247478] Updated weights for policy 0, policy_version 9065 (0.0008) +[2026-06-02 16:41:36,607][247478] Updated weights for policy 0, policy_version 9075 (0.0008) +[2026-06-02 16:41:36,821][247478] Updated weights for policy 0, policy_version 9087 (0.0008) +[2026-06-02 16:41:37,521][247478] Updated weights for policy 0, policy_version 9098 (0.0009) +[2026-06-02 16:41:37,690][247478] Updated weights for policy 0, policy_version 9108 (0.0008) +[2026-06-02 16:41:37,876][247478] Updated weights for policy 0, policy_version 9118 (0.0008) +[2026-06-02 16:41:38,060][247478] Updated weights for policy 0, policy_version 9128 (0.0008) +[2026-06-02 16:41:38,242][247478] Updated weights for policy 0, policy_version 9138 (0.0008) +[2026-06-02 16:41:38,424][247478] Updated weights for policy 0, policy_version 9149 (0.0008) +[2026-06-02 16:41:39,082][247478] Updated weights for policy 0, policy_version 9160 (0.0009) +[2026-06-02 16:41:39,261][247478] Updated weights for policy 0, policy_version 9170 (0.0009) +[2026-06-02 16:41:39,432][247478] Updated weights for policy 0, policy_version 9180 (0.0008) +[2026-06-02 16:41:39,618][247478] Updated weights for policy 0, policy_version 9190 (0.0008) +[2026-06-02 16:41:39,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 19939.7). Total num frames: 4685824. Throughput: 0: 20428.8. Samples: 4739584. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) +[2026-06-02 16:41:39,726][246448] Avg episode reward: [(0, '400.658')] +[2026-06-02 16:41:39,797][247478] Updated weights for policy 0, policy_version 9200 (0.0009) +[2026-06-02 16:41:39,983][247478] Updated weights for policy 0, policy_version 9210 (0.0008) +[2026-06-02 16:41:40,084][247399] Saving new best policy, reward=400.658! +[2026-06-02 16:41:40,644][247478] Updated weights for policy 0, policy_version 9220 (0.0009) +[2026-06-02 16:41:40,817][247478] Updated weights for policy 0, policy_version 9230 (0.0008) +[2026-06-02 16:41:40,998][247478] Updated weights for policy 0, policy_version 9240 (0.0008) +[2026-06-02 16:41:41,170][247478] Updated weights for policy 0, policy_version 9250 (0.0008) +[2026-06-02 16:41:41,350][247478] Updated weights for policy 0, policy_version 9260 (0.0008) +[2026-06-02 16:41:41,532][247478] Updated weights for policy 0, policy_version 9270 (0.0008) +[2026-06-02 16:41:41,713][247478] Updated weights for policy 0, policy_version 9280 (0.0008) +[2026-06-02 16:41:42,386][247478] Updated weights for policy 0, policy_version 9292 (0.0009) +[2026-06-02 16:41:42,562][247478] Updated weights for policy 0, policy_version 9302 (0.0008) +[2026-06-02 16:41:42,750][247478] Updated weights for policy 0, policy_version 9312 (0.0008) +[2026-06-02 16:41:42,935][247478] Updated weights for policy 0, policy_version 9322 (0.0008) +[2026-06-02 16:41:43,116][247478] Updated weights for policy 0, policy_version 9332 (0.0008) +[2026-06-02 16:41:43,301][247478] Updated weights for policy 0, policy_version 9342 (0.0008) +[2026-06-02 16:41:43,934][247478] Updated weights for policy 0, policy_version 9352 (0.0008) +[2026-06-02 16:41:44,131][247478] Updated weights for policy 0, policy_version 9363 (0.0009) +[2026-06-02 16:41:44,305][247478] Updated weights for policy 0, policy_version 9373 (0.0009) +[2026-06-02 16:41:44,479][247478] Updated weights for policy 0, policy_version 9383 (0.0008) +[2026-06-02 16:41:44,664][247478] Updated weights for policy 0, policy_version 9393 (0.0008) +[2026-06-02 16:41:44,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20206.9, 300 sec: 19933.9). Total num frames: 4784128. Throughput: 0: 20434.5. Samples: 4802560. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) +[2026-06-02 16:41:44,726][246448] Avg episode reward: [(0, '395.976')] +[2026-06-02 16:41:44,844][247478] Updated weights for policy 0, policy_version 9403 (0.0008) +[2026-06-02 16:41:45,498][247478] Updated weights for policy 0, policy_version 9413 (0.0008) +[2026-06-02 16:41:45,676][247478] Updated weights for policy 0, policy_version 9423 (0.0008) +[2026-06-02 16:41:45,851][247478] Updated weights for policy 0, policy_version 9433 (0.0008) +[2026-06-02 16:41:46,028][247478] Updated weights for policy 0, policy_version 9443 (0.0009) +[2026-06-02 16:41:46,229][247478] Updated weights for policy 0, policy_version 9454 (0.0009) +[2026-06-02 16:41:46,419][247478] Updated weights for policy 0, policy_version 9465 (0.0009) +[2026-06-02 16:41:47,095][247478] Updated weights for policy 0, policy_version 9475 (0.0009) +[2026-06-02 16:41:47,261][247478] Updated weights for policy 0, policy_version 9485 (0.0008) +[2026-06-02 16:41:47,453][247478] Updated weights for policy 0, policy_version 9496 (0.0008) +[2026-06-02 16:41:47,652][247478] Updated weights for policy 0, policy_version 9507 (0.0008) +[2026-06-02 16:41:47,833][247478] Updated weights for policy 0, policy_version 9517 (0.0008) +[2026-06-02 16:41:48,012][247478] Updated weights for policy 0, policy_version 9527 (0.0009) +[2026-06-02 16:41:48,674][247478] Updated weights for policy 0, policy_version 9537 (0.0008) +[2026-06-02 16:41:48,845][247478] Updated weights for policy 0, policy_version 9548 (0.0008) +[2026-06-02 16:41:49,021][247478] Updated weights for policy 0, policy_version 9558 (0.0008) +[2026-06-02 16:41:49,195][247478] Updated weights for policy 0, policy_version 9568 (0.0008) +[2026-06-02 16:41:49,381][247478] Updated weights for policy 0, policy_version 9578 (0.0009) +[2026-06-02 16:41:49,577][247478] Updated weights for policy 0, policy_version 9589 (0.0009) +[2026-06-02 16:41:49,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 19928.3). Total num frames: 4882432. Throughput: 0: 20420.3. Samples: 4928640. Policy #0 lag: (min: 43.0, avg: 72.0, max: 103.0) +[2026-06-02 16:41:49,726][246448] Avg episode reward: [(0, '387.191')] +[2026-06-02 16:41:49,758][247478] Updated weights for policy 0, policy_version 9599 (0.0008) +[2026-06-02 16:41:50,420][247478] Updated weights for policy 0, policy_version 9610 (0.0008) +[2026-06-02 16:41:50,596][247478] Updated weights for policy 0, policy_version 9620 (0.0008) +[2026-06-02 16:41:50,786][247478] Updated weights for policy 0, policy_version 9631 (0.0009) +[2026-06-02 16:41:50,966][247478] Updated weights for policy 0, policy_version 9641 (0.0008) +[2026-06-02 16:41:51,166][247478] Updated weights for policy 0, policy_version 9652 (0.0008) +[2026-06-02 16:41:51,350][247478] Updated weights for policy 0, policy_version 9662 (0.0009) +[2026-06-02 16:41:52,028][247478] Updated weights for policy 0, policy_version 9672 (0.0008) +[2026-06-02 16:41:52,219][247478] Updated weights for policy 0, policy_version 9683 (0.0005) +[2026-06-02 16:41:52,399][247478] Updated weights for policy 0, policy_version 9693 (0.0005) +[2026-06-02 16:41:52,586][247478] Updated weights for policy 0, policy_version 9703 (0.0005) +[2026-06-02 16:41:52,776][247478] Updated weights for policy 0, policy_version 9713 (0.0004) +[2026-06-02 16:41:52,950][247478] Updated weights for policy 0, policy_version 9723 (0.0005) +[2026-06-02 16:41:53,599][247478] Updated weights for policy 0, policy_version 9733 (0.0004) +[2026-06-02 16:41:53,780][247478] Updated weights for policy 0, policy_version 9743 (0.0005) +[2026-06-02 16:41:53,976][247478] Updated weights for policy 0, policy_version 9754 (0.0006) +[2026-06-02 16:41:54,150][247478] Updated weights for policy 0, policy_version 9764 (0.0005) +[2026-06-02 16:41:54,354][247478] Updated weights for policy 0, policy_version 9775 (0.0005) +[2026-06-02 16:41:54,531][247478] Updated weights for policy 0, policy_version 9785 (0.0005) +[2026-06-02 16:41:54,725][246448] Fps is (10 sec: 22937.5, 60 sec: 20753.1, 300 sec: 20054.1). Total num frames: 5013504. Throughput: 0: 20280.9. Samples: 5042176. Policy #0 lag: (min: 43.0, avg: 72.0, max: 103.0) +[2026-06-02 16:41:54,726][246448] Avg episode reward: [(0, '379.560')] +[2026-06-02 16:41:55,180][247478] Updated weights for policy 0, policy_version 9795 (0.0005) +[2026-06-02 16:41:55,362][247478] Updated weights for policy 0, policy_version 9806 (0.0004) +[2026-06-02 16:41:55,535][247478] Updated weights for policy 0, policy_version 9816 (0.0004) +[2026-06-02 16:41:55,721][247478] Updated weights for policy 0, policy_version 9826 (0.0004) +[2026-06-02 16:41:55,907][247478] Updated weights for policy 0, policy_version 9836 (0.0004) +[2026-06-02 16:41:56,094][247478] Updated weights for policy 0, policy_version 9846 (0.0008) +[2026-06-02 16:41:56,270][247478] Updated weights for policy 0, policy_version 9856 (0.0008) +[2026-06-02 16:41:56,936][247478] Updated weights for policy 0, policy_version 9868 (0.0009) +[2026-06-02 16:41:57,114][247478] Updated weights for policy 0, policy_version 9878 (0.0008) +[2026-06-02 16:41:57,284][247478] Updated weights for policy 0, policy_version 9888 (0.0009) +[2026-06-02 16:41:57,474][247478] Updated weights for policy 0, policy_version 9899 (0.0008) +[2026-06-02 16:41:57,656][247478] Updated weights for policy 0, policy_version 9909 (0.0009) +[2026-06-02 16:41:57,845][247478] Updated weights for policy 0, policy_version 9920 (0.0008) +[2026-06-02 16:41:58,547][247478] Updated weights for policy 0, policy_version 9931 (0.0009) +[2026-06-02 16:41:58,742][247478] Updated weights for policy 0, policy_version 9942 (0.0008) +[2026-06-02 16:41:58,930][247478] Updated weights for policy 0, policy_version 9953 (0.0009) +[2026-06-02 16:41:59,108][247478] Updated weights for policy 0, policy_version 9963 (0.0009) +[2026-06-02 16:41:59,346][247478] Updated weights for policy 0, policy_version 9976 (0.0009) +[2026-06-02 16:41:59,726][246448] Fps is (10 sec: 22937.4, 60 sec: 20206.9, 300 sec: 20046.3). Total num frames: 5111808. Throughput: 0: 20406.0. Samples: 5104128. Policy #0 lag: (min: 86.0, avg: 109.6, max: 144.0) +[2026-06-02 16:41:59,726][246448] Avg episode reward: [(0, '378.283')] +[2026-06-02 16:42:00,041][247478] Updated weights for policy 0, policy_version 9987 (0.0009) +[2026-06-02 16:42:00,216][247478] Updated weights for policy 0, policy_version 9998 (0.0008) +[2026-06-02 16:42:00,395][247478] Updated weights for policy 0, policy_version 10008 (0.0009) +[2026-06-02 16:42:00,570][247478] Updated weights for policy 0, policy_version 10018 (0.0009) +[2026-06-02 16:42:00,767][247478] Updated weights for policy 0, policy_version 10029 (0.0009) +[2026-06-02 16:42:00,938][247478] Updated weights for policy 0, policy_version 10039 (0.0008) +[2026-06-02 16:42:01,620][247478] Updated weights for policy 0, policy_version 10049 (0.0009) +[2026-06-02 16:42:01,778][247478] Updated weights for policy 0, policy_version 10059 (0.0008) +[2026-06-02 16:42:01,985][247478] Updated weights for policy 0, policy_version 10071 (0.0008) +[2026-06-02 16:42:02,155][247478] Updated weights for policy 0, policy_version 10081 (0.0010) +[2026-06-02 16:42:02,352][247478] Updated weights for policy 0, policy_version 10092 (0.0009) +[2026-06-02 16:42:02,544][247478] Updated weights for policy 0, policy_version 10103 (0.0008) +[2026-06-02 16:42:03,226][247478] Updated weights for policy 0, policy_version 10113 (0.0009) +[2026-06-02 16:42:03,402][247478] Updated weights for policy 0, policy_version 10124 (0.0008) +[2026-06-02 16:42:03,596][247478] Updated weights for policy 0, policy_version 10135 (0.0008) +[2026-06-02 16:42:03,780][247478] Updated weights for policy 0, policy_version 10146 (0.0008) +[2026-06-02 16:42:03,968][247478] Updated weights for policy 0, policy_version 10156 (0.0008) +[2026-06-02 16:42:04,178][247478] Updated weights for policy 0, policy_version 10168 (0.0009) +[2026-06-02 16:42:04,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20038.9). Total num frames: 5210112. Throughput: 0: 20406.0. Samples: 5229952. Policy #0 lag: (min: 86.0, avg: 109.6, max: 144.0) +[2026-06-02 16:42:04,726][246448] Avg episode reward: [(0, '379.456')] +[2026-06-02 16:42:04,900][247478] Updated weights for policy 0, policy_version 10180 (0.0008) +[2026-06-02 16:42:05,065][247478] Updated weights for policy 0, policy_version 10190 (0.0008) +[2026-06-02 16:42:05,241][247478] Updated weights for policy 0, policy_version 10200 (0.0008) +[2026-06-02 16:42:05,443][247478] Updated weights for policy 0, policy_version 10211 (0.0009) +[2026-06-02 16:42:05,631][247478] Updated weights for policy 0, policy_version 10222 (0.0008) +[2026-06-02 16:42:05,822][247478] Updated weights for policy 0, policy_version 10233 (0.0008) +[2026-06-02 16:42:06,511][247478] Updated weights for policy 0, policy_version 10243 (0.0009) +[2026-06-02 16:42:06,696][247478] Updated weights for policy 0, policy_version 10254 (0.0008) +[2026-06-02 16:42:06,887][247478] Updated weights for policy 0, policy_version 10264 (0.0008) +[2026-06-02 16:42:07,077][247478] Updated weights for policy 0, policy_version 10275 (0.0008) +[2026-06-02 16:42:07,252][247478] Updated weights for policy 0, policy_version 10285 (0.0008) +[2026-06-02 16:42:07,437][247478] Updated weights for policy 0, policy_version 10295 (0.0008) +[2026-06-02 16:42:08,079][247478] Updated weights for policy 0, policy_version 10305 (0.0008) +[2026-06-02 16:42:08,242][247478] Updated weights for policy 0, policy_version 10315 (0.0008) +[2026-06-02 16:42:08,416][247478] Updated weights for policy 0, policy_version 10325 (0.0009) +[2026-06-02 16:42:08,601][247478] Updated weights for policy 0, policy_version 10335 (0.0008) +[2026-06-02 16:42:08,776][247478] Updated weights for policy 0, policy_version 10345 (0.0008) +[2026-06-02 16:42:08,952][247478] Updated weights for policy 0, policy_version 10355 (0.0008) +[2026-06-02 16:42:09,134][247478] Updated weights for policy 0, policy_version 10365 (0.0008) +[2026-06-02 16:42:09,725][246448] Fps is (10 sec: 19661.1, 60 sec: 20206.9, 300 sec: 20031.8). Total num frames: 5308416. Throughput: 0: 20451.6. Samples: 5356800. Policy #0 lag: (min: 63.0, avg: 77.2, max: 127.0) +[2026-06-02 16:42:09,726][246448] Avg episode reward: [(0, '395.637')] +[2026-06-02 16:42:09,818][247478] Updated weights for policy 0, policy_version 10375 (0.0009) +[2026-06-02 16:42:09,990][247478] Updated weights for policy 0, policy_version 10385 (0.0008) +[2026-06-02 16:42:10,172][247478] Updated weights for policy 0, policy_version 10395 (0.0008) +[2026-06-02 16:42:10,351][247478] Updated weights for policy 0, policy_version 10405 (0.0008) +[2026-06-02 16:42:10,530][247478] Updated weights for policy 0, policy_version 10415 (0.0008) +[2026-06-02 16:42:10,734][247478] Updated weights for policy 0, policy_version 10426 (0.0008) +[2026-06-02 16:42:11,359][247478] Updated weights for policy 0, policy_version 10436 (0.0008) +[2026-06-02 16:42:11,525][247478] Updated weights for policy 0, policy_version 10446 (0.0008) +[2026-06-02 16:42:11,702][247478] Updated weights for policy 0, policy_version 10456 (0.0008) +[2026-06-02 16:42:11,879][247478] Updated weights for policy 0, policy_version 10466 (0.0008) +[2026-06-02 16:42:12,062][247478] Updated weights for policy 0, policy_version 10476 (0.0008) +[2026-06-02 16:42:12,294][247478] Updated weights for policy 0, policy_version 10488 (0.0008) +[2026-06-02 16:42:12,948][247478] Updated weights for policy 0, policy_version 10498 (0.0009) +[2026-06-02 16:42:13,116][247478] Updated weights for policy 0, policy_version 10508 (0.0008) +[2026-06-02 16:42:13,300][247478] Updated weights for policy 0, policy_version 10518 (0.0008) +[2026-06-02 16:42:13,480][247478] Updated weights for policy 0, policy_version 10528 (0.0009) +[2026-06-02 16:42:13,691][247478] Updated weights for policy 0, policy_version 10540 (0.0008) +[2026-06-02 16:42:13,869][247478] Updated weights for policy 0, policy_version 10550 (0.0009) +[2026-06-02 16:42:14,562][247478] Updated weights for policy 0, policy_version 10561 (0.0009) +[2026-06-02 16:42:14,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20024.9). Total num frames: 5406720. Throughput: 0: 20272.4. Samples: 5411968. Policy #0 lag: (min: 63.0, avg: 77.2, max: 127.0) +[2026-06-02 16:42:14,726][246448] Avg episode reward: [(0, '434.811')] +[2026-06-02 16:42:14,741][247478] Updated weights for policy 0, policy_version 10572 (0.0008) +[2026-06-02 16:42:14,923][247478] Updated weights for policy 0, policy_version 10582 (0.0008) +[2026-06-02 16:42:15,101][247478] Updated weights for policy 0, policy_version 10592 (0.0008) +[2026-06-02 16:42:15,299][247478] Updated weights for policy 0, policy_version 10603 (0.0008) +[2026-06-02 16:42:15,503][247478] Updated weights for policy 0, policy_version 10614 (0.0009) +[2026-06-02 16:42:15,689][247399] Saving new best policy, reward=434.811! +[2026-06-02 16:42:16,196][247478] Updated weights for policy 0, policy_version 10625 (0.0008) +[2026-06-02 16:42:16,369][247478] Updated weights for policy 0, policy_version 10636 (0.0009) +[2026-06-02 16:42:16,556][247478] Updated weights for policy 0, policy_version 10646 (0.0008) +[2026-06-02 16:42:16,752][247478] Updated weights for policy 0, policy_version 10657 (0.0008) +[2026-06-02 16:42:16,932][247478] Updated weights for policy 0, policy_version 10667 (0.0009) +[2026-06-02 16:42:17,118][247478] Updated weights for policy 0, policy_version 10678 (0.0008) +[2026-06-02 16:42:17,307][247478] Updated weights for policy 0, policy_version 10688 (0.0008) +[2026-06-02 16:42:17,942][247478] Updated weights for policy 0, policy_version 10698 (0.0009) +[2026-06-02 16:42:18,134][247478] Updated weights for policy 0, policy_version 10709 (0.0009) +[2026-06-02 16:42:18,317][247478] Updated weights for policy 0, policy_version 10719 (0.0008) +[2026-06-02 16:42:18,495][247478] Updated weights for policy 0, policy_version 10729 (0.0008) +[2026-06-02 16:42:18,684][247478] Updated weights for policy 0, policy_version 10739 (0.0009) +[2026-06-02 16:42:18,864][247478] Updated weights for policy 0, policy_version 10749 (0.0008) +[2026-06-02 16:42:19,521][247478] Updated weights for policy 0, policy_version 10760 (0.0008) +[2026-06-02 16:42:19,701][247478] Updated weights for policy 0, policy_version 10770 (0.0009) +[2026-06-02 16:42:19,726][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 20018.3). Total num frames: 5505024. Throughput: 0: 20423.1. Samples: 5532288. Policy #0 lag: (min: 63.0, avg: 79.1, max: 127.0) +[2026-06-02 16:42:19,727][246448] Avg episode reward: [(0, '431.065')] +[2026-06-02 16:42:19,896][247478] Updated weights for policy 0, policy_version 10781 (0.0008) +[2026-06-02 16:42:20,073][247478] Updated weights for policy 0, policy_version 10791 (0.0007) +[2026-06-02 16:42:20,279][247478] Updated weights for policy 0, policy_version 10802 (0.0008) +[2026-06-02 16:42:20,459][247478] Updated weights for policy 0, policy_version 10812 (0.0008) +[2026-06-02 16:42:21,113][247478] Updated weights for policy 0, policy_version 10822 (0.0009) +[2026-06-02 16:42:21,290][247478] Updated weights for policy 0, policy_version 10832 (0.0009) +[2026-06-02 16:42:21,490][247478] Updated weights for policy 0, policy_version 10843 (0.0009) +[2026-06-02 16:42:21,669][247478] Updated weights for policy 0, policy_version 10853 (0.0009) +[2026-06-02 16:42:21,849][247478] Updated weights for policy 0, policy_version 10863 (0.0009) +[2026-06-02 16:42:22,031][247478] Updated weights for policy 0, policy_version 10873 (0.0009) +[2026-06-02 16:42:22,704][247478] Updated weights for policy 0, policy_version 10884 (0.0009) +[2026-06-02 16:42:22,882][247478] Updated weights for policy 0, policy_version 10894 (0.0008) +[2026-06-02 16:42:23,056][247478] Updated weights for policy 0, policy_version 10904 (0.0009) +[2026-06-02 16:42:23,243][247478] Updated weights for policy 0, policy_version 10914 (0.0009) +[2026-06-02 16:42:23,414][247478] Updated weights for policy 0, policy_version 10924 (0.0008) +[2026-06-02 16:42:23,605][247478] Updated weights for policy 0, policy_version 10934 (0.0008) +[2026-06-02 16:42:23,780][247478] Updated weights for policy 0, policy_version 10944 (0.0008) +[2026-06-02 16:42:24,411][247478] Updated weights for policy 0, policy_version 10954 (0.0009) +[2026-06-02 16:42:24,593][247478] Updated weights for policy 0, policy_version 10964 (0.0008) +[2026-06-02 16:42:24,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 20011.9). Total num frames: 5603328. Throughput: 0: 20408.9. Samples: 5657984. Policy #0 lag: (min: 63.0, avg: 79.1, max: 127.0) +[2026-06-02 16:42:24,727][246448] Avg episode reward: [(0, '439.756')] +[2026-06-02 16:42:24,774][247478] Updated weights for policy 0, policy_version 10974 (0.0009) +[2026-06-02 16:42:24,959][247478] Updated weights for policy 0, policy_version 10984 (0.0008) +[2026-06-02 16:42:25,154][247478] Updated weights for policy 0, policy_version 10995 (0.0009) +[2026-06-02 16:42:25,330][247478] Updated weights for policy 0, policy_version 11005 (0.0008) +[2026-06-02 16:42:25,389][247399] Saving new best policy, reward=439.756! +[2026-06-02 16:42:25,973][247478] Updated weights for policy 0, policy_version 11015 (0.0009) +[2026-06-02 16:42:26,146][247478] Updated weights for policy 0, policy_version 11025 (0.0008) +[2026-06-02 16:42:26,318][247478] Updated weights for policy 0, policy_version 11035 (0.0009) +[2026-06-02 16:42:26,492][247478] Updated weights for policy 0, policy_version 11045 (0.0009) +[2026-06-02 16:42:26,689][247478] Updated weights for policy 0, policy_version 11056 (0.0009) +[2026-06-02 16:42:26,868][247478] Updated weights for policy 0, policy_version 11066 (0.0008) +[2026-06-02 16:42:27,531][247478] Updated weights for policy 0, policy_version 11076 (0.0009) +[2026-06-02 16:42:27,693][247478] Updated weights for policy 0, policy_version 11086 (0.0008) +[2026-06-02 16:42:27,874][247478] Updated weights for policy 0, policy_version 11096 (0.0008) +[2026-06-02 16:42:28,052][247478] Updated weights for policy 0, policy_version 11106 (0.0008) +[2026-06-02 16:42:28,240][247478] Updated weights for policy 0, policy_version 11116 (0.0008) +[2026-06-02 16:42:28,422][247478] Updated weights for policy 0, policy_version 11126 (0.0009) +[2026-06-02 16:42:28,600][247478] Updated weights for policy 0, policy_version 11136 (0.0009) +[2026-06-02 16:42:29,247][247478] Updated weights for policy 0, policy_version 11146 (0.0009) +[2026-06-02 16:42:29,432][247478] Updated weights for policy 0, policy_version 11156 (0.0008) +[2026-06-02 16:42:29,615][247478] Updated weights for policy 0, policy_version 11166 (0.0008) +[2026-06-02 16:42:29,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20206.9, 300 sec: 20005.8). Total num frames: 5701632. Throughput: 0: 20391.8. Samples: 5720192. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) +[2026-06-02 16:42:29,726][246448] Avg episode reward: [(0, '436.115')] +[2026-06-02 16:42:29,812][247478] Updated weights for policy 0, policy_version 11177 (0.0008) +[2026-06-02 16:42:30,007][247478] Updated weights for policy 0, policy_version 11188 (0.0008) +[2026-06-02 16:42:30,192][247478] Updated weights for policy 0, policy_version 11198 (0.0008) +[2026-06-02 16:42:30,824][247478] Updated weights for policy 0, policy_version 11208 (0.0009) +[2026-06-02 16:42:30,994][247478] Updated weights for policy 0, policy_version 11218 (0.0008) +[2026-06-02 16:42:31,178][247478] Updated weights for policy 0, policy_version 11228 (0.0009) +[2026-06-02 16:42:31,377][247478] Updated weights for policy 0, policy_version 11239 (0.0009) +[2026-06-02 16:42:31,561][247478] Updated weights for policy 0, policy_version 11249 (0.0009) +[2026-06-02 16:42:31,744][247478] Updated weights for policy 0, policy_version 11259 (0.0009) +[2026-06-02 16:42:32,373][247478] Updated weights for policy 0, policy_version 11269 (0.0009) +[2026-06-02 16:42:32,555][247478] Updated weights for policy 0, policy_version 11279 (0.0007) +[2026-06-02 16:42:32,734][247478] Updated weights for policy 0, policy_version 11289 (0.0009) +[2026-06-02 16:42:32,913][247478] Updated weights for policy 0, policy_version 11299 (0.0009) +[2026-06-02 16:42:33,091][247478] Updated weights for policy 0, policy_version 11309 (0.0008) +[2026-06-02 16:42:33,272][247478] Updated weights for policy 0, policy_version 11319 (0.0009) +[2026-06-02 16:42:33,918][247478] Updated weights for policy 0, policy_version 11329 (0.0008) +[2026-06-02 16:42:34,091][247478] Updated weights for policy 0, policy_version 11339 (0.0009) +[2026-06-02 16:42:34,267][247478] Updated weights for policy 0, policy_version 11349 (0.0008) +[2026-06-02 16:42:34,448][247478] Updated weights for policy 0, policy_version 11359 (0.0008) +[2026-06-02 16:42:34,628][247478] Updated weights for policy 0, policy_version 11369 (0.0008) +[2026-06-02 16:42:34,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20206.9, 300 sec: 19999.8). Total num frames: 5799936. Throughput: 0: 20221.2. Samples: 5838592. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) +[2026-06-02 16:42:34,726][246448] Avg episode reward: [(0, '429.756')] +[2026-06-02 16:42:34,810][247478] Updated weights for policy 0, policy_version 11379 (0.0008) +[2026-06-02 16:42:34,989][247478] Updated weights for policy 0, policy_version 11389 (0.0008) +[2026-06-02 16:42:35,627][247478] Updated weights for policy 0, policy_version 11399 (0.0009) +[2026-06-02 16:42:35,802][247478] Updated weights for policy 0, policy_version 11409 (0.0009) +[2026-06-02 16:42:35,981][247478] Updated weights for policy 0, policy_version 11419 (0.0009) +[2026-06-02 16:42:36,159][247478] Updated weights for policy 0, policy_version 11429 (0.0009) +[2026-06-02 16:42:36,340][247478] Updated weights for policy 0, policy_version 11439 (0.0009) +[2026-06-02 16:42:36,525][247478] Updated weights for policy 0, policy_version 11449 (0.0008) +[2026-06-02 16:42:37,168][247478] Updated weights for policy 0, policy_version 11459 (0.0009) +[2026-06-02 16:42:37,337][247478] Updated weights for policy 0, policy_version 11469 (0.0010) +[2026-06-02 16:42:37,509][247478] Updated weights for policy 0, policy_version 11479 (0.0009) +[2026-06-02 16:42:37,703][247478] Updated weights for policy 0, policy_version 11490 (0.0008) +[2026-06-02 16:42:37,891][247478] Updated weights for policy 0, policy_version 11500 (0.0008) +[2026-06-02 16:42:38,090][247478] Updated weights for policy 0, policy_version 11511 (0.0008) +[2026-06-02 16:42:38,748][247478] Updated weights for policy 0, policy_version 11521 (0.0008) +[2026-06-02 16:42:38,936][247478] Updated weights for policy 0, policy_version 11532 (0.0009) +[2026-06-02 16:42:39,113][247478] Updated weights for policy 0, policy_version 11542 (0.0009) +[2026-06-02 16:42:39,307][247478] Updated weights for policy 0, policy_version 11553 (0.0009) +[2026-06-02 16:42:39,490][247478] Updated weights for policy 0, policy_version 11563 (0.0008) +[2026-06-02 16:42:39,687][247478] Updated weights for policy 0, policy_version 11574 (0.0009) +[2026-06-02 16:42:39,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20206.9, 300 sec: 19994.1). Total num frames: 5898240. Throughput: 0: 20352.0. Samples: 5958016. Policy #0 lag: (min: 45.0, avg: 61.5, max: 109.0) +[2026-06-02 16:42:39,727][246448] Avg episode reward: [(0, '413.128')] +[2026-06-02 16:42:39,862][247478] Updated weights for policy 0, policy_version 11584 (0.0009) +[2026-06-02 16:42:40,535][247478] Updated weights for policy 0, policy_version 11594 (0.0008) +[2026-06-02 16:42:40,718][247478] Updated weights for policy 0, policy_version 11604 (0.0008) +[2026-06-02 16:42:40,901][247478] Updated weights for policy 0, policy_version 11614 (0.0008) +[2026-06-02 16:42:41,116][247478] Updated weights for policy 0, policy_version 11626 (0.0009) +[2026-06-02 16:42:41,292][247478] Updated weights for policy 0, policy_version 11636 (0.0008) +[2026-06-02 16:42:41,470][247478] Updated weights for policy 0, policy_version 11646 (0.0008) +[2026-06-02 16:42:42,109][247478] Updated weights for policy 0, policy_version 11656 (0.0009) +[2026-06-02 16:42:42,285][247478] Updated weights for policy 0, policy_version 11666 (0.0009) +[2026-06-02 16:42:42,464][247478] Updated weights for policy 0, policy_version 11676 (0.0009) +[2026-06-02 16:42:42,651][247478] Updated weights for policy 0, policy_version 11686 (0.0009) +[2026-06-02 16:42:42,831][247478] Updated weights for policy 0, policy_version 11696 (0.0009) +[2026-06-02 16:42:43,011][247478] Updated weights for policy 0, policy_version 11706 (0.0008) +[2026-06-02 16:42:43,683][247478] Updated weights for policy 0, policy_version 11718 (0.0009) +[2026-06-02 16:42:43,861][247478] Updated weights for policy 0, policy_version 11728 (0.0009) +[2026-06-02 16:42:44,046][247478] Updated weights for policy 0, policy_version 11738 (0.0009) +[2026-06-02 16:42:44,227][247478] Updated weights for policy 0, policy_version 11748 (0.0009) +[2026-06-02 16:42:44,414][247478] Updated weights for policy 0, policy_version 11758 (0.0009) +[2026-06-02 16:42:44,592][247478] Updated weights for policy 0, policy_version 11768 (0.0009) +[2026-06-02 16:42:44,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20105.1). Total num frames: 5996544. Throughput: 0: 20377.7. Samples: 6021120. Policy #0 lag: (min: 45.0, avg: 61.5, max: 109.0) +[2026-06-02 16:42:44,726][246448] Avg episode reward: [(0, '437.272')] +[2026-06-02 16:42:45,242][247478] Updated weights for policy 0, policy_version 11778 (0.0009) +[2026-06-02 16:42:45,405][247478] Updated weights for policy 0, policy_version 11788 (0.0009) +[2026-06-02 16:42:45,590][247478] Updated weights for policy 0, policy_version 11798 (0.0009) +[2026-06-02 16:42:45,793][247478] Updated weights for policy 0, policy_version 11809 (0.0009) +[2026-06-02 16:42:45,975][247478] Updated weights for policy 0, policy_version 11819 (0.0008) +[2026-06-02 16:42:46,156][247478] Updated weights for policy 0, policy_version 11829 (0.0009) +[2026-06-02 16:42:46,339][247478] Updated weights for policy 0, policy_version 11839 (0.0009) +[2026-06-02 16:42:46,979][247478] Updated weights for policy 0, policy_version 11849 (0.0009) +[2026-06-02 16:42:47,165][247478] Updated weights for policy 0, policy_version 11860 (0.0009) +[2026-06-02 16:42:47,353][247478] Updated weights for policy 0, policy_version 11870 (0.0009) +[2026-06-02 16:42:47,552][247478] Updated weights for policy 0, policy_version 11881 (0.0009) +[2026-06-02 16:42:47,761][247478] Updated weights for policy 0, policy_version 11893 (0.0009) +[2026-06-02 16:42:47,961][247478] Updated weights for policy 0, policy_version 11904 (0.0007) +[2026-06-02 16:42:48,599][247478] Updated weights for policy 0, policy_version 11914 (0.0010) +[2026-06-02 16:42:48,761][247478] Updated weights for policy 0, policy_version 11924 (0.0009) +[2026-06-02 16:42:48,961][247478] Updated weights for policy 0, policy_version 11935 (0.0009) +[2026-06-02 16:42:49,163][247478] Updated weights for policy 0, policy_version 11946 (0.0008) +[2026-06-02 16:42:49,337][247478] Updated weights for policy 0, policy_version 11956 (0.0008) +[2026-06-02 16:42:49,526][247478] Updated weights for policy 0, policy_version 11966 (0.0010) +[2026-06-02 16:42:49,725][246448] Fps is (10 sec: 22937.5, 60 sec: 20753.1, 300 sec: 20105.1). Total num frames: 6127616. Throughput: 0: 20357.7. Samples: 6146048. Policy #0 lag: (min: 78.0, avg: 94.4, max: 143.0) +[2026-06-02 16:42:49,726][246448] Avg episode reward: [(0, '429.974')] +[2026-06-02 16:42:50,177][247478] Updated weights for policy 0, policy_version 11977 (0.0008) +[2026-06-02 16:42:50,379][247478] Updated weights for policy 0, policy_version 11988 (0.0010) +[2026-06-02 16:42:50,559][247478] Updated weights for policy 0, policy_version 11998 (0.0008) +[2026-06-02 16:42:50,737][247478] Updated weights for policy 0, policy_version 12008 (0.0008) +[2026-06-02 16:42:50,915][247478] Updated weights for policy 0, policy_version 12018 (0.0009) +[2026-06-02 16:42:51,097][247478] Updated weights for policy 0, policy_version 12028 (0.0009) +[2026-06-02 16:42:51,741][247478] Updated weights for policy 0, policy_version 12038 (0.0009) +[2026-06-02 16:42:51,932][247478] Updated weights for policy 0, policy_version 12049 (0.0009) +[2026-06-02 16:42:52,129][247478] Updated weights for policy 0, policy_version 12060 (0.0009) +[2026-06-02 16:42:52,309][247478] Updated weights for policy 0, policy_version 12070 (0.0009) +[2026-06-02 16:42:52,492][247478] Updated weights for policy 0, policy_version 12080 (0.0009) +[2026-06-02 16:42:52,673][247478] Updated weights for policy 0, policy_version 12090 (0.0008) +[2026-06-02 16:42:53,321][247478] Updated weights for policy 0, policy_version 12100 (0.0009) +[2026-06-02 16:42:53,487][247478] Updated weights for policy 0, policy_version 12110 (0.0008) +[2026-06-02 16:42:53,665][247478] Updated weights for policy 0, policy_version 12120 (0.0008) +[2026-06-02 16:42:53,849][247478] Updated weights for policy 0, policy_version 12130 (0.0009) +[2026-06-02 16:42:54,034][247478] Updated weights for policy 0, policy_version 12140 (0.0008) +[2026-06-02 16:42:54,205][247478] Updated weights for policy 0, policy_version 12150 (0.0008) +[2026-06-02 16:42:54,391][247478] Updated weights for policy 0, policy_version 12160 (0.0009) +[2026-06-02 16:42:54,725][246448] Fps is (10 sec: 22937.5, 60 sec: 20206.9, 300 sec: 20105.1). Total num frames: 6225920. Throughput: 0: 20206.9. Samples: 6266112. Policy #0 lag: (min: 78.0, avg: 94.4, max: 143.0) +[2026-06-02 16:42:54,726][246448] Avg episode reward: [(0, '429.560')] +[2026-06-02 16:42:55,038][247478] Updated weights for policy 0, policy_version 12171 (0.0009) +[2026-06-02 16:42:55,215][247478] Updated weights for policy 0, policy_version 12181 (0.0008) +[2026-06-02 16:42:55,421][247478] Updated weights for policy 0, policy_version 12192 (0.0008) +[2026-06-02 16:42:55,601][247478] Updated weights for policy 0, policy_version 12202 (0.0008) +[2026-06-02 16:42:55,784][247478] Updated weights for policy 0, policy_version 12212 (0.0008) +[2026-06-02 16:42:55,964][247478] Updated weights for policy 0, policy_version 12222 (0.0008) +[2026-06-02 16:42:56,636][247478] Updated weights for policy 0, policy_version 12234 (0.0009) +[2026-06-02 16:42:56,816][247478] Updated weights for policy 0, policy_version 12244 (0.0009) +[2026-06-02 16:42:57,002][247478] Updated weights for policy 0, policy_version 12254 (0.0008) +[2026-06-02 16:42:57,176][247478] Updated weights for policy 0, policy_version 12264 (0.0009) +[2026-06-02 16:42:57,381][247478] Updated weights for policy 0, policy_version 12275 (0.0009) +[2026-06-02 16:42:57,567][247478] Updated weights for policy 0, policy_version 12285 (0.0008) +[2026-06-02 16:42:58,196][247478] Updated weights for policy 0, policy_version 12295 (0.0007) +[2026-06-02 16:42:58,366][247478] Updated weights for policy 0, policy_version 12305 (0.0008) +[2026-06-02 16:42:58,549][247478] Updated weights for policy 0, policy_version 12315 (0.0009) +[2026-06-02 16:42:58,729][247478] Updated weights for policy 0, policy_version 12325 (0.0008) +[2026-06-02 16:42:58,910][247478] Updated weights for policy 0, policy_version 12335 (0.0008) +[2026-06-02 16:42:59,081][247478] Updated weights for policy 0, policy_version 12345 (0.0008) +[2026-06-02 16:42:59,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20207.0, 300 sec: 19994.0). Total num frames: 6324224. Throughput: 0: 20209.8. Samples: 6321408. Policy #0 lag: (min: 63.0, avg: 79.7, max: 127.0) +[2026-06-02 16:42:59,726][247478] Updated weights for policy 0, policy_version 12355 (0.0008) +[2026-06-02 16:42:59,726][246448] Avg episode reward: [(0, '435.756')] +[2026-06-02 16:42:59,901][247478] Updated weights for policy 0, policy_version 12365 (0.0008) +[2026-06-02 16:43:00,076][247478] Updated weights for policy 0, policy_version 12375 (0.0009) +[2026-06-02 16:43:00,255][247478] Updated weights for policy 0, policy_version 12385 (0.0007) +[2026-06-02 16:43:00,432][247478] Updated weights for policy 0, policy_version 12395 (0.0009) +[2026-06-02 16:43:00,632][247478] Updated weights for policy 0, policy_version 12406 (0.0008) +[2026-06-02 16:43:00,803][247478] Updated weights for policy 0, policy_version 12416 (0.0008) +[2026-06-02 16:43:01,461][247478] Updated weights for policy 0, policy_version 12426 (0.0009) +[2026-06-02 16:43:01,655][247478] Updated weights for policy 0, policy_version 12437 (0.0008) +[2026-06-02 16:43:01,838][247478] Updated weights for policy 0, policy_version 12447 (0.0008) +[2026-06-02 16:43:02,024][247478] Updated weights for policy 0, policy_version 12457 (0.0009) +[2026-06-02 16:43:02,203][247478] Updated weights for policy 0, policy_version 12467 (0.0009) +[2026-06-02 16:43:02,383][247478] Updated weights for policy 0, policy_version 12477 (0.0008) +[2026-06-02 16:43:03,070][247478] Updated weights for policy 0, policy_version 12489 (0.0009) +[2026-06-02 16:43:03,243][247478] Updated weights for policy 0, policy_version 12499 (0.0009) +[2026-06-02 16:43:03,423][247478] Updated weights for policy 0, policy_version 12509 (0.0007) +[2026-06-02 16:43:03,600][247478] Updated weights for policy 0, policy_version 12519 (0.0008) +[2026-06-02 16:43:03,789][247478] Updated weights for policy 0, policy_version 12530 (0.0009) +[2026-06-02 16:43:03,968][247478] Updated weights for policy 0, policy_version 12540 (0.0005) +[2026-06-02 16:43:04,607][247478] Updated weights for policy 0, policy_version 12550 (0.0004) +[2026-06-02 16:43:04,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 19994.0). Total num frames: 6422528. Throughput: 0: 20300.8. Samples: 6445824. Policy #0 lag: (min: 63.0, avg: 79.7, max: 127.0) +[2026-06-02 16:43:04,726][246448] Avg episode reward: [(0, '457.585')] +[2026-06-02 16:43:04,788][247478] Updated weights for policy 0, policy_version 12561 (0.0004) +[2026-06-02 16:43:04,969][247478] Updated weights for policy 0, policy_version 12571 (0.0006) +[2026-06-02 16:43:05,169][247478] Updated weights for policy 0, policy_version 12582 (0.0009) +[2026-06-02 16:43:05,369][247478] Updated weights for policy 0, policy_version 12593 (0.0010) +[2026-06-02 16:43:05,558][247478] Updated weights for policy 0, policy_version 12604 (0.0008) +[2026-06-02 16:43:05,633][247399] Saving new best policy, reward=457.585! +[2026-06-02 16:43:06,221][247478] Updated weights for policy 0, policy_version 12614 (0.0010) +[2026-06-02 16:43:06,411][247478] Updated weights for policy 0, policy_version 12625 (0.0008) +[2026-06-02 16:43:06,594][247478] Updated weights for policy 0, policy_version 12635 (0.0008) +[2026-06-02 16:43:06,790][247478] Updated weights for policy 0, policy_version 12646 (0.0008) +[2026-06-02 16:43:06,972][247478] Updated weights for policy 0, policy_version 12656 (0.0008) +[2026-06-02 16:43:07,156][247478] Updated weights for policy 0, policy_version 12666 (0.0008) +[2026-06-02 16:43:07,826][247478] Updated weights for policy 0, policy_version 12676 (0.0009) +[2026-06-02 16:43:08,013][247478] Updated weights for policy 0, policy_version 12687 (0.0009) +[2026-06-02 16:43:08,190][247478] Updated weights for policy 0, policy_version 12697 (0.0008) +[2026-06-02 16:43:08,373][247478] Updated weights for policy 0, policy_version 12707 (0.0008) +[2026-06-02 16:43:08,559][247478] Updated weights for policy 0, policy_version 12717 (0.0008) +[2026-06-02 16:43:08,733][247478] Updated weights for policy 0, policy_version 12727 (0.0008) +[2026-06-02 16:43:09,392][247478] Updated weights for policy 0, policy_version 12737 (0.0008) +[2026-06-02 16:43:09,576][247478] Updated weights for policy 0, policy_version 12748 (0.0008) +[2026-06-02 16:43:09,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 19994.0). Total num frames: 6520832. Throughput: 0: 20298.0. Samples: 6571392. Policy #0 lag: (min: 53.0, avg: 68.3, max: 117.0) +[2026-06-02 16:43:09,726][246448] Avg episode reward: [(0, '466.106')] +[2026-06-02 16:43:09,770][247478] Updated weights for policy 0, policy_version 12759 (0.0008) +[2026-06-02 16:43:09,949][247478] Updated weights for policy 0, policy_version 12769 (0.0008) +[2026-06-02 16:43:10,138][247478] Updated weights for policy 0, policy_version 12779 (0.0009) +[2026-06-02 16:43:10,316][247478] Updated weights for policy 0, policy_version 12789 (0.0008) +[2026-06-02 16:43:10,490][247478] Updated weights for policy 0, policy_version 12799 (0.0008) +[2026-06-02 16:43:10,505][247399] Saving new best policy, reward=466.106! +[2026-06-02 16:43:11,173][247478] Updated weights for policy 0, policy_version 12810 (0.0009) +[2026-06-02 16:43:11,339][247478] Updated weights for policy 0, policy_version 12820 (0.0009) +[2026-06-02 16:43:11,516][247478] Updated weights for policy 0, policy_version 12830 (0.0009) +[2026-06-02 16:43:11,698][247478] Updated weights for policy 0, policy_version 12840 (0.0009) +[2026-06-02 16:43:11,873][247478] Updated weights for policy 0, policy_version 12850 (0.0009) +[2026-06-02 16:43:12,070][247478] Updated weights for policy 0, policy_version 12861 (0.0009) +[2026-06-02 16:43:12,723][247478] Updated weights for policy 0, policy_version 12871 (0.0009) +[2026-06-02 16:43:12,904][247478] Updated weights for policy 0, policy_version 12881 (0.0009) +[2026-06-02 16:43:13,080][247478] Updated weights for policy 0, policy_version 12891 (0.0009) +[2026-06-02 16:43:13,257][247478] Updated weights for policy 0, policy_version 12901 (0.0009) +[2026-06-02 16:43:13,447][247478] Updated weights for policy 0, policy_version 12911 (0.0009) +[2026-06-02 16:43:13,638][247478] Updated weights for policy 0, policy_version 12922 (0.0009) +[2026-06-02 16:43:14,316][247478] Updated weights for policy 0, policy_version 12932 (0.0009) +[2026-06-02 16:43:14,490][247478] Updated weights for policy 0, policy_version 12942 (0.0009) +[2026-06-02 16:43:14,670][247478] Updated weights for policy 0, policy_version 12952 (0.0008) +[2026-06-02 16:43:14,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20206.9, 300 sec: 19994.0). Total num frames: 6619136. Throughput: 0: 20289.4. Samples: 6633216. Policy #0 lag: (min: 53.0, avg: 68.3, max: 117.0) +[2026-06-02 16:43:14,726][246448] Avg episode reward: [(0, '472.778')] +[2026-06-02 16:43:14,846][247478] Updated weights for policy 0, policy_version 12962 (0.0008) +[2026-06-02 16:43:15,032][247478] Updated weights for policy 0, policy_version 12972 (0.0008) +[2026-06-02 16:43:15,216][247478] Updated weights for policy 0, policy_version 12982 (0.0008) +[2026-06-02 16:43:15,388][247399] Saving new best policy, reward=472.778! +[2026-06-02 16:43:15,393][247478] Updated weights for policy 0, policy_version 12992 (0.0009) +[2026-06-02 16:43:16,029][247478] Updated weights for policy 0, policy_version 13002 (0.0008) +[2026-06-02 16:43:16,209][247478] Updated weights for policy 0, policy_version 13012 (0.0008) +[2026-06-02 16:43:16,386][247478] Updated weights for policy 0, policy_version 13022 (0.0009) +[2026-06-02 16:43:16,576][247478] Updated weights for policy 0, policy_version 13032 (0.0009) +[2026-06-02 16:43:16,764][247478] Updated weights for policy 0, policy_version 13043 (0.0008) +[2026-06-02 16:43:16,956][247478] Updated weights for policy 0, policy_version 13053 (0.0009) +[2026-06-02 16:43:17,587][247478] Updated weights for policy 0, policy_version 13063 (0.0009) +[2026-06-02 16:43:17,765][247478] Updated weights for policy 0, policy_version 13073 (0.0009) +[2026-06-02 16:43:17,951][247478] Updated weights for policy 0, policy_version 13083 (0.0009) +[2026-06-02 16:43:18,133][247478] Updated weights for policy 0, policy_version 13093 (0.0009) +[2026-06-02 16:43:18,312][247478] Updated weights for policy 0, policy_version 13103 (0.0009) +[2026-06-02 16:43:18,517][247478] Updated weights for policy 0, policy_version 13114 (0.0009) +[2026-06-02 16:43:19,163][247478] Updated weights for policy 0, policy_version 13125 (0.0009) +[2026-06-02 16:43:19,343][247478] Updated weights for policy 0, policy_version 13136 (0.0009) +[2026-06-02 16:43:19,549][247478] Updated weights for policy 0, policy_version 13147 (0.0008) +[2026-06-02 16:43:19,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 19994.0). Total num frames: 6717440. Throughput: 0: 20189.9. Samples: 6747136. Policy #0 lag: (min: 63.0, avg: 79.7, max: 127.0) +[2026-06-02 16:43:19,726][246448] Avg episode reward: [(0, '501.985')] +[2026-06-02 16:43:19,727][247478] Updated weights for policy 0, policy_version 13157 (0.0008) +[2026-06-02 16:43:19,910][247478] Updated weights for policy 0, policy_version 13167 (0.0008) +[2026-06-02 16:43:20,098][247478] Updated weights for policy 0, policy_version 13177 (0.0008) +[2026-06-02 16:43:20,216][247399] Saving new best policy, reward=501.985! +[2026-06-02 16:43:20,734][247478] Updated weights for policy 0, policy_version 13187 (0.0008) +[2026-06-02 16:43:20,941][247478] Updated weights for policy 0, policy_version 13199 (0.0009) +[2026-06-02 16:43:21,126][247478] Updated weights for policy 0, policy_version 13209 (0.0009) +[2026-06-02 16:43:21,317][247478] Updated weights for policy 0, policy_version 13220 (0.0009) +[2026-06-02 16:43:21,503][247478] Updated weights for policy 0, policy_version 13230 (0.0009) +[2026-06-02 16:43:21,701][247478] Updated weights for policy 0, policy_version 13241 (0.0009) +[2026-06-02 16:43:22,344][247478] Updated weights for policy 0, policy_version 13251 (0.0009) +[2026-06-02 16:43:22,518][247478] Updated weights for policy 0, policy_version 13261 (0.0009) +[2026-06-02 16:43:22,685][247478] Updated weights for policy 0, policy_version 13271 (0.0009) +[2026-06-02 16:43:22,871][247478] Updated weights for policy 0, policy_version 13281 (0.0009) +[2026-06-02 16:43:23,057][247478] Updated weights for policy 0, policy_version 13291 (0.0009) +[2026-06-02 16:43:23,238][247478] Updated weights for policy 0, policy_version 13301 (0.0009) +[2026-06-02 16:43:23,418][247478] Updated weights for policy 0, policy_version 13311 (0.0008) +[2026-06-02 16:43:24,044][247478] Updated weights for policy 0, policy_version 13321 (0.0009) +[2026-06-02 16:43:24,209][247478] Updated weights for policy 0, policy_version 13331 (0.0008) +[2026-06-02 16:43:24,396][247478] Updated weights for policy 0, policy_version 13341 (0.0008) +[2026-06-02 16:43:24,579][247478] Updated weights for policy 0, policy_version 13351 (0.0009) +[2026-06-02 16:43:24,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 19994.0). Total num frames: 6815744. Throughput: 0: 20289.4. Samples: 6871040. Policy #0 lag: (min: 63.0, avg: 79.7, max: 127.0) +[2026-06-02 16:43:24,726][246448] Avg episode reward: [(0, '507.623')] +[2026-06-02 16:43:24,758][247478] Updated weights for policy 0, policy_version 13361 (0.0009) +[2026-06-02 16:43:24,956][247478] Updated weights for policy 0, policy_version 13372 (0.0008) +[2026-06-02 16:43:25,022][247399] Saving new best policy, reward=507.623! +[2026-06-02 16:43:25,619][247478] Updated weights for policy 0, policy_version 13383 (0.0009) +[2026-06-02 16:43:25,790][247478] Updated weights for policy 0, policy_version 13393 (0.0008) +[2026-06-02 16:43:25,978][247478] Updated weights for policy 0, policy_version 13403 (0.0009) +[2026-06-02 16:43:26,194][247478] Updated weights for policy 0, policy_version 13415 (0.0009) +[2026-06-02 16:43:26,372][247478] Updated weights for policy 0, policy_version 13425 (0.0008) +[2026-06-02 16:43:26,549][247478] Updated weights for policy 0, policy_version 13435 (0.0008) +[2026-06-02 16:43:27,207][247478] Updated weights for policy 0, policy_version 13445 (0.0008) +[2026-06-02 16:43:27,376][247478] Updated weights for policy 0, policy_version 13455 (0.0008) +[2026-06-02 16:43:27,553][247478] Updated weights for policy 0, policy_version 13465 (0.0009) +[2026-06-02 16:43:27,730][247478] Updated weights for policy 0, policy_version 13475 (0.0007) +[2026-06-02 16:43:27,911][247478] Updated weights for policy 0, policy_version 13485 (0.0008) +[2026-06-02 16:43:28,095][247478] Updated weights for policy 0, policy_version 13495 (0.0008) +[2026-06-02 16:43:28,752][247478] Updated weights for policy 0, policy_version 13506 (0.0009) +[2026-06-02 16:43:28,933][247478] Updated weights for policy 0, policy_version 13517 (0.0008) +[2026-06-02 16:43:29,117][247478] Updated weights for policy 0, policy_version 13527 (0.0008) +[2026-06-02 16:43:29,304][247478] Updated weights for policy 0, policy_version 13537 (0.0008) +[2026-06-02 16:43:29,497][247478] Updated weights for policy 0, policy_version 13548 (0.0008) +[2026-06-02 16:43:29,684][247478] Updated weights for policy 0, policy_version 13558 (0.0008) +[2026-06-02 16:43:29,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 19994.1). Total num frames: 6914048. Throughput: 0: 20275.2. Samples: 6933504. Policy #0 lag: (min: 63.0, avg: 79.7, max: 127.0) +[2026-06-02 16:43:29,727][246448] Avg episode reward: [(0, '513.049')] +[2026-06-02 16:43:29,873][247399] Saving new best policy, reward=513.049! +[2026-06-02 16:43:29,876][247478] Updated weights for policy 0, policy_version 13568 (0.0008) +[2026-06-02 16:43:30,499][247478] Updated weights for policy 0, policy_version 13578 (0.0008) +[2026-06-02 16:43:30,702][247478] Updated weights for policy 0, policy_version 13589 (0.0008) +[2026-06-02 16:43:30,887][247478] Updated weights for policy 0, policy_version 13599 (0.0008) +[2026-06-02 16:43:31,058][247478] Updated weights for policy 0, policy_version 13609 (0.0008) +[2026-06-02 16:43:31,248][247478] Updated weights for policy 0, policy_version 13619 (0.0007) +[2026-06-02 16:43:31,416][247478] Updated weights for policy 0, policy_version 13629 (0.0004) +[2026-06-02 16:43:32,072][247478] Updated weights for policy 0, policy_version 13641 (0.0007) +[2026-06-02 16:43:32,253][247478] Updated weights for policy 0, policy_version 13652 (0.0008) +[2026-06-02 16:43:32,451][247478] Updated weights for policy 0, policy_version 13663 (0.0008) +[2026-06-02 16:43:32,636][247478] Updated weights for policy 0, policy_version 13673 (0.0008) +[2026-06-02 16:43:32,822][247478] Updated weights for policy 0, policy_version 13684 (0.0008) +[2026-06-02 16:43:33,000][247478] Updated weights for policy 0, policy_version 13694 (0.0008) +[2026-06-02 16:43:33,669][247478] Updated weights for policy 0, policy_version 13704 (0.0009) +[2026-06-02 16:43:33,863][247478] Updated weights for policy 0, policy_version 13715 (0.0008) +[2026-06-02 16:43:34,065][247478] Updated weights for policy 0, policy_version 13726 (0.0009) +[2026-06-02 16:43:34,268][247478] Updated weights for policy 0, policy_version 13738 (0.0009) +[2026-06-02 16:43:34,462][247478] Updated weights for policy 0, policy_version 13749 (0.0008) +[2026-06-02 16:43:34,634][247478] Updated weights for policy 0, policy_version 13759 (0.0008) +[2026-06-02 16:43:34,725][246448] Fps is (10 sec: 22937.8, 60 sec: 20753.1, 300 sec: 20105.1). Total num frames: 7045120. Throughput: 0: 20289.4. Samples: 7059072. Policy #0 lag: (min: 63.0, avg: 80.0, max: 127.0) +[2026-06-02 16:43:34,726][246448] Avg episode reward: [(0, '509.898')] +[2026-06-02 16:43:35,305][247478] Updated weights for policy 0, policy_version 13769 (0.0005) +[2026-06-02 16:43:35,476][247478] Updated weights for policy 0, policy_version 13779 (0.0005) +[2026-06-02 16:43:35,654][247478] Updated weights for policy 0, policy_version 13789 (0.0005) +[2026-06-02 16:43:35,835][247478] Updated weights for policy 0, policy_version 13799 (0.0006) +[2026-06-02 16:43:36,020][247478] Updated weights for policy 0, policy_version 13809 (0.0009) +[2026-06-02 16:43:36,198][247478] Updated weights for policy 0, policy_version 13819 (0.0008) +[2026-06-02 16:43:36,867][247478] Updated weights for policy 0, policy_version 13830 (0.0008) +[2026-06-02 16:43:37,042][247478] Updated weights for policy 0, policy_version 13840 (0.0009) +[2026-06-02 16:43:37,222][247478] Updated weights for policy 0, policy_version 13850 (0.0009) +[2026-06-02 16:43:37,398][247478] Updated weights for policy 0, policy_version 13860 (0.0009) +[2026-06-02 16:43:37,589][247478] Updated weights for policy 0, policy_version 13870 (0.0009) +[2026-06-02 16:43:37,765][247478] Updated weights for policy 0, policy_version 13880 (0.0008) +[2026-06-02 16:43:38,440][247478] Updated weights for policy 0, policy_version 13890 (0.0008) +[2026-06-02 16:43:38,613][247478] Updated weights for policy 0, policy_version 13900 (0.0008) +[2026-06-02 16:43:38,795][247478] Updated weights for policy 0, policy_version 13910 (0.0009) +[2026-06-02 16:43:38,967][247478] Updated weights for policy 0, policy_version 13920 (0.0009) +[2026-06-02 16:43:39,152][247478] Updated weights for policy 0, policy_version 13930 (0.0006) +[2026-06-02 16:43:39,331][247478] Updated weights for policy 0, policy_version 13940 (0.0006) +[2026-06-02 16:43:39,529][247478] Updated weights for policy 0, policy_version 13951 (0.0009) +[2026-06-02 16:43:39,725][246448] Fps is (10 sec: 22937.7, 60 sec: 20753.1, 300 sec: 20105.1). Total num frames: 7143424. Throughput: 0: 20178.5. Samples: 7174144. Policy #0 lag: (min: 63.0, avg: 80.0, max: 127.0) +[2026-06-02 16:43:39,726][246448] Avg episode reward: [(0, '513.330')] +[2026-06-02 16:43:39,731][247399] Saving new best policy, reward=513.330! +[2026-06-02 16:43:40,180][247478] Updated weights for policy 0, policy_version 13961 (0.0005) +[2026-06-02 16:43:40,355][247478] Updated weights for policy 0, policy_version 13971 (0.0004) +[2026-06-02 16:43:40,539][247478] Updated weights for policy 0, policy_version 13981 (0.0004) +[2026-06-02 16:43:40,731][247478] Updated weights for policy 0, policy_version 13991 (0.0004) +[2026-06-02 16:43:40,907][247478] Updated weights for policy 0, policy_version 14001 (0.0007) +[2026-06-02 16:43:41,086][247478] Updated weights for policy 0, policy_version 14011 (0.0008) +[2026-06-02 16:43:41,743][247478] Updated weights for policy 0, policy_version 14021 (0.0007) +[2026-06-02 16:43:41,909][247478] Updated weights for policy 0, policy_version 14031 (0.0008) +[2026-06-02 16:43:42,092][247478] Updated weights for policy 0, policy_version 14041 (0.0008) +[2026-06-02 16:43:42,297][247478] Updated weights for policy 0, policy_version 14052 (0.0008) +[2026-06-02 16:43:42,480][247478] Updated weights for policy 0, policy_version 14062 (0.0009) +[2026-06-02 16:43:42,659][247478] Updated weights for policy 0, policy_version 14072 (0.0008) +[2026-06-02 16:43:43,300][247478] Updated weights for policy 0, policy_version 14082 (0.0008) +[2026-06-02 16:43:43,472][247478] Updated weights for policy 0, policy_version 14092 (0.0009) +[2026-06-02 16:43:43,663][247478] Updated weights for policy 0, policy_version 14103 (0.0008) +[2026-06-02 16:43:43,848][247478] Updated weights for policy 0, policy_version 14113 (0.0009) +[2026-06-02 16:43:44,026][247478] Updated weights for policy 0, policy_version 14123 (0.0008) +[2026-06-02 16:43:44,202][247478] Updated weights for policy 0, policy_version 14133 (0.0008) +[2026-06-02 16:43:44,394][247478] Updated weights for policy 0, policy_version 14144 (0.0008) +[2026-06-02 16:43:44,726][246448] Fps is (10 sec: 19660.0, 60 sec: 20752.9, 300 sec: 20105.1). Total num frames: 7241728. Throughput: 0: 20326.2. Samples: 7236096. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) +[2026-06-02 16:43:44,727][246448] Avg episode reward: [(0, '514.654')] +[2026-06-02 16:43:44,732][247399] Saving new best policy, reward=514.654! +[2026-06-02 16:43:45,072][247478] Updated weights for policy 0, policy_version 14154 (0.0010) +[2026-06-02 16:43:45,253][247478] Updated weights for policy 0, policy_version 14164 (0.0010) +[2026-06-02 16:43:45,426][247478] Updated weights for policy 0, policy_version 14174 (0.0009) +[2026-06-02 16:43:45,613][247478] Updated weights for policy 0, policy_version 14184 (0.0009) +[2026-06-02 16:43:45,800][247478] Updated weights for policy 0, policy_version 14194 (0.0009) +[2026-06-02 16:43:45,977][247478] Updated weights for policy 0, policy_version 14204 (0.0009) +[2026-06-02 16:43:46,606][247478] Updated weights for policy 0, policy_version 14214 (0.0011) +[2026-06-02 16:43:46,776][247478] Updated weights for policy 0, policy_version 14224 (0.0008) +[2026-06-02 16:43:46,975][247478] Updated weights for policy 0, policy_version 14235 (0.0008) +[2026-06-02 16:43:47,149][247478] Updated weights for policy 0, policy_version 14245 (0.0008) +[2026-06-02 16:43:47,330][247478] Updated weights for policy 0, policy_version 14255 (0.0009) +[2026-06-02 16:43:47,509][247478] Updated weights for policy 0, policy_version 14265 (0.0008) +[2026-06-02 16:43:48,182][247478] Updated weights for policy 0, policy_version 14276 (0.0009) +[2026-06-02 16:43:48,351][247478] Updated weights for policy 0, policy_version 14286 (0.0008) +[2026-06-02 16:43:48,533][247478] Updated weights for policy 0, policy_version 14296 (0.0010) +[2026-06-02 16:43:48,713][247478] Updated weights for policy 0, policy_version 14306 (0.0008) +[2026-06-02 16:43:48,896][247478] Updated weights for policy 0, policy_version 14316 (0.0008) +[2026-06-02 16:43:49,081][247478] Updated weights for policy 0, policy_version 14326 (0.0009) +[2026-06-02 16:43:49,250][247478] Updated weights for policy 0, policy_version 14336 (0.0009) +[2026-06-02 16:43:49,725][246448] Fps is (10 sec: 19660.6, 60 sec: 20206.9, 300 sec: 20105.1). Total num frames: 7340032. Throughput: 0: 20312.2. Samples: 7359872. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) +[2026-06-02 16:43:49,727][246448] Avg episode reward: [(0, '505.837')] +[2026-06-02 16:43:49,893][247478] Updated weights for policy 0, policy_version 14346 (0.0011) +[2026-06-02 16:43:50,072][247478] Updated weights for policy 0, policy_version 14356 (0.0009) +[2026-06-02 16:43:50,265][247478] Updated weights for policy 0, policy_version 14366 (0.0008) +[2026-06-02 16:43:50,435][247478] Updated weights for policy 0, policy_version 14376 (0.0008) +[2026-06-02 16:43:50,621][247478] Updated weights for policy 0, policy_version 14386 (0.0009) +[2026-06-02 16:43:50,803][247478] Updated weights for policy 0, policy_version 14396 (0.0008) +[2026-06-02 16:43:51,447][247478] Updated weights for policy 0, policy_version 14406 (0.0009) +[2026-06-02 16:43:51,643][247478] Updated weights for policy 0, policy_version 14417 (0.0008) +[2026-06-02 16:43:51,821][247478] Updated weights for policy 0, policy_version 14427 (0.0008) +[2026-06-02 16:43:52,000][247478] Updated weights for policy 0, policy_version 14437 (0.0008) +[2026-06-02 16:43:52,184][247478] Updated weights for policy 0, policy_version 14447 (0.0008) +[2026-06-02 16:43:52,367][247478] Updated weights for policy 0, policy_version 14457 (0.0008) +[2026-06-02 16:43:53,019][247478] Updated weights for policy 0, policy_version 14468 (0.0010) +[2026-06-02 16:43:53,205][247478] Updated weights for policy 0, policy_version 14479 (0.0008) +[2026-06-02 16:43:53,385][247478] Updated weights for policy 0, policy_version 14489 (0.0008) +[2026-06-02 16:43:53,578][247478] Updated weights for policy 0, policy_version 14499 (0.0008) +[2026-06-02 16:43:53,770][247478] Updated weights for policy 0, policy_version 14510 (0.0008) +[2026-06-02 16:43:53,947][247478] Updated weights for policy 0, policy_version 14520 (0.0008) +[2026-06-02 16:43:54,608][247478] Updated weights for policy 0, policy_version 14530 (0.0009) +[2026-06-02 16:43:54,725][246448] Fps is (10 sec: 19661.3, 60 sec: 20206.9, 300 sec: 20105.1). Total num frames: 7438336. Throughput: 0: 20295.1. Samples: 7484672. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) +[2026-06-02 16:43:54,727][246448] Avg episode reward: [(0, '504.533')] +[2026-06-02 16:43:54,788][247478] Updated weights for policy 0, policy_version 14540 (0.0009) +[2026-06-02 16:43:54,959][247478] Updated weights for policy 0, policy_version 14550 (0.0008) +[2026-06-02 16:43:55,137][247478] Updated weights for policy 0, policy_version 14560 (0.0008) +[2026-06-02 16:43:55,315][247478] Updated weights for policy 0, policy_version 14570 (0.0008) +[2026-06-02 16:43:55,509][247478] Updated weights for policy 0, policy_version 14580 (0.0008) +[2026-06-02 16:43:55,685][247478] Updated weights for policy 0, policy_version 14590 (0.0009) +[2026-06-02 16:43:56,322][247478] Updated weights for policy 0, policy_version 14601 (0.0009) +[2026-06-02 16:43:56,500][247478] Updated weights for policy 0, policy_version 14611 (0.0009) +[2026-06-02 16:43:56,675][247478] Updated weights for policy 0, policy_version 14621 (0.0009) +[2026-06-02 16:43:56,861][247478] Updated weights for policy 0, policy_version 14631 (0.0009) +[2026-06-02 16:43:57,037][247478] Updated weights for policy 0, policy_version 14641 (0.0005) +[2026-06-02 16:43:57,229][247478] Updated weights for policy 0, policy_version 14652 (0.0004) +[2026-06-02 16:43:57,900][247478] Updated weights for policy 0, policy_version 14662 (0.0005) +[2026-06-02 16:43:58,067][247478] Updated weights for policy 0, policy_version 14672 (0.0005) +[2026-06-02 16:43:58,245][247478] Updated weights for policy 0, policy_version 14682 (0.0005) +[2026-06-02 16:43:58,412][247478] Updated weights for policy 0, policy_version 14692 (0.0004) +[2026-06-02 16:43:58,600][247478] Updated weights for policy 0, policy_version 14702 (0.0005) +[2026-06-02 16:43:58,774][247478] Updated weights for policy 0, policy_version 14712 (0.0005) +[2026-06-02 16:43:59,436][247478] Updated weights for policy 0, policy_version 14722 (0.0008) +[2026-06-02 16:43:59,608][247478] Updated weights for policy 0, policy_version 14732 (0.0008) +[2026-06-02 16:43:59,726][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20216.2). Total num frames: 7536640. Throughput: 0: 20278.0. Samples: 7545728. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) +[2026-06-02 16:43:59,727][246448] Avg episode reward: [(0, '507.477')] +[2026-06-02 16:43:59,777][247478] Updated weights for policy 0, policy_version 14742 (0.0009) +[2026-06-02 16:43:59,959][247478] Updated weights for policy 0, policy_version 14752 (0.0008) +[2026-06-02 16:44:00,145][247478] Updated weights for policy 0, policy_version 14762 (0.0008) +[2026-06-02 16:44:00,328][247478] Updated weights for policy 0, policy_version 14772 (0.0008) +[2026-06-02 16:44:00,505][247478] Updated weights for policy 0, policy_version 14782 (0.0008) +[2026-06-02 16:44:01,150][247478] Updated weights for policy 0, policy_version 14793 (0.0005) +[2026-06-02 16:44:01,330][247478] Updated weights for policy 0, policy_version 14803 (0.0004) +[2026-06-02 16:44:01,512][247478] Updated weights for policy 0, policy_version 14813 (0.0004) +[2026-06-02 16:44:01,693][247478] Updated weights for policy 0, policy_version 14823 (0.0004) +[2026-06-02 16:44:01,868][247478] Updated weights for policy 0, policy_version 14833 (0.0004) +[2026-06-02 16:44:02,063][247478] Updated weights for policy 0, policy_version 14844 (0.0004) +[2026-06-02 16:44:02,704][247478] Updated weights for policy 0, policy_version 14854 (0.0004) +[2026-06-02 16:44:02,875][247478] Updated weights for policy 0, policy_version 14864 (0.0004) +[2026-06-02 16:44:03,063][247478] Updated weights for policy 0, policy_version 14874 (0.0004) +[2026-06-02 16:44:03,231][247478] Updated weights for policy 0, policy_version 14884 (0.0004) +[2026-06-02 16:44:03,421][247478] Updated weights for policy 0, policy_version 14894 (0.0004) +[2026-06-02 16:44:03,608][247478] Updated weights for policy 0, policy_version 14904 (0.0004) +[2026-06-02 16:44:04,223][247478] Updated weights for policy 0, policy_version 14914 (0.0004) +[2026-06-02 16:44:04,394][247478] Updated weights for policy 0, policy_version 14924 (0.0004) +[2026-06-02 16:44:04,593][247478] Updated weights for policy 0, policy_version 14935 (0.0004) +[2026-06-02 16:44:04,725][246448] Fps is (10 sec: 19661.0, 60 sec: 20206.9, 300 sec: 20216.2). Total num frames: 7634944. Throughput: 0: 20320.7. Samples: 7661568. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) +[2026-06-02 16:44:04,726][246448] Avg episode reward: [(0, '497.698')] +[2026-06-02 16:44:04,779][247478] Updated weights for policy 0, policy_version 14945 (0.0004) +[2026-06-02 16:44:04,980][247478] Updated weights for policy 0, policy_version 14956 (0.0004) +[2026-06-02 16:44:05,165][247478] Updated weights for policy 0, policy_version 14966 (0.0004) +[2026-06-02 16:44:05,340][247478] Updated weights for policy 0, policy_version 14976 (0.0004) +[2026-06-02 16:44:05,936][247478] Updated weights for policy 0, policy_version 14986 (0.0009) +[2026-06-02 16:44:06,129][247478] Updated weights for policy 0, policy_version 14996 (0.0009) +[2026-06-02 16:44:06,311][247478] Updated weights for policy 0, policy_version 15006 (0.0009) +[2026-06-02 16:44:06,492][247478] Updated weights for policy 0, policy_version 15016 (0.0009) +[2026-06-02 16:44:06,671][247478] Updated weights for policy 0, policy_version 15026 (0.0008) +[2026-06-02 16:44:06,852][247478] Updated weights for policy 0, policy_version 15036 (0.0008) +[2026-06-02 16:44:07,488][247478] Updated weights for policy 0, policy_version 15046 (0.0009) +[2026-06-02 16:44:07,672][247478] Updated weights for policy 0, policy_version 15056 (0.0009) +[2026-06-02 16:44:07,846][247478] Updated weights for policy 0, policy_version 15066 (0.0009) +[2026-06-02 16:44:08,028][247478] Updated weights for policy 0, policy_version 15076 (0.0009) +[2026-06-02 16:44:08,210][247478] Updated weights for policy 0, policy_version 15086 (0.0009) +[2026-06-02 16:44:08,407][247478] Updated weights for policy 0, policy_version 15097 (0.0009) +[2026-06-02 16:44:09,046][247478] Updated weights for policy 0, policy_version 15107 (0.0009) +[2026-06-02 16:44:09,233][247478] Updated weights for policy 0, policy_version 15118 (0.0009) +[2026-06-02 16:44:09,413][247478] Updated weights for policy 0, policy_version 15128 (0.0009) +[2026-06-02 16:44:09,599][247478] Updated weights for policy 0, policy_version 15138 (0.0009) +[2026-06-02 16:44:09,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20206.9, 300 sec: 20216.2). Total num frames: 7733248. Throughput: 0: 20332.1. Samples: 7785984. Policy #0 lag: (min: 63.0, avg: 80.5, max: 127.0) +[2026-06-02 16:44:09,726][246448] Avg episode reward: [(0, '485.717')] +[2026-06-02 16:44:09,780][247478] Updated weights for policy 0, policy_version 15148 (0.0009) +[2026-06-02 16:44:09,965][247478] Updated weights for policy 0, policy_version 15158 (0.0009) +[2026-06-02 16:44:10,136][247478] Updated weights for policy 0, policy_version 15168 (0.0009) +[2026-06-02 16:44:10,816][247478] Updated weights for policy 0, policy_version 15178 (0.0009) +[2026-06-02 16:44:10,995][247478] Updated weights for policy 0, policy_version 15188 (0.0009) +[2026-06-02 16:44:11,168][247478] Updated weights for policy 0, policy_version 15198 (0.0009) +[2026-06-02 16:44:11,356][247478] Updated weights for policy 0, policy_version 15208 (0.0009) +[2026-06-02 16:44:11,534][247478] Updated weights for policy 0, policy_version 15218 (0.0009) +[2026-06-02 16:44:11,714][247478] Updated weights for policy 0, policy_version 15228 (0.0008) +[2026-06-02 16:44:12,350][247478] Updated weights for policy 0, policy_version 15238 (0.0008) +[2026-06-02 16:44:12,516][247478] Updated weights for policy 0, policy_version 15248 (0.0008) +[2026-06-02 16:44:12,699][247478] Updated weights for policy 0, policy_version 15258 (0.0009) +[2026-06-02 16:44:12,887][247478] Updated weights for policy 0, policy_version 15268 (0.0009) +[2026-06-02 16:44:13,065][247478] Updated weights for policy 0, policy_version 15278 (0.0009) +[2026-06-02 16:44:13,238][247478] Updated weights for policy 0, policy_version 15288 (0.0009) +[2026-06-02 16:44:13,889][247478] Updated weights for policy 0, policy_version 15298 (0.0009) +[2026-06-02 16:44:14,055][247478] Updated weights for policy 0, policy_version 15308 (0.0008) +[2026-06-02 16:44:14,229][247478] Updated weights for policy 0, policy_version 15318 (0.0009) +[2026-06-02 16:44:14,409][247478] Updated weights for policy 0, policy_version 15328 (0.0009) +[2026-06-02 16:44:14,586][247478] Updated weights for policy 0, policy_version 15338 (0.0009) +[2026-06-02 16:44:14,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 20216.2). Total num frames: 7831552. Throughput: 0: 20357.7. Samples: 7849600. Policy #0 lag: (min: 63.0, avg: 80.5, max: 127.0) +[2026-06-02 16:44:14,727][246448] Avg episode reward: [(0, '503.597')] +[2026-06-02 16:44:14,776][247478] Updated weights for policy 0, policy_version 15348 (0.0008) +[2026-06-02 16:44:14,954][247478] Updated weights for policy 0, policy_version 15358 (0.0009) +[2026-06-02 16:44:15,610][247478] Updated weights for policy 0, policy_version 15368 (0.0009) +[2026-06-02 16:44:15,789][247478] Updated weights for policy 0, policy_version 15378 (0.0009) +[2026-06-02 16:44:15,965][247478] Updated weights for policy 0, policy_version 15388 (0.0009) +[2026-06-02 16:44:16,144][247478] Updated weights for policy 0, policy_version 15398 (0.0008) +[2026-06-02 16:44:16,323][247478] Updated weights for policy 0, policy_version 15408 (0.0008) +[2026-06-02 16:44:16,510][247478] Updated weights for policy 0, policy_version 15418 (0.0007) +[2026-06-02 16:44:17,149][247478] Updated weights for policy 0, policy_version 15429 (0.0009) +[2026-06-02 16:44:17,329][247478] Updated weights for policy 0, policy_version 15439 (0.0008) +[2026-06-02 16:44:17,499][247478] Updated weights for policy 0, policy_version 15449 (0.0009) +[2026-06-02 16:44:17,689][247478] Updated weights for policy 0, policy_version 15459 (0.0009) +[2026-06-02 16:44:17,881][247478] Updated weights for policy 0, policy_version 15470 (0.0009) +[2026-06-02 16:44:18,073][247478] Updated weights for policy 0, policy_version 15480 (0.0009) +[2026-06-02 16:44:18,721][247478] Updated weights for policy 0, policy_version 15490 (0.0009) +[2026-06-02 16:44:18,924][247478] Updated weights for policy 0, policy_version 15502 (0.0009) +[2026-06-02 16:44:19,108][247478] Updated weights for policy 0, policy_version 15512 (0.0009) +[2026-06-02 16:44:19,281][247478] Updated weights for policy 0, policy_version 15522 (0.0009) +[2026-06-02 16:44:19,461][247478] Updated weights for policy 0, policy_version 15532 (0.0009) +[2026-06-02 16:44:19,636][247478] Updated weights for policy 0, policy_version 15542 (0.0009) +[2026-06-02 16:44:19,726][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20216.2). Total num frames: 7929856. Throughput: 0: 20366.2. Samples: 7975552. Policy #0 lag: (min: 63.0, avg: 80.5, max: 127.0) +[2026-06-02 16:44:19,727][246448] Avg episode reward: [(0, '543.471')] +[2026-06-02 16:44:19,822][247399] Saving new best policy, reward=543.471! +[2026-06-02 16:44:19,824][247478] Updated weights for policy 0, policy_version 15552 (0.0009) +[2026-06-02 16:44:20,465][247478] Updated weights for policy 0, policy_version 15562 (0.0009) +[2026-06-02 16:44:20,647][247478] Updated weights for policy 0, policy_version 15572 (0.0009) +[2026-06-02 16:44:20,823][247478] Updated weights for policy 0, policy_version 15582 (0.0009) +[2026-06-02 16:44:21,012][247478] Updated weights for policy 0, policy_version 15592 (0.0009) +[2026-06-02 16:44:21,182][247478] Updated weights for policy 0, policy_version 15602 (0.0009) +[2026-06-02 16:44:21,361][247478] Updated weights for policy 0, policy_version 15612 (0.0009) +[2026-06-02 16:44:22,015][247478] Updated weights for policy 0, policy_version 15622 (0.0009) +[2026-06-02 16:44:22,182][247478] Updated weights for policy 0, policy_version 15632 (0.0009) +[2026-06-02 16:44:22,360][247478] Updated weights for policy 0, policy_version 15642 (0.0009) +[2026-06-02 16:44:22,542][247478] Updated weights for policy 0, policy_version 15652 (0.0009) +[2026-06-02 16:44:22,733][247478] Updated weights for policy 0, policy_version 15662 (0.0009) +[2026-06-02 16:44:22,902][247478] Updated weights for policy 0, policy_version 15672 (0.0009) +[2026-06-02 16:44:23,571][247478] Updated weights for policy 0, policy_version 15682 (0.0009) +[2026-06-02 16:44:23,744][247478] Updated weights for policy 0, policy_version 15692 (0.0009) +[2026-06-02 16:44:23,919][247478] Updated weights for policy 0, policy_version 15702 (0.0009) +[2026-06-02 16:44:24,095][247478] Updated weights for policy 0, policy_version 15712 (0.0009) +[2026-06-02 16:44:24,276][247478] Updated weights for policy 0, policy_version 15722 (0.0009) +[2026-06-02 16:44:24,447][247478] Updated weights for policy 0, policy_version 15732 (0.0009) +[2026-06-02 16:44:24,648][247478] Updated weights for policy 0, policy_version 15743 (0.0009) +[2026-06-02 16:44:24,725][246448] Fps is (10 sec: 22937.6, 60 sec: 20753.1, 300 sec: 20327.3). Total num frames: 8060928. Throughput: 0: 20323.5. Samples: 8088704. Policy #0 lag: (min: 63.0, avg: 78.6, max: 127.0) +[2026-06-02 16:44:24,726][246448] Avg episode reward: [(0, '572.283')] +[2026-06-02 16:44:24,731][247399] Saving new best policy, reward=572.283! +[2026-06-02 16:44:25,307][247478] Updated weights for policy 0, policy_version 15754 (0.0008) +[2026-06-02 16:44:25,482][247478] Updated weights for policy 0, policy_version 15764 (0.0008) +[2026-06-02 16:44:25,666][247478] Updated weights for policy 0, policy_version 15774 (0.0008) +[2026-06-02 16:44:25,845][247478] Updated weights for policy 0, policy_version 15784 (0.0008) +[2026-06-02 16:44:26,019][247478] Updated weights for policy 0, policy_version 15794 (0.0008) +[2026-06-02 16:44:26,209][247478] Updated weights for policy 0, policy_version 15804 (0.0009) +[2026-06-02 16:44:26,863][247478] Updated weights for policy 0, policy_version 15815 (0.0009) +[2026-06-02 16:44:27,033][247478] Updated weights for policy 0, policy_version 15825 (0.0008) +[2026-06-02 16:44:27,222][247478] Updated weights for policy 0, policy_version 15835 (0.0009) +[2026-06-02 16:44:27,392][247478] Updated weights for policy 0, policy_version 15845 (0.0008) +[2026-06-02 16:44:27,572][247478] Updated weights for policy 0, policy_version 15855 (0.0009) +[2026-06-02 16:44:27,765][247478] Updated weights for policy 0, policy_version 15866 (0.0009) +[2026-06-02 16:44:28,412][247478] Updated weights for policy 0, policy_version 15876 (0.0009) +[2026-06-02 16:44:28,584][247478] Updated weights for policy 0, policy_version 15886 (0.0009) +[2026-06-02 16:44:28,764][247478] Updated weights for policy 0, policy_version 15896 (0.0009) +[2026-06-02 16:44:28,945][247478] Updated weights for policy 0, policy_version 15906 (0.0008) +[2026-06-02 16:44:29,127][247478] Updated weights for policy 0, policy_version 15916 (0.0008) +[2026-06-02 16:44:29,315][247478] Updated weights for policy 0, policy_version 15926 (0.0008) +[2026-06-02 16:44:29,481][247478] Updated weights for policy 0, policy_version 15936 (0.0008) +[2026-06-02 16:44:29,725][246448] Fps is (10 sec: 22937.6, 60 sec: 20753.1, 300 sec: 20327.3). Total num frames: 8159232. Throughput: 0: 20337.9. Samples: 8151296. Policy #0 lag: (min: 63.0, avg: 78.6, max: 127.0) +[2026-06-02 16:44:29,726][246448] Avg episode reward: [(0, '606.717')] +[2026-06-02 16:44:29,731][247399] Saving new best policy, reward=606.717! +[2026-06-02 16:44:30,146][247478] Updated weights for policy 0, policy_version 15947 (0.0008) +[2026-06-02 16:44:30,331][247478] Updated weights for policy 0, policy_version 15957 (0.0008) +[2026-06-02 16:44:30,512][247478] Updated weights for policy 0, policy_version 15967 (0.0008) +[2026-06-02 16:44:30,690][247478] Updated weights for policy 0, policy_version 15977 (0.0008) +[2026-06-02 16:44:30,873][247478] Updated weights for policy 0, policy_version 15987 (0.0008) +[2026-06-02 16:44:31,041][247478] Updated weights for policy 0, policy_version 15997 (0.0008) +[2026-06-02 16:44:31,682][247478] Updated weights for policy 0, policy_version 16007 (0.0008) +[2026-06-02 16:44:31,874][247478] Updated weights for policy 0, policy_version 16018 (0.0008) +[2026-06-02 16:44:32,078][247478] Updated weights for policy 0, policy_version 16029 (0.0008) +[2026-06-02 16:44:32,256][247478] Updated weights for policy 0, policy_version 16039 (0.0008) +[2026-06-02 16:44:32,442][247478] Updated weights for policy 0, policy_version 16049 (0.0009) +[2026-06-02 16:44:32,634][247478] Updated weights for policy 0, policy_version 16060 (0.0009) +[2026-06-02 16:44:33,278][247478] Updated weights for policy 0, policy_version 16070 (0.0009) +[2026-06-02 16:44:33,448][247478] Updated weights for policy 0, policy_version 16080 (0.0008) +[2026-06-02 16:44:33,623][247478] Updated weights for policy 0, policy_version 16090 (0.0008) +[2026-06-02 16:44:33,809][247478] Updated weights for policy 0, policy_version 16100 (0.0009) +[2026-06-02 16:44:34,006][247478] Updated weights for policy 0, policy_version 16111 (0.0009) +[2026-06-02 16:44:34,186][247478] Updated weights for policy 0, policy_version 16121 (0.0009) +[2026-06-02 16:44:34,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 8257536. Throughput: 0: 20369.1. Samples: 8276480. Policy #0 lag: (min: 63.0, avg: 79.4, max: 127.0) +[2026-06-02 16:44:34,727][246448] Avg episode reward: [(0, '619.191')] +[2026-06-02 16:44:34,848][247478] Updated weights for policy 0, policy_version 16131 (0.0009) +[2026-06-02 16:44:35,017][247478] Updated weights for policy 0, policy_version 16141 (0.0009) +[2026-06-02 16:44:35,212][247478] Updated weights for policy 0, policy_version 16152 (0.0009) +[2026-06-02 16:44:35,392][247478] Updated weights for policy 0, policy_version 16162 (0.0010) +[2026-06-02 16:44:35,572][247478] Updated weights for policy 0, policy_version 16172 (0.0009) +[2026-06-02 16:44:35,742][247478] Updated weights for policy 0, policy_version 16182 (0.0009) +[2026-06-02 16:44:35,927][247399] Saving new best policy, reward=619.191! +[2026-06-02 16:44:35,930][247478] Updated weights for policy 0, policy_version 16192 (0.0009) +[2026-06-02 16:44:36,606][247478] Updated weights for policy 0, policy_version 16204 (0.0009) +[2026-06-02 16:44:36,803][247478] Updated weights for policy 0, policy_version 16215 (0.0008) +[2026-06-02 16:44:36,985][247478] Updated weights for policy 0, policy_version 16225 (0.0008) +[2026-06-02 16:44:37,169][247478] Updated weights for policy 0, policy_version 16235 (0.0010) +[2026-06-02 16:44:37,354][247478] Updated weights for policy 0, policy_version 16245 (0.0009) +[2026-06-02 16:44:37,544][247478] Updated weights for policy 0, policy_version 16256 (0.0008) +[2026-06-02 16:44:38,192][247478] Updated weights for policy 0, policy_version 16266 (0.0009) +[2026-06-02 16:44:38,362][247478] Updated weights for policy 0, policy_version 16276 (0.0008) +[2026-06-02 16:44:38,556][247478] Updated weights for policy 0, policy_version 16287 (0.0008) +[2026-06-02 16:44:38,747][247478] Updated weights for policy 0, policy_version 16297 (0.0008) +[2026-06-02 16:44:38,936][247478] Updated weights for policy 0, policy_version 16308 (0.0008) +[2026-06-02 16:44:39,134][247478] Updated weights for policy 0, policy_version 16318 (0.0008) +[2026-06-02 16:44:39,725][246448] Fps is (10 sec: 19661.0, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 8355840. Throughput: 0: 20420.3. Samples: 8403584. Policy #0 lag: (min: 63.0, avg: 79.4, max: 127.0) +[2026-06-02 16:44:39,726][246448] Avg episode reward: [(0, '624.868')] +[2026-06-02 16:44:39,805][247478] Updated weights for policy 0, policy_version 16329 (0.0009) +[2026-06-02 16:44:39,974][247478] Updated weights for policy 0, policy_version 16339 (0.0008) +[2026-06-02 16:44:40,146][247478] Updated weights for policy 0, policy_version 16349 (0.0008) +[2026-06-02 16:44:40,333][247478] Updated weights for policy 0, policy_version 16359 (0.0009) +[2026-06-02 16:44:40,510][247478] Updated weights for policy 0, policy_version 16369 (0.0009) +[2026-06-02 16:44:40,697][247478] Updated weights for policy 0, policy_version 16379 (0.0008) +[2026-06-02 16:44:40,779][247399] Saving new best policy, reward=624.868! +[2026-06-02 16:44:41,323][247478] Updated weights for policy 0, policy_version 16389 (0.0009) +[2026-06-02 16:44:41,496][247478] Updated weights for policy 0, policy_version 16399 (0.0009) +[2026-06-02 16:44:41,692][247478] Updated weights for policy 0, policy_version 16410 (0.0009) +[2026-06-02 16:44:41,874][247478] Updated weights for policy 0, policy_version 16420 (0.0006) +[2026-06-02 16:44:42,071][247478] Updated weights for policy 0, policy_version 16431 (0.0004) +[2026-06-02 16:44:42,270][247478] Updated weights for policy 0, policy_version 16442 (0.0004) +[2026-06-02 16:44:42,918][247478] Updated weights for policy 0, policy_version 16452 (0.0004) +[2026-06-02 16:44:43,124][247478] Updated weights for policy 0, policy_version 16464 (0.0004) +[2026-06-02 16:44:43,309][247478] Updated weights for policy 0, policy_version 16474 (0.0004) +[2026-06-02 16:44:43,486][247478] Updated weights for policy 0, policy_version 16484 (0.0008) +[2026-06-02 16:44:43,665][247478] Updated weights for policy 0, policy_version 16494 (0.0008) +[2026-06-02 16:44:43,852][247478] Updated weights for policy 0, policy_version 16504 (0.0008) +[2026-06-02 16:44:44,487][247478] Updated weights for policy 0, policy_version 16514 (0.0009) +[2026-06-02 16:44:44,648][247478] Updated weights for policy 0, policy_version 16524 (0.0008) +[2026-06-02 16:44:44,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20207.0, 300 sec: 20327.3). Total num frames: 8454144. Throughput: 0: 20360.6. Samples: 8461952. Policy #0 lag: (min: 63.0, avg: 79.4, max: 127.0) +[2026-06-02 16:44:44,726][246448] Avg episode reward: [(0, '651.594')] +[2026-06-02 16:44:44,837][247478] Updated weights for policy 0, policy_version 16535 (0.0008) +[2026-06-02 16:44:45,023][247478] Updated weights for policy 0, policy_version 16545 (0.0009) +[2026-06-02 16:44:45,209][247478] Updated weights for policy 0, policy_version 16555 (0.0008) +[2026-06-02 16:44:45,393][247478] Updated weights for policy 0, policy_version 16565 (0.0009) +[2026-06-02 16:44:45,572][247478] Updated weights for policy 0, policy_version 16575 (0.0008) +[2026-06-02 16:44:45,586][247399] Saving new best policy, reward=651.594! +[2026-06-02 16:44:46,205][247478] Updated weights for policy 0, policy_version 16585 (0.0009) +[2026-06-02 16:44:46,374][247478] Updated weights for policy 0, policy_version 16595 (0.0008) +[2026-06-02 16:44:46,557][247478] Updated weights for policy 0, policy_version 16605 (0.0009) +[2026-06-02 16:44:46,740][247478] Updated weights for policy 0, policy_version 16615 (0.0009) +[2026-06-02 16:44:46,917][247478] Updated weights for policy 0, policy_version 16625 (0.0008) +[2026-06-02 16:44:47,097][247478] Updated weights for policy 0, policy_version 16635 (0.0009) +[2026-06-02 16:44:47,755][247478] Updated weights for policy 0, policy_version 16645 (0.0008) +[2026-06-02 16:44:47,925][247478] Updated weights for policy 0, policy_version 16655 (0.0009) +[2026-06-02 16:44:48,119][247478] Updated weights for policy 0, policy_version 16666 (0.0009) +[2026-06-02 16:44:48,298][247478] Updated weights for policy 0, policy_version 16676 (0.0009) +[2026-06-02 16:44:48,479][247478] Updated weights for policy 0, policy_version 16686 (0.0009) +[2026-06-02 16:44:48,659][247478] Updated weights for policy 0, policy_version 16696 (0.0009) +[2026-06-02 16:44:49,318][247478] Updated weights for policy 0, policy_version 16707 (0.0009) +[2026-06-02 16:44:49,480][247478] Updated weights for policy 0, policy_version 16717 (0.0008) +[2026-06-02 16:44:49,681][247478] Updated weights for policy 0, policy_version 16728 (0.0009) +[2026-06-02 16:44:49,725][246448] Fps is (10 sec: 19660.6, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 8552448. Throughput: 0: 20386.1. Samples: 8578944. Policy #0 lag: (min: 63.0, avg: 80.5, max: 127.0) +[2026-06-02 16:44:49,727][246448] Avg episode reward: [(0, '677.306')] +[2026-06-02 16:44:49,862][247478] Updated weights for policy 0, policy_version 16738 (0.0008) +[2026-06-02 16:44:50,043][247478] Updated weights for policy 0, policy_version 16748 (0.0009) +[2026-06-02 16:44:50,240][247478] Updated weights for policy 0, policy_version 16759 (0.0008) +[2026-06-02 16:44:50,389][247399] Saving new best policy, reward=677.306! +[2026-06-02 16:44:50,911][247478] Updated weights for policy 0, policy_version 16769 (0.0009) +[2026-06-02 16:44:51,075][247478] Updated weights for policy 0, policy_version 16779 (0.0008) +[2026-06-02 16:44:51,255][247478] Updated weights for policy 0, policy_version 16789 (0.0009) +[2026-06-02 16:44:51,434][247478] Updated weights for policy 0, policy_version 16799 (0.0008) +[2026-06-02 16:44:51,624][247478] Updated weights for policy 0, policy_version 16810 (0.0008) +[2026-06-02 16:44:51,819][247478] Updated weights for policy 0, policy_version 16820 (0.0009) +[2026-06-02 16:44:51,990][247478] Updated weights for policy 0, policy_version 16830 (0.0008) +[2026-06-02 16:44:52,636][247478] Updated weights for policy 0, policy_version 16840 (0.0008) +[2026-06-02 16:44:52,801][247478] Updated weights for policy 0, policy_version 16850 (0.0008) +[2026-06-02 16:44:52,973][247478] Updated weights for policy 0, policy_version 16860 (0.0009) +[2026-06-02 16:44:53,170][247478] Updated weights for policy 0, policy_version 16871 (0.0007) +[2026-06-02 16:44:53,351][247478] Updated weights for policy 0, policy_version 16881 (0.0008) +[2026-06-02 16:44:53,535][247478] Updated weights for policy 0, policy_version 16891 (0.0009) +[2026-06-02 16:44:54,196][247478] Updated weights for policy 0, policy_version 16902 (0.0009) +[2026-06-02 16:44:54,363][247478] Updated weights for policy 0, policy_version 16912 (0.0009) +[2026-06-02 16:44:54,547][247478] Updated weights for policy 0, policy_version 16922 (0.0008) +[2026-06-02 16:44:54,722][247478] Updated weights for policy 0, policy_version 16932 (0.0008) +[2026-06-02 16:44:54,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20207.0, 300 sec: 20327.3). Total num frames: 8650752. Throughput: 0: 20394.7. Samples: 8703744. Policy #0 lag: (min: 63.0, avg: 80.5, max: 127.0) +[2026-06-02 16:44:54,726][246448] Avg episode reward: [(0, '668.745')] +[2026-06-02 16:44:54,923][247478] Updated weights for policy 0, policy_version 16943 (0.0009) +[2026-06-02 16:44:55,104][247478] Updated weights for policy 0, policy_version 16953 (0.0008) +[2026-06-02 16:44:55,773][247478] Updated weights for policy 0, policy_version 16964 (0.0009) +[2026-06-02 16:44:55,949][247478] Updated weights for policy 0, policy_version 16974 (0.0008) +[2026-06-02 16:44:56,143][247478] Updated weights for policy 0, policy_version 16985 (0.0009) +[2026-06-02 16:44:56,337][247478] Updated weights for policy 0, policy_version 16996 (0.0008) +[2026-06-02 16:44:56,508][247478] Updated weights for policy 0, policy_version 17006 (0.0009) +[2026-06-02 16:44:56,698][247478] Updated weights for policy 0, policy_version 17016 (0.0009) +[2026-06-02 16:44:57,350][247478] Updated weights for policy 0, policy_version 17026 (0.0008) +[2026-06-02 16:44:57,514][247478] Updated weights for policy 0, policy_version 17036 (0.0009) +[2026-06-02 16:44:57,706][247478] Updated weights for policy 0, policy_version 17047 (0.0009) +[2026-06-02 16:44:57,889][247478] Updated weights for policy 0, policy_version 17057 (0.0009) +[2026-06-02 16:44:58,073][247478] Updated weights for policy 0, policy_version 17067 (0.0009) +[2026-06-02 16:44:58,256][247478] Updated weights for policy 0, policy_version 17077 (0.0009) +[2026-06-02 16:44:58,427][247478] Updated weights for policy 0, policy_version 17087 (0.0009) +[2026-06-02 16:44:59,077][247478] Updated weights for policy 0, policy_version 17097 (0.0009) +[2026-06-02 16:44:59,264][247478] Updated weights for policy 0, policy_version 17107 (0.0009) +[2026-06-02 16:44:59,444][247478] Updated weights for policy 0, policy_version 17117 (0.0009) +[2026-06-02 16:44:59,639][247478] Updated weights for policy 0, policy_version 17128 (0.0009) +[2026-06-02 16:44:59,725][246448] Fps is (10 sec: 19661.0, 60 sec: 20207.0, 300 sec: 20327.3). Total num frames: 8749056. Throughput: 0: 20363.4. Samples: 8765952. Policy #0 lag: (min: 63.0, avg: 80.5, max: 127.0) +[2026-06-02 16:44:59,726][246448] Avg episode reward: [(0, '663.580')] +[2026-06-02 16:44:59,820][247478] Updated weights for policy 0, policy_version 17138 (0.0009) +[2026-06-02 16:45:00,003][247478] Updated weights for policy 0, policy_version 17148 (0.0007) +[2026-06-02 16:45:00,621][247478] Updated weights for policy 0, policy_version 17158 (0.0004) +[2026-06-02 16:45:00,792][247478] Updated weights for policy 0, policy_version 17168 (0.0004) +[2026-06-02 16:45:01,000][247478] Updated weights for policy 0, policy_version 17180 (0.0004) +[2026-06-02 16:45:01,182][247478] Updated weights for policy 0, policy_version 17190 (0.0004) +[2026-06-02 16:45:01,375][247478] Updated weights for policy 0, policy_version 17200 (0.0004) +[2026-06-02 16:45:01,571][247478] Updated weights for policy 0, policy_version 17211 (0.0004) +[2026-06-02 16:45:02,210][247478] Updated weights for policy 0, policy_version 17221 (0.0007) +[2026-06-02 16:45:02,379][247478] Updated weights for policy 0, policy_version 17231 (0.0008) +[2026-06-02 16:45:02,547][247478] Updated weights for policy 0, policy_version 17241 (0.0008) +[2026-06-02 16:45:02,740][247478] Updated weights for policy 0, policy_version 17251 (0.0008) +[2026-06-02 16:45:02,926][247478] Updated weights for policy 0, policy_version 17262 (0.0008) +[2026-06-02 16:45:03,111][247478] Updated weights for policy 0, policy_version 17272 (0.0008) +[2026-06-02 16:45:03,786][247478] Updated weights for policy 0, policy_version 17282 (0.0009) +[2026-06-02 16:45:03,954][247478] Updated weights for policy 0, policy_version 17292 (0.0009) +[2026-06-02 16:45:04,141][247478] Updated weights for policy 0, policy_version 17303 (0.0009) +[2026-06-02 16:45:04,336][247478] Updated weights for policy 0, policy_version 17314 (0.0009) +[2026-06-02 16:45:04,522][247478] Updated weights for policy 0, policy_version 17324 (0.0009) +[2026-06-02 16:45:04,701][247478] Updated weights for policy 0, policy_version 17334 (0.0009) +[2026-06-02 16:45:04,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20216.2). Total num frames: 8847360. Throughput: 0: 20386.1. Samples: 8892928. Policy #0 lag: (min: 63.0, avg: 78.7, max: 127.0) +[2026-06-02 16:45:04,726][246448] Avg episode reward: [(0, '663.504')] +[2026-06-02 16:45:04,876][247478] Updated weights for policy 0, policy_version 17344 (0.0009) +[2026-06-02 16:45:05,528][247478] Updated weights for policy 0, policy_version 17354 (0.0009) +[2026-06-02 16:45:05,718][247478] Updated weights for policy 0, policy_version 17365 (0.0009) +[2026-06-02 16:45:05,891][247478] Updated weights for policy 0, policy_version 17375 (0.0009) +[2026-06-02 16:45:06,073][247478] Updated weights for policy 0, policy_version 17385 (0.0009) +[2026-06-02 16:45:06,281][247478] Updated weights for policy 0, policy_version 17397 (0.0009) +[2026-06-02 16:45:06,446][247478] Updated weights for policy 0, policy_version 17407 (0.0009) +[2026-06-02 16:45:07,137][247478] Updated weights for policy 0, policy_version 17418 (0.0009) +[2026-06-02 16:45:07,336][247478] Updated weights for policy 0, policy_version 17429 (0.0009) +[2026-06-02 16:45:07,505][247478] Updated weights for policy 0, policy_version 17439 (0.0009) +[2026-06-02 16:45:07,673][247478] Updated weights for policy 0, policy_version 17449 (0.0009) +[2026-06-02 16:45:07,870][247478] Updated weights for policy 0, policy_version 17460 (0.0009) +[2026-06-02 16:45:08,041][247478] Updated weights for policy 0, policy_version 17470 (0.0009) +[2026-06-02 16:45:08,720][247478] Updated weights for policy 0, policy_version 17481 (0.0009) +[2026-06-02 16:45:08,905][247478] Updated weights for policy 0, policy_version 17492 (0.0009) +[2026-06-02 16:45:09,101][247478] Updated weights for policy 0, policy_version 17503 (0.0009) +[2026-06-02 16:45:09,292][247478] Updated weights for policy 0, policy_version 17514 (0.0009) +[2026-06-02 16:45:09,489][247478] Updated weights for policy 0, policy_version 17525 (0.0009) +[2026-06-02 16:45:09,725][246448] Fps is (10 sec: 22937.5, 60 sec: 20753.1, 300 sec: 20327.3). Total num frames: 8978432. Throughput: 0: 20383.3. Samples: 9005952. Policy #0 lag: (min: 63.0, avg: 78.7, max: 127.0) +[2026-06-02 16:45:09,726][246448] Avg episode reward: [(0, '662.182')] +[2026-06-02 16:45:10,203][247478] Updated weights for policy 0, policy_version 17537 (0.0009) +[2026-06-02 16:45:10,395][247478] Updated weights for policy 0, policy_version 17549 (0.0009) +[2026-06-02 16:45:10,561][247478] Updated weights for policy 0, policy_version 17559 (0.0008) +[2026-06-02 16:45:10,764][247478] Updated weights for policy 0, policy_version 17570 (0.0009) +[2026-06-02 16:45:10,934][247478] Updated weights for policy 0, policy_version 17580 (0.0009) +[2026-06-02 16:45:11,108][247478] Updated weights for policy 0, policy_version 17590 (0.0009) +[2026-06-02 16:45:11,275][247478] Updated weights for policy 0, policy_version 17600 (0.0009) +[2026-06-02 16:45:11,965][247478] Updated weights for policy 0, policy_version 17611 (0.0008) +[2026-06-02 16:45:12,159][247478] Updated weights for policy 0, policy_version 17622 (0.0006) +[2026-06-02 16:45:12,342][247478] Updated weights for policy 0, policy_version 17632 (0.0009) +[2026-06-02 16:45:12,520][247478] Updated weights for policy 0, policy_version 17643 (0.0009) +[2026-06-02 16:45:12,712][247478] Updated weights for policy 0, policy_version 17654 (0.0009) +[2026-06-02 16:45:12,890][247478] Updated weights for policy 0, policy_version 17664 (0.0009) +[2026-06-02 16:45:13,561][247478] Updated weights for policy 0, policy_version 17675 (0.0009) +[2026-06-02 16:45:13,755][247478] Updated weights for policy 0, policy_version 17686 (0.0009) +[2026-06-02 16:45:13,932][247478] Updated weights for policy 0, policy_version 17696 (0.0009) +[2026-06-02 16:45:14,127][247478] Updated weights for policy 0, policy_version 17707 (0.0008) +[2026-06-02 16:45:14,318][247478] Updated weights for policy 0, policy_version 17718 (0.0009) +[2026-06-02 16:45:14,725][246448] Fps is (10 sec: 22937.5, 60 sec: 20753.1, 300 sec: 20327.3). Total num frames: 9076736. Throughput: 0: 20400.4. Samples: 9069312. Policy #0 lag: (min: 63.0, avg: 78.4, max: 127.0) +[2026-06-02 16:45:14,727][246448] Avg episode reward: [(0, '670.587')] +[2026-06-02 16:45:15,004][247478] Updated weights for policy 0, policy_version 17729 (0.0009) +[2026-06-02 16:45:15,198][247478] Updated weights for policy 0, policy_version 17741 (0.0009) +[2026-06-02 16:45:15,382][247478] Updated weights for policy 0, policy_version 17751 (0.0009) +[2026-06-02 16:45:15,551][247478] Updated weights for policy 0, policy_version 17761 (0.0009) +[2026-06-02 16:45:15,722][247478] Updated weights for policy 0, policy_version 17771 (0.0009) +[2026-06-02 16:45:15,920][247478] Updated weights for policy 0, policy_version 17782 (0.0008) +[2026-06-02 16:45:16,095][247478] Updated weights for policy 0, policy_version 17792 (0.0008) +[2026-06-02 16:45:16,805][247478] Updated weights for policy 0, policy_version 17804 (0.0009) +[2026-06-02 16:45:17,009][247478] Updated weights for policy 0, policy_version 17816 (0.0009) +[2026-06-02 16:45:17,192][247478] Updated weights for policy 0, policy_version 17826 (0.0008) +[2026-06-02 16:45:17,368][247478] Updated weights for policy 0, policy_version 17836 (0.0009) +[2026-06-02 16:45:17,565][247478] Updated weights for policy 0, policy_version 17847 (0.0009) +[2026-06-02 16:45:18,227][247478] Updated weights for policy 0, policy_version 17858 (0.0008) +[2026-06-02 16:45:18,409][247478] Updated weights for policy 0, policy_version 17869 (0.0008) +[2026-06-02 16:45:18,578][247478] Updated weights for policy 0, policy_version 17879 (0.0008) +[2026-06-02 16:45:18,756][247478] Updated weights for policy 0, policy_version 17889 (0.0009) +[2026-06-02 16:45:18,936][247478] Updated weights for policy 0, policy_version 17899 (0.0009) +[2026-06-02 16:45:19,109][247478] Updated weights for policy 0, policy_version 17909 (0.0009) +[2026-06-02 16:45:19,283][247478] Updated weights for policy 0, policy_version 17919 (0.0009) +[2026-06-02 16:45:19,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20753.1, 300 sec: 20327.3). Total num frames: 9175040. Throughput: 0: 20408.9. Samples: 9194880. Policy #0 lag: (min: 63.0, avg: 78.4, max: 127.0) +[2026-06-02 16:45:19,727][246448] Avg episode reward: [(0, '643.959')] +[2026-06-02 16:45:19,973][247478] Updated weights for policy 0, policy_version 17930 (0.0009) +[2026-06-02 16:45:20,158][247478] Updated weights for policy 0, policy_version 17941 (0.0009) +[2026-06-02 16:45:20,333][247478] Updated weights for policy 0, policy_version 17951 (0.0009) +[2026-06-02 16:45:20,506][247478] Updated weights for policy 0, policy_version 17961 (0.0009) +[2026-06-02 16:45:20,720][247478] Updated weights for policy 0, policy_version 17973 (0.0009) +[2026-06-02 16:45:20,907][247478] Updated weights for policy 0, policy_version 17984 (0.0008) +[2026-06-02 16:45:21,625][247478] Updated weights for policy 0, policy_version 17998 (0.0009) +[2026-06-02 16:45:21,829][247478] Updated weights for policy 0, policy_version 18010 (0.0009) +[2026-06-02 16:45:22,019][247478] Updated weights for policy 0, policy_version 18021 (0.0008) +[2026-06-02 16:45:22,209][247478] Updated weights for policy 0, policy_version 18031 (0.0009) +[2026-06-02 16:45:22,378][247478] Updated weights for policy 0, policy_version 18041 (0.0009) +[2026-06-02 16:45:23,061][247478] Updated weights for policy 0, policy_version 18051 (0.0009) +[2026-06-02 16:45:23,226][247478] Updated weights for policy 0, policy_version 18061 (0.0008) +[2026-06-02 16:45:23,394][247478] Updated weights for policy 0, policy_version 18071 (0.0008) +[2026-06-02 16:45:23,592][247478] Updated weights for policy 0, policy_version 18082 (0.0009) +[2026-06-02 16:45:23,780][247478] Updated weights for policy 0, policy_version 18093 (0.0008) +[2026-06-02 16:45:23,959][247478] Updated weights for policy 0, policy_version 18103 (0.0008) +[2026-06-02 16:45:24,656][247478] Updated weights for policy 0, policy_version 18113 (0.0009) +[2026-06-02 16:45:24,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 9273344. Throughput: 0: 20406.0. Samples: 9321856. Policy #0 lag: (min: 63.0, avg: 78.4, max: 127.0) +[2026-06-02 16:45:24,727][246448] Avg episode reward: [(0, '702.672')] +[2026-06-02 16:45:24,849][247478] Updated weights for policy 0, policy_version 18125 (0.0009) +[2026-06-02 16:45:25,022][247478] Updated weights for policy 0, policy_version 18135 (0.0009) +[2026-06-02 16:45:25,197][247478] Updated weights for policy 0, policy_version 18145 (0.0009) +[2026-06-02 16:45:25,371][247478] Updated weights for policy 0, policy_version 18155 (0.0009) +[2026-06-02 16:45:25,548][247478] Updated weights for policy 0, policy_version 18165 (0.0009) +[2026-06-02 16:45:25,720][247478] Updated weights for policy 0, policy_version 18175 (0.0009) +[2026-06-02 16:45:25,729][247399] Saving new best policy, reward=702.672! +[2026-06-02 16:45:26,399][247478] Updated weights for policy 0, policy_version 18187 (0.0009) +[2026-06-02 16:45:26,589][247478] Updated weights for policy 0, policy_version 18198 (0.0009) +[2026-06-02 16:45:26,765][247478] Updated weights for policy 0, policy_version 18208 (0.0008) +[2026-06-02 16:45:26,937][247478] Updated weights for policy 0, policy_version 18218 (0.0009) +[2026-06-02 16:45:27,163][247478] Updated weights for policy 0, policy_version 18231 (0.0009) +[2026-06-02 16:45:27,854][247478] Updated weights for policy 0, policy_version 18241 (0.0009) +[2026-06-02 16:45:28,046][247478] Updated weights for policy 0, policy_version 18252 (0.0009) +[2026-06-02 16:45:28,207][247478] Updated weights for policy 0, policy_version 18262 (0.0009) +[2026-06-02 16:45:28,404][247478] Updated weights for policy 0, policy_version 18273 (0.0009) +[2026-06-02 16:45:28,572][247478] Updated weights for policy 0, policy_version 18283 (0.0009) +[2026-06-02 16:45:28,775][247478] Updated weights for policy 0, policy_version 18294 (0.0009) +[2026-06-02 16:45:29,450][247478] Updated weights for policy 0, policy_version 18305 (0.0009) +[2026-06-02 16:45:29,645][247478] Updated weights for policy 0, policy_version 18317 (0.0009) +[2026-06-02 16:45:29,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 9371648. Throughput: 0: 20417.4. Samples: 9380736. Policy #0 lag: (min: 57.0, avg: 82.6, max: 127.0) +[2026-06-02 16:45:29,726][246448] Avg episode reward: [(0, '716.783')] +[2026-06-02 16:45:29,815][247478] Updated weights for policy 0, policy_version 18327 (0.0009) +[2026-06-02 16:45:29,985][247478] Updated weights for policy 0, policy_version 18337 (0.0009) +[2026-06-02 16:45:30,180][247478] Updated weights for policy 0, policy_version 18348 (0.0009) +[2026-06-02 16:45:30,379][247478] Updated weights for policy 0, policy_version 18359 (0.0009) +[2026-06-02 16:45:30,535][247399] Saving new best policy, reward=716.783! +[2026-06-02 16:45:31,056][247478] Updated weights for policy 0, policy_version 18369 (0.0009) +[2026-06-02 16:45:31,212][247478] Updated weights for policy 0, policy_version 18379 (0.0008) +[2026-06-02 16:45:31,396][247478] Updated weights for policy 0, policy_version 18389 (0.0008) +[2026-06-02 16:45:31,575][247478] Updated weights for policy 0, policy_version 18399 (0.0008) +[2026-06-02 16:45:31,774][247478] Updated weights for policy 0, policy_version 18410 (0.0009) +[2026-06-02 16:45:31,974][247478] Updated weights for policy 0, policy_version 18421 (0.0009) +[2026-06-02 16:45:32,148][247478] Updated weights for policy 0, policy_version 18431 (0.0009) +[2026-06-02 16:45:32,820][247478] Updated weights for policy 0, policy_version 18442 (0.0009) +[2026-06-02 16:45:32,991][247478] Updated weights for policy 0, policy_version 18452 (0.0009) +[2026-06-02 16:45:33,169][247478] Updated weights for policy 0, policy_version 18462 (0.0009) +[2026-06-02 16:45:33,350][247478] Updated weights for policy 0, policy_version 18472 (0.0009) +[2026-06-02 16:45:33,532][247478] Updated weights for policy 0, policy_version 18482 (0.0009) +[2026-06-02 16:45:33,722][247478] Updated weights for policy 0, policy_version 18492 (0.0008) +[2026-06-02 16:45:34,333][247478] Updated weights for policy 0, policy_version 18502 (0.0009) +[2026-06-02 16:45:34,513][247478] Updated weights for policy 0, policy_version 18512 (0.0008) +[2026-06-02 16:45:34,710][247478] Updated weights for policy 0, policy_version 18523 (0.0009) +[2026-06-02 16:45:34,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 9469952. Throughput: 0: 20423.1. Samples: 9497984. Policy #0 lag: (min: 57.0, avg: 82.6, max: 127.0) +[2026-06-02 16:45:34,726][246448] Avg episode reward: [(0, '733.606')] +[2026-06-02 16:45:34,888][247478] Updated weights for policy 0, policy_version 18533 (0.0008) +[2026-06-02 16:45:35,068][247478] Updated weights for policy 0, policy_version 18543 (0.0009) +[2026-06-02 16:45:35,253][247478] Updated weights for policy 0, policy_version 18553 (0.0008) +[2026-06-02 16:45:35,367][247399] Saving new best policy, reward=733.606! +[2026-06-02 16:45:35,909][247478] Updated weights for policy 0, policy_version 18563 (0.0008) +[2026-06-02 16:45:36,078][247478] Updated weights for policy 0, policy_version 18573 (0.0009) +[2026-06-02 16:45:36,276][247478] Updated weights for policy 0, policy_version 18584 (0.0008) +[2026-06-02 16:45:36,455][247478] Updated weights for policy 0, policy_version 18594 (0.0008) +[2026-06-02 16:45:36,626][247478] Updated weights for policy 0, policy_version 18604 (0.0008) +[2026-06-02 16:45:36,817][247478] Updated weights for policy 0, policy_version 18614 (0.0008) +[2026-06-02 16:45:37,479][247478] Updated weights for policy 0, policy_version 18625 (0.0009) +[2026-06-02 16:45:37,645][247478] Updated weights for policy 0, policy_version 18635 (0.0009) +[2026-06-02 16:45:37,819][247478] Updated weights for policy 0, policy_version 18645 (0.0009) +[2026-06-02 16:45:38,008][247478] Updated weights for policy 0, policy_version 18656 (0.0009) +[2026-06-02 16:45:38,191][247478] Updated weights for policy 0, policy_version 18666 (0.0009) +[2026-06-02 16:45:38,376][247478] Updated weights for policy 0, policy_version 18676 (0.0009) +[2026-06-02 16:45:38,556][247478] Updated weights for policy 0, policy_version 18686 (0.0008) +[2026-06-02 16:45:39,177][247478] Updated weights for policy 0, policy_version 18696 (0.0009) +[2026-06-02 16:45:39,358][247478] Updated weights for policy 0, policy_version 18706 (0.0008) +[2026-06-02 16:45:39,539][247478] Updated weights for policy 0, policy_version 18716 (0.0008) +[2026-06-02 16:45:39,719][247478] Updated weights for policy 0, policy_version 18726 (0.0008) +[2026-06-02 16:45:39,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 9568256. Throughput: 0: 20420.3. Samples: 9622656. Policy #0 lag: (min: 57.0, avg: 82.6, max: 127.0) +[2026-06-02 16:45:39,726][246448] Avg episode reward: [(0, '729.110')] +[2026-06-02 16:45:39,917][247478] Updated weights for policy 0, policy_version 18737 (0.0008) +[2026-06-02 16:45:40,098][247478] Updated weights for policy 0, policy_version 18747 (0.0008) +[2026-06-02 16:45:40,762][247478] Updated weights for policy 0, policy_version 18757 (0.0008) +[2026-06-02 16:45:40,942][247478] Updated weights for policy 0, policy_version 18768 (0.0008) +[2026-06-02 16:45:41,143][247478] Updated weights for policy 0, policy_version 18779 (0.0008) +[2026-06-02 16:45:41,324][247478] Updated weights for policy 0, policy_version 18789 (0.0009) +[2026-06-02 16:45:41,505][247478] Updated weights for policy 0, policy_version 18799 (0.0009) +[2026-06-02 16:45:41,710][247478] Updated weights for policy 0, policy_version 18810 (0.0008) +[2026-06-02 16:45:42,334][247478] Updated weights for policy 0, policy_version 18821 (0.0009) +[2026-06-02 16:45:42,529][247478] Updated weights for policy 0, policy_version 18832 (0.0006) +[2026-06-02 16:45:42,728][247478] Updated weights for policy 0, policy_version 18843 (0.0004) +[2026-06-02 16:45:42,910][247478] Updated weights for policy 0, policy_version 18853 (0.0004) +[2026-06-02 16:45:43,089][247478] Updated weights for policy 0, policy_version 18863 (0.0004) +[2026-06-02 16:45:43,289][247478] Updated weights for policy 0, policy_version 18874 (0.0004) +[2026-06-02 16:45:43,900][247478] Updated weights for policy 0, policy_version 18884 (0.0004) +[2026-06-02 16:45:44,084][247478] Updated weights for policy 0, policy_version 18894 (0.0004) +[2026-06-02 16:45:44,263][247478] Updated weights for policy 0, policy_version 18904 (0.0004) +[2026-06-02 16:45:44,469][247478] Updated weights for policy 0, policy_version 18916 (0.0004) +[2026-06-02 16:45:44,663][247478] Updated weights for policy 0, policy_version 18927 (0.0004) +[2026-06-02 16:45:44,726][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 9666560. Throughput: 0: 20434.4. Samples: 9685504. Policy #0 lag: (min: 14.0, avg: 31.9, max: 78.0) +[2026-06-02 16:45:44,727][246448] Avg episode reward: [(0, '754.435')] +[2026-06-02 16:45:44,862][247478] Updated weights for policy 0, policy_version 18937 (0.0007) +[2026-06-02 16:45:44,973][247399] Saving new best policy, reward=754.435! +[2026-06-02 16:45:45,481][247478] Updated weights for policy 0, policy_version 18947 (0.0006) +[2026-06-02 16:45:45,655][247478] Updated weights for policy 0, policy_version 18957 (0.0009) +[2026-06-02 16:45:45,830][247478] Updated weights for policy 0, policy_version 18967 (0.0009) +[2026-06-02 16:45:46,021][247478] Updated weights for policy 0, policy_version 18978 (0.0008) +[2026-06-02 16:45:46,209][247478] Updated weights for policy 0, policy_version 18988 (0.0009) +[2026-06-02 16:45:46,388][247478] Updated weights for policy 0, policy_version 18998 (0.0010) +[2026-06-02 16:45:46,563][247478] Updated weights for policy 0, policy_version 19008 (0.0008) +[2026-06-02 16:45:47,208][247478] Updated weights for policy 0, policy_version 19018 (0.0009) +[2026-06-02 16:45:47,391][247478] Updated weights for policy 0, policy_version 19028 (0.0008) +[2026-06-02 16:45:47,567][247478] Updated weights for policy 0, policy_version 19038 (0.0008) +[2026-06-02 16:45:47,744][247478] Updated weights for policy 0, policy_version 19048 (0.0009) +[2026-06-02 16:45:47,921][247478] Updated weights for policy 0, policy_version 19058 (0.0008) +[2026-06-02 16:45:48,098][247478] Updated weights for policy 0, policy_version 19068 (0.0008) +[2026-06-02 16:45:48,754][247478] Updated weights for policy 0, policy_version 19078 (0.0006) +[2026-06-02 16:45:48,944][247478] Updated weights for policy 0, policy_version 19089 (0.0004) +[2026-06-02 16:45:49,121][247478] Updated weights for policy 0, policy_version 19099 (0.0004) +[2026-06-02 16:45:49,316][247478] Updated weights for policy 0, policy_version 19110 (0.0004) +[2026-06-02 16:45:49,498][247478] Updated weights for policy 0, policy_version 19120 (0.0005) +[2026-06-02 16:45:49,696][247478] Updated weights for policy 0, policy_version 19131 (0.0005) +[2026-06-02 16:45:49,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 9764864. Throughput: 0: 20423.1. Samples: 9811968. Policy #0 lag: (min: 14.0, avg: 31.9, max: 78.0) +[2026-06-02 16:45:49,726][246448] Avg episode reward: [(0, '791.261')] +[2026-06-02 16:45:49,769][247399] Saving new best policy, reward=791.261! +[2026-06-02 16:45:50,372][247478] Updated weights for policy 0, policy_version 19143 (0.0007) +[2026-06-02 16:45:50,552][247478] Updated weights for policy 0, policy_version 19153 (0.0008) +[2026-06-02 16:45:50,728][247478] Updated weights for policy 0, policy_version 19163 (0.0008) +[2026-06-02 16:45:50,905][247478] Updated weights for policy 0, policy_version 19173 (0.0006) +[2026-06-02 16:45:51,072][247478] Updated weights for policy 0, policy_version 19183 (0.0004) +[2026-06-02 16:45:51,288][247478] Updated weights for policy 0, policy_version 19195 (0.0004) +[2026-06-02 16:45:51,940][247478] Updated weights for policy 0, policy_version 19205 (0.0006) +[2026-06-02 16:45:52,106][247478] Updated weights for policy 0, policy_version 19215 (0.0008) +[2026-06-02 16:45:52,307][247478] Updated weights for policy 0, policy_version 19226 (0.0008) +[2026-06-02 16:45:52,487][247478] Updated weights for policy 0, policy_version 19236 (0.0008) +[2026-06-02 16:45:52,666][247478] Updated weights for policy 0, policy_version 19246 (0.0009) +[2026-06-02 16:45:52,843][247478] Updated weights for policy 0, policy_version 19256 (0.0006) +[2026-06-02 16:45:53,520][247478] Updated weights for policy 0, policy_version 19267 (0.0005) +[2026-06-02 16:45:53,684][247478] Updated weights for policy 0, policy_version 19277 (0.0006) +[2026-06-02 16:45:53,862][247478] Updated weights for policy 0, policy_version 19287 (0.0005) +[2026-06-02 16:45:54,063][247478] Updated weights for policy 0, policy_version 19297 (0.0005) +[2026-06-02 16:45:54,225][247478] Updated weights for policy 0, policy_version 19307 (0.0005) +[2026-06-02 16:45:54,427][247478] Updated weights for policy 0, policy_version 19318 (0.0005) +[2026-06-02 16:45:54,601][247478] Updated weights for policy 0, policy_version 19328 (0.0006) +[2026-06-02 16:45:54,725][246448] Fps is (10 sec: 22937.6, 60 sec: 20753.1, 300 sec: 20327.3). Total num frames: 9895936. Throughput: 0: 20454.4. Samples: 9926400. Policy #0 lag: (min: 14.0, avg: 31.9, max: 78.0) +[2026-06-02 16:45:54,727][246448] Avg episode reward: [(0, '831.092')] +[2026-06-02 16:45:54,732][247399] Saving new best policy, reward=831.092! +[2026-06-02 16:45:55,244][247478] Updated weights for policy 0, policy_version 19338 (0.0009) +[2026-06-02 16:45:55,420][247478] Updated weights for policy 0, policy_version 19348 (0.0008) +[2026-06-02 16:45:55,594][247478] Updated weights for policy 0, policy_version 19358 (0.0005) +[2026-06-02 16:45:55,777][247478] Updated weights for policy 0, policy_version 19369 (0.0005) +[2026-06-02 16:45:55,963][247478] Updated weights for policy 0, policy_version 19379 (0.0005) +[2026-06-02 16:45:56,162][247478] Updated weights for policy 0, policy_version 19390 (0.0005) +[2026-06-02 16:45:56,817][247478] Updated weights for policy 0, policy_version 19400 (0.0005) +[2026-06-02 16:45:57,025][247478] Updated weights for policy 0, policy_version 19412 (0.0005) +[2026-06-02 16:45:57,206][247478] Updated weights for policy 0, policy_version 19422 (0.0005) +[2026-06-02 16:45:57,376][247478] Updated weights for policy 0, policy_version 19432 (0.0005) +[2026-06-02 16:45:57,550][247478] Updated weights for policy 0, policy_version 19442 (0.0005) +[2026-06-02 16:45:57,719][247478] Updated weights for policy 0, policy_version 19452 (0.0005) +[2026-06-02 16:45:58,373][247478] Updated weights for policy 0, policy_version 19462 (0.0005) +[2026-06-02 16:45:58,553][247478] Updated weights for policy 0, policy_version 19473 (0.0005) +[2026-06-02 16:45:58,739][247478] Updated weights for policy 0, policy_version 19484 (0.0005) +[2026-06-02 16:45:58,932][247478] Updated weights for policy 0, policy_version 19495 (0.0005) +[2026-06-02 16:45:59,109][247478] Updated weights for policy 0, policy_version 19505 (0.0004) +[2026-06-02 16:45:59,303][247478] Updated weights for policy 0, policy_version 19516 (0.0005) +[2026-06-02 16:45:59,725][246448] Fps is (10 sec: 22937.7, 60 sec: 20753.0, 300 sec: 20327.3). Total num frames: 9994240. Throughput: 0: 20431.7. Samples: 9988736. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) +[2026-06-02 16:45:59,726][246448] Avg episode reward: [(0, '831.745')] +[2026-06-02 16:45:59,977][247478] Updated weights for policy 0, policy_version 19527 (0.0004) +[2026-06-02 16:46:00,152][247478] Updated weights for policy 0, policy_version 19537 (0.0004) +[2026-06-02 16:46:00,328][247478] Updated weights for policy 0, policy_version 19548 (0.0007) +[2026-06-02 16:46:00,511][247478] Updated weights for policy 0, policy_version 19558 (0.0009) +[2026-06-02 16:46:00,680][247478] Updated weights for policy 0, policy_version 19568 (0.0009) +[2026-06-02 16:46:00,868][247478] Updated weights for policy 0, policy_version 19578 (0.0009) +[2026-06-02 16:46:00,965][247399] Saving new best policy, reward=831.745! +[2026-06-02 16:46:01,528][247478] Updated weights for policy 0, policy_version 19588 (0.0009) +[2026-06-02 16:46:01,697][247478] Updated weights for policy 0, policy_version 19598 (0.0008) +[2026-06-02 16:46:01,906][247478] Updated weights for policy 0, policy_version 19610 (0.0009) +[2026-06-02 16:46:02,082][247478] Updated weights for policy 0, policy_version 19620 (0.0008) +[2026-06-02 16:46:02,288][247478] Updated weights for policy 0, policy_version 19631 (0.0009) +[2026-06-02 16:46:02,479][247478] Updated weights for policy 0, policy_version 19642 (0.0008) +[2026-06-02 16:46:03,127][247478] Updated weights for policy 0, policy_version 19652 (0.0010) +[2026-06-02 16:46:03,311][247478] Updated weights for policy 0, policy_version 19663 (0.0008) +[2026-06-02 16:46:03,493][247478] Updated weights for policy 0, policy_version 19673 (0.0008) +[2026-06-02 16:46:03,681][247478] Updated weights for policy 0, policy_version 19684 (0.0008) +[2026-06-02 16:46:03,886][247478] Updated weights for policy 0, policy_version 19695 (0.0008) +[2026-06-02 16:46:04,059][247478] Updated weights for policy 0, policy_version 19705 (0.0008) +[2026-06-02 16:46:04,722][247478] Updated weights for policy 0, policy_version 19715 (0.0009) +[2026-06-02 16:46:04,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20753.1, 300 sec: 20327.3). Total num frames: 10092544. Throughput: 0: 20417.4. Samples: 10113664. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) +[2026-06-02 16:46:04,726][246448] Avg episode reward: [(0, '844.935')] +[2026-06-02 16:46:04,886][247478] Updated weights for policy 0, policy_version 19725 (0.0009) +[2026-06-02 16:46:05,062][247478] Updated weights for policy 0, policy_version 19735 (0.0008) +[2026-06-02 16:46:05,241][247478] Updated weights for policy 0, policy_version 19745 (0.0009) +[2026-06-02 16:46:05,439][247478] Updated weights for policy 0, policy_version 19756 (0.0009) +[2026-06-02 16:46:05,614][247478] Updated weights for policy 0, policy_version 19766 (0.0008) +[2026-06-02 16:46:05,794][247399] Saving new best policy, reward=844.935! +[2026-06-02 16:46:05,796][247478] Updated weights for policy 0, policy_version 19776 (0.0008) +[2026-06-02 16:46:06,426][247478] Updated weights for policy 0, policy_version 19786 (0.0009) +[2026-06-02 16:46:06,608][247478] Updated weights for policy 0, policy_version 19796 (0.0008) +[2026-06-02 16:46:06,786][247478] Updated weights for policy 0, policy_version 19806 (0.0008) +[2026-06-02 16:46:06,960][247478] Updated weights for policy 0, policy_version 19816 (0.0008) +[2026-06-02 16:46:07,149][247478] Updated weights for policy 0, policy_version 19826 (0.0008) +[2026-06-02 16:46:07,327][247478] Updated weights for policy 0, policy_version 19836 (0.0009) +[2026-06-02 16:46:07,972][247478] Updated weights for policy 0, policy_version 19846 (0.0008) +[2026-06-02 16:46:08,150][247478] Updated weights for policy 0, policy_version 19856 (0.0009) +[2026-06-02 16:46:08,322][247478] Updated weights for policy 0, policy_version 19866 (0.0009) +[2026-06-02 16:46:08,537][247478] Updated weights for policy 0, policy_version 19878 (0.0008) +[2026-06-02 16:46:08,718][247478] Updated weights for policy 0, policy_version 19888 (0.0010) +[2026-06-02 16:46:08,888][247478] Updated weights for policy 0, policy_version 19898 (0.0005) +[2026-06-02 16:46:09,537][247478] Updated weights for policy 0, policy_version 19908 (0.0004) +[2026-06-02 16:46:09,703][247478] Updated weights for policy 0, policy_version 19918 (0.0004) +[2026-06-02 16:46:09,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 10190848. Throughput: 0: 20374.8. Samples: 10238720. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) +[2026-06-02 16:46:09,726][246448] Avg episode reward: [(0, '857.427')] +[2026-06-02 16:46:09,883][247478] Updated weights for policy 0, policy_version 19928 (0.0004) +[2026-06-02 16:46:10,053][247478] Updated weights for policy 0, policy_version 19938 (0.0010) +[2026-06-02 16:46:10,235][247478] Updated weights for policy 0, policy_version 19948 (0.0004) +[2026-06-02 16:46:10,425][247478] Updated weights for policy 0, policy_version 19959 (0.0004) +[2026-06-02 16:46:10,583][247399] Saving new best policy, reward=857.427! +[2026-06-02 16:46:11,076][247478] Updated weights for policy 0, policy_version 19969 (0.0007) +[2026-06-02 16:46:11,256][247478] Updated weights for policy 0, policy_version 19980 (0.0006) +[2026-06-02 16:46:11,427][247478] Updated weights for policy 0, policy_version 19990 (0.0007) +[2026-06-02 16:46:11,620][247478] Updated weights for policy 0, policy_version 20001 (0.0004) +[2026-06-02 16:46:11,803][247478] Updated weights for policy 0, policy_version 20011 (0.0004) +[2026-06-02 16:46:11,983][247478] Updated weights for policy 0, policy_version 20021 (0.0009) +[2026-06-02 16:46:12,155][247478] Updated weights for policy 0, policy_version 20031 (0.0008) +[2026-06-02 16:46:12,838][247478] Updated weights for policy 0, policy_version 20043 (0.0009) +[2026-06-02 16:46:13,053][247478] Updated weights for policy 0, policy_version 20055 (0.0009) +[2026-06-02 16:46:13,257][247478] Updated weights for policy 0, policy_version 20067 (0.0009) +[2026-06-02 16:46:13,475][247478] Updated weights for policy 0, policy_version 20080 (0.0008) +[2026-06-02 16:46:13,655][247478] Updated weights for policy 0, policy_version 20090 (0.0008) +[2026-06-02 16:46:14,323][247478] Updated weights for policy 0, policy_version 20100 (0.0007) +[2026-06-02 16:46:14,489][247478] Updated weights for policy 0, policy_version 20110 (0.0004) +[2026-06-02 16:46:14,665][247478] Updated weights for policy 0, policy_version 20120 (0.0004) +[2026-06-02 16:46:14,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 10289152. Throughput: 0: 20460.1. Samples: 10301440. Policy #0 lag: (min: 44.0, avg: 79.7, max: 111.0) +[2026-06-02 16:46:14,726][246448] Avg episode reward: [(0, '854.528')] +[2026-06-02 16:46:14,841][247478] Updated weights for policy 0, policy_version 20130 (0.0004) +[2026-06-02 16:46:15,027][247478] Updated weights for policy 0, policy_version 20140 (0.0004) +[2026-06-02 16:46:15,203][247478] Updated weights for policy 0, policy_version 20150 (0.0004) +[2026-06-02 16:46:15,382][247478] Updated weights for policy 0, policy_version 20160 (0.0007) +[2026-06-02 16:46:16,012][247478] Updated weights for policy 0, policy_version 20170 (0.0004) +[2026-06-02 16:46:16,185][247478] Updated weights for policy 0, policy_version 20180 (0.0004) +[2026-06-02 16:46:16,368][247478] Updated weights for policy 0, policy_version 20190 (0.0004) +[2026-06-02 16:46:16,550][247478] Updated weights for policy 0, policy_version 20200 (0.0004) +[2026-06-02 16:46:16,754][247478] Updated weights for policy 0, policy_version 20211 (0.0004) +[2026-06-02 16:46:16,929][247478] Updated weights for policy 0, policy_version 20221 (0.0005) +[2026-06-02 16:46:17,557][247478] Updated weights for policy 0, policy_version 20231 (0.0008) +[2026-06-02 16:46:17,727][247478] Updated weights for policy 0, policy_version 20241 (0.0008) +[2026-06-02 16:46:17,897][247478] Updated weights for policy 0, policy_version 20251 (0.0008) +[2026-06-02 16:46:18,085][247478] Updated weights for policy 0, policy_version 20262 (0.0007) +[2026-06-02 16:46:18,272][247478] Updated weights for policy 0, policy_version 20272 (0.0004) +[2026-06-02 16:46:18,463][247478] Updated weights for policy 0, policy_version 20283 (0.0004) +[2026-06-02 16:46:19,096][247478] Updated weights for policy 0, policy_version 20293 (0.0006) +[2026-06-02 16:46:19,262][247478] Updated weights for policy 0, policy_version 20303 (0.0008) +[2026-06-02 16:46:19,445][247478] Updated weights for policy 0, policy_version 20313 (0.0008) +[2026-06-02 16:46:19,624][247478] Updated weights for policy 0, policy_version 20323 (0.0008) +[2026-06-02 16:46:19,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 10387456. Throughput: 0: 20380.5. Samples: 10415104. Policy #0 lag: (min: 44.0, avg: 79.7, max: 111.0) +[2026-06-02 16:46:19,726][246448] Avg episode reward: [(0, '852.734')] +[2026-06-02 16:46:19,820][247478] Updated weights for policy 0, policy_version 20334 (0.0008) +[2026-06-02 16:46:20,004][247478] Updated weights for policy 0, policy_version 20344 (0.0009) +[2026-06-02 16:46:20,637][247478] Updated weights for policy 0, policy_version 20354 (0.0009) +[2026-06-02 16:46:20,801][247478] Updated weights for policy 0, policy_version 20364 (0.0009) +[2026-06-02 16:46:20,969][247478] Updated weights for policy 0, policy_version 20374 (0.0007) +[2026-06-02 16:46:21,148][247478] Updated weights for policy 0, policy_version 20384 (0.0008) +[2026-06-02 16:46:21,338][247478] Updated weights for policy 0, policy_version 20394 (0.0004) +[2026-06-02 16:46:21,515][247478] Updated weights for policy 0, policy_version 20404 (0.0004) +[2026-06-02 16:46:21,692][247478] Updated weights for policy 0, policy_version 20414 (0.0004) +[2026-06-02 16:46:22,299][247478] Updated weights for policy 0, policy_version 20424 (0.0007) +[2026-06-02 16:46:22,472][247478] Updated weights for policy 0, policy_version 20434 (0.0007) +[2026-06-02 16:46:22,662][247478] Updated weights for policy 0, policy_version 20444 (0.0006) +[2026-06-02 16:46:22,856][247478] Updated weights for policy 0, policy_version 20455 (0.0004) +[2026-06-02 16:46:23,026][247478] Updated weights for policy 0, policy_version 20465 (0.0004) +[2026-06-02 16:46:23,194][247478] Updated weights for policy 0, policy_version 20475 (0.0006) +[2026-06-02 16:46:23,842][247478] Updated weights for policy 0, policy_version 20485 (0.0009) +[2026-06-02 16:46:24,004][247478] Updated weights for policy 0, policy_version 20495 (0.0008) +[2026-06-02 16:46:24,203][247478] Updated weights for policy 0, policy_version 20506 (0.0007) +[2026-06-02 16:46:24,399][247478] Updated weights for policy 0, policy_version 20517 (0.0009) +[2026-06-02 16:46:24,588][247478] Updated weights for policy 0, policy_version 20528 (0.0008) +[2026-06-02 16:46:24,725][246448] Fps is (10 sec: 19661.1, 60 sec: 20207.0, 300 sec: 20327.3). Total num frames: 10485760. Throughput: 0: 20340.7. Samples: 10537984. Policy #0 lag: (min: 44.0, avg: 79.7, max: 111.0) +[2026-06-02 16:46:24,726][246448] Avg episode reward: [(0, '869.700')] +[2026-06-02 16:46:24,785][247478] Updated weights for policy 0, policy_version 20539 (0.0010) +[2026-06-02 16:46:24,876][247399] Saving new best policy, reward=869.700! +[2026-06-02 16:46:25,457][247478] Updated weights for policy 0, policy_version 20550 (0.0009) +[2026-06-02 16:46:25,629][247478] Updated weights for policy 0, policy_version 20560 (0.0009) +[2026-06-02 16:46:25,826][247478] Updated weights for policy 0, policy_version 20571 (0.0007) +[2026-06-02 16:46:25,998][247478] Updated weights for policy 0, policy_version 20581 (0.0007) +[2026-06-02 16:46:26,186][247478] Updated weights for policy 0, policy_version 20591 (0.0009) +[2026-06-02 16:46:26,364][247478] Updated weights for policy 0, policy_version 20601 (0.0009) +[2026-06-02 16:46:27,030][247478] Updated weights for policy 0, policy_version 20612 (0.0009) +[2026-06-02 16:46:27,195][247478] Updated weights for policy 0, policy_version 20622 (0.0004) +[2026-06-02 16:46:27,372][247478] Updated weights for policy 0, policy_version 20632 (0.0005) +[2026-06-02 16:46:27,548][247478] Updated weights for policy 0, policy_version 20642 (0.0009) +[2026-06-02 16:46:27,736][247478] Updated weights for policy 0, policy_version 20652 (0.0008) +[2026-06-02 16:46:27,923][247478] Updated weights for policy 0, policy_version 20662 (0.0008) +[2026-06-02 16:46:28,104][247478] Updated weights for policy 0, policy_version 20672 (0.0009) +[2026-06-02 16:46:28,718][247478] Updated weights for policy 0, policy_version 20682 (0.0008) +[2026-06-02 16:46:28,907][247478] Updated weights for policy 0, policy_version 20692 (0.0006) +[2026-06-02 16:46:29,081][247478] Updated weights for policy 0, policy_version 20702 (0.0009) +[2026-06-02 16:46:29,261][247478] Updated weights for policy 0, policy_version 20712 (0.0010) +[2026-06-02 16:46:29,441][247478] Updated weights for policy 0, policy_version 20722 (0.0012) +[2026-06-02 16:46:29,617][247478] Updated weights for policy 0, policy_version 20732 (0.0009) +[2026-06-02 16:46:29,725][246448] Fps is (10 sec: 22937.7, 60 sec: 20753.1, 300 sec: 20438.3). Total num frames: 10616832. Throughput: 0: 20349.2. Samples: 10601216. Policy #0 lag: (min: 63.0, avg: 79.3, max: 127.0) +[2026-06-02 16:46:29,727][246448] Avg episode reward: [(0, '831.167')] +[2026-06-02 16:46:30,256][247478] Updated weights for policy 0, policy_version 20742 (0.0008) +[2026-06-02 16:46:30,440][247478] Updated weights for policy 0, policy_version 20753 (0.0006) +[2026-06-02 16:46:30,631][247478] Updated weights for policy 0, policy_version 20764 (0.0005) +[2026-06-02 16:46:30,823][247478] Updated weights for policy 0, policy_version 20774 (0.0007) +[2026-06-02 16:46:30,998][247478] Updated weights for policy 0, policy_version 20784 (0.0006) +[2026-06-02 16:46:31,180][247478] Updated weights for policy 0, policy_version 20794 (0.0007) +[2026-06-02 16:46:31,830][247478] Updated weights for policy 0, policy_version 20804 (0.0008) +[2026-06-02 16:46:32,025][247478] Updated weights for policy 0, policy_version 20815 (0.0008) +[2026-06-02 16:46:32,199][247478] Updated weights for policy 0, policy_version 20825 (0.0008) +[2026-06-02 16:46:32,371][247478] Updated weights for policy 0, policy_version 20835 (0.0009) +[2026-06-02 16:46:32,555][247478] Updated weights for policy 0, policy_version 20845 (0.0007) +[2026-06-02 16:46:32,731][247478] Updated weights for policy 0, policy_version 20855 (0.0011) +[2026-06-02 16:46:33,393][247478] Updated weights for policy 0, policy_version 20865 (0.0010) +[2026-06-02 16:46:33,555][247478] Updated weights for policy 0, policy_version 20875 (0.0007) +[2026-06-02 16:46:33,724][247478] Updated weights for policy 0, policy_version 20885 (0.0007) +[2026-06-02 16:46:33,910][247478] Updated weights for policy 0, policy_version 20896 (0.0006) +[2026-06-02 16:46:34,129][247478] Updated weights for policy 0, policy_version 20908 (0.0005) +[2026-06-02 16:46:34,317][247478] Updated weights for policy 0, policy_version 20918 (0.0004) +[2026-06-02 16:46:34,493][247478] Updated weights for policy 0, policy_version 20928 (0.0005) +[2026-06-02 16:46:34,725][246448] Fps is (10 sec: 22937.2, 60 sec: 20753.0, 300 sec: 20438.3). Total num frames: 10715136. Throughput: 0: 20326.4. Samples: 10726656. Policy #0 lag: (min: 63.0, avg: 79.3, max: 127.0) +[2026-06-02 16:46:34,726][246448] Avg episode reward: [(0, '883.881')] +[2026-06-02 16:46:34,731][247399] Saving new best policy, reward=883.881! +[2026-06-02 16:46:35,124][247478] Updated weights for policy 0, policy_version 20938 (0.0009) +[2026-06-02 16:46:35,306][247478] Updated weights for policy 0, policy_version 20948 (0.0008) +[2026-06-02 16:46:35,494][247478] Updated weights for policy 0, policy_version 20958 (0.0008) +[2026-06-02 16:46:35,674][247478] Updated weights for policy 0, policy_version 20968 (0.0008) +[2026-06-02 16:46:35,850][247478] Updated weights for policy 0, policy_version 20978 (0.0009) +[2026-06-02 16:46:36,031][247478] Updated weights for policy 0, policy_version 20988 (0.0009) +[2026-06-02 16:46:36,663][247478] Updated weights for policy 0, policy_version 20998 (0.0007) +[2026-06-02 16:46:36,837][247478] Updated weights for policy 0, policy_version 21008 (0.0004) +[2026-06-02 16:46:37,019][247478] Updated weights for policy 0, policy_version 21018 (0.0004) +[2026-06-02 16:46:37,196][247478] Updated weights for policy 0, policy_version 21028 (0.0005) +[2026-06-02 16:46:37,375][247478] Updated weights for policy 0, policy_version 21038 (0.0005) +[2026-06-02 16:46:37,553][247478] Updated weights for policy 0, policy_version 21048 (0.0009) +[2026-06-02 16:46:38,208][247478] Updated weights for policy 0, policy_version 21058 (0.0009) +[2026-06-02 16:46:38,372][247478] Updated weights for policy 0, policy_version 21068 (0.0009) +[2026-06-02 16:46:38,550][247478] Updated weights for policy 0, policy_version 21078 (0.0009) +[2026-06-02 16:46:38,718][247478] Updated weights for policy 0, policy_version 21088 (0.0009) +[2026-06-02 16:46:38,907][247478] Updated weights for policy 0, policy_version 21098 (0.0006) +[2026-06-02 16:46:39,089][247478] Updated weights for policy 0, policy_version 21108 (0.0009) +[2026-06-02 16:46:39,268][247478] Updated weights for policy 0, policy_version 21118 (0.0008) +[2026-06-02 16:46:39,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20753.1, 300 sec: 20438.3). Total num frames: 10813440. Throughput: 0: 20542.6. Samples: 10850816. Policy #0 lag: (min: 63.0, avg: 79.3, max: 127.0) +[2026-06-02 16:46:39,726][246448] Avg episode reward: [(0, '865.260')] +[2026-06-02 16:46:39,915][247478] Updated weights for policy 0, policy_version 21128 (0.0008) +[2026-06-02 16:46:40,091][247478] Updated weights for policy 0, policy_version 21138 (0.0009) +[2026-06-02 16:46:40,272][247478] Updated weights for policy 0, policy_version 21148 (0.0009) +[2026-06-02 16:46:40,463][247478] Updated weights for policy 0, policy_version 21159 (0.0009) +[2026-06-02 16:46:40,659][247478] Updated weights for policy 0, policy_version 21170 (0.0009) +[2026-06-02 16:46:40,836][247478] Updated weights for policy 0, policy_version 21180 (0.0008) +[2026-06-02 16:46:41,487][247478] Updated weights for policy 0, policy_version 21190 (0.0008) +[2026-06-02 16:46:41,657][247478] Updated weights for policy 0, policy_version 21200 (0.0008) +[2026-06-02 16:46:41,850][247478] Updated weights for policy 0, policy_version 21211 (0.0007) +[2026-06-02 16:46:42,028][247478] Updated weights for policy 0, policy_version 21221 (0.0008) +[2026-06-02 16:46:42,210][247478] Updated weights for policy 0, policy_version 21231 (0.0009) +[2026-06-02 16:46:42,393][247478] Updated weights for policy 0, policy_version 21241 (0.0009) +[2026-06-02 16:46:43,036][247478] Updated weights for policy 0, policy_version 21251 (0.0008) +[2026-06-02 16:46:43,239][247478] Updated weights for policy 0, policy_version 21263 (0.0008) +[2026-06-02 16:46:43,415][247478] Updated weights for policy 0, policy_version 21273 (0.0009) +[2026-06-02 16:46:43,595][247478] Updated weights for policy 0, policy_version 21283 (0.0009) +[2026-06-02 16:46:43,769][247478] Updated weights for policy 0, policy_version 21293 (0.0008) +[2026-06-02 16:46:43,955][247478] Updated weights for policy 0, policy_version 21303 (0.0009) +[2026-06-02 16:46:44,607][247478] Updated weights for policy 0, policy_version 21313 (0.0009) +[2026-06-02 16:46:44,725][246448] Fps is (10 sec: 19661.0, 60 sec: 20753.1, 300 sec: 20438.3). Total num frames: 10911744. Throughput: 0: 20366.2. Samples: 10905216. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) +[2026-06-02 16:46:44,726][246448] Avg episode reward: [(0, '878.481')] +[2026-06-02 16:46:45,099][247478] Updated weights for policy 0, policy_version 21347 (0.0010) +[2026-06-02 16:46:45,283][247478] Updated weights for policy 0, policy_version 21357 (0.0008) +[2026-06-02 16:46:45,456][247478] Updated weights for policy 0, policy_version 21367 (0.0007) +[2026-06-02 16:46:46,209][247478] Updated weights for policy 0, policy_version 21377 (0.0005) +[2026-06-02 16:46:46,390][247478] Updated weights for policy 0, policy_version 21388 (0.0009) +[2026-06-02 16:46:46,564][247478] Updated weights for policy 0, policy_version 21398 (0.0009) +[2026-06-02 16:46:46,760][247478] Updated weights for policy 0, policy_version 21409 (0.0009) +[2026-06-02 16:46:46,953][247478] Updated weights for policy 0, policy_version 21420 (0.0009) +[2026-06-02 16:46:47,134][247478] Updated weights for policy 0, policy_version 21430 (0.0009) +[2026-06-02 16:46:47,306][247478] Updated weights for policy 0, policy_version 21440 (0.0008) +[2026-06-02 16:46:47,949][247478] Updated weights for policy 0, policy_version 21450 (0.0009) +[2026-06-02 16:46:48,128][247478] Updated weights for policy 0, policy_version 21460 (0.0009) +[2026-06-02 16:46:48,315][247478] Updated weights for policy 0, policy_version 21470 (0.0009) +[2026-06-02 16:46:48,483][247478] Updated weights for policy 0, policy_version 21480 (0.0010) +[2026-06-02 16:46:48,671][247478] Updated weights for policy 0, policy_version 21490 (0.0009) +[2026-06-02 16:46:48,867][247478] Updated weights for policy 0, policy_version 21501 (0.0008) +[2026-06-02 16:46:49,521][247478] Updated weights for policy 0, policy_version 21511 (0.0008) +[2026-06-02 16:46:49,698][247478] Updated weights for policy 0, policy_version 21521 (0.0009) +[2026-06-02 16:46:49,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20753.1, 300 sec: 20327.3). Total num frames: 11010048. Throughput: 0: 20320.7. Samples: 11028096. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) +[2026-06-02 16:46:49,726][246448] Avg episode reward: [(0, '924.502')] +[2026-06-02 16:46:49,876][247478] Updated weights for policy 0, policy_version 21531 (0.0006) +[2026-06-02 16:46:50,045][247478] Updated weights for policy 0, policy_version 21541 (0.0005) +[2026-06-02 16:46:50,227][247478] Updated weights for policy 0, policy_version 21552 (0.0009) +[2026-06-02 16:46:50,434][247478] Updated weights for policy 0, policy_version 21563 (0.0009) +[2026-06-02 16:46:50,516][247399] Saving new best policy, reward=924.502! +[2026-06-02 16:46:51,112][247478] Updated weights for policy 0, policy_version 21574 (0.0009) +[2026-06-02 16:46:51,284][247478] Updated weights for policy 0, policy_version 21584 (0.0008) +[2026-06-02 16:46:51,467][247478] Updated weights for policy 0, policy_version 21594 (0.0009) +[2026-06-02 16:46:51,640][247478] Updated weights for policy 0, policy_version 21604 (0.0008) +[2026-06-02 16:46:51,828][247478] Updated weights for policy 0, policy_version 21614 (0.0008) +[2026-06-02 16:46:52,000][247478] Updated weights for policy 0, policy_version 21624 (0.0009) +[2026-06-02 16:46:52,636][247478] Updated weights for policy 0, policy_version 21634 (0.0006) +[2026-06-02 16:46:52,830][247478] Updated weights for policy 0, policy_version 21646 (0.0009) +[2026-06-02 16:46:53,003][247478] Updated weights for policy 0, policy_version 21656 (0.0009) +[2026-06-02 16:46:53,202][247478] Updated weights for policy 0, policy_version 21667 (0.0009) +[2026-06-02 16:46:53,393][247478] Updated weights for policy 0, policy_version 21678 (0.0009) +[2026-06-02 16:46:53,562][247478] Updated weights for policy 0, policy_version 21688 (0.0009) +[2026-06-02 16:46:54,271][247478] Updated weights for policy 0, policy_version 21699 (0.0009) +[2026-06-02 16:46:54,444][247478] Updated weights for policy 0, policy_version 21709 (0.0008) +[2026-06-02 16:46:54,613][247478] Updated weights for policy 0, policy_version 21719 (0.0008) +[2026-06-02 16:46:54,726][246448] Fps is (10 sec: 19660.6, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 11108352. Throughput: 0: 20343.4. Samples: 11154176. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) +[2026-06-02 16:46:54,727][246448] Avg episode reward: [(0, '929.636')] +[2026-06-02 16:46:54,788][247478] Updated weights for policy 0, policy_version 21729 (0.0008) +[2026-06-02 16:46:54,979][247478] Updated weights for policy 0, policy_version 21739 (0.0008) +[2026-06-02 16:46:55,178][247478] Updated weights for policy 0, policy_version 21750 (0.0007) +[2026-06-02 16:46:55,343][247399] Saving new best policy, reward=929.636! +[2026-06-02 16:46:55,344][247478] Updated weights for policy 0, policy_version 21760 (0.0006) +[2026-06-02 16:46:55,973][247478] Updated weights for policy 0, policy_version 21770 (0.0009) +[2026-06-02 16:46:56,164][247478] Updated weights for policy 0, policy_version 21781 (0.0009) +[2026-06-02 16:46:56,342][247478] Updated weights for policy 0, policy_version 21791 (0.0009) +[2026-06-02 16:46:56,519][247478] Updated weights for policy 0, policy_version 21801 (0.0009) +[2026-06-02 16:46:56,697][247478] Updated weights for policy 0, policy_version 21811 (0.0008) +[2026-06-02 16:46:56,883][247478] Updated weights for policy 0, policy_version 21821 (0.0009) +[2026-06-02 16:46:57,522][247478] Updated weights for policy 0, policy_version 21831 (0.0008) +[2026-06-02 16:46:57,698][247478] Updated weights for policy 0, policy_version 21841 (0.0009) +[2026-06-02 16:46:57,870][247478] Updated weights for policy 0, policy_version 21851 (0.0008) +[2026-06-02 16:46:58,067][247478] Updated weights for policy 0, policy_version 21861 (0.0009) +[2026-06-02 16:46:58,236][247478] Updated weights for policy 0, policy_version 21871 (0.0009) +[2026-06-02 16:46:58,413][247478] Updated weights for policy 0, policy_version 21881 (0.0008) +[2026-06-02 16:46:59,065][247478] Updated weights for policy 0, policy_version 21891 (0.0009) +[2026-06-02 16:46:59,232][247478] Updated weights for policy 0, policy_version 21901 (0.0009) +[2026-06-02 16:46:59,428][247478] Updated weights for policy 0, policy_version 21912 (0.0009) +[2026-06-02 16:46:59,614][247478] Updated weights for policy 0, policy_version 21922 (0.0009) +[2026-06-02 16:46:59,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 11206656. Throughput: 0: 20340.6. Samples: 11216768. Policy #0 lag: (min: 63.0, avg: 79.5, max: 127.0) +[2026-06-02 16:46:59,726][246448] Avg episode reward: [(0, '938.761')] +[2026-06-02 16:46:59,785][247478] Updated weights for policy 0, policy_version 21932 (0.0009) +[2026-06-02 16:46:59,972][247478] Updated weights for policy 0, policy_version 21942 (0.0008) +[2026-06-02 16:47:00,152][247399] Saving new best policy, reward=938.761! +[2026-06-02 16:47:00,154][247478] Updated weights for policy 0, policy_version 21952 (0.0009) +[2026-06-02 16:47:00,786][247478] Updated weights for policy 0, policy_version 21962 (0.0009) +[2026-06-02 16:47:00,969][247478] Updated weights for policy 0, policy_version 21972 (0.0009) +[2026-06-02 16:47:01,140][247478] Updated weights for policy 0, policy_version 21982 (0.0009) +[2026-06-02 16:47:01,322][247478] Updated weights for policy 0, policy_version 21992 (0.0008) +[2026-06-02 16:47:01,496][247478] Updated weights for policy 0, policy_version 22002 (0.0009) +[2026-06-02 16:47:01,687][247478] Updated weights for policy 0, policy_version 22012 (0.0009) +[2026-06-02 16:47:02,321][247478] Updated weights for policy 0, policy_version 22022 (0.0009) +[2026-06-02 16:47:02,492][247478] Updated weights for policy 0, policy_version 22032 (0.0009) +[2026-06-02 16:47:02,667][247478] Updated weights for policy 0, policy_version 22042 (0.0009) +[2026-06-02 16:47:02,863][247478] Updated weights for policy 0, policy_version 22053 (0.0009) +[2026-06-02 16:47:03,044][247478] Updated weights for policy 0, policy_version 22063 (0.0009) +[2026-06-02 16:47:03,220][247478] Updated weights for policy 0, policy_version 22073 (0.0009) +[2026-06-02 16:47:03,875][247478] Updated weights for policy 0, policy_version 22084 (0.0009) +[2026-06-02 16:47:04,046][247478] Updated weights for policy 0, policy_version 22094 (0.0009) +[2026-06-02 16:47:04,228][247478] Updated weights for policy 0, policy_version 22104 (0.0008) +[2026-06-02 16:47:04,416][247478] Updated weights for policy 0, policy_version 22115 (0.0008) +[2026-06-02 16:47:04,601][247478] Updated weights for policy 0, policy_version 22125 (0.0009) +[2026-06-02 16:47:04,726][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 11304960. Throughput: 0: 20545.4. Samples: 11339648. Policy #0 lag: (min: 63.0, avg: 79.4, max: 127.0) +[2026-06-02 16:47:04,727][246448] Avg episode reward: [(0, '965.359')] +[2026-06-02 16:47:04,778][247478] Updated weights for policy 0, policy_version 22135 (0.0006) +[2026-06-02 16:47:04,932][247399] Saving new best policy, reward=965.359! +[2026-06-02 16:47:05,414][247478] Updated weights for policy 0, policy_version 22145 (0.0009) +[2026-06-02 16:47:05,574][247478] Updated weights for policy 0, policy_version 22155 (0.0008) +[2026-06-02 16:47:05,775][247478] Updated weights for policy 0, policy_version 22166 (0.0010) +[2026-06-02 16:47:05,960][247478] Updated weights for policy 0, policy_version 22176 (0.0009) +[2026-06-02 16:47:06,137][247478] Updated weights for policy 0, policy_version 22186 (0.0009) +[2026-06-02 16:47:06,313][247478] Updated weights for policy 0, policy_version 22196 (0.0009) +[2026-06-02 16:47:06,491][247478] Updated weights for policy 0, policy_version 22206 (0.0009) +[2026-06-02 16:47:07,139][247478] Updated weights for policy 0, policy_version 22217 (0.0008) +[2026-06-02 16:47:07,323][247478] Updated weights for policy 0, policy_version 22227 (0.0008) +[2026-06-02 16:47:07,515][247478] Updated weights for policy 0, policy_version 22238 (0.0009) +[2026-06-02 16:47:07,699][247478] Updated weights for policy 0, policy_version 22248 (0.0009) +[2026-06-02 16:47:07,869][247478] Updated weights for policy 0, policy_version 22258 (0.0008) +[2026-06-02 16:47:08,056][247478] Updated weights for policy 0, policy_version 22268 (0.0009) +[2026-06-02 16:47:08,690][247478] Updated weights for policy 0, policy_version 22279 (0.0005) +[2026-06-02 16:47:08,860][247478] Updated weights for policy 0, policy_version 22289 (0.0004) +[2026-06-02 16:47:09,042][247478] Updated weights for policy 0, policy_version 22299 (0.0004) +[2026-06-02 16:47:09,225][247478] Updated weights for policy 0, policy_version 22309 (0.0004) +[2026-06-02 16:47:09,402][247478] Updated weights for policy 0, policy_version 22319 (0.0007) +[2026-06-02 16:47:09,591][247478] Updated weights for policy 0, policy_version 22330 (0.0008) +[2026-06-02 16:47:09,725][246448] Fps is (10 sec: 22937.8, 60 sec: 20753.1, 300 sec: 20438.3). Total num frames: 11436032. Throughput: 0: 20377.6. Samples: 11454976. Policy #0 lag: (min: 63.0, avg: 79.4, max: 127.0) +[2026-06-02 16:47:09,726][246448] Avg episode reward: [(0, '1011.059')] +[2026-06-02 16:47:09,738][247399] Saving new best policy, reward=1011.059! +[2026-06-02 16:47:10,270][247478] Updated weights for policy 0, policy_version 22340 (0.0007) +[2026-06-02 16:47:10,443][247478] Updated weights for policy 0, policy_version 22350 (0.0004) +[2026-06-02 16:47:10,611][247478] Updated weights for policy 0, policy_version 22360 (0.0006) +[2026-06-02 16:47:10,796][247478] Updated weights for policy 0, policy_version 22370 (0.0009) +[2026-06-02 16:47:10,971][247478] Updated weights for policy 0, policy_version 22380 (0.0008) +[2026-06-02 16:47:11,149][247478] Updated weights for policy 0, policy_version 22390 (0.0009) +[2026-06-02 16:47:11,327][247478] Updated weights for policy 0, policy_version 22400 (0.0009) +[2026-06-02 16:47:11,962][247478] Updated weights for policy 0, policy_version 22410 (0.0008) +[2026-06-02 16:47:12,138][247478] Updated weights for policy 0, policy_version 22420 (0.0008) +[2026-06-02 16:47:12,314][247478] Updated weights for policy 0, policy_version 22430 (0.0009) +[2026-06-02 16:47:12,496][247478] Updated weights for policy 0, policy_version 22440 (0.0008) +[2026-06-02 16:47:12,678][247478] Updated weights for policy 0, policy_version 22450 (0.0009) +[2026-06-02 16:47:12,856][247478] Updated weights for policy 0, policy_version 22460 (0.0008) +[2026-06-02 16:47:13,504][247478] Updated weights for policy 0, policy_version 22470 (0.0009) +[2026-06-02 16:47:13,680][247478] Updated weights for policy 0, policy_version 22480 (0.0009) +[2026-06-02 16:47:13,869][247478] Updated weights for policy 0, policy_version 22490 (0.0008) +[2026-06-02 16:47:14,050][247478] Updated weights for policy 0, policy_version 22500 (0.0009) +[2026-06-02 16:47:14,232][247478] Updated weights for policy 0, policy_version 22510 (0.0008) +[2026-06-02 16:47:14,408][247478] Updated weights for policy 0, policy_version 22520 (0.0008) +[2026-06-02 16:47:14,725][246448] Fps is (10 sec: 22938.0, 60 sec: 20753.1, 300 sec: 20438.3). Total num frames: 11534336. Throughput: 0: 20374.7. Samples: 11518080. Policy #0 lag: (min: 63.0, avg: 79.4, max: 127.0) +[2026-06-02 16:47:14,726][246448] Avg episode reward: [(0, '1009.574')] +[2026-06-02 16:47:15,058][247478] Updated weights for policy 0, policy_version 22531 (0.0009) +[2026-06-02 16:47:15,245][247478] Updated weights for policy 0, policy_version 22542 (0.0008) +[2026-06-02 16:47:15,423][247478] Updated weights for policy 0, policy_version 22552 (0.0008) +[2026-06-02 16:47:15,601][247478] Updated weights for policy 0, policy_version 22562 (0.0008) +[2026-06-02 16:47:15,785][247478] Updated weights for policy 0, policy_version 22572 (0.0009) +[2026-06-02 16:47:15,963][247478] Updated weights for policy 0, policy_version 22582 (0.0009) +[2026-06-02 16:47:16,132][247478] Updated weights for policy 0, policy_version 22592 (0.0008) +[2026-06-02 16:47:16,774][247478] Updated weights for policy 0, policy_version 22602 (0.0007) +[2026-06-02 16:47:16,949][247478] Updated weights for policy 0, policy_version 22612 (0.0008) +[2026-06-02 16:47:17,133][247478] Updated weights for policy 0, policy_version 22622 (0.0007) +[2026-06-02 16:47:17,308][247478] Updated weights for policy 0, policy_version 22632 (0.0004) +[2026-06-02 16:47:17,488][247478] Updated weights for policy 0, policy_version 22642 (0.0004) +[2026-06-02 16:47:17,669][247478] Updated weights for policy 0, policy_version 22652 (0.0005) +[2026-06-02 16:47:18,312][247478] Updated weights for policy 0, policy_version 22662 (0.0009) +[2026-06-02 16:47:18,488][247478] Updated weights for policy 0, policy_version 22672 (0.0008) +[2026-06-02 16:47:18,665][247478] Updated weights for policy 0, policy_version 22682 (0.0009) +[2026-06-02 16:47:18,845][247478] Updated weights for policy 0, policy_version 22692 (0.0008) +[2026-06-02 16:47:19,020][247478] Updated weights for policy 0, policy_version 22702 (0.0009) +[2026-06-02 16:47:19,202][247478] Updated weights for policy 0, policy_version 22712 (0.0009) +[2026-06-02 16:47:19,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20753.1, 300 sec: 20438.3). Total num frames: 11632640. Throughput: 0: 20394.7. Samples: 11644416. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) +[2026-06-02 16:47:19,726][246448] Avg episode reward: [(0, '1016.179')] +[2026-06-02 16:47:19,852][247478] Updated weights for policy 0, policy_version 22722 (0.0009) +[2026-06-02 16:47:20,020][247478] Updated weights for policy 0, policy_version 22732 (0.0008) +[2026-06-02 16:47:20,196][247478] Updated weights for policy 0, policy_version 22742 (0.0009) +[2026-06-02 16:47:20,374][247478] Updated weights for policy 0, policy_version 22752 (0.0009) +[2026-06-02 16:47:20,551][247478] Updated weights for policy 0, policy_version 22762 (0.0008) +[2026-06-02 16:47:20,732][247478] Updated weights for policy 0, policy_version 22772 (0.0009) +[2026-06-02 16:47:20,911][247478] Updated weights for policy 0, policy_version 22782 (0.0009) +[2026-06-02 16:47:20,938][247399] Saving new best policy, reward=1016.179! +[2026-06-02 16:47:21,551][247478] Updated weights for policy 0, policy_version 22792 (0.0009) +[2026-06-02 16:47:21,727][247478] Updated weights for policy 0, policy_version 22802 (0.0008) +[2026-06-02 16:47:21,915][247478] Updated weights for policy 0, policy_version 22812 (0.0007) +[2026-06-02 16:47:22,092][247478] Updated weights for policy 0, policy_version 22822 (0.0008) +[2026-06-02 16:47:22,274][247478] Updated weights for policy 0, policy_version 22832 (0.0009) +[2026-06-02 16:47:22,448][247478] Updated weights for policy 0, policy_version 22842 (0.0009) +[2026-06-02 16:47:23,082][247478] Updated weights for policy 0, policy_version 22852 (0.0008) +[2026-06-02 16:47:23,252][247478] Updated weights for policy 0, policy_version 22862 (0.0008) +[2026-06-02 16:47:23,436][247478] Updated weights for policy 0, policy_version 22872 (0.0009) +[2026-06-02 16:47:23,621][247478] Updated weights for policy 0, policy_version 22882 (0.0008) +[2026-06-02 16:47:23,798][247478] Updated weights for policy 0, policy_version 22892 (0.0009) +[2026-06-02 16:47:23,987][247478] Updated weights for policy 0, policy_version 22903 (0.0009) +[2026-06-02 16:47:24,661][247478] Updated weights for policy 0, policy_version 22913 (0.0009) +[2026-06-02 16:47:24,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20753.0, 300 sec: 20438.3). Total num frames: 11730944. Throughput: 0: 20457.2. Samples: 11771392. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) +[2026-06-02 16:47:24,726][246448] Avg episode reward: [(0, '1002.722')] +[2026-06-02 16:47:24,825][247478] Updated weights for policy 0, policy_version 22923 (0.0009) +[2026-06-02 16:47:25,005][247478] Updated weights for policy 0, policy_version 22933 (0.0008) +[2026-06-02 16:47:25,182][247478] Updated weights for policy 0, policy_version 22943 (0.0008) +[2026-06-02 16:47:25,379][247478] Updated weights for policy 0, policy_version 22954 (0.0008) +[2026-06-02 16:47:25,553][247478] Updated weights for policy 0, policy_version 22964 (0.0008) +[2026-06-02 16:47:25,740][247478] Updated weights for policy 0, policy_version 22974 (0.0008) +[2026-06-02 16:47:26,370][247478] Updated weights for policy 0, policy_version 22984 (0.0009) +[2026-06-02 16:47:26,540][247478] Updated weights for policy 0, policy_version 22994 (0.0008) +[2026-06-02 16:47:26,717][247478] Updated weights for policy 0, policy_version 23004 (0.0009) +[2026-06-02 16:47:26,906][247478] Updated weights for policy 0, policy_version 23015 (0.0008) +[2026-06-02 16:47:27,094][247478] Updated weights for policy 0, policy_version 23025 (0.0009) +[2026-06-02 16:47:27,286][247478] Updated weights for policy 0, policy_version 23036 (0.0008) +[2026-06-02 16:47:27,958][247478] Updated weights for policy 0, policy_version 23047 (0.0009) +[2026-06-02 16:47:28,130][247478] Updated weights for policy 0, policy_version 23057 (0.0009) +[2026-06-02 16:47:28,309][247478] Updated weights for policy 0, policy_version 23067 (0.0008) +[2026-06-02 16:47:28,489][247478] Updated weights for policy 0, policy_version 23077 (0.0008) +[2026-06-02 16:47:28,665][247478] Updated weights for policy 0, policy_version 23087 (0.0009) +[2026-06-02 16:47:28,843][247478] Updated weights for policy 0, policy_version 23097 (0.0008) +[2026-06-02 16:47:29,489][247478] Updated weights for policy 0, policy_version 23107 (0.0009) +[2026-06-02 16:47:29,669][247478] Updated weights for policy 0, policy_version 23117 (0.0008) +[2026-06-02 16:47:29,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 20438.3). Total num frames: 11829248. Throughput: 0: 20545.4. Samples: 11829760. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) +[2026-06-02 16:47:29,727][246448] Avg episode reward: [(0, '1033.261')] +[2026-06-02 16:47:29,853][247478] Updated weights for policy 0, policy_version 23128 (0.0009) +[2026-06-02 16:47:30,053][247478] Updated weights for policy 0, policy_version 23139 (0.0009) +[2026-06-02 16:47:30,239][247478] Updated weights for policy 0, policy_version 23149 (0.0008) +[2026-06-02 16:47:30,413][247478] Updated weights for policy 0, policy_version 23159 (0.0009) +[2026-06-02 16:47:30,568][247399] Saving new best policy, reward=1033.261! +[2026-06-02 16:47:31,073][247478] Updated weights for policy 0, policy_version 23169 (0.0009) +[2026-06-02 16:47:31,232][247478] Updated weights for policy 0, policy_version 23179 (0.0009) +[2026-06-02 16:47:31,428][247478] Updated weights for policy 0, policy_version 23189 (0.0009) +[2026-06-02 16:47:31,592][247478] Updated weights for policy 0, policy_version 23199 (0.0008) +[2026-06-02 16:47:31,769][247478] Updated weights for policy 0, policy_version 23209 (0.0008) +[2026-06-02 16:47:31,979][247478] Updated weights for policy 0, policy_version 23220 (0.0009) +[2026-06-02 16:47:32,180][247478] Updated weights for policy 0, policy_version 23231 (0.0009) +[2026-06-02 16:47:32,808][247478] Updated weights for policy 0, policy_version 23241 (0.0009) +[2026-06-02 16:47:32,978][247478] Updated weights for policy 0, policy_version 23251 (0.0008) +[2026-06-02 16:47:33,158][247478] Updated weights for policy 0, policy_version 23261 (0.0010) +[2026-06-02 16:47:33,341][247478] Updated weights for policy 0, policy_version 23271 (0.0008) +[2026-06-02 16:47:33,528][247478] Updated weights for policy 0, policy_version 23282 (0.0009) +[2026-06-02 16:47:33,713][247478] Updated weights for policy 0, policy_version 23292 (0.0009) +[2026-06-02 16:47:34,364][247478] Updated weights for policy 0, policy_version 23302 (0.0009) +[2026-06-02 16:47:34,532][247478] Updated weights for policy 0, policy_version 23312 (0.0009) +[2026-06-02 16:47:34,726][246448] Fps is (10 sec: 19660.5, 60 sec: 20206.9, 300 sec: 20438.3). Total num frames: 11927552. Throughput: 0: 20425.9. Samples: 11947264. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) +[2026-06-02 16:47:34,727][246448] Avg episode reward: [(0, '1102.333')] +[2026-06-02 16:47:34,730][247478] Updated weights for policy 0, policy_version 23323 (0.0009) +[2026-06-02 16:47:34,904][247478] Updated weights for policy 0, policy_version 23333 (0.0009) +[2026-06-02 16:47:35,084][247478] Updated weights for policy 0, policy_version 23343 (0.0009) +[2026-06-02 16:47:35,262][247478] Updated weights for policy 0, policy_version 23353 (0.0006) +[2026-06-02 16:47:35,398][247399] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs4_seed14/checkpoint_p0/checkpoint_000023360_11960320.pth... +[2026-06-02 16:47:35,415][247399] Saving new best policy, reward=1102.333! +[2026-06-02 16:47:35,925][247478] Updated weights for policy 0, policy_version 23364 (0.0007) +[2026-06-02 16:47:36,116][247478] Updated weights for policy 0, policy_version 23375 (0.0009) +[2026-06-02 16:47:36,302][247478] Updated weights for policy 0, policy_version 23385 (0.0009) +[2026-06-02 16:47:36,497][247478] Updated weights for policy 0, policy_version 23396 (0.0009) +[2026-06-02 16:47:36,674][247478] Updated weights for policy 0, policy_version 23406 (0.0009) +[2026-06-02 16:47:36,857][247478] Updated weights for policy 0, policy_version 23416 (0.0009) +[2026-06-02 16:47:37,512][247478] Updated weights for policy 0, policy_version 23426 (0.0009) +[2026-06-02 16:47:37,676][247478] Updated weights for policy 0, policy_version 23436 (0.0009) +[2026-06-02 16:47:37,860][247478] Updated weights for policy 0, policy_version 23446 (0.0008) +[2026-06-02 16:47:38,025][247478] Updated weights for policy 0, policy_version 23456 (0.0008) +[2026-06-02 16:47:38,217][247478] Updated weights for policy 0, policy_version 23466 (0.0009) +[2026-06-02 16:47:38,400][247478] Updated weights for policy 0, policy_version 23476 (0.0009) +[2026-06-02 16:47:38,579][247478] Updated weights for policy 0, policy_version 23486 (0.0009) +[2026-06-02 16:47:39,210][247478] Updated weights for policy 0, policy_version 23496 (0.0009) +[2026-06-02 16:47:39,412][247478] Updated weights for policy 0, policy_version 23507 (0.0009) +[2026-06-02 16:47:39,597][247478] Updated weights for policy 0, policy_version 23517 (0.0009) +[2026-06-02 16:47:39,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20206.9, 300 sec: 20438.3). Total num frames: 12025856. Throughput: 0: 20454.4. Samples: 12074624. Policy #0 lag: (min: 63.0, avg: 78.4, max: 127.0) +[2026-06-02 16:47:39,726][246448] Avg episode reward: [(0, '1097.218')] +[2026-06-02 16:47:39,772][247478] Updated weights for policy 0, policy_version 23527 (0.0009) +[2026-06-02 16:47:39,949][247478] Updated weights for policy 0, policy_version 23537 (0.0009) +[2026-06-02 16:47:40,132][247478] Updated weights for policy 0, policy_version 23547 (0.0009) +[2026-06-02 16:47:40,785][247478] Updated weights for policy 0, policy_version 23557 (0.0009) +[2026-06-02 16:47:40,950][247478] Updated weights for policy 0, policy_version 23567 (0.0008) +[2026-06-02 16:47:41,133][247478] Updated weights for policy 0, policy_version 23577 (0.0009) +[2026-06-02 16:47:41,313][247478] Updated weights for policy 0, policy_version 23587 (0.0009) +[2026-06-02 16:47:41,493][247478] Updated weights for policy 0, policy_version 23597 (0.0009) +[2026-06-02 16:47:41,671][247478] Updated weights for policy 0, policy_version 23607 (0.0009) +[2026-06-02 16:47:42,314][247478] Updated weights for policy 0, policy_version 23617 (0.0009) +[2026-06-02 16:47:42,482][247478] Updated weights for policy 0, policy_version 23627 (0.0009) +[2026-06-02 16:47:42,673][247478] Updated weights for policy 0, policy_version 23638 (0.0009) +[2026-06-02 16:47:42,850][247478] Updated weights for policy 0, policy_version 23648 (0.0008) +[2026-06-02 16:47:43,031][247478] Updated weights for policy 0, policy_version 23658 (0.0008) +[2026-06-02 16:47:43,204][247478] Updated weights for policy 0, policy_version 23668 (0.0009) +[2026-06-02 16:47:43,387][247478] Updated weights for policy 0, policy_version 23678 (0.0011) +[2026-06-02 16:47:44,044][247478] Updated weights for policy 0, policy_version 23688 (0.0009) +[2026-06-02 16:47:44,206][247478] Updated weights for policy 0, policy_version 23698 (0.0009) +[2026-06-02 16:47:44,391][247478] Updated weights for policy 0, policy_version 23708 (0.0009) +[2026-06-02 16:47:44,568][247478] Updated weights for policy 0, policy_version 23718 (0.0008) +[2026-06-02 16:47:44,726][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 12124160. Throughput: 0: 20460.0. Samples: 12137472. Policy #0 lag: (min: 63.0, avg: 78.4, max: 127.0) +[2026-06-02 16:47:44,727][246448] Avg episode reward: [(0, '1087.747')] +[2026-06-02 16:47:44,747][247478] Updated weights for policy 0, policy_version 23728 (0.0008) +[2026-06-02 16:47:44,925][247478] Updated weights for policy 0, policy_version 23738 (0.0008) +[2026-06-02 16:47:45,572][247478] Updated weights for policy 0, policy_version 23748 (0.0008) +[2026-06-02 16:47:45,745][247478] Updated weights for policy 0, policy_version 23758 (0.0011) +[2026-06-02 16:47:45,919][247478] Updated weights for policy 0, policy_version 23768 (0.0010) +[2026-06-02 16:47:46,094][247478] Updated weights for policy 0, policy_version 23778 (0.0009) +[2026-06-02 16:47:46,275][247478] Updated weights for policy 0, policy_version 23788 (0.0008) +[2026-06-02 16:47:46,457][247478] Updated weights for policy 0, policy_version 23798 (0.0008) +[2026-06-02 16:47:46,631][247478] Updated weights for policy 0, policy_version 23808 (0.0008) +[2026-06-02 16:47:47,271][247478] Updated weights for policy 0, policy_version 23818 (0.0009) +[2026-06-02 16:47:47,450][247478] Updated weights for policy 0, policy_version 23828 (0.0008) +[2026-06-02 16:47:47,629][247478] Updated weights for policy 0, policy_version 23838 (0.0008) +[2026-06-02 16:47:47,805][247478] Updated weights for policy 0, policy_version 23848 (0.0008) +[2026-06-02 16:47:47,991][247478] Updated weights for policy 0, policy_version 23858 (0.0009) +[2026-06-02 16:47:48,178][247478] Updated weights for policy 0, policy_version 23869 (0.0009) +[2026-06-02 16:47:48,829][247478] Updated weights for policy 0, policy_version 23880 (0.0009) +[2026-06-02 16:47:49,012][247478] Updated weights for policy 0, policy_version 23890 (0.0008) +[2026-06-02 16:47:49,185][247478] Updated weights for policy 0, policy_version 23900 (0.0009) +[2026-06-02 16:47:49,365][247478] Updated weights for policy 0, policy_version 23910 (0.0010) +[2026-06-02 16:47:49,537][247478] Updated weights for policy 0, policy_version 23920 (0.0008) +[2026-06-02 16:47:49,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 12222464. Throughput: 0: 20525.6. Samples: 12263296. Policy #0 lag: (min: 63.0, avg: 78.4, max: 127.0) +[2026-06-02 16:47:49,727][247478] Updated weights for policy 0, policy_version 23930 (0.0009) +[2026-06-02 16:47:49,727][246448] Avg episode reward: [(0, '1094.336')] +[2026-06-02 16:47:50,379][247478] Updated weights for policy 0, policy_version 23940 (0.0009) +[2026-06-02 16:47:50,545][247478] Updated weights for policy 0, policy_version 23950 (0.0008) +[2026-06-02 16:47:50,723][247478] Updated weights for policy 0, policy_version 23960 (0.0008) +[2026-06-02 16:47:50,904][247478] Updated weights for policy 0, policy_version 23970 (0.0007) +[2026-06-02 16:47:51,078][247478] Updated weights for policy 0, policy_version 23980 (0.0008) +[2026-06-02 16:47:51,261][247478] Updated weights for policy 0, policy_version 23990 (0.0009) +[2026-06-02 16:47:51,434][247478] Updated weights for policy 0, policy_version 24000 (0.0009) +[2026-06-02 16:47:52,096][247478] Updated weights for policy 0, policy_version 24010 (0.0008) +[2026-06-02 16:47:52,270][247478] Updated weights for policy 0, policy_version 24020 (0.0009) +[2026-06-02 16:47:52,449][247478] Updated weights for policy 0, policy_version 24030 (0.0008) +[2026-06-02 16:47:52,629][247478] Updated weights for policy 0, policy_version 24040 (0.0008) +[2026-06-02 16:47:52,806][247478] Updated weights for policy 0, policy_version 24050 (0.0008) +[2026-06-02 16:47:52,988][247478] Updated weights for policy 0, policy_version 24060 (0.0008) +[2026-06-02 16:47:53,610][247478] Updated weights for policy 0, policy_version 24070 (0.0009) +[2026-06-02 16:47:53,779][247478] Updated weights for policy 0, policy_version 24080 (0.0008) +[2026-06-02 16:47:53,950][247478] Updated weights for policy 0, policy_version 24090 (0.0008) +[2026-06-02 16:47:54,130][247478] Updated weights for policy 0, policy_version 24100 (0.0009) +[2026-06-02 16:47:54,313][247478] Updated weights for policy 0, policy_version 24110 (0.0008) +[2026-06-02 16:47:54,488][247478] Updated weights for policy 0, policy_version 24120 (0.0009) +[2026-06-02 16:47:54,726][246448] Fps is (10 sec: 22937.3, 60 sec: 20753.0, 300 sec: 20438.3). Total num frames: 12353536. Throughput: 0: 20471.3. Samples: 12376192. Policy #0 lag: (min: 0.0, avg: 46.1, max: 64.0) +[2026-06-02 16:47:54,726][246448] Avg episode reward: [(0, '1065.865')] +[2026-06-02 16:47:55,172][247478] Updated weights for policy 0, policy_version 24131 (0.0009) +[2026-06-02 16:47:55,341][247478] Updated weights for policy 0, policy_version 24141 (0.0009) +[2026-06-02 16:47:55,543][247478] Updated weights for policy 0, policy_version 24152 (0.0008) +[2026-06-02 16:47:55,716][247478] Updated weights for policy 0, policy_version 24162 (0.0008) +[2026-06-02 16:47:55,901][247478] Updated weights for policy 0, policy_version 24172 (0.0008) +[2026-06-02 16:47:56,080][247478] Updated weights for policy 0, policy_version 24182 (0.0009) +[2026-06-02 16:47:56,259][247478] Updated weights for policy 0, policy_version 24192 (0.0008) +[2026-06-02 16:47:56,896][247478] Updated weights for policy 0, policy_version 24202 (0.0009) +[2026-06-02 16:47:57,077][247478] Updated weights for policy 0, policy_version 24212 (0.0009) +[2026-06-02 16:47:57,255][247478] Updated weights for policy 0, policy_version 24222 (0.0008) +[2026-06-02 16:47:57,434][247478] Updated weights for policy 0, policy_version 24232 (0.0008) +[2026-06-02 16:47:57,616][247478] Updated weights for policy 0, policy_version 24242 (0.0009) +[2026-06-02 16:47:57,800][247478] Updated weights for policy 0, policy_version 24252 (0.0008) +[2026-06-02 16:47:58,433][247478] Updated weights for policy 0, policy_version 24262 (0.0009) +[2026-06-02 16:47:58,608][247478] Updated weights for policy 0, policy_version 24272 (0.0009) +[2026-06-02 16:47:58,784][247478] Updated weights for policy 0, policy_version 24282 (0.0008) +[2026-06-02 16:47:58,987][247478] Updated weights for policy 0, policy_version 24293 (0.0009) +[2026-06-02 16:47:59,164][247478] Updated weights for policy 0, policy_version 24303 (0.0009) +[2026-06-02 16:47:59,353][247478] Updated weights for policy 0, policy_version 24313 (0.0008) +[2026-06-02 16:47:59,725][246448] Fps is (10 sec: 22937.7, 60 sec: 20753.1, 300 sec: 20438.3). Total num frames: 12451840. Throughput: 0: 20494.2. Samples: 12440320. Policy #0 lag: (min: 0.0, avg: 46.1, max: 64.0) +[2026-06-02 16:47:59,726][246448] Avg episode reward: [(0, '1060.305')] +[2026-06-02 16:47:59,978][247478] Updated weights for policy 0, policy_version 24323 (0.0008) +[2026-06-02 16:48:00,148][247478] Updated weights for policy 0, policy_version 24333 (0.0008) +[2026-06-02 16:48:00,347][247478] Updated weights for policy 0, policy_version 24344 (0.0008) +[2026-06-02 16:48:00,526][247478] Updated weights for policy 0, policy_version 24354 (0.0010) +[2026-06-02 16:48:00,707][247478] Updated weights for policy 0, policy_version 24364 (0.0008) +[2026-06-02 16:48:00,902][247478] Updated weights for policy 0, policy_version 24375 (0.0008) +[2026-06-02 16:48:01,556][247478] Updated weights for policy 0, policy_version 24385 (0.0008) +[2026-06-02 16:48:01,740][247478] Updated weights for policy 0, policy_version 24396 (0.0009) +[2026-06-02 16:48:01,927][247478] Updated weights for policy 0, policy_version 24407 (0.0009) +[2026-06-02 16:48:02,110][247478] Updated weights for policy 0, policy_version 24417 (0.0009) +[2026-06-02 16:48:02,291][247478] Updated weights for policy 0, policy_version 24427 (0.0009) +[2026-06-02 16:48:02,473][247478] Updated weights for policy 0, policy_version 24437 (0.0009) +[2026-06-02 16:48:02,650][247478] Updated weights for policy 0, policy_version 24447 (0.0008) +[2026-06-02 16:48:03,291][247478] Updated weights for policy 0, policy_version 24457 (0.0008) +[2026-06-02 16:48:03,482][247478] Updated weights for policy 0, policy_version 24468 (0.0009) +[2026-06-02 16:48:03,669][247478] Updated weights for policy 0, policy_version 24478 (0.0008) +[2026-06-02 16:48:03,849][247478] Updated weights for policy 0, policy_version 24488 (0.0008) +[2026-06-02 16:48:04,028][247478] Updated weights for policy 0, policy_version 24498 (0.0009) +[2026-06-02 16:48:04,207][247478] Updated weights for policy 0, policy_version 24508 (0.0009) +[2026-06-02 16:48:04,725][246448] Fps is (10 sec: 19661.3, 60 sec: 20753.1, 300 sec: 20438.3). Total num frames: 12550144. Throughput: 0: 20474.3. Samples: 12565760. Policy #0 lag: (min: 0.0, avg: 46.1, max: 64.0) +[2026-06-02 16:48:04,727][246448] Avg episode reward: [(0, '1072.336')] +[2026-06-02 16:48:04,845][247478] Updated weights for policy 0, policy_version 24518 (0.0008) +[2026-06-02 16:48:05,017][247478] Updated weights for policy 0, policy_version 24528 (0.0008) +[2026-06-02 16:48:05,193][247478] Updated weights for policy 0, policy_version 24538 (0.0009) +[2026-06-02 16:48:05,415][247478] Updated weights for policy 0, policy_version 24550 (0.0009) +[2026-06-02 16:48:05,607][247478] Updated weights for policy 0, policy_version 24561 (0.0008) +[2026-06-02 16:48:05,795][247478] Updated weights for policy 0, policy_version 24571 (0.0009) +[2026-06-02 16:48:06,436][247478] Updated weights for policy 0, policy_version 24581 (0.0008) +[2026-06-02 16:48:06,607][247478] Updated weights for policy 0, policy_version 24591 (0.0008) +[2026-06-02 16:48:06,797][247478] Updated weights for policy 0, policy_version 24602 (0.0008) +[2026-06-02 16:48:06,981][247478] Updated weights for policy 0, policy_version 24612 (0.0008) +[2026-06-02 16:48:07,152][247478] Updated weights for policy 0, policy_version 24622 (0.0008) +[2026-06-02 16:48:07,339][247478] Updated weights for policy 0, policy_version 24632 (0.0009) +[2026-06-02 16:48:07,984][247478] Updated weights for policy 0, policy_version 24642 (0.0008) +[2026-06-02 16:48:08,155][247478] Updated weights for policy 0, policy_version 24652 (0.0009) +[2026-06-02 16:48:08,322][247478] Updated weights for policy 0, policy_version 24662 (0.0008) +[2026-06-02 16:48:08,524][247478] Updated weights for policy 0, policy_version 24673 (0.0009) +[2026-06-02 16:48:08,701][247478] Updated weights for policy 0, policy_version 24683 (0.0008) +[2026-06-02 16:48:08,890][247478] Updated weights for policy 0, policy_version 24693 (0.0008) +[2026-06-02 16:48:09,068][247478] Updated weights for policy 0, policy_version 24703 (0.0008) +[2026-06-02 16:48:09,710][247478] Updated weights for policy 0, policy_version 24713 (0.0009) +[2026-06-02 16:48:09,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20438.3). Total num frames: 12648448. Throughput: 0: 20437.3. Samples: 12691072. Policy #0 lag: (min: 0.0, avg: 46.1, max: 64.0) +[2026-06-02 16:48:09,726][246448] Avg episode reward: [(0, '1091.188')] +[2026-06-02 16:48:09,887][247478] Updated weights for policy 0, policy_version 24723 (0.0008) +[2026-06-02 16:48:10,078][247478] Updated weights for policy 0, policy_version 24734 (0.0008) +[2026-06-02 16:48:10,263][247478] Updated weights for policy 0, policy_version 24744 (0.0010) +[2026-06-02 16:48:10,440][247478] Updated weights for policy 0, policy_version 24754 (0.0009) +[2026-06-02 16:48:10,624][247478] Updated weights for policy 0, policy_version 24764 (0.0008) +[2026-06-02 16:48:11,260][247478] Updated weights for policy 0, policy_version 24774 (0.0008) +[2026-06-02 16:48:11,430][247478] Updated weights for policy 0, policy_version 24784 (0.0008) +[2026-06-02 16:48:11,607][247478] Updated weights for policy 0, policy_version 24794 (0.0008) +[2026-06-02 16:48:11,786][247478] Updated weights for policy 0, policy_version 24804 (0.0008) +[2026-06-02 16:48:11,965][247478] Updated weights for policy 0, policy_version 24814 (0.0009) +[2026-06-02 16:48:12,145][247478] Updated weights for policy 0, policy_version 24824 (0.0009) +[2026-06-02 16:48:12,818][247478] Updated weights for policy 0, policy_version 24835 (0.0009) +[2026-06-02 16:48:12,979][247478] Updated weights for policy 0, policy_version 24845 (0.0008) +[2026-06-02 16:48:13,161][247478] Updated weights for policy 0, policy_version 24855 (0.0009) +[2026-06-02 16:48:13,341][247478] Updated weights for policy 0, policy_version 24865 (0.0009) +[2026-06-02 16:48:13,530][247478] Updated weights for policy 0, policy_version 24876 (0.0009) +[2026-06-02 16:48:13,714][247478] Updated weights for policy 0, policy_version 24886 (0.0009) +[2026-06-02 16:48:13,892][247478] Updated weights for policy 0, policy_version 24896 (0.0009) +[2026-06-02 16:48:14,539][247478] Updated weights for policy 0, policy_version 24906 (0.0009) +[2026-06-02 16:48:14,718][247478] Updated weights for policy 0, policy_version 24916 (0.0009) +[2026-06-02 16:48:14,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 20438.3). Total num frames: 12746752. Throughput: 0: 20511.3. Samples: 12752768. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) +[2026-06-02 16:48:14,727][246448] Avg episode reward: [(0, '1078.505')] +[2026-06-02 16:48:14,929][247478] Updated weights for policy 0, policy_version 24928 (0.0009) +[2026-06-02 16:48:15,099][247478] Updated weights for policy 0, policy_version 24938 (0.0009) +[2026-06-02 16:48:15,287][247478] Updated weights for policy 0, policy_version 24948 (0.0009) +[2026-06-02 16:48:15,467][247478] Updated weights for policy 0, policy_version 24958 (0.0008) +[2026-06-02 16:48:16,131][247478] Updated weights for policy 0, policy_version 24969 (0.0009) +[2026-06-02 16:48:16,306][247478] Updated weights for policy 0, policy_version 24979 (0.0008) +[2026-06-02 16:48:16,481][247478] Updated weights for policy 0, policy_version 24989 (0.0008) +[2026-06-02 16:48:16,674][247478] Updated weights for policy 0, policy_version 24999 (0.0008) +[2026-06-02 16:48:16,848][247478] Updated weights for policy 0, policy_version 25009 (0.0009) +[2026-06-02 16:48:17,024][247478] Updated weights for policy 0, policy_version 25019 (0.0009) +[2026-06-02 16:48:17,679][247478] Updated weights for policy 0, policy_version 25029 (0.0009) +[2026-06-02 16:48:17,848][247478] Updated weights for policy 0, policy_version 25039 (0.0007) +[2026-06-02 16:48:18,030][247478] Updated weights for policy 0, policy_version 25049 (0.0009) +[2026-06-02 16:48:18,218][247478] Updated weights for policy 0, policy_version 25060 (0.0009) +[2026-06-02 16:48:18,441][247478] Updated weights for policy 0, policy_version 25072 (0.0008) +[2026-06-02 16:48:18,622][247478] Updated weights for policy 0, policy_version 25082 (0.0008) +[2026-06-02 16:48:19,257][247478] Updated weights for policy 0, policy_version 25092 (0.0009) +[2026-06-02 16:48:19,428][247478] Updated weights for policy 0, policy_version 25102 (0.0009) +[2026-06-02 16:48:19,601][247478] Updated weights for policy 0, policy_version 25112 (0.0008) +[2026-06-02 16:48:19,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20438.3). Total num frames: 12845056. Throughput: 0: 20446.0. Samples: 12867328. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) +[2026-06-02 16:48:19,726][246448] Avg episode reward: [(0, '1064.723')] +[2026-06-02 16:48:19,784][247478] Updated weights for policy 0, policy_version 25122 (0.0008) +[2026-06-02 16:48:19,962][247478] Updated weights for policy 0, policy_version 25132 (0.0009) +[2026-06-02 16:48:20,142][247478] Updated weights for policy 0, policy_version 25142 (0.0009) +[2026-06-02 16:48:20,313][247478] Updated weights for policy 0, policy_version 25152 (0.0008) +[2026-06-02 16:48:20,958][247478] Updated weights for policy 0, policy_version 25162 (0.0009) +[2026-06-02 16:48:21,134][247478] Updated weights for policy 0, policy_version 25172 (0.0008) +[2026-06-02 16:48:21,305][247478] Updated weights for policy 0, policy_version 25182 (0.0008) +[2026-06-02 16:48:21,486][247478] Updated weights for policy 0, policy_version 25192 (0.0008) +[2026-06-02 16:48:21,668][247478] Updated weights for policy 0, policy_version 25202 (0.0008) +[2026-06-02 16:48:21,853][247478] Updated weights for policy 0, policy_version 25212 (0.0009) +[2026-06-02 16:48:22,512][247478] Updated weights for policy 0, policy_version 25222 (0.0009) +[2026-06-02 16:48:22,685][247478] Updated weights for policy 0, policy_version 25232 (0.0009) +[2026-06-02 16:48:22,880][247478] Updated weights for policy 0, policy_version 25243 (0.0009) +[2026-06-02 16:48:23,059][247478] Updated weights for policy 0, policy_version 25253 (0.0009) +[2026-06-02 16:48:23,239][247478] Updated weights for policy 0, policy_version 25263 (0.0009) +[2026-06-02 16:48:23,416][247478] Updated weights for policy 0, policy_version 25273 (0.0009) +[2026-06-02 16:48:24,045][247478] Updated weights for policy 0, policy_version 25283 (0.0009) +[2026-06-02 16:48:24,209][247478] Updated weights for policy 0, policy_version 25293 (0.0009) +[2026-06-02 16:48:24,383][247478] Updated weights for policy 0, policy_version 25303 (0.0009) +[2026-06-02 16:48:24,569][247478] Updated weights for policy 0, policy_version 25313 (0.0009) +[2026-06-02 16:48:24,725][246448] Fps is (10 sec: 19661.0, 60 sec: 20206.9, 300 sec: 20438.3). Total num frames: 12943360. Throughput: 0: 20383.3. Samples: 12991872. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) +[2026-06-02 16:48:24,726][246448] Avg episode reward: [(0, '1038.480')] +[2026-06-02 16:48:24,751][247478] Updated weights for policy 0, policy_version 25323 (0.0009) +[2026-06-02 16:48:24,930][247478] Updated weights for policy 0, policy_version 25333 (0.0008) +[2026-06-02 16:48:25,120][247478] Updated weights for policy 0, policy_version 25344 (0.0008) +[2026-06-02 16:48:25,784][247478] Updated weights for policy 0, policy_version 25355 (0.0008) +[2026-06-02 16:48:25,979][247478] Updated weights for policy 0, policy_version 25366 (0.0009) +[2026-06-02 16:48:26,166][247478] Updated weights for policy 0, policy_version 25376 (0.0009) +[2026-06-02 16:48:26,362][247478] Updated weights for policy 0, policy_version 25387 (0.0009) +[2026-06-02 16:48:26,529][247478] Updated weights for policy 0, policy_version 25397 (0.0009) +[2026-06-02 16:48:26,713][247478] Updated weights for policy 0, policy_version 25407 (0.0008) +[2026-06-02 16:48:27,366][247478] Updated weights for policy 0, policy_version 25417 (0.0010) +[2026-06-02 16:48:27,532][247478] Updated weights for policy 0, policy_version 25427 (0.0009) +[2026-06-02 16:48:27,734][247478] Updated weights for policy 0, policy_version 25438 (0.0009) +[2026-06-02 16:48:27,912][247478] Updated weights for policy 0, policy_version 25448 (0.0009) +[2026-06-02 16:48:28,083][247478] Updated weights for policy 0, policy_version 25458 (0.0009) +[2026-06-02 16:48:28,269][247478] Updated weights for policy 0, policy_version 25468 (0.0009) +[2026-06-02 16:48:28,908][247478] Updated weights for policy 0, policy_version 25478 (0.0008) +[2026-06-02 16:48:29,085][247478] Updated weights for policy 0, policy_version 25488 (0.0008) +[2026-06-02 16:48:29,256][247478] Updated weights for policy 0, policy_version 25498 (0.0009) +[2026-06-02 16:48:29,445][247478] Updated weights for policy 0, policy_version 25508 (0.0009) +[2026-06-02 16:48:29,618][247478] Updated weights for policy 0, policy_version 25518 (0.0009) +[2026-06-02 16:48:29,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 13041664. Throughput: 0: 20380.5. Samples: 13054592. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) +[2026-06-02 16:48:29,727][246448] Avg episode reward: [(0, '1039.431')] +[2026-06-02 16:48:29,815][247478] Updated weights for policy 0, policy_version 25529 (0.0008) +[2026-06-02 16:48:30,466][247478] Updated weights for policy 0, policy_version 25539 (0.0009) +[2026-06-02 16:48:30,634][247478] Updated weights for policy 0, policy_version 25549 (0.0009) +[2026-06-02 16:48:30,814][247478] Updated weights for policy 0, policy_version 25559 (0.0009) +[2026-06-02 16:48:30,985][247478] Updated weights for policy 0, policy_version 25569 (0.0009) +[2026-06-02 16:48:31,171][247478] Updated weights for policy 0, policy_version 25579 (0.0009) +[2026-06-02 16:48:31,353][247478] Updated weights for policy 0, policy_version 25589 (0.0009) +[2026-06-02 16:48:31,534][247478] Updated weights for policy 0, policy_version 25599 (0.0009) +[2026-06-02 16:48:32,187][247478] Updated weights for policy 0, policy_version 25609 (0.0008) +[2026-06-02 16:48:32,360][247478] Updated weights for policy 0, policy_version 25619 (0.0009) +[2026-06-02 16:48:32,545][247478] Updated weights for policy 0, policy_version 25629 (0.0009) +[2026-06-02 16:48:32,720][247478] Updated weights for policy 0, policy_version 25639 (0.0007) +[2026-06-02 16:48:32,894][247478] Updated weights for policy 0, policy_version 25649 (0.0009) +[2026-06-02 16:48:33,082][247478] Updated weights for policy 0, policy_version 25659 (0.0009) +[2026-06-02 16:48:33,727][247478] Updated weights for policy 0, policy_version 25669 (0.0009) +[2026-06-02 16:48:33,896][247478] Updated weights for policy 0, policy_version 25679 (0.0009) +[2026-06-02 16:48:34,079][247478] Updated weights for policy 0, policy_version 25689 (0.0009) +[2026-06-02 16:48:34,253][247478] Updated weights for policy 0, policy_version 25699 (0.0009) +[2026-06-02 16:48:34,439][247478] Updated weights for policy 0, policy_version 25709 (0.0009) +[2026-06-02 16:48:34,619][247478] Updated weights for policy 0, policy_version 25719 (0.0009) +[2026-06-02 16:48:34,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20207.0, 300 sec: 20327.3). Total num frames: 13139968. Throughput: 0: 20380.5. Samples: 13180416. Policy #0 lag: (min: 63.0, avg: 78.7, max: 127.0) +[2026-06-02 16:48:34,727][246448] Avg episode reward: [(0, '1080.542')] +[2026-06-02 16:48:35,277][247478] Updated weights for policy 0, policy_version 25729 (0.0008) +[2026-06-02 16:48:35,452][247478] Updated weights for policy 0, policy_version 25739 (0.0008) +[2026-06-02 16:48:35,625][247478] Updated weights for policy 0, policy_version 25749 (0.0009) +[2026-06-02 16:48:35,802][247478] Updated weights for policy 0, policy_version 25759 (0.0008) +[2026-06-02 16:48:35,973][247478] Updated weights for policy 0, policy_version 25769 (0.0009) +[2026-06-02 16:48:36,142][247478] Updated weights for policy 0, policy_version 25779 (0.0009) +[2026-06-02 16:48:36,327][247478] Updated weights for policy 0, policy_version 25789 (0.0008) +[2026-06-02 16:48:36,965][247478] Updated weights for policy 0, policy_version 25799 (0.0008) +[2026-06-02 16:48:37,158][247478] Updated weights for policy 0, policy_version 25810 (0.0009) +[2026-06-02 16:48:37,333][247478] Updated weights for policy 0, policy_version 25820 (0.0009) +[2026-06-02 16:48:37,517][247478] Updated weights for policy 0, policy_version 25831 (0.0008) +[2026-06-02 16:48:37,725][247478] Updated weights for policy 0, policy_version 25842 (0.0009) +[2026-06-02 16:48:37,923][247478] Updated weights for policy 0, policy_version 25854 (0.0009) +[2026-06-02 16:48:38,614][247478] Updated weights for policy 0, policy_version 25865 (0.0008) +[2026-06-02 16:48:38,789][247478] Updated weights for policy 0, policy_version 25875 (0.0009) +[2026-06-02 16:48:38,982][247478] Updated weights for policy 0, policy_version 25886 (0.0009) +[2026-06-02 16:48:39,175][247478] Updated weights for policy 0, policy_version 25897 (0.0009) +[2026-06-02 16:48:39,364][247478] Updated weights for policy 0, policy_version 25907 (0.0009) +[2026-06-02 16:48:39,535][247478] Updated weights for policy 0, policy_version 25917 (0.0009) +[2026-06-02 16:48:39,725][246448] Fps is (10 sec: 22937.7, 60 sec: 20753.1, 300 sec: 20438.4). Total num frames: 13271040. Throughput: 0: 20380.6. Samples: 13293312. Policy #0 lag: (min: 63.0, avg: 78.7, max: 127.0) +[2026-06-02 16:48:39,727][246448] Avg episode reward: [(0, '1121.316')] +[2026-06-02 16:48:39,732][247399] Saving new best policy, reward=1121.316! +[2026-06-02 16:48:40,194][247478] Updated weights for policy 0, policy_version 25927 (0.0009) +[2026-06-02 16:48:40,386][247478] Updated weights for policy 0, policy_version 25938 (0.0009) +[2026-06-02 16:48:40,576][247478] Updated weights for policy 0, policy_version 25948 (0.0009) +[2026-06-02 16:48:40,757][247478] Updated weights for policy 0, policy_version 25958 (0.0008) +[2026-06-02 16:48:40,970][247478] Updated weights for policy 0, policy_version 25970 (0.0009) +[2026-06-02 16:48:41,146][247478] Updated weights for policy 0, policy_version 25980 (0.0008) +[2026-06-02 16:48:41,790][247478] Updated weights for policy 0, policy_version 25990 (0.0009) +[2026-06-02 16:48:41,962][247478] Updated weights for policy 0, policy_version 26000 (0.0008) +[2026-06-02 16:48:42,142][247478] Updated weights for policy 0, policy_version 26010 (0.0009) +[2026-06-02 16:48:42,320][247478] Updated weights for policy 0, policy_version 26020 (0.0008) +[2026-06-02 16:48:42,516][247478] Updated weights for policy 0, policy_version 26031 (0.0008) +[2026-06-02 16:48:42,700][247478] Updated weights for policy 0, policy_version 26041 (0.0008) +[2026-06-02 16:48:43,350][247478] Updated weights for policy 0, policy_version 26051 (0.0008) +[2026-06-02 16:48:43,510][247478] Updated weights for policy 0, policy_version 26061 (0.0008) +[2026-06-02 16:48:43,685][247478] Updated weights for policy 0, policy_version 26071 (0.0008) +[2026-06-02 16:48:43,869][247478] Updated weights for policy 0, policy_version 26081 (0.0008) +[2026-06-02 16:48:44,056][247478] Updated weights for policy 0, policy_version 26091 (0.0009) +[2026-06-02 16:48:44,233][247478] Updated weights for policy 0, policy_version 26101 (0.0008) +[2026-06-02 16:48:44,428][247478] Updated weights for policy 0, policy_version 26112 (0.0008) +[2026-06-02 16:48:44,725][246448] Fps is (10 sec: 22937.6, 60 sec: 20753.1, 300 sec: 20438.3). Total num frames: 13369344. Throughput: 0: 20363.4. Samples: 13356672. Policy #0 lag: (min: 63.0, avg: 78.7, max: 127.0) +[2026-06-02 16:48:44,727][246448] Avg episode reward: [(0, '1137.368')] +[2026-06-02 16:48:44,733][247399] Saving new best policy, reward=1137.368! +[2026-06-02 16:48:45,099][247478] Updated weights for policy 0, policy_version 26123 (0.0008) +[2026-06-02 16:48:45,276][247478] Updated weights for policy 0, policy_version 26133 (0.0009) +[2026-06-02 16:48:45,452][247478] Updated weights for policy 0, policy_version 26143 (0.0008) +[2026-06-02 16:48:45,629][247478] Updated weights for policy 0, policy_version 26153 (0.0008) +[2026-06-02 16:48:45,812][247478] Updated weights for policy 0, policy_version 26163 (0.0008) +[2026-06-02 16:48:45,996][247478] Updated weights for policy 0, policy_version 26173 (0.0008) +[2026-06-02 16:48:46,642][247478] Updated weights for policy 0, policy_version 26183 (0.0008) +[2026-06-02 16:48:46,805][247478] Updated weights for policy 0, policy_version 26193 (0.0008) +[2026-06-02 16:48:46,996][247478] Updated weights for policy 0, policy_version 26203 (0.0008) +[2026-06-02 16:48:47,171][247478] Updated weights for policy 0, policy_version 26213 (0.0008) +[2026-06-02 16:48:47,349][247478] Updated weights for policy 0, policy_version 26223 (0.0008) +[2026-06-02 16:48:47,532][247478] Updated weights for policy 0, policy_version 26233 (0.0008) +[2026-06-02 16:48:48,173][247478] Updated weights for policy 0, policy_version 26243 (0.0009) +[2026-06-02 16:48:48,332][247478] Updated weights for policy 0, policy_version 26253 (0.0008) +[2026-06-02 16:48:48,517][247478] Updated weights for policy 0, policy_version 26263 (0.0008) +[2026-06-02 16:48:48,698][247478] Updated weights for policy 0, policy_version 26273 (0.0008) +[2026-06-02 16:48:48,869][247478] Updated weights for policy 0, policy_version 26283 (0.0008) +[2026-06-02 16:48:49,055][247478] Updated weights for policy 0, policy_version 26293 (0.0009) +[2026-06-02 16:48:49,246][247478] Updated weights for policy 0, policy_version 26303 (0.0008) +[2026-06-02 16:48:49,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20753.1, 300 sec: 20438.3). Total num frames: 13467648. Throughput: 0: 20380.4. Samples: 13482880. Policy #0 lag: (min: 59.0, avg: 106.5, max: 123.0) +[2026-06-02 16:48:49,726][246448] Avg episode reward: [(0, '1144.609')] +[2026-06-02 16:48:49,887][247478] Updated weights for policy 0, policy_version 26314 (0.0009) +[2026-06-02 16:48:50,059][247478] Updated weights for policy 0, policy_version 26324 (0.0009) +[2026-06-02 16:48:50,248][247478] Updated weights for policy 0, policy_version 26334 (0.0008) +[2026-06-02 16:48:50,417][247478] Updated weights for policy 0, policy_version 26344 (0.0008) +[2026-06-02 16:48:50,604][247478] Updated weights for policy 0, policy_version 26354 (0.0008) +[2026-06-02 16:48:50,788][247478] Updated weights for policy 0, policy_version 26364 (0.0008) +[2026-06-02 16:48:50,852][247399] Saving new best policy, reward=1144.609! +[2026-06-02 16:48:51,446][247478] Updated weights for policy 0, policy_version 26375 (0.0008) +[2026-06-02 16:48:51,623][247478] Updated weights for policy 0, policy_version 26385 (0.0009) +[2026-06-02 16:48:51,807][247478] Updated weights for policy 0, policy_version 26396 (0.0008) +[2026-06-02 16:48:51,993][247478] Updated weights for policy 0, policy_version 26406 (0.0008) +[2026-06-02 16:48:52,179][247478] Updated weights for policy 0, policy_version 26416 (0.0009) +[2026-06-02 16:48:52,357][247478] Updated weights for policy 0, policy_version 26426 (0.0008) +[2026-06-02 16:48:53,001][247478] Updated weights for policy 0, policy_version 26436 (0.0009) +[2026-06-02 16:48:53,181][247478] Updated weights for policy 0, policy_version 26447 (0.0008) +[2026-06-02 16:48:53,367][247478] Updated weights for policy 0, policy_version 26457 (0.0009) +[2026-06-02 16:48:53,550][247478] Updated weights for policy 0, policy_version 26467 (0.0008) +[2026-06-02 16:48:53,749][247478] Updated weights for policy 0, policy_version 26478 (0.0009) +[2026-06-02 16:48:53,927][247478] Updated weights for policy 0, policy_version 26488 (0.0008) +[2026-06-02 16:48:54,585][247478] Updated weights for policy 0, policy_version 26498 (0.0009) +[2026-06-02 16:48:54,726][246448] Fps is (10 sec: 19660.5, 60 sec: 20207.0, 300 sec: 20438.3). Total num frames: 13565952. Throughput: 0: 20386.0. Samples: 13608448. Policy #0 lag: (min: 59.0, avg: 106.5, max: 123.0) +[2026-06-02 16:48:54,727][246448] Avg episode reward: [(0, '1167.955')] +[2026-06-02 16:48:54,753][247478] Updated weights for policy 0, policy_version 26508 (0.0008) +[2026-06-02 16:48:54,929][247478] Updated weights for policy 0, policy_version 26518 (0.0009) +[2026-06-02 16:48:55,116][247478] Updated weights for policy 0, policy_version 26528 (0.0009) +[2026-06-02 16:48:55,284][247478] Updated weights for policy 0, policy_version 26538 (0.0009) +[2026-06-02 16:48:55,465][247478] Updated weights for policy 0, policy_version 26548 (0.0009) +[2026-06-02 16:48:55,656][247478] Updated weights for policy 0, policy_version 26558 (0.0008) +[2026-06-02 16:48:55,681][247399] Saving new best policy, reward=1167.955! +[2026-06-02 16:48:56,294][247478] Updated weights for policy 0, policy_version 26568 (0.0009) +[2026-06-02 16:48:56,470][247478] Updated weights for policy 0, policy_version 26578 (0.0009) +[2026-06-02 16:48:56,645][247478] Updated weights for policy 0, policy_version 26588 (0.0009) +[2026-06-02 16:48:56,826][247478] Updated weights for policy 0, policy_version 26598 (0.0009) +[2026-06-02 16:48:57,001][247478] Updated weights for policy 0, policy_version 26608 (0.0008) +[2026-06-02 16:48:57,181][247478] Updated weights for policy 0, policy_version 26618 (0.0008) +[2026-06-02 16:48:57,846][247478] Updated weights for policy 0, policy_version 26628 (0.0009) +[2026-06-02 16:48:58,009][247478] Updated weights for policy 0, policy_version 26638 (0.0008) +[2026-06-02 16:48:58,195][247478] Updated weights for policy 0, policy_version 26648 (0.0009) +[2026-06-02 16:48:58,370][247478] Updated weights for policy 0, policy_version 26658 (0.0008) +[2026-06-02 16:48:58,558][247478] Updated weights for policy 0, policy_version 26668 (0.0009) +[2026-06-02 16:48:58,734][247478] Updated weights for policy 0, policy_version 26678 (0.0009) +[2026-06-02 16:48:58,910][247478] Updated weights for policy 0, policy_version 26688 (0.0009) +[2026-06-02 16:48:59,561][247478] Updated weights for policy 0, policy_version 26698 (0.0009) +[2026-06-02 16:48:59,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20206.9, 300 sec: 20438.3). Total num frames: 13664256. Throughput: 0: 20391.9. Samples: 13670400. Policy #0 lag: (min: 59.0, avg: 106.5, max: 123.0) +[2026-06-02 16:48:59,726][246448] Avg episode reward: [(0, '1129.475')] +[2026-06-02 16:48:59,730][247478] Updated weights for policy 0, policy_version 26708 (0.0009) +[2026-06-02 16:48:59,914][247478] Updated weights for policy 0, policy_version 26718 (0.0009) +[2026-06-02 16:49:00,097][247478] Updated weights for policy 0, policy_version 26728 (0.0009) +[2026-06-02 16:49:00,281][247478] Updated weights for policy 0, policy_version 26738 (0.0009) +[2026-06-02 16:49:00,450][247478] Updated weights for policy 0, policy_version 26748 (0.0008) +[2026-06-02 16:49:01,085][247478] Updated weights for policy 0, policy_version 26758 (0.0009) +[2026-06-02 16:49:01,263][247478] Updated weights for policy 0, policy_version 26768 (0.0009) +[2026-06-02 16:49:01,439][247478] Updated weights for policy 0, policy_version 26778 (0.0009) +[2026-06-02 16:49:01,631][247478] Updated weights for policy 0, policy_version 26789 (0.0009) +[2026-06-02 16:49:01,816][247478] Updated weights for policy 0, policy_version 26799 (0.0008) +[2026-06-02 16:49:01,996][247478] Updated weights for policy 0, policy_version 26809 (0.0008) +[2026-06-02 16:49:02,658][247478] Updated weights for policy 0, policy_version 26819 (0.0008) +[2026-06-02 16:49:02,828][247478] Updated weights for policy 0, policy_version 26829 (0.0009) +[2026-06-02 16:49:03,005][247478] Updated weights for policy 0, policy_version 26839 (0.0009) +[2026-06-02 16:49:03,187][247478] Updated weights for policy 0, policy_version 26849 (0.0007) +[2026-06-02 16:49:03,374][247478] Updated weights for policy 0, policy_version 26859 (0.0006) +[2026-06-02 16:49:03,550][247478] Updated weights for policy 0, policy_version 26869 (0.0008) +[2026-06-02 16:49:03,737][247478] Updated weights for policy 0, policy_version 26879 (0.0008) +[2026-06-02 16:49:04,358][247478] Updated weights for policy 0, policy_version 26889 (0.0009) +[2026-06-02 16:49:04,532][247478] Updated weights for policy 0, policy_version 26899 (0.0008) +[2026-06-02 16:49:04,708][247478] Updated weights for policy 0, policy_version 26909 (0.0008) +[2026-06-02 16:49:04,725][246448] Fps is (10 sec: 19661.1, 60 sec: 20206.9, 300 sec: 20438.3). Total num frames: 13762560. Throughput: 0: 20406.0. Samples: 13785600. Policy #0 lag: (min: 59.0, avg: 106.5, max: 123.0) +[2026-06-02 16:49:04,727][246448] Avg episode reward: [(0, '1121.433')] +[2026-06-02 16:49:04,901][247478] Updated weights for policy 0, policy_version 26919 (0.0008) +[2026-06-02 16:49:05,072][247478] Updated weights for policy 0, policy_version 26929 (0.0008) +[2026-06-02 16:49:05,252][247478] Updated weights for policy 0, policy_version 26939 (0.0008) +[2026-06-02 16:49:05,888][247478] Updated weights for policy 0, policy_version 26949 (0.0008) +[2026-06-02 16:49:06,076][247478] Updated weights for policy 0, policy_version 26959 (0.0008) +[2026-06-02 16:49:06,245][247478] Updated weights for policy 0, policy_version 26969 (0.0008) +[2026-06-02 16:49:06,436][247478] Updated weights for policy 0, policy_version 26980 (0.0009) +[2026-06-02 16:49:06,617][247478] Updated weights for policy 0, policy_version 26990 (0.0008) +[2026-06-02 16:49:06,794][247478] Updated weights for policy 0, policy_version 27000 (0.0008) +[2026-06-02 16:49:07,463][247478] Updated weights for policy 0, policy_version 27010 (0.0009) +[2026-06-02 16:49:07,635][247478] Updated weights for policy 0, policy_version 27020 (0.0009) +[2026-06-02 16:49:07,810][247478] Updated weights for policy 0, policy_version 27030 (0.0008) +[2026-06-02 16:49:07,991][247478] Updated weights for policy 0, policy_version 27040 (0.0008) +[2026-06-02 16:49:08,168][247478] Updated weights for policy 0, policy_version 27050 (0.0008) +[2026-06-02 16:49:08,353][247478] Updated weights for policy 0, policy_version 27060 (0.0008) +[2026-06-02 16:49:08,533][247478] Updated weights for policy 0, policy_version 27070 (0.0008) +[2026-06-02 16:49:09,172][247478] Updated weights for policy 0, policy_version 27080 (0.0009) +[2026-06-02 16:49:09,340][247478] Updated weights for policy 0, policy_version 27090 (0.0009) +[2026-06-02 16:49:09,530][247478] Updated weights for policy 0, policy_version 27100 (0.0008) +[2026-06-02 16:49:09,709][247478] Updated weights for policy 0, policy_version 27110 (0.0009) +[2026-06-02 16:49:09,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20438.3). Total num frames: 13860864. Throughput: 0: 20428.8. Samples: 13911168. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) +[2026-06-02 16:49:09,726][246448] Avg episode reward: [(0, '1174.028')] +[2026-06-02 16:49:09,913][247478] Updated weights for policy 0, policy_version 27121 (0.0009) +[2026-06-02 16:49:10,087][247478] Updated weights for policy 0, policy_version 27131 (0.0009) +[2026-06-02 16:49:10,179][247399] Saving new best policy, reward=1174.028! +[2026-06-02 16:49:10,741][247478] Updated weights for policy 0, policy_version 27141 (0.0008) +[2026-06-02 16:49:10,930][247478] Updated weights for policy 0, policy_version 27152 (0.0009) +[2026-06-02 16:49:11,101][247478] Updated weights for policy 0, policy_version 27162 (0.0008) +[2026-06-02 16:49:11,292][247478] Updated weights for policy 0, policy_version 27172 (0.0008) +[2026-06-02 16:49:11,474][247478] Updated weights for policy 0, policy_version 27182 (0.0009) +[2026-06-02 16:49:11,650][247478] Updated weights for policy 0, policy_version 27192 (0.0008) +[2026-06-02 16:49:12,284][247478] Updated weights for policy 0, policy_version 27202 (0.0008) +[2026-06-02 16:49:12,466][247478] Updated weights for policy 0, policy_version 27213 (0.0009) +[2026-06-02 16:49:12,648][247478] Updated weights for policy 0, policy_version 27223 (0.0008) +[2026-06-02 16:49:12,829][247478] Updated weights for policy 0, policy_version 27233 (0.0009) +[2026-06-02 16:49:13,013][247478] Updated weights for policy 0, policy_version 27243 (0.0008) +[2026-06-02 16:49:13,208][247478] Updated weights for policy 0, policy_version 27254 (0.0009) +[2026-06-02 16:49:13,387][247478] Updated weights for policy 0, policy_version 27264 (0.0007) +[2026-06-02 16:49:14,040][247478] Updated weights for policy 0, policy_version 27275 (0.0009) +[2026-06-02 16:49:14,234][247478] Updated weights for policy 0, policy_version 27286 (0.0009) +[2026-06-02 16:49:14,406][247478] Updated weights for policy 0, policy_version 27296 (0.0009) +[2026-06-02 16:49:14,582][247478] Updated weights for policy 0, policy_version 27306 (0.0009) +[2026-06-02 16:49:14,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20207.0, 300 sec: 20438.3). Total num frames: 13959168. Throughput: 0: 20423.1. Samples: 13973632. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) +[2026-06-02 16:49:14,727][246448] Avg episode reward: [(0, '1198.866')] +[2026-06-02 16:49:14,763][247478] Updated weights for policy 0, policy_version 27316 (0.0008) +[2026-06-02 16:49:14,958][247478] Updated weights for policy 0, policy_version 27327 (0.0008) +[2026-06-02 16:49:14,966][247399] Saving new best policy, reward=1198.866! +[2026-06-02 16:49:15,621][247478] Updated weights for policy 0, policy_version 27338 (0.0009) +[2026-06-02 16:49:15,800][247478] Updated weights for policy 0, policy_version 27348 (0.0009) +[2026-06-02 16:49:15,988][247478] Updated weights for policy 0, policy_version 27358 (0.0010) +[2026-06-02 16:49:16,187][247478] Updated weights for policy 0, policy_version 27369 (0.0009) +[2026-06-02 16:49:16,373][247478] Updated weights for policy 0, policy_version 27379 (0.0008) +[2026-06-02 16:49:16,546][247478] Updated weights for policy 0, policy_version 27389 (0.0008) +[2026-06-02 16:49:17,176][247478] Updated weights for policy 0, policy_version 27399 (0.0008) +[2026-06-02 16:49:17,356][247478] Updated weights for policy 0, policy_version 27409 (0.0009) +[2026-06-02 16:49:17,554][247478] Updated weights for policy 0, policy_version 27420 (0.0008) +[2026-06-02 16:49:17,738][247478] Updated weights for policy 0, policy_version 27430 (0.0006) +[2026-06-02 16:49:17,923][247478] Updated weights for policy 0, policy_version 27440 (0.0004) +[2026-06-02 16:49:18,105][247478] Updated weights for policy 0, policy_version 27450 (0.0004) +[2026-06-02 16:49:18,726][247478] Updated weights for policy 0, policy_version 27460 (0.0006) +[2026-06-02 16:49:18,890][247478] Updated weights for policy 0, policy_version 27470 (0.0009) +[2026-06-02 16:49:19,078][247478] Updated weights for policy 0, policy_version 27480 (0.0008) +[2026-06-02 16:49:19,256][247478] Updated weights for policy 0, policy_version 27490 (0.0008) +[2026-06-02 16:49:19,437][247478] Updated weights for policy 0, policy_version 27500 (0.0008) +[2026-06-02 16:49:19,618][247478] Updated weights for policy 0, policy_version 27510 (0.0009) +[2026-06-02 16:49:19,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 14057472. Throughput: 0: 20403.2. Samples: 14098560. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) +[2026-06-02 16:49:19,726][246448] Avg episode reward: [(0, '1205.832')] +[2026-06-02 16:49:19,789][247399] Saving new best policy, reward=1205.832! +[2026-06-02 16:49:19,791][247478] Updated weights for policy 0, policy_version 27520 (0.0008) +[2026-06-02 16:49:20,446][247478] Updated weights for policy 0, policy_version 27530 (0.0009) +[2026-06-02 16:49:20,624][247478] Updated weights for policy 0, policy_version 27540 (0.0006) +[2026-06-02 16:49:20,810][247478] Updated weights for policy 0, policy_version 27551 (0.0008) +[2026-06-02 16:49:20,998][247478] Updated weights for policy 0, policy_version 27561 (0.0009) +[2026-06-02 16:49:21,190][247478] Updated weights for policy 0, policy_version 27572 (0.0009) +[2026-06-02 16:49:21,378][247478] Updated weights for policy 0, policy_version 27582 (0.0009) +[2026-06-02 16:49:22,044][247478] Updated weights for policy 0, policy_version 27593 (0.0009) +[2026-06-02 16:49:22,221][247478] Updated weights for policy 0, policy_version 27604 (0.0009) +[2026-06-02 16:49:22,420][247478] Updated weights for policy 0, policy_version 27615 (0.0009) +[2026-06-02 16:49:22,636][247478] Updated weights for policy 0, policy_version 27628 (0.0009) +[2026-06-02 16:49:22,822][247478] Updated weights for policy 0, policy_version 27638 (0.0009) +[2026-06-02 16:49:22,996][247478] Updated weights for policy 0, policy_version 27648 (0.0008) +[2026-06-02 16:49:23,671][247478] Updated weights for policy 0, policy_version 27658 (0.0009) +[2026-06-02 16:49:23,862][247478] Updated weights for policy 0, policy_version 27669 (0.0008) +[2026-06-02 16:49:24,035][247478] Updated weights for policy 0, policy_version 27679 (0.0008) +[2026-06-02 16:49:24,222][247478] Updated weights for policy 0, policy_version 27689 (0.0008) +[2026-06-02 16:49:24,396][247478] Updated weights for policy 0, policy_version 27699 (0.0009) +[2026-06-02 16:49:24,581][247478] Updated weights for policy 0, policy_version 27709 (0.0008) +[2026-06-02 16:49:24,725][246448] Fps is (10 sec: 22937.7, 60 sec: 20753.1, 300 sec: 20438.3). Total num frames: 14188544. Throughput: 0: 20408.9. Samples: 14211712. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) +[2026-06-02 16:49:24,727][246448] Avg episode reward: [(0, '1282.464')] +[2026-06-02 16:49:24,733][247399] Saving new best policy, reward=1282.464! +[2026-06-02 16:49:25,218][247478] Updated weights for policy 0, policy_version 27719 (0.0008) +[2026-06-02 16:49:25,397][247478] Updated weights for policy 0, policy_version 27729 (0.0008) +[2026-06-02 16:49:25,569][247478] Updated weights for policy 0, policy_version 27739 (0.0008) +[2026-06-02 16:49:25,742][247478] Updated weights for policy 0, policy_version 27749 (0.0009) +[2026-06-02 16:49:25,923][247478] Updated weights for policy 0, policy_version 27759 (0.0009) +[2026-06-02 16:49:26,105][247478] Updated weights for policy 0, policy_version 27769 (0.0008) +[2026-06-02 16:49:26,766][247478] Updated weights for policy 0, policy_version 27779 (0.0008) +[2026-06-02 16:49:26,961][247478] Updated weights for policy 0, policy_version 27791 (0.0008) +[2026-06-02 16:49:27,144][247478] Updated weights for policy 0, policy_version 27801 (0.0009) +[2026-06-02 16:49:27,343][247478] Updated weights for policy 0, policy_version 27812 (0.0008) +[2026-06-02 16:49:27,529][247478] Updated weights for policy 0, policy_version 27822 (0.0009) +[2026-06-02 16:49:27,710][247478] Updated weights for policy 0, policy_version 27832 (0.0009) +[2026-06-02 16:49:28,359][247478] Updated weights for policy 0, policy_version 27842 (0.0009) +[2026-06-02 16:49:28,566][247478] Updated weights for policy 0, policy_version 27854 (0.0009) +[2026-06-02 16:49:28,754][247478] Updated weights for policy 0, policy_version 27864 (0.0008) +[2026-06-02 16:49:28,943][247478] Updated weights for policy 0, policy_version 27875 (0.0009) +[2026-06-02 16:49:29,131][247478] Updated weights for policy 0, policy_version 27885 (0.0009) +[2026-06-02 16:49:29,311][247478] Updated weights for policy 0, policy_version 27895 (0.0008) +[2026-06-02 16:49:29,725][246448] Fps is (10 sec: 22937.7, 60 sec: 20753.1, 300 sec: 20438.4). Total num frames: 14286848. Throughput: 0: 20406.1. Samples: 14274944. Policy #0 lag: (min: 63.0, avg: 78.4, max: 127.0) +[2026-06-02 16:49:29,726][246448] Avg episode reward: [(0, '1304.844')] +[2026-06-02 16:49:29,731][247399] Saving new best policy, reward=1304.844! +[2026-06-02 16:49:29,963][247478] Updated weights for policy 0, policy_version 27906 (0.0009) +[2026-06-02 16:49:30,136][247478] Updated weights for policy 0, policy_version 27916 (0.0009) +[2026-06-02 16:49:30,308][247478] Updated weights for policy 0, policy_version 27926 (0.0008) +[2026-06-02 16:49:30,489][247478] Updated weights for policy 0, policy_version 27936 (0.0008) +[2026-06-02 16:49:30,670][247478] Updated weights for policy 0, policy_version 27946 (0.0009) +[2026-06-02 16:49:30,853][247478] Updated weights for policy 0, policy_version 27956 (0.0009) +[2026-06-02 16:49:31,024][247478] Updated weights for policy 0, policy_version 27966 (0.0008) +[2026-06-02 16:49:31,670][247478] Updated weights for policy 0, policy_version 27976 (0.0009) +[2026-06-02 16:49:31,848][247478] Updated weights for policy 0, policy_version 27986 (0.0009) +[2026-06-02 16:49:32,032][247478] Updated weights for policy 0, policy_version 27996 (0.0009) +[2026-06-02 16:49:32,205][247478] Updated weights for policy 0, policy_version 28006 (0.0009) +[2026-06-02 16:49:32,383][247478] Updated weights for policy 0, policy_version 28016 (0.0008) +[2026-06-02 16:49:32,567][247478] Updated weights for policy 0, policy_version 28026 (0.0009) +[2026-06-02 16:49:33,236][247478] Updated weights for policy 0, policy_version 28037 (0.0009) +[2026-06-02 16:49:33,429][247478] Updated weights for policy 0, policy_version 28048 (0.0009) +[2026-06-02 16:49:33,602][247478] Updated weights for policy 0, policy_version 28058 (0.0009) +[2026-06-02 16:49:33,781][247478] Updated weights for policy 0, policy_version 28068 (0.0009) +[2026-06-02 16:49:33,958][247478] Updated weights for policy 0, policy_version 28078 (0.0009) +[2026-06-02 16:49:34,135][247478] Updated weights for policy 0, policy_version 28088 (0.0008) +[2026-06-02 16:49:34,725][246448] Fps is (10 sec: 19660.6, 60 sec: 20753.1, 300 sec: 20438.3). Total num frames: 14385152. Throughput: 0: 20383.3. Samples: 14400128. Policy #0 lag: (min: 63.0, avg: 78.4, max: 127.0) +[2026-06-02 16:49:34,727][246448] Avg episode reward: [(0, '1353.024')] +[2026-06-02 16:49:34,786][247478] Updated weights for policy 0, policy_version 28098 (0.0008) +[2026-06-02 16:49:34,974][247478] Updated weights for policy 0, policy_version 28109 (0.0009) +[2026-06-02 16:49:35,154][247478] Updated weights for policy 0, policy_version 28119 (0.0008) +[2026-06-02 16:49:35,350][247478] Updated weights for policy 0, policy_version 28130 (0.0008) +[2026-06-02 16:49:35,527][247478] Updated weights for policy 0, policy_version 28140 (0.0008) +[2026-06-02 16:49:35,732][247478] Updated weights for policy 0, policy_version 28151 (0.0008) +[2026-06-02 16:49:35,889][247399] Saving new best policy, reward=1353.024! +[2026-06-02 16:49:36,414][247478] Updated weights for policy 0, policy_version 28161 (0.0009) +[2026-06-02 16:49:36,573][247478] Updated weights for policy 0, policy_version 28171 (0.0008) +[2026-06-02 16:49:36,753][247478] Updated weights for policy 0, policy_version 28181 (0.0008) +[2026-06-02 16:49:36,936][247478] Updated weights for policy 0, policy_version 28191 (0.0008) +[2026-06-02 16:49:37,120][247478] Updated weights for policy 0, policy_version 28201 (0.0008) +[2026-06-02 16:49:37,302][247478] Updated weights for policy 0, policy_version 28211 (0.0008) +[2026-06-02 16:49:37,482][247478] Updated weights for policy 0, policy_version 28221 (0.0008) +[2026-06-02 16:49:38,097][247478] Updated weights for policy 0, policy_version 28231 (0.0008) +[2026-06-02 16:49:38,310][247478] Updated weights for policy 0, policy_version 28243 (0.0009) +[2026-06-02 16:49:38,482][247478] Updated weights for policy 0, policy_version 28253 (0.0009) +[2026-06-02 16:49:38,674][247478] Updated weights for policy 0, policy_version 28263 (0.0009) +[2026-06-02 16:49:38,854][247478] Updated weights for policy 0, policy_version 28273 (0.0009) +[2026-06-02 16:49:39,037][247478] Updated weights for policy 0, policy_version 28283 (0.0009) +[2026-06-02 16:49:39,703][247478] Updated weights for policy 0, policy_version 28294 (0.0009) +[2026-06-02 16:49:39,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20438.3). Total num frames: 14483456. Throughput: 0: 20380.5. Samples: 14525568. Policy #0 lag: (min: 63.0, avg: 78.4, max: 127.0) +[2026-06-02 16:49:39,726][246448] Avg episode reward: [(0, '1332.798')] +[2026-06-02 16:49:39,887][247478] Updated weights for policy 0, policy_version 28304 (0.0009) +[2026-06-02 16:49:40,076][247478] Updated weights for policy 0, policy_version 28315 (0.0009) +[2026-06-02 16:49:40,265][247478] Updated weights for policy 0, policy_version 28325 (0.0009) +[2026-06-02 16:49:40,450][247478] Updated weights for policy 0, policy_version 28335 (0.0009) +[2026-06-02 16:49:40,631][247478] Updated weights for policy 0, policy_version 28345 (0.0009) +[2026-06-02 16:49:41,273][247478] Updated weights for policy 0, policy_version 28355 (0.0008) +[2026-06-02 16:49:41,439][247478] Updated weights for policy 0, policy_version 28365 (0.0009) +[2026-06-02 16:49:41,635][247478] Updated weights for policy 0, policy_version 28376 (0.0008) +[2026-06-02 16:49:41,819][247478] Updated weights for policy 0, policy_version 28386 (0.0008) +[2026-06-02 16:49:42,012][247478] Updated weights for policy 0, policy_version 28396 (0.0008) +[2026-06-02 16:49:42,192][247478] Updated weights for policy 0, policy_version 28406 (0.0009) +[2026-06-02 16:49:42,366][247478] Updated weights for policy 0, policy_version 28416 (0.0008) +[2026-06-02 16:49:43,008][247478] Updated weights for policy 0, policy_version 28427 (0.0009) +[2026-06-02 16:49:43,190][247478] Updated weights for policy 0, policy_version 28437 (0.0009) +[2026-06-02 16:49:43,367][247478] Updated weights for policy 0, policy_version 28447 (0.0007) +[2026-06-02 16:49:43,564][247478] Updated weights for policy 0, policy_version 28458 (0.0008) +[2026-06-02 16:49:43,741][247478] Updated weights for policy 0, policy_version 28468 (0.0008) +[2026-06-02 16:49:43,919][247478] Updated weights for policy 0, policy_version 28478 (0.0009) +[2026-06-02 16:49:44,576][247478] Updated weights for policy 0, policy_version 28488 (0.0009) +[2026-06-02 16:49:44,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20207.0, 300 sec: 20438.3). Total num frames: 14581760. Throughput: 0: 20315.0. Samples: 14584576. Policy #0 lag: (min: 63.0, avg: 78.4, max: 127.0) +[2026-06-02 16:49:44,726][246448] Avg episode reward: [(0, '1325.829')] +[2026-06-02 16:49:44,759][247478] Updated weights for policy 0, policy_version 28498 (0.0008) +[2026-06-02 16:49:44,928][247478] Updated weights for policy 0, policy_version 28508 (0.0009) +[2026-06-02 16:49:45,114][247478] Updated weights for policy 0, policy_version 28518 (0.0009) +[2026-06-02 16:49:45,289][247478] Updated weights for policy 0, policy_version 28528 (0.0009) +[2026-06-02 16:49:45,512][247478] Updated weights for policy 0, policy_version 28540 (0.0009) +[2026-06-02 16:49:46,158][247478] Updated weights for policy 0, policy_version 28550 (0.0007) +[2026-06-02 16:49:46,331][247478] Updated weights for policy 0, policy_version 28560 (0.0007) +[2026-06-02 16:49:46,508][247478] Updated weights for policy 0, policy_version 28570 (0.0012) +[2026-06-02 16:49:46,687][247478] Updated weights for policy 0, policy_version 28580 (0.0007) +[2026-06-02 16:49:46,884][247478] Updated weights for policy 0, policy_version 28591 (0.0008) +[2026-06-02 16:49:47,060][247478] Updated weights for policy 0, policy_version 28601 (0.0008) +[2026-06-02 16:49:47,722][247478] Updated weights for policy 0, policy_version 28611 (0.0009) +[2026-06-02 16:49:47,896][247478] Updated weights for policy 0, policy_version 28621 (0.0008) +[2026-06-02 16:49:48,074][247478] Updated weights for policy 0, policy_version 28631 (0.0008) +[2026-06-02 16:49:48,250][247478] Updated weights for policy 0, policy_version 28641 (0.0009) +[2026-06-02 16:49:48,439][247478] Updated weights for policy 0, policy_version 28651 (0.0009) +[2026-06-02 16:49:48,633][247478] Updated weights for policy 0, policy_version 28662 (0.0008) +[2026-06-02 16:49:48,819][247478] Updated weights for policy 0, policy_version 28672 (0.0009) +[2026-06-02 16:49:49,439][247478] Updated weights for policy 0, policy_version 28682 (0.0009) +[2026-06-02 16:49:49,618][247478] Updated weights for policy 0, policy_version 28692 (0.0008) +[2026-06-02 16:49:49,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 20438.3). Total num frames: 14680064. Throughput: 0: 20360.5. Samples: 14701824. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) +[2026-06-02 16:49:49,727][246448] Avg episode reward: [(0, '1323.767')] +[2026-06-02 16:49:49,794][247478] Updated weights for policy 0, policy_version 28702 (0.0010) +[2026-06-02 16:49:49,978][247478] Updated weights for policy 0, policy_version 28712 (0.0008) +[2026-06-02 16:49:50,199][247478] Updated weights for policy 0, policy_version 28724 (0.0009) +[2026-06-02 16:49:50,390][247478] Updated weights for policy 0, policy_version 28734 (0.0009) +[2026-06-02 16:49:51,039][247478] Updated weights for policy 0, policy_version 28745 (0.0008) +[2026-06-02 16:49:51,229][247478] Updated weights for policy 0, policy_version 28756 (0.0009) +[2026-06-02 16:49:51,421][247478] Updated weights for policy 0, policy_version 28766 (0.0008) +[2026-06-02 16:49:51,622][247478] Updated weights for policy 0, policy_version 28777 (0.0008) +[2026-06-02 16:49:51,804][247478] Updated weights for policy 0, policy_version 28787 (0.0008) +[2026-06-02 16:49:51,984][247478] Updated weights for policy 0, policy_version 28797 (0.0008) +[2026-06-02 16:49:52,638][247478] Updated weights for policy 0, policy_version 28808 (0.0009) +[2026-06-02 16:49:52,844][247478] Updated weights for policy 0, policy_version 28819 (0.0008) +[2026-06-02 16:49:53,023][247478] Updated weights for policy 0, policy_version 28829 (0.0009) +[2026-06-02 16:49:53,218][247478] Updated weights for policy 0, policy_version 28840 (0.0008) +[2026-06-02 16:49:53,399][247478] Updated weights for policy 0, policy_version 28850 (0.0007) +[2026-06-02 16:49:53,573][247478] Updated weights for policy 0, policy_version 28860 (0.0009) +[2026-06-02 16:49:54,224][247478] Updated weights for policy 0, policy_version 28870 (0.0008) +[2026-06-02 16:49:54,407][247478] Updated weights for policy 0, policy_version 28880 (0.0008) +[2026-06-02 16:49:54,600][247478] Updated weights for policy 0, policy_version 28891 (0.0008) +[2026-06-02 16:49:54,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20207.0, 300 sec: 20438.3). Total num frames: 14778368. Throughput: 0: 20366.2. Samples: 14827648. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) +[2026-06-02 16:49:54,727][246448] Avg episode reward: [(0, '1334.604')] +[2026-06-02 16:49:54,779][247478] Updated weights for policy 0, policy_version 28901 (0.0011) +[2026-06-02 16:49:54,966][247478] Updated weights for policy 0, policy_version 28911 (0.0011) +[2026-06-02 16:49:55,149][247478] Updated weights for policy 0, policy_version 28921 (0.0010) +[2026-06-02 16:49:55,785][247478] Updated weights for policy 0, policy_version 28931 (0.0009) +[2026-06-02 16:49:55,960][247478] Updated weights for policy 0, policy_version 28941 (0.0009) +[2026-06-02 16:49:56,133][247478] Updated weights for policy 0, policy_version 28951 (0.0009) +[2026-06-02 16:49:56,324][247478] Updated weights for policy 0, policy_version 28961 (0.0009) +[2026-06-02 16:49:56,499][247478] Updated weights for policy 0, policy_version 28971 (0.0009) +[2026-06-02 16:49:56,678][247478] Updated weights for policy 0, policy_version 28981 (0.0009) +[2026-06-02 16:49:56,856][247478] Updated weights for policy 0, policy_version 28991 (0.0009) +[2026-06-02 16:49:57,493][247478] Updated weights for policy 0, policy_version 29001 (0.0009) +[2026-06-02 16:49:57,666][247478] Updated weights for policy 0, policy_version 29011 (0.0009) +[2026-06-02 16:49:57,850][247478] Updated weights for policy 0, policy_version 29021 (0.0009) +[2026-06-02 16:49:58,049][247478] Updated weights for policy 0, policy_version 29032 (0.0008) +[2026-06-02 16:49:58,228][247478] Updated weights for policy 0, policy_version 29042 (0.0008) +[2026-06-02 16:49:58,410][247478] Updated weights for policy 0, policy_version 29052 (0.0008) +[2026-06-02 16:49:59,054][247478] Updated weights for policy 0, policy_version 29062 (0.0008) +[2026-06-02 16:49:59,230][247478] Updated weights for policy 0, policy_version 29072 (0.0008) +[2026-06-02 16:49:59,401][247478] Updated weights for policy 0, policy_version 29082 (0.0008) +[2026-06-02 16:49:59,593][247478] Updated weights for policy 0, policy_version 29092 (0.0008) +[2026-06-02 16:49:59,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20206.9, 300 sec: 20438.3). Total num frames: 14876672. Throughput: 0: 20360.6. Samples: 14889856. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) +[2026-06-02 16:49:59,726][246448] Avg episode reward: [(0, '1378.818')] +[2026-06-02 16:49:59,777][247478] Updated weights for policy 0, policy_version 29102 (0.0008) +[2026-06-02 16:49:59,955][247478] Updated weights for policy 0, policy_version 29112 (0.0009) +[2026-06-02 16:50:00,087][247399] Saving new best policy, reward=1378.818! +[2026-06-02 16:50:00,599][247478] Updated weights for policy 0, policy_version 29122 (0.0009) +[2026-06-02 16:50:00,785][247478] Updated weights for policy 0, policy_version 29133 (0.0008) +[2026-06-02 16:50:00,974][247478] Updated weights for policy 0, policy_version 29143 (0.0008) +[2026-06-02 16:50:01,150][247478] Updated weights for policy 0, policy_version 29153 (0.0008) +[2026-06-02 16:50:01,335][247478] Updated weights for policy 0, policy_version 29163 (0.0008) +[2026-06-02 16:50:01,516][247478] Updated weights for policy 0, policy_version 29173 (0.0008) +[2026-06-02 16:50:01,716][247478] Updated weights for policy 0, policy_version 29184 (0.0008) +[2026-06-02 16:50:02,330][247478] Updated weights for policy 0, policy_version 29194 (0.0008) +[2026-06-02 16:50:02,513][247478] Updated weights for policy 0, policy_version 29204 (0.0008) +[2026-06-02 16:50:02,700][247478] Updated weights for policy 0, policy_version 29214 (0.0009) +[2026-06-02 16:50:02,893][247478] Updated weights for policy 0, policy_version 29225 (0.0008) +[2026-06-02 16:50:03,084][247478] Updated weights for policy 0, policy_version 29235 (0.0008) +[2026-06-02 16:50:03,263][247478] Updated weights for policy 0, policy_version 29245 (0.0008) +[2026-06-02 16:50:03,883][247478] Updated weights for policy 0, policy_version 29255 (0.0008) +[2026-06-02 16:50:04,063][247478] Updated weights for policy 0, policy_version 29265 (0.0008) +[2026-06-02 16:50:04,239][247478] Updated weights for policy 0, policy_version 29275 (0.0008) +[2026-06-02 16:50:04,439][247478] Updated weights for policy 0, policy_version 29286 (0.0008) +[2026-06-02 16:50:04,629][247478] Updated weights for policy 0, policy_version 29296 (0.0008) +[2026-06-02 16:50:04,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20207.0, 300 sec: 20327.3). Total num frames: 14974976. Throughput: 0: 20340.6. Samples: 15013888. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) +[2026-06-02 16:50:04,726][246448] Avg episode reward: [(0, '1342.626')] +[2026-06-02 16:50:04,808][247478] Updated weights for policy 0, policy_version 29306 (0.0008) +[2026-06-02 16:50:05,440][247478] Updated weights for policy 0, policy_version 29316 (0.0009) +[2026-06-02 16:50:05,621][247478] Updated weights for policy 0, policy_version 29326 (0.0009) +[2026-06-02 16:50:05,789][247478] Updated weights for policy 0, policy_version 29336 (0.0008) +[2026-06-02 16:50:05,978][247478] Updated weights for policy 0, policy_version 29346 (0.0008) +[2026-06-02 16:50:06,157][247478] Updated weights for policy 0, policy_version 29356 (0.0009) +[2026-06-02 16:50:06,339][247478] Updated weights for policy 0, policy_version 29366 (0.0009) +[2026-06-02 16:50:06,516][247478] Updated weights for policy 0, policy_version 29376 (0.0008) +[2026-06-02 16:50:07,169][247478] Updated weights for policy 0, policy_version 29388 (0.0009) +[2026-06-02 16:50:07,352][247478] Updated weights for policy 0, policy_version 29398 (0.0008) +[2026-06-02 16:50:07,533][247478] Updated weights for policy 0, policy_version 29408 (0.0008) +[2026-06-02 16:50:07,725][247478] Updated weights for policy 0, policy_version 29418 (0.0009) +[2026-06-02 16:50:07,909][247478] Updated weights for policy 0, policy_version 29428 (0.0009) +[2026-06-02 16:50:08,080][247478] Updated weights for policy 0, policy_version 29438 (0.0008) +[2026-06-02 16:50:08,716][247478] Updated weights for policy 0, policy_version 29448 (0.0009) +[2026-06-02 16:50:08,885][247478] Updated weights for policy 0, policy_version 29458 (0.0009) +[2026-06-02 16:50:09,079][247478] Updated weights for policy 0, policy_version 29469 (0.0008) +[2026-06-02 16:50:09,268][247478] Updated weights for policy 0, policy_version 29479 (0.0009) +[2026-06-02 16:50:09,460][247478] Updated weights for policy 0, policy_version 29490 (0.0008) +[2026-06-02 16:50:09,653][247478] Updated weights for policy 0, policy_version 29500 (0.0009) +[2026-06-02 16:50:09,728][246448] Fps is (10 sec: 22930.5, 60 sec: 20752.0, 300 sec: 20438.1). Total num frames: 15106048. Throughput: 0: 20347.7. Samples: 15127424. Policy #0 lag: (min: 63.0, avg: 79.1, max: 127.0) +[2026-06-02 16:50:09,730][246448] Avg episode reward: [(0, '1348.564')] +[2026-06-02 16:50:10,307][247478] Updated weights for policy 0, policy_version 29510 (0.0009) +[2026-06-02 16:50:10,471][247478] Updated weights for policy 0, policy_version 29520 (0.0008) +[2026-06-02 16:50:10,655][247478] Updated weights for policy 0, policy_version 29530 (0.0008) +[2026-06-02 16:50:10,833][247478] Updated weights for policy 0, policy_version 29540 (0.0008) +[2026-06-02 16:50:11,034][247478] Updated weights for policy 0, policy_version 29551 (0.0008) +[2026-06-02 16:50:11,214][247478] Updated weights for policy 0, policy_version 29561 (0.0008) +[2026-06-02 16:50:11,856][247478] Updated weights for policy 0, policy_version 29571 (0.0009) +[2026-06-02 16:50:12,026][247478] Updated weights for policy 0, policy_version 29581 (0.0008) +[2026-06-02 16:50:12,204][247478] Updated weights for policy 0, policy_version 29591 (0.0008) +[2026-06-02 16:50:12,378][247478] Updated weights for policy 0, policy_version 29601 (0.0009) +[2026-06-02 16:50:12,566][247478] Updated weights for policy 0, policy_version 29611 (0.0008) +[2026-06-02 16:50:12,740][247478] Updated weights for policy 0, policy_version 29621 (0.0008) +[2026-06-02 16:50:12,932][247478] Updated weights for policy 0, policy_version 29631 (0.0009) +[2026-06-02 16:50:13,570][247478] Updated weights for policy 0, policy_version 29641 (0.0008) +[2026-06-02 16:50:13,752][247478] Updated weights for policy 0, policy_version 29651 (0.0008) +[2026-06-02 16:50:13,942][247478] Updated weights for policy 0, policy_version 29662 (0.0008) +[2026-06-02 16:50:14,127][247478] Updated weights for policy 0, policy_version 29672 (0.0009) +[2026-06-02 16:50:14,324][247478] Updated weights for policy 0, policy_version 29683 (0.0008) +[2026-06-02 16:50:14,504][247478] Updated weights for policy 0, policy_version 29693 (0.0008) +[2026-06-02 16:50:14,725][246448] Fps is (10 sec: 22937.7, 60 sec: 20753.1, 300 sec: 20438.3). Total num frames: 15204352. Throughput: 0: 20323.6. Samples: 15189504. Policy #0 lag: (min: 63.0, avg: 79.1, max: 127.0) +[2026-06-02 16:50:14,726][246448] Avg episode reward: [(0, '1301.442')] +[2026-06-02 16:50:15,173][247478] Updated weights for policy 0, policy_version 29704 (0.0009) +[2026-06-02 16:50:15,344][247478] Updated weights for policy 0, policy_version 29714 (0.0008) +[2026-06-02 16:50:15,544][247478] Updated weights for policy 0, policy_version 29725 (0.0007) +[2026-06-02 16:50:15,724][247478] Updated weights for policy 0, policy_version 29735 (0.0007) +[2026-06-02 16:50:15,900][247478] Updated weights for policy 0, policy_version 29745 (0.0008) +[2026-06-02 16:50:16,093][247478] Updated weights for policy 0, policy_version 29755 (0.0008) +[2026-06-02 16:50:16,748][247478] Updated weights for policy 0, policy_version 29765 (0.0009) +[2026-06-02 16:50:16,914][247478] Updated weights for policy 0, policy_version 29775 (0.0009) +[2026-06-02 16:50:17,089][247478] Updated weights for policy 0, policy_version 29785 (0.0009) +[2026-06-02 16:50:17,287][247478] Updated weights for policy 0, policy_version 29796 (0.0009) +[2026-06-02 16:50:17,480][247478] Updated weights for policy 0, policy_version 29806 (0.0009) +[2026-06-02 16:50:17,659][247478] Updated weights for policy 0, policy_version 29816 (0.0009) +[2026-06-02 16:50:18,301][247478] Updated weights for policy 0, policy_version 29826 (0.0009) +[2026-06-02 16:50:18,476][247478] Updated weights for policy 0, policy_version 29837 (0.0008) +[2026-06-02 16:50:18,662][247478] Updated weights for policy 0, policy_version 29847 (0.0008) +[2026-06-02 16:50:18,845][247478] Updated weights for policy 0, policy_version 29857 (0.0008) +[2026-06-02 16:50:19,024][247478] Updated weights for policy 0, policy_version 29867 (0.0009) +[2026-06-02 16:50:19,201][247478] Updated weights for policy 0, policy_version 29877 (0.0008) +[2026-06-02 16:50:19,390][247478] Updated weights for policy 0, policy_version 29887 (0.0009) +[2026-06-02 16:50:19,725][246448] Fps is (10 sec: 19666.8, 60 sec: 20753.1, 300 sec: 20438.3). Total num frames: 15302656. Throughput: 0: 20320.7. Samples: 15314560. Policy #0 lag: (min: 63.0, avg: 79.1, max: 127.0) +[2026-06-02 16:50:19,726][246448] Avg episode reward: [(0, '1356.914')] +[2026-06-02 16:50:20,018][247478] Updated weights for policy 0, policy_version 29897 (0.0009) +[2026-06-02 16:50:20,197][247478] Updated weights for policy 0, policy_version 29907 (0.0008) +[2026-06-02 16:50:20,376][247478] Updated weights for policy 0, policy_version 29917 (0.0009) +[2026-06-02 16:50:20,557][247478] Updated weights for policy 0, policy_version 29927 (0.0008) +[2026-06-02 16:50:20,741][247478] Updated weights for policy 0, policy_version 29937 (0.0008) +[2026-06-02 16:50:20,915][247478] Updated weights for policy 0, policy_version 29947 (0.0008) +[2026-06-02 16:50:21,555][247478] Updated weights for policy 0, policy_version 29957 (0.0008) +[2026-06-02 16:50:21,728][247478] Updated weights for policy 0, policy_version 29967 (0.0008) +[2026-06-02 16:50:21,924][247478] Updated weights for policy 0, policy_version 29978 (0.0009) +[2026-06-02 16:50:22,106][247478] Updated weights for policy 0, policy_version 29988 (0.0008) +[2026-06-02 16:50:22,288][247478] Updated weights for policy 0, policy_version 29998 (0.0008) +[2026-06-02 16:50:22,484][247478] Updated weights for policy 0, policy_version 30009 (0.0010) +[2026-06-02 16:50:23,161][247478] Updated weights for policy 0, policy_version 30020 (0.0009) +[2026-06-02 16:50:23,352][247478] Updated weights for policy 0, policy_version 30031 (0.0008) +[2026-06-02 16:50:23,525][247478] Updated weights for policy 0, policy_version 30041 (0.0008) +[2026-06-02 16:50:23,703][247478] Updated weights for policy 0, policy_version 30051 (0.0008) +[2026-06-02 16:50:23,894][247478] Updated weights for policy 0, policy_version 30061 (0.0008) +[2026-06-02 16:50:24,070][247478] Updated weights for policy 0, policy_version 30071 (0.0008) +[2026-06-02 16:50:24,723][247478] Updated weights for policy 0, policy_version 30081 (0.0008) +[2026-06-02 16:50:24,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 20438.3). Total num frames: 15400960. Throughput: 0: 20323.5. Samples: 15440128. Policy #0 lag: (min: 63.0, avg: 79.1, max: 127.0) +[2026-06-02 16:50:24,726][246448] Avg episode reward: [(0, '1371.100')] +[2026-06-02 16:50:24,903][247478] Updated weights for policy 0, policy_version 30092 (0.0008) +[2026-06-02 16:50:25,086][247478] Updated weights for policy 0, policy_version 30102 (0.0008) +[2026-06-02 16:50:25,266][247478] Updated weights for policy 0, policy_version 30112 (0.0008) +[2026-06-02 16:50:25,447][247478] Updated weights for policy 0, policy_version 30122 (0.0008) +[2026-06-02 16:50:25,629][247478] Updated weights for policy 0, policy_version 30132 (0.0009) +[2026-06-02 16:50:25,807][247478] Updated weights for policy 0, policy_version 30142 (0.0008) +[2026-06-02 16:50:26,435][247478] Updated weights for policy 0, policy_version 30152 (0.0008) +[2026-06-02 16:50:26,625][247478] Updated weights for policy 0, policy_version 30163 (0.0008) +[2026-06-02 16:50:26,833][247478] Updated weights for policy 0, policy_version 30174 (0.0008) +[2026-06-02 16:50:27,019][247478] Updated weights for policy 0, policy_version 30184 (0.0008) +[2026-06-02 16:50:27,197][247478] Updated weights for policy 0, policy_version 30194 (0.0008) +[2026-06-02 16:50:27,396][247478] Updated weights for policy 0, policy_version 30205 (0.0009) +[2026-06-02 16:50:28,035][247478] Updated weights for policy 0, policy_version 30216 (0.0009) +[2026-06-02 16:50:28,211][247478] Updated weights for policy 0, policy_version 30226 (0.0008) +[2026-06-02 16:50:28,397][247478] Updated weights for policy 0, policy_version 30236 (0.0008) +[2026-06-02 16:50:28,582][247478] Updated weights for policy 0, policy_version 30246 (0.0008) +[2026-06-02 16:50:28,757][247478] Updated weights for policy 0, policy_version 30256 (0.0008) +[2026-06-02 16:50:28,945][247478] Updated weights for policy 0, policy_version 30266 (0.0008) +[2026-06-02 16:50:29,595][247478] Updated weights for policy 0, policy_version 30277 (0.0008) +[2026-06-02 16:50:29,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20438.3). Total num frames: 15499264. Throughput: 0: 20235.4. Samples: 15495168. Policy #0 lag: (min: 63.0, avg: 79.1, max: 127.0) +[2026-06-02 16:50:29,726][246448] Avg episode reward: [(0, '1388.518')] +[2026-06-02 16:50:29,770][247478] Updated weights for policy 0, policy_version 30287 (0.0008) +[2026-06-02 16:50:29,949][247478] Updated weights for policy 0, policy_version 30297 (0.0008) +[2026-06-02 16:50:30,124][247478] Updated weights for policy 0, policy_version 30307 (0.0008) +[2026-06-02 16:50:30,302][247478] Updated weights for policy 0, policy_version 30317 (0.0008) +[2026-06-02 16:50:30,496][247478] Updated weights for policy 0, policy_version 30327 (0.0009) +[2026-06-02 16:50:30,661][247399] Saving new best policy, reward=1388.518! +[2026-06-02 16:50:31,144][247478] Updated weights for policy 0, policy_version 30337 (0.0008) +[2026-06-02 16:50:31,307][247478] Updated weights for policy 0, policy_version 30347 (0.0009) +[2026-06-02 16:50:31,478][247478] Updated weights for policy 0, policy_version 30357 (0.0009) +[2026-06-02 16:50:31,666][247478] Updated weights for policy 0, policy_version 30367 (0.0008) +[2026-06-02 16:50:31,847][247478] Updated weights for policy 0, policy_version 30377 (0.0008) +[2026-06-02 16:50:32,025][247478] Updated weights for policy 0, policy_version 30387 (0.0008) +[2026-06-02 16:50:32,201][247478] Updated weights for policy 0, policy_version 30397 (0.0008) +[2026-06-02 16:50:32,863][247478] Updated weights for policy 0, policy_version 30408 (0.0009) +[2026-06-02 16:50:33,037][247478] Updated weights for policy 0, policy_version 30418 (0.0008) +[2026-06-02 16:50:33,230][247478] Updated weights for policy 0, policy_version 30428 (0.0008) +[2026-06-02 16:50:33,408][247478] Updated weights for policy 0, policy_version 30438 (0.0009) +[2026-06-02 16:50:33,583][247478] Updated weights for policy 0, policy_version 30448 (0.0008) +[2026-06-02 16:50:33,780][247478] Updated weights for policy 0, policy_version 30459 (0.0008) +[2026-06-02 16:50:34,420][247478] Updated weights for policy 0, policy_version 30469 (0.0009) +[2026-06-02 16:50:34,594][247478] Updated weights for policy 0, policy_version 30479 (0.0008) +[2026-06-02 16:50:34,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20207.0, 300 sec: 20438.3). Total num frames: 15597568. Throughput: 0: 20289.4. Samples: 15614848. Policy #0 lag: (min: 52.0, avg: 68.8, max: 116.0) +[2026-06-02 16:50:34,726][246448] Avg episode reward: [(0, '1440.516')] +[2026-06-02 16:50:34,773][247478] Updated weights for policy 0, policy_version 30489 (0.0009) +[2026-06-02 16:50:34,968][247478] Updated weights for policy 0, policy_version 30500 (0.0008) +[2026-06-02 16:50:35,153][247478] Updated weights for policy 0, policy_version 30510 (0.0008) +[2026-06-02 16:50:35,333][247478] Updated weights for policy 0, policy_version 30520 (0.0008) +[2026-06-02 16:50:35,475][247399] Saving new best policy, reward=1440.516! +[2026-06-02 16:50:35,964][247478] Updated weights for policy 0, policy_version 30530 (0.0009) +[2026-06-02 16:50:36,141][247478] Updated weights for policy 0, policy_version 30540 (0.0008) +[2026-06-02 16:50:36,314][247478] Updated weights for policy 0, policy_version 30550 (0.0008) +[2026-06-02 16:50:36,501][247478] Updated weights for policy 0, policy_version 30560 (0.0009) +[2026-06-02 16:50:36,674][247478] Updated weights for policy 0, policy_version 30570 (0.0008) +[2026-06-02 16:50:36,856][247478] Updated weights for policy 0, policy_version 30580 (0.0008) +[2026-06-02 16:50:37,046][247478] Updated weights for policy 0, policy_version 30590 (0.0009) +[2026-06-02 16:50:37,672][247478] Updated weights for policy 0, policy_version 30600 (0.0009) +[2026-06-02 16:50:37,860][247478] Updated weights for policy 0, policy_version 30610 (0.0008) +[2026-06-02 16:50:38,035][247478] Updated weights for policy 0, policy_version 30620 (0.0008) +[2026-06-02 16:50:38,232][247478] Updated weights for policy 0, policy_version 30631 (0.0008) +[2026-06-02 16:50:38,415][247478] Updated weights for policy 0, policy_version 30641 (0.0009) +[2026-06-02 16:50:38,606][247478] Updated weights for policy 0, policy_version 30651 (0.0008) +[2026-06-02 16:50:39,248][247478] Updated weights for policy 0, policy_version 30662 (0.0008) +[2026-06-02 16:50:39,423][247478] Updated weights for policy 0, policy_version 30672 (0.0009) +[2026-06-02 16:50:39,604][247478] Updated weights for policy 0, policy_version 30682 (0.0009) +[2026-06-02 16:50:39,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20438.3). Total num frames: 15695872. Throughput: 0: 20258.1. Samples: 15739264. Policy #0 lag: (min: 52.0, avg: 68.8, max: 116.0) +[2026-06-02 16:50:39,726][246448] Avg episode reward: [(0, '1424.681')] +[2026-06-02 16:50:39,797][247478] Updated weights for policy 0, policy_version 30692 (0.0008) +[2026-06-02 16:50:39,979][247478] Updated weights for policy 0, policy_version 30702 (0.0008) +[2026-06-02 16:50:40,163][247478] Updated weights for policy 0, policy_version 30712 (0.0008) +[2026-06-02 16:50:40,835][247478] Updated weights for policy 0, policy_version 30722 (0.0008) +[2026-06-02 16:50:41,001][247478] Updated weights for policy 0, policy_version 30732 (0.0004) +[2026-06-02 16:50:41,184][247478] Updated weights for policy 0, policy_version 30742 (0.0005) +[2026-06-02 16:50:41,368][247478] Updated weights for policy 0, policy_version 30752 (0.0005) +[2026-06-02 16:50:41,549][247478] Updated weights for policy 0, policy_version 30762 (0.0004) +[2026-06-02 16:50:41,724][247478] Updated weights for policy 0, policy_version 30772 (0.0005) +[2026-06-02 16:50:41,905][247478] Updated weights for policy 0, policy_version 30782 (0.0004) +[2026-06-02 16:50:42,514][247478] Updated weights for policy 0, policy_version 30792 (0.0008) +[2026-06-02 16:50:42,682][247478] Updated weights for policy 0, policy_version 30802 (0.0008) +[2026-06-02 16:50:42,873][247478] Updated weights for policy 0, policy_version 30812 (0.0008) +[2026-06-02 16:50:43,057][247478] Updated weights for policy 0, policy_version 30822 (0.0009) +[2026-06-02 16:50:43,233][247478] Updated weights for policy 0, policy_version 30832 (0.0008) +[2026-06-02 16:50:43,412][247478] Updated weights for policy 0, policy_version 30842 (0.0008) +[2026-06-02 16:50:44,055][247478] Updated weights for policy 0, policy_version 30852 (0.0009) +[2026-06-02 16:50:44,226][247478] Updated weights for policy 0, policy_version 30862 (0.0008) +[2026-06-02 16:50:44,395][247478] Updated weights for policy 0, policy_version 30872 (0.0009) +[2026-06-02 16:50:44,582][247478] Updated weights for policy 0, policy_version 30882 (0.0008) +[2026-06-02 16:50:44,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20438.3). Total num frames: 15794176. Throughput: 0: 20275.2. Samples: 15802240. Policy #0 lag: (min: 52.0, avg: 68.8, max: 116.0) +[2026-06-02 16:50:44,726][246448] Avg episode reward: [(0, '1445.003')] +[2026-06-02 16:50:44,768][247478] Updated weights for policy 0, policy_version 30892 (0.0009) +[2026-06-02 16:50:44,951][247478] Updated weights for policy 0, policy_version 30902 (0.0008) +[2026-06-02 16:50:45,123][247399] Saving new best policy, reward=1445.003! +[2026-06-02 16:50:45,636][247478] Updated weights for policy 0, policy_version 30914 (0.0008) +[2026-06-02 16:50:45,801][247478] Updated weights for policy 0, policy_version 30924 (0.0008) +[2026-06-02 16:50:45,978][247478] Updated weights for policy 0, policy_version 30934 (0.0008) +[2026-06-02 16:50:46,169][247478] Updated weights for policy 0, policy_version 30944 (0.0009) +[2026-06-02 16:50:46,348][247478] Updated weights for policy 0, policy_version 30954 (0.0009) +[2026-06-02 16:50:46,525][247478] Updated weights for policy 0, policy_version 30964 (0.0008) +[2026-06-02 16:50:46,716][247478] Updated weights for policy 0, policy_version 30974 (0.0009) +[2026-06-02 16:50:47,337][247478] Updated weights for policy 0, policy_version 30984 (0.0008) +[2026-06-02 16:50:47,519][247478] Updated weights for policy 0, policy_version 30994 (0.0008) +[2026-06-02 16:50:47,696][247478] Updated weights for policy 0, policy_version 31004 (0.0008) +[2026-06-02 16:50:47,878][247478] Updated weights for policy 0, policy_version 31014 (0.0008) +[2026-06-02 16:50:48,055][247478] Updated weights for policy 0, policy_version 31024 (0.0008) +[2026-06-02 16:50:48,234][247478] Updated weights for policy 0, policy_version 31034 (0.0008) +[2026-06-02 16:50:48,896][247478] Updated weights for policy 0, policy_version 31044 (0.0009) +[2026-06-02 16:50:49,063][247478] Updated weights for policy 0, policy_version 31054 (0.0008) +[2026-06-02 16:50:49,235][247478] Updated weights for policy 0, policy_version 31064 (0.0008) +[2026-06-02 16:50:49,418][247478] Updated weights for policy 0, policy_version 31074 (0.0008) +[2026-06-02 16:50:49,609][247478] Updated weights for policy 0, policy_version 31084 (0.0008) +[2026-06-02 16:50:49,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 15892480. Throughput: 0: 20235.4. Samples: 15924480. Policy #0 lag: (min: 52.0, avg: 68.8, max: 116.0) +[2026-06-02 16:50:49,726][246448] Avg episode reward: [(0, '1462.329')] +[2026-06-02 16:50:49,790][247478] Updated weights for policy 0, policy_version 31094 (0.0008) +[2026-06-02 16:50:49,967][247399] Saving new best policy, reward=1462.329! +[2026-06-02 16:50:49,970][247478] Updated weights for policy 0, policy_version 31104 (0.0008) +[2026-06-02 16:50:50,603][247478] Updated weights for policy 0, policy_version 31114 (0.0009) +[2026-06-02 16:50:50,780][247478] Updated weights for policy 0, policy_version 31124 (0.0009) +[2026-06-02 16:50:50,959][247478] Updated weights for policy 0, policy_version 31134 (0.0009) +[2026-06-02 16:50:51,146][247478] Updated weights for policy 0, policy_version 31144 (0.0005) +[2026-06-02 16:50:51,345][247478] Updated weights for policy 0, policy_version 31155 (0.0009) +[2026-06-02 16:50:51,533][247478] Updated weights for policy 0, policy_version 31165 (0.0011) +[2026-06-02 16:50:52,150][247478] Updated weights for policy 0, policy_version 31175 (0.0012) +[2026-06-02 16:50:52,330][247478] Updated weights for policy 0, policy_version 31185 (0.0013) +[2026-06-02 16:50:52,525][247478] Updated weights for policy 0, policy_version 31196 (0.0010) +[2026-06-02 16:50:52,722][247478] Updated weights for policy 0, policy_version 31206 (0.0005) +[2026-06-02 16:50:52,894][247478] Updated weights for policy 0, policy_version 31216 (0.0005) +[2026-06-02 16:50:53,079][247478] Updated weights for policy 0, policy_version 31226 (0.0006) +[2026-06-02 16:50:53,723][247478] Updated weights for policy 0, policy_version 31237 (0.0007) +[2026-06-02 16:50:53,896][247478] Updated weights for policy 0, policy_version 31247 (0.0008) +[2026-06-02 16:50:54,082][247478] Updated weights for policy 0, policy_version 31257 (0.0008) +[2026-06-02 16:50:54,264][247478] Updated weights for policy 0, policy_version 31267 (0.0009) +[2026-06-02 16:50:54,440][247478] Updated weights for policy 0, policy_version 31277 (0.0009) +[2026-06-02 16:50:54,621][247478] Updated weights for policy 0, policy_version 31287 (0.0009) +[2026-06-02 16:50:54,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 15990784. Throughput: 0: 20288.0. Samples: 16040320. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 16:50:54,726][246448] Avg episode reward: [(0, '1414.914')] +[2026-06-02 16:50:55,274][247478] Updated weights for policy 0, policy_version 31297 (0.0009) +[2026-06-02 16:50:55,437][247478] Updated weights for policy 0, policy_version 31307 (0.0008) +[2026-06-02 16:50:55,614][247478] Updated weights for policy 0, policy_version 31317 (0.0008) +[2026-06-02 16:50:55,801][247478] Updated weights for policy 0, policy_version 31327 (0.0008) +[2026-06-02 16:50:55,979][247478] Updated weights for policy 0, policy_version 31337 (0.0009) +[2026-06-02 16:50:56,165][247478] Updated weights for policy 0, policy_version 31347 (0.0008) +[2026-06-02 16:50:56,351][247478] Updated weights for policy 0, policy_version 31357 (0.0009) +[2026-06-02 16:50:56,997][247478] Updated weights for policy 0, policy_version 31368 (0.0009) +[2026-06-02 16:50:57,188][247478] Updated weights for policy 0, policy_version 31379 (0.0008) +[2026-06-02 16:50:57,372][247478] Updated weights for policy 0, policy_version 31389 (0.0008) +[2026-06-02 16:50:57,574][247478] Updated weights for policy 0, policy_version 31400 (0.0008) +[2026-06-02 16:50:57,753][247478] Updated weights for policy 0, policy_version 31410 (0.0008) +[2026-06-02 16:50:57,936][247478] Updated weights for policy 0, policy_version 31420 (0.0009) +[2026-06-02 16:50:58,572][247478] Updated weights for policy 0, policy_version 31430 (0.0009) +[2026-06-02 16:50:58,746][247478] Updated weights for policy 0, policy_version 31440 (0.0010) +[2026-06-02 16:50:58,926][247478] Updated weights for policy 0, policy_version 31450 (0.0008) +[2026-06-02 16:50:59,104][247478] Updated weights for policy 0, policy_version 31460 (0.0008) +[2026-06-02 16:50:59,289][247478] Updated weights for policy 0, policy_version 31470 (0.0007) +[2026-06-02 16:50:59,470][247478] Updated weights for policy 0, policy_version 31480 (0.0005) +[2026-06-02 16:50:59,725][246448] Fps is (10 sec: 22937.5, 60 sec: 20753.0, 300 sec: 20438.3). Total num frames: 16121856. Throughput: 0: 20295.1. Samples: 16102784. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 16:50:59,727][246448] Avg episode reward: [(0, '1426.634')] +[2026-06-02 16:51:00,098][247478] Updated weights for policy 0, policy_version 31490 (0.0006) +[2026-06-02 16:51:00,266][247478] Updated weights for policy 0, policy_version 31500 (0.0008) +[2026-06-02 16:51:00,465][247478] Updated weights for policy 0, policy_version 31511 (0.0009) +[2026-06-02 16:51:00,634][247478] Updated weights for policy 0, policy_version 31521 (0.0008) +[2026-06-02 16:51:00,819][247478] Updated weights for policy 0, policy_version 31531 (0.0008) +[2026-06-02 16:51:00,992][247478] Updated weights for policy 0, policy_version 31541 (0.0009) +[2026-06-02 16:51:01,183][247478] Updated weights for policy 0, policy_version 31551 (0.0008) +[2026-06-02 16:51:01,826][247478] Updated weights for policy 0, policy_version 31561 (0.0009) +[2026-06-02 16:51:02,007][247478] Updated weights for policy 0, policy_version 31571 (0.0008) +[2026-06-02 16:51:02,185][247478] Updated weights for policy 0, policy_version 31581 (0.0007) +[2026-06-02 16:51:02,367][247478] Updated weights for policy 0, policy_version 31591 (0.0008) +[2026-06-02 16:51:02,552][247478] Updated weights for policy 0, policy_version 31601 (0.0008) +[2026-06-02 16:51:02,721][247478] Updated weights for policy 0, policy_version 31611 (0.0008) +[2026-06-02 16:51:03,381][247478] Updated weights for policy 0, policy_version 31621 (0.0009) +[2026-06-02 16:51:03,558][247478] Updated weights for policy 0, policy_version 31631 (0.0008) +[2026-06-02 16:51:03,741][247478] Updated weights for policy 0, policy_version 31641 (0.0008) +[2026-06-02 16:51:03,922][247478] Updated weights for policy 0, policy_version 31651 (0.0009) +[2026-06-02 16:51:04,106][247478] Updated weights for policy 0, policy_version 31661 (0.0008) +[2026-06-02 16:51:04,292][247478] Updated weights for policy 0, policy_version 31671 (0.0008) +[2026-06-02 16:51:04,725][246448] Fps is (10 sec: 22937.5, 60 sec: 20753.1, 300 sec: 20438.3). Total num frames: 16220160. Throughput: 0: 20306.5. Samples: 16228352. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 16:51:04,727][246448] Avg episode reward: [(0, '1370.688')] +[2026-06-02 16:51:04,939][247478] Updated weights for policy 0, policy_version 31681 (0.0009) +[2026-06-02 16:51:05,108][247478] Updated weights for policy 0, policy_version 31691 (0.0008) +[2026-06-02 16:51:05,285][247478] Updated weights for policy 0, policy_version 31701 (0.0008) +[2026-06-02 16:51:05,467][247478] Updated weights for policy 0, policy_version 31711 (0.0008) +[2026-06-02 16:51:05,648][247478] Updated weights for policy 0, policy_version 31721 (0.0008) +[2026-06-02 16:51:05,825][247478] Updated weights for policy 0, policy_version 31731 (0.0008) +[2026-06-02 16:51:06,007][247478] Updated weights for policy 0, policy_version 31741 (0.0008) +[2026-06-02 16:51:06,662][247478] Updated weights for policy 0, policy_version 31751 (0.0008) +[2026-06-02 16:51:06,840][247478] Updated weights for policy 0, policy_version 31761 (0.0009) +[2026-06-02 16:51:07,020][247478] Updated weights for policy 0, policy_version 31771 (0.0008) +[2026-06-02 16:51:07,198][247478] Updated weights for policy 0, policy_version 31781 (0.0008) +[2026-06-02 16:51:07,377][247478] Updated weights for policy 0, policy_version 31791 (0.0009) +[2026-06-02 16:51:07,568][247478] Updated weights for policy 0, policy_version 31801 (0.0008) +[2026-06-02 16:51:08,230][247478] Updated weights for policy 0, policy_version 31812 (0.0009) +[2026-06-02 16:51:08,393][247478] Updated weights for policy 0, policy_version 31822 (0.0008) +[2026-06-02 16:51:08,578][247478] Updated weights for policy 0, policy_version 31832 (0.0008) +[2026-06-02 16:51:08,759][247478] Updated weights for policy 0, policy_version 31842 (0.0009) +[2026-06-02 16:51:08,951][247478] Updated weights for policy 0, policy_version 31852 (0.0008) +[2026-06-02 16:51:09,130][247478] Updated weights for policy 0, policy_version 31862 (0.0008) +[2026-06-02 16:51:09,307][247478] Updated weights for policy 0, policy_version 31872 (0.0008) +[2026-06-02 16:51:09,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20208.0, 300 sec: 20438.3). Total num frames: 16318464. Throughput: 0: 20275.2. Samples: 16352512. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 16:51:09,726][246448] Avg episode reward: [(0, '1323.059')] +[2026-06-02 16:51:09,955][247478] Updated weights for policy 0, policy_version 31882 (0.0009) +[2026-06-02 16:51:10,128][247478] Updated weights for policy 0, policy_version 31892 (0.0008) +[2026-06-02 16:51:10,318][247478] Updated weights for policy 0, policy_version 31902 (0.0008) +[2026-06-02 16:51:10,501][247478] Updated weights for policy 0, policy_version 31912 (0.0008) +[2026-06-02 16:51:10,696][247478] Updated weights for policy 0, policy_version 31923 (0.0008) +[2026-06-02 16:51:10,880][247478] Updated weights for policy 0, policy_version 31933 (0.0009) +[2026-06-02 16:51:11,513][247478] Updated weights for policy 0, policy_version 31943 (0.0009) +[2026-06-02 16:51:11,686][247478] Updated weights for policy 0, policy_version 31953 (0.0009) +[2026-06-02 16:51:11,863][247478] Updated weights for policy 0, policy_version 31963 (0.0008) +[2026-06-02 16:51:12,039][247478] Updated weights for policy 0, policy_version 31973 (0.0008) +[2026-06-02 16:51:12,224][247478] Updated weights for policy 0, policy_version 31983 (0.0008) +[2026-06-02 16:51:12,396][247478] Updated weights for policy 0, policy_version 31993 (0.0008) +[2026-06-02 16:51:13,059][247478] Updated weights for policy 0, policy_version 32003 (0.0009) +[2026-06-02 16:51:13,221][247478] Updated weights for policy 0, policy_version 32013 (0.0009) +[2026-06-02 16:51:13,397][247478] Updated weights for policy 0, policy_version 32023 (0.0009) +[2026-06-02 16:51:13,588][247478] Updated weights for policy 0, policy_version 32033 (0.0009) +[2026-06-02 16:51:13,763][247478] Updated weights for policy 0, policy_version 32043 (0.0009) +[2026-06-02 16:51:13,944][247478] Updated weights for policy 0, policy_version 32053 (0.0008) +[2026-06-02 16:51:14,132][247478] Updated weights for policy 0, policy_version 32063 (0.0009) +[2026-06-02 16:51:14,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20206.9, 300 sec: 20438.3). Total num frames: 16416768. Throughput: 0: 20249.6. Samples: 16406400. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 16:51:14,726][246448] Avg episode reward: [(0, '1411.727')] +[2026-06-02 16:51:14,785][247478] Updated weights for policy 0, policy_version 32073 (0.0008) +[2026-06-02 16:51:14,959][247478] Updated weights for policy 0, policy_version 32083 (0.0008) +[2026-06-02 16:51:15,149][247478] Updated weights for policy 0, policy_version 32093 (0.0008) +[2026-06-02 16:51:15,325][247478] Updated weights for policy 0, policy_version 32103 (0.0008) +[2026-06-02 16:51:15,506][247478] Updated weights for policy 0, policy_version 32113 (0.0008) +[2026-06-02 16:51:15,708][247478] Updated weights for policy 0, policy_version 32124 (0.0009) +[2026-06-02 16:51:16,333][247478] Updated weights for policy 0, policy_version 32134 (0.0009) +[2026-06-02 16:51:16,514][247478] Updated weights for policy 0, policy_version 32144 (0.0008) +[2026-06-02 16:51:16,692][247478] Updated weights for policy 0, policy_version 32154 (0.0008) +[2026-06-02 16:51:16,877][247478] Updated weights for policy 0, policy_version 32164 (0.0009) +[2026-06-02 16:51:17,061][247478] Updated weights for policy 0, policy_version 32174 (0.0008) +[2026-06-02 16:51:17,257][247478] Updated weights for policy 0, policy_version 32185 (0.0009) +[2026-06-02 16:51:17,881][247478] Updated weights for policy 0, policy_version 32195 (0.0009) +[2026-06-02 16:51:18,052][247478] Updated weights for policy 0, policy_version 32205 (0.0008) +[2026-06-02 16:51:18,237][247478] Updated weights for policy 0, policy_version 32215 (0.0008) +[2026-06-02 16:51:18,417][247478] Updated weights for policy 0, policy_version 32225 (0.0008) +[2026-06-02 16:51:18,601][247478] Updated weights for policy 0, policy_version 32235 (0.0008) +[2026-06-02 16:51:18,782][247478] Updated weights for policy 0, policy_version 32245 (0.0008) +[2026-06-02 16:51:18,960][247478] Updated weights for policy 0, policy_version 32255 (0.0008) +[2026-06-02 16:51:19,597][247478] Updated weights for policy 0, policy_version 32265 (0.0009) +[2026-06-02 16:51:19,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20207.0, 300 sec: 20438.3). Total num frames: 16515072. Throughput: 0: 20332.1. Samples: 16529792. Policy #0 lag: (min: 27.0, avg: 58.3, max: 91.0) +[2026-06-02 16:51:19,726][246448] Avg episode reward: [(0, '1429.853')] +[2026-06-02 16:51:19,773][247478] Updated weights for policy 0, policy_version 32275 (0.0009) +[2026-06-02 16:51:19,970][247478] Updated weights for policy 0, policy_version 32286 (0.0008) +[2026-06-02 16:51:20,159][247478] Updated weights for policy 0, policy_version 32296 (0.0008) +[2026-06-02 16:51:20,338][247478] Updated weights for policy 0, policy_version 32306 (0.0009) +[2026-06-02 16:51:20,541][247478] Updated weights for policy 0, policy_version 32317 (0.0008) +[2026-06-02 16:51:21,158][247478] Updated weights for policy 0, policy_version 32327 (0.0009) +[2026-06-02 16:51:21,338][247478] Updated weights for policy 0, policy_version 32337 (0.0008) +[2026-06-02 16:51:21,520][247478] Updated weights for policy 0, policy_version 32347 (0.0009) +[2026-06-02 16:51:21,719][247478] Updated weights for policy 0, policy_version 32358 (0.0009) +[2026-06-02 16:51:21,893][247478] Updated weights for policy 0, policy_version 32368 (0.0008) +[2026-06-02 16:51:22,083][247478] Updated weights for policy 0, policy_version 32378 (0.0008) +[2026-06-02 16:51:22,734][247478] Updated weights for policy 0, policy_version 32388 (0.0009) +[2026-06-02 16:51:22,906][247478] Updated weights for policy 0, policy_version 32398 (0.0008) +[2026-06-02 16:51:23,082][247478] Updated weights for policy 0, policy_version 32408 (0.0009) +[2026-06-02 16:51:23,267][247478] Updated weights for policy 0, policy_version 32418 (0.0008) +[2026-06-02 16:51:23,447][247478] Updated weights for policy 0, policy_version 32428 (0.0008) +[2026-06-02 16:51:23,630][247478] Updated weights for policy 0, policy_version 32438 (0.0008) +[2026-06-02 16:51:23,805][247478] Updated weights for policy 0, policy_version 32448 (0.0009) +[2026-06-02 16:51:24,450][247478] Updated weights for policy 0, policy_version 32458 (0.0008) +[2026-06-02 16:51:24,640][247478] Updated weights for policy 0, policy_version 32469 (0.0008) +[2026-06-02 16:51:24,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 16613376. Throughput: 0: 20343.5. Samples: 16654720. Policy #0 lag: (min: 27.0, avg: 58.3, max: 91.0) +[2026-06-02 16:51:24,726][246448] Avg episode reward: [(0, '1423.820')] +[2026-06-02 16:51:24,821][247478] Updated weights for policy 0, policy_version 32479 (0.0008) +[2026-06-02 16:51:25,010][247478] Updated weights for policy 0, policy_version 32489 (0.0008) +[2026-06-02 16:51:25,190][247478] Updated weights for policy 0, policy_version 32499 (0.0008) +[2026-06-02 16:51:25,377][247478] Updated weights for policy 0, policy_version 32509 (0.0008) +[2026-06-02 16:51:26,041][247478] Updated weights for policy 0, policy_version 32521 (0.0008) +[2026-06-02 16:51:26,219][247478] Updated weights for policy 0, policy_version 32531 (0.0008) +[2026-06-02 16:51:26,410][247478] Updated weights for policy 0, policy_version 32541 (0.0008) +[2026-06-02 16:51:26,575][247478] Updated weights for policy 0, policy_version 32551 (0.0008) +[2026-06-02 16:51:26,783][247478] Updated weights for policy 0, policy_version 32562 (0.0008) +[2026-06-02 16:51:26,962][247478] Updated weights for policy 0, policy_version 32572 (0.0007) +[2026-06-02 16:51:27,612][247478] Updated weights for policy 0, policy_version 32582 (0.0009) +[2026-06-02 16:51:27,801][247478] Updated weights for policy 0, policy_version 32593 (0.0009) +[2026-06-02 16:51:27,977][247478] Updated weights for policy 0, policy_version 32603 (0.0009) +[2026-06-02 16:51:28,170][247478] Updated weights for policy 0, policy_version 32613 (0.0009) +[2026-06-02 16:51:28,353][247478] Updated weights for policy 0, policy_version 32623 (0.0008) +[2026-06-02 16:51:28,535][247478] Updated weights for policy 0, policy_version 32633 (0.0008) +[2026-06-02 16:51:29,185][247478] Updated weights for policy 0, policy_version 32644 (0.0009) +[2026-06-02 16:51:29,361][247478] Updated weights for policy 0, policy_version 32654 (0.0009) +[2026-06-02 16:51:29,546][247478] Updated weights for policy 0, policy_version 32664 (0.0008) +[2026-06-02 16:51:29,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 16711680. Throughput: 0: 20334.9. Samples: 16717312. Policy #0 lag: (min: 27.0, avg: 58.3, max: 91.0) +[2026-06-02 16:51:29,726][246448] Avg episode reward: [(0, '1454.742')] +[2026-06-02 16:51:29,727][247478] Updated weights for policy 0, policy_version 32674 (0.0009) +[2026-06-02 16:51:29,904][247478] Updated weights for policy 0, policy_version 32684 (0.0008) +[2026-06-02 16:51:30,090][247478] Updated weights for policy 0, policy_version 32694 (0.0008) +[2026-06-02 16:51:30,752][247478] Updated weights for policy 0, policy_version 32705 (0.0008) +[2026-06-02 16:51:30,925][247478] Updated weights for policy 0, policy_version 32715 (0.0008) +[2026-06-02 16:51:31,103][247478] Updated weights for policy 0, policy_version 32725 (0.0009) +[2026-06-02 16:51:31,299][247478] Updated weights for policy 0, policy_version 32736 (0.0008) +[2026-06-02 16:51:31,481][247478] Updated weights for policy 0, policy_version 32746 (0.0008) +[2026-06-02 16:51:31,657][247478] Updated weights for policy 0, policy_version 32756 (0.0008) +[2026-06-02 16:51:31,843][247478] Updated weights for policy 0, policy_version 32766 (0.0008) +[2026-06-02 16:51:32,478][247478] Updated weights for policy 0, policy_version 32776 (0.0008) +[2026-06-02 16:51:32,660][247478] Updated weights for policy 0, policy_version 32786 (0.0008) +[2026-06-02 16:51:32,838][247478] Updated weights for policy 0, policy_version 32796 (0.0008) +[2026-06-02 16:51:33,012][247478] Updated weights for policy 0, policy_version 32806 (0.0008) +[2026-06-02 16:51:33,204][247478] Updated weights for policy 0, policy_version 32816 (0.0008) +[2026-06-02 16:51:33,386][247478] Updated weights for policy 0, policy_version 32826 (0.0009) +[2026-06-02 16:51:34,015][247478] Updated weights for policy 0, policy_version 32836 (0.0008) +[2026-06-02 16:51:34,190][247478] Updated weights for policy 0, policy_version 32846 (0.0009) +[2026-06-02 16:51:34,363][247478] Updated weights for policy 0, policy_version 32856 (0.0008) +[2026-06-02 16:51:34,548][247478] Updated weights for policy 0, policy_version 32866 (0.0008) +[2026-06-02 16:51:34,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 16809984. Throughput: 0: 20212.6. Samples: 16834048. Policy #0 lag: (min: 27.0, avg: 58.3, max: 91.0) +[2026-06-02 16:51:34,726][246448] Avg episode reward: [(0, '1525.038')] +[2026-06-02 16:51:34,731][247478] Updated weights for policy 0, policy_version 32876 (0.0009) +[2026-06-02 16:51:34,911][247478] Updated weights for policy 0, policy_version 32886 (0.0009) +[2026-06-02 16:51:35,084][247399] Saving new best policy, reward=1525.038! +[2026-06-02 16:51:35,086][247478] Updated weights for policy 0, policy_version 32896 (0.0009) +[2026-06-02 16:51:35,739][247478] Updated weights for policy 0, policy_version 32906 (0.0008) +[2026-06-02 16:51:35,915][247478] Updated weights for policy 0, policy_version 32916 (0.0008) +[2026-06-02 16:51:36,102][247478] Updated weights for policy 0, policy_version 32927 (0.0009) +[2026-06-02 16:51:36,293][247478] Updated weights for policy 0, policy_version 32937 (0.0009) +[2026-06-02 16:51:36,475][247478] Updated weights for policy 0, policy_version 32947 (0.0009) +[2026-06-02 16:51:36,662][247478] Updated weights for policy 0, policy_version 32957 (0.0008) +[2026-06-02 16:51:37,283][247478] Updated weights for policy 0, policy_version 32967 (0.0008) +[2026-06-02 16:51:37,457][247478] Updated weights for policy 0, policy_version 32977 (0.0009) +[2026-06-02 16:51:37,641][247478] Updated weights for policy 0, policy_version 32987 (0.0008) +[2026-06-02 16:51:37,828][247478] Updated weights for policy 0, policy_version 32997 (0.0009) +[2026-06-02 16:51:38,007][247478] Updated weights for policy 0, policy_version 33007 (0.0008) +[2026-06-02 16:51:38,195][247478] Updated weights for policy 0, policy_version 33017 (0.0008) +[2026-06-02 16:51:38,845][247478] Updated weights for policy 0, policy_version 33027 (0.0009) +[2026-06-02 16:51:39,016][247478] Updated weights for policy 0, policy_version 33037 (0.0009) +[2026-06-02 16:51:39,194][247478] Updated weights for policy 0, policy_version 33047 (0.0008) +[2026-06-02 16:51:39,376][247478] Updated weights for policy 0, policy_version 33057 (0.0008) +[2026-06-02 16:51:39,562][247478] Updated weights for policy 0, policy_version 33067 (0.0008) +[2026-06-02 16:51:39,726][246448] Fps is (10 sec: 19660.9, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 16908288. Throughput: 0: 20343.5. Samples: 16955776. Policy #0 lag: (min: 63.0, avg: 80.0, max: 127.0) +[2026-06-02 16:51:39,727][246448] Avg episode reward: [(0, '1546.621')] +[2026-06-02 16:51:39,745][247478] Updated weights for policy 0, policy_version 33077 (0.0008) +[2026-06-02 16:51:39,926][247478] Updated weights for policy 0, policy_version 33087 (0.0009) +[2026-06-02 16:51:39,937][247399] Saving new best policy, reward=1546.621! +[2026-06-02 16:51:40,569][247478] Updated weights for policy 0, policy_version 33097 (0.0008) +[2026-06-02 16:51:40,738][247478] Updated weights for policy 0, policy_version 33107 (0.0008) +[2026-06-02 16:51:40,925][247478] Updated weights for policy 0, policy_version 33117 (0.0008) +[2026-06-02 16:51:41,104][247478] Updated weights for policy 0, policy_version 33127 (0.0008) +[2026-06-02 16:51:41,282][247478] Updated weights for policy 0, policy_version 33137 (0.0008) +[2026-06-02 16:51:41,475][247478] Updated weights for policy 0, policy_version 33147 (0.0009) +[2026-06-02 16:51:42,123][247478] Updated weights for policy 0, policy_version 33159 (0.0009) +[2026-06-02 16:51:42,302][247478] Updated weights for policy 0, policy_version 33169 (0.0008) +[2026-06-02 16:51:42,489][247478] Updated weights for policy 0, policy_version 33179 (0.0008) +[2026-06-02 16:51:42,666][247478] Updated weights for policy 0, policy_version 33189 (0.0008) +[2026-06-02 16:51:42,856][247478] Updated weights for policy 0, policy_version 33199 (0.0008) +[2026-06-02 16:51:43,031][247478] Updated weights for policy 0, policy_version 33209 (0.0008) +[2026-06-02 16:51:43,687][247478] Updated weights for policy 0, policy_version 33219 (0.0008) +[2026-06-02 16:51:43,857][247478] Updated weights for policy 0, policy_version 33229 (0.0008) +[2026-06-02 16:51:44,035][247478] Updated weights for policy 0, policy_version 33239 (0.0008) +[2026-06-02 16:51:44,229][247478] Updated weights for policy 0, policy_version 33250 (0.0009) +[2026-06-02 16:51:44,411][247478] Updated weights for policy 0, policy_version 33260 (0.0008) +[2026-06-02 16:51:44,594][247478] Updated weights for policy 0, policy_version 33270 (0.0008) +[2026-06-02 16:51:44,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20207.0, 300 sec: 20327.3). Total num frames: 17006592. Throughput: 0: 20340.7. Samples: 17018112. Policy #0 lag: (min: 63.0, avg: 80.0, max: 127.0) +[2026-06-02 16:51:44,727][246448] Avg episode reward: [(0, '1595.860')] +[2026-06-02 16:51:44,777][247399] Saving new best policy, reward=1595.860! +[2026-06-02 16:51:44,779][247478] Updated weights for policy 0, policy_version 33280 (0.0008) +[2026-06-02 16:51:45,404][247478] Updated weights for policy 0, policy_version 33290 (0.0008) +[2026-06-02 16:51:45,576][247478] Updated weights for policy 0, policy_version 33300 (0.0008) +[2026-06-02 16:51:45,763][247478] Updated weights for policy 0, policy_version 33310 (0.0008) +[2026-06-02 16:51:45,950][247478] Updated weights for policy 0, policy_version 33320 (0.0009) +[2026-06-02 16:51:46,138][247478] Updated weights for policy 0, policy_version 33330 (0.0009) +[2026-06-02 16:51:46,314][247478] Updated weights for policy 0, policy_version 33340 (0.0008) +[2026-06-02 16:51:46,934][247478] Updated weights for policy 0, policy_version 33350 (0.0008) +[2026-06-02 16:51:47,105][247478] Updated weights for policy 0, policy_version 33360 (0.0008) +[2026-06-02 16:51:47,293][247478] Updated weights for policy 0, policy_version 33370 (0.0009) +[2026-06-02 16:51:47,478][247478] Updated weights for policy 0, policy_version 33380 (0.0008) +[2026-06-02 16:51:47,652][247478] Updated weights for policy 0, policy_version 33390 (0.0008) +[2026-06-02 16:51:47,841][247478] Updated weights for policy 0, policy_version 33400 (0.0006) +[2026-06-02 16:51:48,476][247478] Updated weights for policy 0, policy_version 33410 (0.0006) +[2026-06-02 16:51:48,645][247478] Updated weights for policy 0, policy_version 33420 (0.0009) +[2026-06-02 16:51:48,829][247478] Updated weights for policy 0, policy_version 33430 (0.0008) +[2026-06-02 16:51:49,004][247478] Updated weights for policy 0, policy_version 33440 (0.0008) +[2026-06-02 16:51:49,194][247478] Updated weights for policy 0, policy_version 33450 (0.0008) +[2026-06-02 16:51:49,372][247478] Updated weights for policy 0, policy_version 33460 (0.0008) +[2026-06-02 16:51:49,544][247478] Updated weights for policy 0, policy_version 33470 (0.0008) +[2026-06-02 16:51:49,725][246448] Fps is (10 sec: 22937.6, 60 sec: 20753.1, 300 sec: 20438.4). Total num frames: 17137664. Throughput: 0: 20315.0. Samples: 17142528. Policy #0 lag: (min: 63.0, avg: 80.0, max: 127.0) +[2026-06-02 16:51:49,726][246448] Avg episode reward: [(0, '1549.706')] +[2026-06-02 16:51:50,200][247478] Updated weights for policy 0, policy_version 33481 (0.0009) +[2026-06-02 16:51:50,389][247478] Updated weights for policy 0, policy_version 33491 (0.0008) +[2026-06-02 16:51:50,568][247478] Updated weights for policy 0, policy_version 33501 (0.0008) +[2026-06-02 16:51:50,751][247478] Updated weights for policy 0, policy_version 33511 (0.0009) +[2026-06-02 16:51:50,933][247478] Updated weights for policy 0, policy_version 33521 (0.0008) +[2026-06-02 16:51:51,117][247478] Updated weights for policy 0, policy_version 33531 (0.0008) +[2026-06-02 16:51:51,742][247478] Updated weights for policy 0, policy_version 33541 (0.0009) +[2026-06-02 16:51:51,925][247478] Updated weights for policy 0, policy_version 33552 (0.0009) +[2026-06-02 16:51:52,105][247478] Updated weights for policy 0, policy_version 33562 (0.0009) +[2026-06-02 16:51:52,306][247478] Updated weights for policy 0, policy_version 33573 (0.0008) +[2026-06-02 16:51:52,480][247478] Updated weights for policy 0, policy_version 33583 (0.0008) +[2026-06-02 16:51:52,667][247478] Updated weights for policy 0, policy_version 33593 (0.0008) +[2026-06-02 16:51:53,324][247478] Updated weights for policy 0, policy_version 33603 (0.0009) +[2026-06-02 16:51:53,488][247478] Updated weights for policy 0, policy_version 33613 (0.0008) +[2026-06-02 16:51:53,691][247478] Updated weights for policy 0, policy_version 33624 (0.0009) +[2026-06-02 16:51:53,868][247478] Updated weights for policy 0, policy_version 33634 (0.0008) +[2026-06-02 16:51:54,060][247478] Updated weights for policy 0, policy_version 33644 (0.0009) +[2026-06-02 16:51:54,254][247478] Updated weights for policy 0, policy_version 33655 (0.0009) +[2026-06-02 16:51:54,725][246448] Fps is (10 sec: 22937.7, 60 sec: 20753.1, 300 sec: 20438.4). Total num frames: 17235968. Throughput: 0: 20212.7. Samples: 17262080. Policy #0 lag: (min: 63.0, avg: 80.0, max: 127.0) +[2026-06-02 16:51:54,726][246448] Avg episode reward: [(0, '1530.813')] +[2026-06-02 16:51:54,904][247478] Updated weights for policy 0, policy_version 33665 (0.0009) +[2026-06-02 16:51:55,071][247478] Updated weights for policy 0, policy_version 33675 (0.0008) +[2026-06-02 16:51:55,248][247478] Updated weights for policy 0, policy_version 33685 (0.0008) +[2026-06-02 16:51:55,430][247478] Updated weights for policy 0, policy_version 33695 (0.0009) +[2026-06-02 16:51:55,612][247478] Updated weights for policy 0, policy_version 33705 (0.0009) +[2026-06-02 16:51:55,788][247478] Updated weights for policy 0, policy_version 33715 (0.0008) +[2026-06-02 16:51:55,973][247478] Updated weights for policy 0, policy_version 33725 (0.0008) +[2026-06-02 16:51:56,610][247478] Updated weights for policy 0, policy_version 33735 (0.0009) +[2026-06-02 16:51:56,791][247478] Updated weights for policy 0, policy_version 33745 (0.0008) +[2026-06-02 16:51:56,989][247478] Updated weights for policy 0, policy_version 33756 (0.0008) +[2026-06-02 16:51:57,173][247478] Updated weights for policy 0, policy_version 33766 (0.0009) +[2026-06-02 16:51:57,354][247478] Updated weights for policy 0, policy_version 33776 (0.0008) +[2026-06-02 16:51:57,535][247478] Updated weights for policy 0, policy_version 33786 (0.0008) +[2026-06-02 16:51:58,181][247478] Updated weights for policy 0, policy_version 33796 (0.0009) +[2026-06-02 16:51:58,367][247478] Updated weights for policy 0, policy_version 33807 (0.0008) +[2026-06-02 16:51:58,552][247478] Updated weights for policy 0, policy_version 33817 (0.0008) +[2026-06-02 16:51:58,732][247478] Updated weights for policy 0, policy_version 33827 (0.0009) +[2026-06-02 16:51:58,916][247478] Updated weights for policy 0, policy_version 33837 (0.0008) +[2026-06-02 16:51:59,099][247478] Updated weights for policy 0, policy_version 33847 (0.0008) +[2026-06-02 16:51:59,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20207.0, 300 sec: 20438.4). Total num frames: 17334272. Throughput: 0: 20258.1. Samples: 17318016. Policy #0 lag: (min: 63.0, avg: 80.0, max: 127.0) +[2026-06-02 16:51:59,726][246448] Avg episode reward: [(0, '1505.279')] +[2026-06-02 16:51:59,739][247478] Updated weights for policy 0, policy_version 33857 (0.0009) +[2026-06-02 16:51:59,925][247478] Updated weights for policy 0, policy_version 33868 (0.0008) +[2026-06-02 16:52:00,111][247478] Updated weights for policy 0, policy_version 33878 (0.0009) +[2026-06-02 16:52:00,284][247478] Updated weights for policy 0, policy_version 33888 (0.0008) +[2026-06-02 16:52:00,471][247478] Updated weights for policy 0, policy_version 33898 (0.0008) +[2026-06-02 16:52:00,645][247478] Updated weights for policy 0, policy_version 33908 (0.0009) +[2026-06-02 16:52:00,837][247478] Updated weights for policy 0, policy_version 33918 (0.0009) +[2026-06-02 16:52:01,499][247478] Updated weights for policy 0, policy_version 33929 (0.0009) +[2026-06-02 16:52:01,677][247478] Updated weights for policy 0, policy_version 33939 (0.0008) +[2026-06-02 16:52:01,867][247478] Updated weights for policy 0, policy_version 33949 (0.0008) +[2026-06-02 16:52:02,040][247478] Updated weights for policy 0, policy_version 33959 (0.0008) +[2026-06-02 16:52:02,218][247478] Updated weights for policy 0, policy_version 33969 (0.0009) +[2026-06-02 16:52:02,405][247478] Updated weights for policy 0, policy_version 33979 (0.0008) +[2026-06-02 16:52:03,036][247478] Updated weights for policy 0, policy_version 33989 (0.0009) +[2026-06-02 16:52:03,195][247478] Updated weights for policy 0, policy_version 33999 (0.0008) +[2026-06-02 16:52:03,383][247478] Updated weights for policy 0, policy_version 34009 (0.0008) +[2026-06-02 16:52:03,563][247478] Updated weights for policy 0, policy_version 34019 (0.0010) +[2026-06-02 16:52:03,742][247478] Updated weights for policy 0, policy_version 34029 (0.0009) +[2026-06-02 16:52:03,923][247478] Updated weights for policy 0, policy_version 34039 (0.0008) +[2026-06-02 16:52:04,575][247478] Updated weights for policy 0, policy_version 34049 (0.0009) +[2026-06-02 16:52:04,725][246448] Fps is (10 sec: 19660.6, 60 sec: 20207.0, 300 sec: 20327.3). Total num frames: 17432576. Throughput: 0: 20300.8. Samples: 17443328. Policy #0 lag: (min: 63.0, avg: 80.1, max: 127.0) +[2026-06-02 16:52:04,726][246448] Avg episode reward: [(0, '1479.753')] +[2026-06-02 16:52:04,730][247478] Updated weights for policy 0, policy_version 34059 (0.0010) +[2026-06-02 16:52:04,914][247478] Updated weights for policy 0, policy_version 34069 (0.0008) +[2026-06-02 16:52:05,095][247478] Updated weights for policy 0, policy_version 34079 (0.0009) +[2026-06-02 16:52:05,273][247478] Updated weights for policy 0, policy_version 34089 (0.0009) +[2026-06-02 16:52:05,462][247478] Updated weights for policy 0, policy_version 34099 (0.0008) +[2026-06-02 16:52:05,635][247478] Updated weights for policy 0, policy_version 34109 (0.0009) +[2026-06-02 16:52:06,286][247478] Updated weights for policy 0, policy_version 34119 (0.0009) +[2026-06-02 16:52:06,464][247478] Updated weights for policy 0, policy_version 34129 (0.0008) +[2026-06-02 16:52:06,639][247478] Updated weights for policy 0, policy_version 34139 (0.0009) +[2026-06-02 16:52:06,826][247478] Updated weights for policy 0, policy_version 34149 (0.0009) +[2026-06-02 16:52:07,006][247478] Updated weights for policy 0, policy_version 34159 (0.0005) +[2026-06-02 16:52:07,187][247478] Updated weights for policy 0, policy_version 34169 (0.0004) +[2026-06-02 16:52:07,814][247478] Updated weights for policy 0, policy_version 34179 (0.0004) +[2026-06-02 16:52:07,986][247478] Updated weights for policy 0, policy_version 34189 (0.0004) +[2026-06-02 16:52:08,165][247478] Updated weights for policy 0, policy_version 34199 (0.0004) +[2026-06-02 16:52:08,344][247478] Updated weights for policy 0, policy_version 34209 (0.0004) +[2026-06-02 16:52:08,532][247478] Updated weights for policy 0, policy_version 34219 (0.0004) +[2026-06-02 16:52:08,711][247478] Updated weights for policy 0, policy_version 34229 (0.0007) +[2026-06-02 16:52:08,894][247478] Updated weights for policy 0, policy_version 34239 (0.0008) +[2026-06-02 16:52:09,511][247478] Updated weights for policy 0, policy_version 34249 (0.0009) +[2026-06-02 16:52:09,707][247478] Updated weights for policy 0, policy_version 34260 (0.0008) +[2026-06-02 16:52:09,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 17530880. Throughput: 0: 20292.3. Samples: 17567872. Policy #0 lag: (min: 63.0, avg: 80.1, max: 127.0) +[2026-06-02 16:52:09,726][246448] Avg episode reward: [(0, '1452.860')] +[2026-06-02 16:52:09,889][247478] Updated weights for policy 0, policy_version 34270 (0.0008) +[2026-06-02 16:52:10,074][247478] Updated weights for policy 0, policy_version 34280 (0.0009) +[2026-06-02 16:52:10,248][247478] Updated weights for policy 0, policy_version 34290 (0.0008) +[2026-06-02 16:52:10,436][247478] Updated weights for policy 0, policy_version 34300 (0.0008) +[2026-06-02 16:52:11,094][247478] Updated weights for policy 0, policy_version 34310 (0.0008) +[2026-06-02 16:52:11,272][247478] Updated weights for policy 0, policy_version 34320 (0.0008) +[2026-06-02 16:52:11,446][247478] Updated weights for policy 0, policy_version 34330 (0.0009) +[2026-06-02 16:52:11,626][247478] Updated weights for policy 0, policy_version 34340 (0.0008) +[2026-06-02 16:52:11,811][247478] Updated weights for policy 0, policy_version 34350 (0.0010) +[2026-06-02 16:52:12,004][247478] Updated weights for policy 0, policy_version 34360 (0.0009) +[2026-06-02 16:52:12,631][247478] Updated weights for policy 0, policy_version 34370 (0.0009) +[2026-06-02 16:52:12,789][247478] Updated weights for policy 0, policy_version 34380 (0.0009) +[2026-06-02 16:52:12,978][247478] Updated weights for policy 0, policy_version 34390 (0.0009) +[2026-06-02 16:52:13,149][247478] Updated weights for policy 0, policy_version 34400 (0.0009) +[2026-06-02 16:52:13,331][247478] Updated weights for policy 0, policy_version 34410 (0.0009) +[2026-06-02 16:52:13,525][247478] Updated weights for policy 0, policy_version 34420 (0.0009) +[2026-06-02 16:52:13,712][247478] Updated weights for policy 0, policy_version 34430 (0.0009) +[2026-06-02 16:52:14,357][247478] Updated weights for policy 0, policy_version 34441 (0.0009) +[2026-06-02 16:52:14,542][247478] Updated weights for policy 0, policy_version 34451 (0.0009) +[2026-06-02 16:52:14,723][247478] Updated weights for policy 0, policy_version 34461 (0.0009) +[2026-06-02 16:52:14,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 17629184. Throughput: 0: 20292.3. Samples: 17630464. Policy #0 lag: (min: 63.0, avg: 80.1, max: 127.0) +[2026-06-02 16:52:14,726][246448] Avg episode reward: [(0, '1475.940')] +[2026-06-02 16:52:14,908][247478] Updated weights for policy 0, policy_version 34471 (0.0009) +[2026-06-02 16:52:15,083][247478] Updated weights for policy 0, policy_version 34481 (0.0008) +[2026-06-02 16:52:15,274][247478] Updated weights for policy 0, policy_version 34491 (0.0008) +[2026-06-02 16:52:15,906][247478] Updated weights for policy 0, policy_version 34501 (0.0009) +[2026-06-02 16:52:16,079][247478] Updated weights for policy 0, policy_version 34511 (0.0008) +[2026-06-02 16:52:16,261][247478] Updated weights for policy 0, policy_version 34521 (0.0009) +[2026-06-02 16:52:16,441][247478] Updated weights for policy 0, policy_version 34531 (0.0008) +[2026-06-02 16:52:16,624][247478] Updated weights for policy 0, policy_version 34541 (0.0009) +[2026-06-02 16:52:16,804][247478] Updated weights for policy 0, policy_version 34551 (0.0009) +[2026-06-02 16:52:17,461][247478] Updated weights for policy 0, policy_version 34561 (0.0009) +[2026-06-02 16:52:17,648][247478] Updated weights for policy 0, policy_version 34573 (0.0009) +[2026-06-02 16:52:17,830][247478] Updated weights for policy 0, policy_version 34583 (0.0009) +[2026-06-02 16:52:18,008][247478] Updated weights for policy 0, policy_version 34593 (0.0008) +[2026-06-02 16:52:18,200][247478] Updated weights for policy 0, policy_version 34603 (0.0009) +[2026-06-02 16:52:18,384][247478] Updated weights for policy 0, policy_version 34613 (0.0009) +[2026-06-02 16:52:18,565][247478] Updated weights for policy 0, policy_version 34623 (0.0009) +[2026-06-02 16:52:19,216][247478] Updated weights for policy 0, policy_version 34633 (0.0009) +[2026-06-02 16:52:19,391][247478] Updated weights for policy 0, policy_version 34643 (0.0008) +[2026-06-02 16:52:19,585][247478] Updated weights for policy 0, policy_version 34654 (0.0009) +[2026-06-02 16:52:19,725][246448] Fps is (10 sec: 19660.6, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 17727488. Throughput: 0: 20229.7. Samples: 17744384. Policy #0 lag: (min: 63.0, avg: 80.1, max: 127.0) +[2026-06-02 16:52:19,727][246448] Avg episode reward: [(0, '1536.140')] +[2026-06-02 16:52:19,765][247478] Updated weights for policy 0, policy_version 34664 (0.0008) +[2026-06-02 16:52:19,952][247478] Updated weights for policy 0, policy_version 34674 (0.0008) +[2026-06-02 16:52:20,140][247478] Updated weights for policy 0, policy_version 34684 (0.0008) +[2026-06-02 16:52:20,782][247478] Updated weights for policy 0, policy_version 34694 (0.0008) +[2026-06-02 16:52:20,956][247478] Updated weights for policy 0, policy_version 34704 (0.0009) +[2026-06-02 16:52:21,134][247478] Updated weights for policy 0, policy_version 34714 (0.0008) +[2026-06-02 16:52:21,317][247478] Updated weights for policy 0, policy_version 34724 (0.0009) +[2026-06-02 16:52:21,508][247478] Updated weights for policy 0, policy_version 34734 (0.0008) +[2026-06-02 16:52:21,685][247478] Updated weights for policy 0, policy_version 34744 (0.0008) +[2026-06-02 16:52:22,326][247478] Updated weights for policy 0, policy_version 34754 (0.0009) +[2026-06-02 16:52:22,500][247478] Updated weights for policy 0, policy_version 34764 (0.0010) +[2026-06-02 16:52:22,678][247478] Updated weights for policy 0, policy_version 34774 (0.0008) +[2026-06-02 16:52:22,856][247478] Updated weights for policy 0, policy_version 34784 (0.0008) +[2026-06-02 16:52:23,042][247478] Updated weights for policy 0, policy_version 34794 (0.0009) +[2026-06-02 16:52:23,226][247478] Updated weights for policy 0, policy_version 34804 (0.0008) +[2026-06-02 16:52:23,412][247478] Updated weights for policy 0, policy_version 34814 (0.0008) +[2026-06-02 16:52:24,042][247478] Updated weights for policy 0, policy_version 34824 (0.0009) +[2026-06-02 16:52:24,223][247478] Updated weights for policy 0, policy_version 34834 (0.0008) +[2026-06-02 16:52:24,424][247478] Updated weights for policy 0, policy_version 34845 (0.0009) +[2026-06-02 16:52:24,601][247478] Updated weights for policy 0, policy_version 34855 (0.0008) +[2026-06-02 16:52:24,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 17825792. Throughput: 0: 20306.5. Samples: 17869568. Policy #0 lag: (min: 63.0, avg: 80.1, max: 127.0) +[2026-06-02 16:52:24,726][246448] Avg episode reward: [(0, '1506.988')] +[2026-06-02 16:52:24,780][247478] Updated weights for policy 0, policy_version 34865 (0.0008) +[2026-06-02 16:52:24,969][247478] Updated weights for policy 0, policy_version 34875 (0.0009) +[2026-06-02 16:52:25,616][247478] Updated weights for policy 0, policy_version 34885 (0.0009) +[2026-06-02 16:52:25,793][247478] Updated weights for policy 0, policy_version 34895 (0.0008) +[2026-06-02 16:52:25,984][247478] Updated weights for policy 0, policy_version 34905 (0.0008) +[2026-06-02 16:52:26,181][247478] Updated weights for policy 0, policy_version 34916 (0.0008) +[2026-06-02 16:52:26,370][247478] Updated weights for policy 0, policy_version 34926 (0.0009) +[2026-06-02 16:52:26,547][247478] Updated weights for policy 0, policy_version 34936 (0.0005) +[2026-06-02 16:52:27,199][247478] Updated weights for policy 0, policy_version 34946 (0.0005) +[2026-06-02 16:52:27,389][247478] Updated weights for policy 0, policy_version 34957 (0.0009) +[2026-06-02 16:52:27,578][247478] Updated weights for policy 0, policy_version 34967 (0.0008) +[2026-06-02 16:52:27,754][247478] Updated weights for policy 0, policy_version 34977 (0.0010) +[2026-06-02 16:52:27,933][247478] Updated weights for policy 0, policy_version 34987 (0.0009) +[2026-06-02 16:52:28,106][247478] Updated weights for policy 0, policy_version 34997 (0.0009) +[2026-06-02 16:52:28,294][247478] Updated weights for policy 0, policy_version 35007 (0.0008) +[2026-06-02 16:52:28,921][247478] Updated weights for policy 0, policy_version 35017 (0.0009) +[2026-06-02 16:52:29,099][247478] Updated weights for policy 0, policy_version 35027 (0.0008) +[2026-06-02 16:52:29,288][247478] Updated weights for policy 0, policy_version 35037 (0.0009) +[2026-06-02 16:52:29,481][247478] Updated weights for policy 0, policy_version 35047 (0.0010) +[2026-06-02 16:52:29,649][247478] Updated weights for policy 0, policy_version 35057 (0.0009) +[2026-06-02 16:52:29,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 17924096. Throughput: 0: 20343.4. Samples: 17933568. Policy #0 lag: (min: 63.0, avg: 79.1, max: 127.0) +[2026-06-02 16:52:29,727][246448] Avg episode reward: [(0, '1555.452')] +[2026-06-02 16:52:29,831][247478] Updated weights for policy 0, policy_version 35067 (0.0009) +[2026-06-02 16:52:30,468][247478] Updated weights for policy 0, policy_version 35077 (0.0009) +[2026-06-02 16:52:30,660][247478] Updated weights for policy 0, policy_version 35088 (0.0009) +[2026-06-02 16:52:30,839][247478] Updated weights for policy 0, policy_version 35098 (0.0010) +[2026-06-02 16:52:31,018][247478] Updated weights for policy 0, policy_version 35108 (0.0008) +[2026-06-02 16:52:31,196][247478] Updated weights for policy 0, policy_version 35118 (0.0009) +[2026-06-02 16:52:31,383][247478] Updated weights for policy 0, policy_version 35128 (0.0008) +[2026-06-02 16:52:32,044][247478] Updated weights for policy 0, policy_version 35139 (0.0009) +[2026-06-02 16:52:32,210][247478] Updated weights for policy 0, policy_version 35149 (0.0009) +[2026-06-02 16:52:32,387][247478] Updated weights for policy 0, policy_version 35159 (0.0008) +[2026-06-02 16:52:32,574][247478] Updated weights for policy 0, policy_version 35169 (0.0009) +[2026-06-02 16:52:32,753][247478] Updated weights for policy 0, policy_version 35179 (0.0008) +[2026-06-02 16:52:32,947][247478] Updated weights for policy 0, policy_version 35189 (0.0008) +[2026-06-02 16:52:33,130][247478] Updated weights for policy 0, policy_version 35199 (0.0009) +[2026-06-02 16:52:33,787][247478] Updated weights for policy 0, policy_version 35211 (0.0009) +[2026-06-02 16:52:33,962][247478] Updated weights for policy 0, policy_version 35221 (0.0009) +[2026-06-02 16:52:34,187][247478] Updated weights for policy 0, policy_version 35234 (0.0009) +[2026-06-02 16:52:34,385][247478] Updated weights for policy 0, policy_version 35245 (0.0009) +[2026-06-02 16:52:34,572][247478] Updated weights for policy 0, policy_version 35256 (0.0008) +[2026-06-02 16:52:34,725][246448] Fps is (10 sec: 22937.5, 60 sec: 20753.1, 300 sec: 20438.3). Total num frames: 18055168. Throughput: 0: 20334.9. Samples: 18057600. Policy #0 lag: (min: 63.0, avg: 79.1, max: 127.0) +[2026-06-02 16:52:34,726][246448] Avg episode reward: [(0, '1565.550')] +[2026-06-02 16:52:35,297][247478] Updated weights for policy 0, policy_version 35268 (0.0010) +[2026-06-02 16:52:35,463][247478] Updated weights for policy 0, policy_version 35278 (0.0009) +[2026-06-02 16:52:35,643][247478] Updated weights for policy 0, policy_version 35288 (0.0008) +[2026-06-02 16:52:35,843][247478] Updated weights for policy 0, policy_version 35299 (0.0008) +[2026-06-02 16:52:36,035][247478] Updated weights for policy 0, policy_version 35309 (0.0005) +[2026-06-02 16:52:36,211][247478] Updated weights for policy 0, policy_version 35319 (0.0005) +[2026-06-02 16:52:36,870][247478] Updated weights for policy 0, policy_version 35329 (0.0005) +[2026-06-02 16:52:37,035][247478] Updated weights for policy 0, policy_version 35339 (0.0009) +[2026-06-02 16:52:37,216][247478] Updated weights for policy 0, policy_version 35349 (0.0008) +[2026-06-02 16:52:37,401][247478] Updated weights for policy 0, policy_version 35359 (0.0008) +[2026-06-02 16:52:37,583][247478] Updated weights for policy 0, policy_version 35369 (0.0008) +[2026-06-02 16:52:37,761][247478] Updated weights for policy 0, policy_version 35379 (0.0009) +[2026-06-02 16:52:37,943][247478] Updated weights for policy 0, policy_version 35389 (0.0008) +[2026-06-02 16:52:38,578][247478] Updated weights for policy 0, policy_version 35399 (0.0008) +[2026-06-02 16:52:38,752][247478] Updated weights for policy 0, policy_version 35409 (0.0009) +[2026-06-02 16:52:38,929][247478] Updated weights for policy 0, policy_version 35419 (0.0008) +[2026-06-02 16:52:39,110][247478] Updated weights for policy 0, policy_version 35429 (0.0008) +[2026-06-02 16:52:39,288][247478] Updated weights for policy 0, policy_version 35439 (0.0008) +[2026-06-02 16:52:39,471][247478] Updated weights for policy 0, policy_version 35449 (0.0009) +[2026-06-02 16:52:39,725][246448] Fps is (10 sec: 22937.6, 60 sec: 20753.0, 300 sec: 20438.4). Total num frames: 18153472. Throughput: 0: 20192.7. Samples: 18170752. Policy #0 lag: (min: 63.0, avg: 79.1, max: 127.0) +[2026-06-02 16:52:39,726][246448] Avg episode reward: [(0, '1630.935')] +[2026-06-02 16:52:39,731][247399] Saving new best policy, reward=1630.935! +[2026-06-02 16:52:40,169][247478] Updated weights for policy 0, policy_version 35460 (0.0009) +[2026-06-02 16:52:40,341][247478] Updated weights for policy 0, policy_version 35470 (0.0008) +[2026-06-02 16:52:40,518][247478] Updated weights for policy 0, policy_version 35480 (0.0008) +[2026-06-02 16:52:40,695][247478] Updated weights for policy 0, policy_version 35490 (0.0008) +[2026-06-02 16:52:40,887][247478] Updated weights for policy 0, policy_version 35500 (0.0008) +[2026-06-02 16:52:41,061][247478] Updated weights for policy 0, policy_version 35510 (0.0008) +[2026-06-02 16:52:41,237][247478] Updated weights for policy 0, policy_version 35520 (0.0008) +[2026-06-02 16:52:41,891][247478] Updated weights for policy 0, policy_version 35530 (0.0010) +[2026-06-02 16:52:42,069][247478] Updated weights for policy 0, policy_version 35540 (0.0012) +[2026-06-02 16:52:42,265][247478] Updated weights for policy 0, policy_version 35551 (0.0008) +[2026-06-02 16:52:42,441][247478] Updated weights for policy 0, policy_version 35561 (0.0008) +[2026-06-02 16:52:42,630][247478] Updated weights for policy 0, policy_version 35571 (0.0008) +[2026-06-02 16:52:42,817][247478] Updated weights for policy 0, policy_version 35581 (0.0008) +[2026-06-02 16:52:43,470][247478] Updated weights for policy 0, policy_version 35592 (0.0009) +[2026-06-02 16:52:43,649][247478] Updated weights for policy 0, policy_version 35602 (0.0008) +[2026-06-02 16:52:43,821][247478] Updated weights for policy 0, policy_version 35612 (0.0008) +[2026-06-02 16:52:44,011][247478] Updated weights for policy 0, policy_version 35622 (0.0008) +[2026-06-02 16:52:44,200][247478] Updated weights for policy 0, policy_version 35632 (0.0008) +[2026-06-02 16:52:44,380][247478] Updated weights for policy 0, policy_version 35642 (0.0008) +[2026-06-02 16:52:44,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20753.1, 300 sec: 20438.4). Total num frames: 18251776. Throughput: 0: 20363.4. Samples: 18234368. Policy #0 lag: (min: 63.0, avg: 79.1, max: 127.0) +[2026-06-02 16:52:44,726][246448] Avg episode reward: [(0, '1576.611')] +[2026-06-02 16:52:45,017][247478] Updated weights for policy 0, policy_version 35652 (0.0008) +[2026-06-02 16:52:45,189][247478] Updated weights for policy 0, policy_version 35662 (0.0008) +[2026-06-02 16:52:45,374][247478] Updated weights for policy 0, policy_version 35672 (0.0008) +[2026-06-02 16:52:45,565][247478] Updated weights for policy 0, policy_version 35682 (0.0008) +[2026-06-02 16:52:45,730][247478] Updated weights for policy 0, policy_version 35692 (0.0009) +[2026-06-02 16:52:45,920][247478] Updated weights for policy 0, policy_version 35702 (0.0009) +[2026-06-02 16:52:46,097][247478] Updated weights for policy 0, policy_version 35712 (0.0008) +[2026-06-02 16:52:46,738][247478] Updated weights for policy 0, policy_version 35722 (0.0009) +[2026-06-02 16:52:46,915][247478] Updated weights for policy 0, policy_version 35732 (0.0009) +[2026-06-02 16:52:47,093][247478] Updated weights for policy 0, policy_version 35742 (0.0008) +[2026-06-02 16:52:47,273][247478] Updated weights for policy 0, policy_version 35752 (0.0008) +[2026-06-02 16:52:47,466][247478] Updated weights for policy 0, policy_version 35762 (0.0009) +[2026-06-02 16:52:47,642][247478] Updated weights for policy 0, policy_version 35772 (0.0008) +[2026-06-02 16:52:48,317][247478] Updated weights for policy 0, policy_version 35784 (0.0009) +[2026-06-02 16:52:48,485][247478] Updated weights for policy 0, policy_version 35794 (0.0008) +[2026-06-02 16:52:48,687][247478] Updated weights for policy 0, policy_version 35805 (0.0009) +[2026-06-02 16:52:48,859][247478] Updated weights for policy 0, policy_version 35815 (0.0009) +[2026-06-02 16:52:49,039][247478] Updated weights for policy 0, policy_version 35825 (0.0009) +[2026-06-02 16:52:49,242][247478] Updated weights for policy 0, policy_version 35836 (0.0009) +[2026-06-02 16:52:49,726][246448] Fps is (10 sec: 19660.6, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 18350080. Throughput: 0: 20351.9. Samples: 18359168. Policy #0 lag: (min: 63.0, avg: 79.1, max: 127.0) +[2026-06-02 16:52:49,727][246448] Avg episode reward: [(0, '1592.587')] +[2026-06-02 16:52:49,920][247478] Updated weights for policy 0, policy_version 35848 (0.0009) +[2026-06-02 16:52:50,111][247478] Updated weights for policy 0, policy_version 35858 (0.0009) +[2026-06-02 16:52:50,287][247478] Updated weights for policy 0, policy_version 35868 (0.0009) +[2026-06-02 16:52:50,471][247478] Updated weights for policy 0, policy_version 35878 (0.0008) +[2026-06-02 16:52:50,651][247478] Updated weights for policy 0, policy_version 35888 (0.0008) +[2026-06-02 16:52:50,831][247478] Updated weights for policy 0, policy_version 35898 (0.0009) +[2026-06-02 16:52:51,497][247478] Updated weights for policy 0, policy_version 35909 (0.0009) +[2026-06-02 16:52:51,665][247478] Updated weights for policy 0, policy_version 35919 (0.0008) +[2026-06-02 16:52:51,833][247478] Updated weights for policy 0, policy_version 35929 (0.0008) +[2026-06-02 16:52:52,020][247478] Updated weights for policy 0, policy_version 35939 (0.0008) +[2026-06-02 16:52:52,200][247478] Updated weights for policy 0, policy_version 35949 (0.0008) +[2026-06-02 16:52:52,389][247478] Updated weights for policy 0, policy_version 35959 (0.0008) +[2026-06-02 16:52:53,047][247478] Updated weights for policy 0, policy_version 35969 (0.0008) +[2026-06-02 16:52:53,228][247478] Updated weights for policy 0, policy_version 35980 (0.0008) +[2026-06-02 16:52:53,420][247478] Updated weights for policy 0, policy_version 35991 (0.0008) +[2026-06-02 16:52:53,617][247478] Updated weights for policy 0, policy_version 36002 (0.0008) +[2026-06-02 16:52:53,819][247478] Updated weights for policy 0, policy_version 36013 (0.0008) +[2026-06-02 16:52:54,016][247478] Updated weights for policy 0, policy_version 36024 (0.0008) +[2026-06-02 16:52:54,669][247478] Updated weights for policy 0, policy_version 36034 (0.0009) +[2026-06-02 16:52:54,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 18448384. Throughput: 0: 20386.1. Samples: 18485248. Policy #0 lag: (min: 9.0, avg: 35.7, max: 73.0) +[2026-06-02 16:52:54,726][246448] Avg episode reward: [(0, '1620.297')] +[2026-06-02 16:52:54,863][247478] Updated weights for policy 0, policy_version 36045 (0.0008) +[2026-06-02 16:52:55,039][247478] Updated weights for policy 0, policy_version 36055 (0.0008) +[2026-06-02 16:52:55,217][247478] Updated weights for policy 0, policy_version 36065 (0.0008) +[2026-06-02 16:52:55,406][247478] Updated weights for policy 0, policy_version 36075 (0.0008) +[2026-06-02 16:52:55,593][247478] Updated weights for policy 0, policy_version 36085 (0.0008) +[2026-06-02 16:52:55,784][247478] Updated weights for policy 0, policy_version 36096 (0.0008) +[2026-06-02 16:52:56,425][247478] Updated weights for policy 0, policy_version 36106 (0.0009) +[2026-06-02 16:52:56,601][247478] Updated weights for policy 0, policy_version 36116 (0.0009) +[2026-06-02 16:52:56,784][247478] Updated weights for policy 0, policy_version 36126 (0.0008) +[2026-06-02 16:52:56,973][247478] Updated weights for policy 0, policy_version 36136 (0.0008) +[2026-06-02 16:52:57,158][247478] Updated weights for policy 0, policy_version 36146 (0.0009) +[2026-06-02 16:52:57,337][247478] Updated weights for policy 0, policy_version 36156 (0.0008) +[2026-06-02 16:52:57,986][247478] Updated weights for policy 0, policy_version 36166 (0.0009) +[2026-06-02 16:52:58,159][247478] Updated weights for policy 0, policy_version 36176 (0.0008) +[2026-06-02 16:52:58,357][247478] Updated weights for policy 0, policy_version 36187 (0.0008) +[2026-06-02 16:52:58,536][247478] Updated weights for policy 0, policy_version 36197 (0.0008) +[2026-06-02 16:52:58,732][247478] Updated weights for policy 0, policy_version 36208 (0.0009) +[2026-06-02 16:52:58,928][247478] Updated weights for policy 0, policy_version 36218 (0.0008) +[2026-06-02 16:52:59,584][247478] Updated weights for policy 0, policy_version 36229 (0.0009) +[2026-06-02 16:52:59,725][246448] Fps is (10 sec: 19661.0, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 18546688. Throughput: 0: 20266.7. Samples: 18542464. Policy #0 lag: (min: 9.0, avg: 35.7, max: 73.0) +[2026-06-02 16:52:59,726][246448] Avg episode reward: [(0, '1633.525')] +[2026-06-02 16:52:59,752][247478] Updated weights for policy 0, policy_version 36239 (0.0009) +[2026-06-02 16:52:59,951][247478] Updated weights for policy 0, policy_version 36250 (0.0008) +[2026-06-02 16:53:00,155][247478] Updated weights for policy 0, policy_version 36261 (0.0007) +[2026-06-02 16:53:00,326][247478] Updated weights for policy 0, policy_version 36271 (0.0006) +[2026-06-02 16:53:00,510][247478] Updated weights for policy 0, policy_version 36281 (0.0005) +[2026-06-02 16:53:00,626][247399] Saving new best policy, reward=1633.525! +[2026-06-02 16:53:01,151][247478] Updated weights for policy 0, policy_version 36292 (0.0007) +[2026-06-02 16:53:01,327][247478] Updated weights for policy 0, policy_version 36302 (0.0008) +[2026-06-02 16:53:01,506][247478] Updated weights for policy 0, policy_version 36312 (0.0008) +[2026-06-02 16:53:01,688][247478] Updated weights for policy 0, policy_version 36322 (0.0008) +[2026-06-02 16:53:01,885][247478] Updated weights for policy 0, policy_version 36333 (0.0008) +[2026-06-02 16:53:02,069][247478] Updated weights for policy 0, policy_version 36343 (0.0008) +[2026-06-02 16:53:02,769][247478] Updated weights for policy 0, policy_version 36354 (0.0008) +[2026-06-02 16:53:02,934][247478] Updated weights for policy 0, policy_version 36364 (0.0008) +[2026-06-02 16:53:03,115][247478] Updated weights for policy 0, policy_version 36374 (0.0008) +[2026-06-02 16:53:03,294][247478] Updated weights for policy 0, policy_version 36384 (0.0009) +[2026-06-02 16:53:03,474][247478] Updated weights for policy 0, policy_version 36394 (0.0009) +[2026-06-02 16:53:03,656][247478] Updated weights for policy 0, policy_version 36404 (0.0008) +[2026-06-02 16:53:03,838][247478] Updated weights for policy 0, policy_version 36414 (0.0008) +[2026-06-02 16:53:04,478][247478] Updated weights for policy 0, policy_version 36424 (0.0009) +[2026-06-02 16:53:04,655][247478] Updated weights for policy 0, policy_version 36434 (0.0008) +[2026-06-02 16:53:04,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 18644992. Throughput: 0: 20380.5. Samples: 18661504. Policy #0 lag: (min: 9.0, avg: 35.7, max: 73.0) +[2026-06-02 16:53:04,726][246448] Avg episode reward: [(0, '1575.426')] +[2026-06-02 16:53:04,831][247478] Updated weights for policy 0, policy_version 36444 (0.0009) +[2026-06-02 16:53:05,012][247478] Updated weights for policy 0, policy_version 36454 (0.0008) +[2026-06-02 16:53:05,195][247478] Updated weights for policy 0, policy_version 36464 (0.0008) +[2026-06-02 16:53:05,375][247478] Updated weights for policy 0, policy_version 36474 (0.0008) +[2026-06-02 16:53:06,030][247478] Updated weights for policy 0, policy_version 36484 (0.0008) +[2026-06-02 16:53:06,207][247478] Updated weights for policy 0, policy_version 36494 (0.0009) +[2026-06-02 16:53:06,389][247478] Updated weights for policy 0, policy_version 36504 (0.0009) +[2026-06-02 16:53:06,567][247478] Updated weights for policy 0, policy_version 36514 (0.0008) +[2026-06-02 16:53:06,745][247478] Updated weights for policy 0, policy_version 36524 (0.0008) +[2026-06-02 16:53:06,929][247478] Updated weights for policy 0, policy_version 36534 (0.0008) +[2026-06-02 16:53:07,104][247478] Updated weights for policy 0, policy_version 36544 (0.0008) +[2026-06-02 16:53:07,740][247478] Updated weights for policy 0, policy_version 36554 (0.0008) +[2026-06-02 16:53:07,910][247478] Updated weights for policy 0, policy_version 36564 (0.0008) +[2026-06-02 16:53:08,091][247478] Updated weights for policy 0, policy_version 36574 (0.0008) +[2026-06-02 16:53:08,275][247478] Updated weights for policy 0, policy_version 36584 (0.0009) +[2026-06-02 16:53:08,478][247478] Updated weights for policy 0, policy_version 36595 (0.0008) +[2026-06-02 16:53:08,661][247478] Updated weights for policy 0, policy_version 36605 (0.0009) +[2026-06-02 16:53:09,319][247478] Updated weights for policy 0, policy_version 36615 (0.0008) +[2026-06-02 16:53:09,496][247478] Updated weights for policy 0, policy_version 36625 (0.0008) +[2026-06-02 16:53:09,672][247478] Updated weights for policy 0, policy_version 36635 (0.0008) +[2026-06-02 16:53:09,725][246448] Fps is (10 sec: 19661.0, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 18743296. Throughput: 0: 20394.7. Samples: 18787328. Policy #0 lag: (min: 9.0, avg: 35.7, max: 73.0) +[2026-06-02 16:53:09,726][246448] Avg episode reward: [(0, '1570.806')] +[2026-06-02 16:53:09,857][247478] Updated weights for policy 0, policy_version 36645 (0.0008) +[2026-06-02 16:53:10,038][247478] Updated weights for policy 0, policy_version 36655 (0.0008) +[2026-06-02 16:53:10,224][247478] Updated weights for policy 0, policy_version 36665 (0.0008) +[2026-06-02 16:53:10,893][247478] Updated weights for policy 0, policy_version 36675 (0.0008) +[2026-06-02 16:53:11,065][247478] Updated weights for policy 0, policy_version 36685 (0.0008) +[2026-06-02 16:53:11,246][247478] Updated weights for policy 0, policy_version 36695 (0.0009) +[2026-06-02 16:53:11,429][247478] Updated weights for policy 0, policy_version 36705 (0.0009) +[2026-06-02 16:53:11,631][247478] Updated weights for policy 0, policy_version 36716 (0.0008) +[2026-06-02 16:53:11,810][247478] Updated weights for policy 0, policy_version 36726 (0.0008) +[2026-06-02 16:53:11,986][247478] Updated weights for policy 0, policy_version 36736 (0.0008) +[2026-06-02 16:53:12,615][247478] Updated weights for policy 0, policy_version 36746 (0.0008) +[2026-06-02 16:53:12,797][247478] Updated weights for policy 0, policy_version 36756 (0.0008) +[2026-06-02 16:53:12,974][247478] Updated weights for policy 0, policy_version 36766 (0.0009) +[2026-06-02 16:53:13,174][247478] Updated weights for policy 0, policy_version 36777 (0.0008) +[2026-06-02 16:53:13,364][247478] Updated weights for policy 0, policy_version 36787 (0.0009) +[2026-06-02 16:53:13,539][247478] Updated weights for policy 0, policy_version 36797 (0.0008) +[2026-06-02 16:53:14,193][247478] Updated weights for policy 0, policy_version 36807 (0.0009) +[2026-06-02 16:53:14,373][247478] Updated weights for policy 0, policy_version 36817 (0.0008) +[2026-06-02 16:53:14,547][247478] Updated weights for policy 0, policy_version 36827 (0.0008) +[2026-06-02 16:53:14,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 18841600. Throughput: 0: 20363.4. Samples: 18849920. Policy #0 lag: (min: 9.0, avg: 35.7, max: 73.0) +[2026-06-02 16:53:14,726][246448] Avg episode reward: [(0, '1510.902')] +[2026-06-02 16:53:14,733][247478] Updated weights for policy 0, policy_version 36837 (0.0008) +[2026-06-02 16:53:14,915][247478] Updated weights for policy 0, policy_version 36847 (0.0008) +[2026-06-02 16:53:15,092][247478] Updated weights for policy 0, policy_version 36857 (0.0009) +[2026-06-02 16:53:15,735][247478] Updated weights for policy 0, policy_version 36867 (0.0008) +[2026-06-02 16:53:15,906][247478] Updated weights for policy 0, policy_version 36877 (0.0008) +[2026-06-02 16:53:16,084][247478] Updated weights for policy 0, policy_version 36887 (0.0008) +[2026-06-02 16:53:16,285][247478] Updated weights for policy 0, policy_version 36898 (0.0009) +[2026-06-02 16:53:16,480][247478] Updated weights for policy 0, policy_version 36908 (0.0009) +[2026-06-02 16:53:16,662][247478] Updated weights for policy 0, policy_version 36918 (0.0009) +[2026-06-02 16:53:16,839][247478] Updated weights for policy 0, policy_version 36928 (0.0009) +[2026-06-02 16:53:17,481][247478] Updated weights for policy 0, policy_version 36939 (0.0009) +[2026-06-02 16:53:17,654][247478] Updated weights for policy 0, policy_version 36949 (0.0008) +[2026-06-02 16:53:17,840][247478] Updated weights for policy 0, policy_version 36959 (0.0008) +[2026-06-02 16:53:18,040][247478] Updated weights for policy 0, policy_version 36970 (0.0009) +[2026-06-02 16:53:18,228][247478] Updated weights for policy 0, policy_version 36980 (0.0009) +[2026-06-02 16:53:18,413][247478] Updated weights for policy 0, policy_version 36991 (0.0009) +[2026-06-02 16:53:19,062][247478] Updated weights for policy 0, policy_version 37001 (0.0008) +[2026-06-02 16:53:19,238][247478] Updated weights for policy 0, policy_version 37011 (0.0009) +[2026-06-02 16:53:19,435][247478] Updated weights for policy 0, policy_version 37022 (0.0008) +[2026-06-02 16:53:19,619][247478] Updated weights for policy 0, policy_version 37032 (0.0009) +[2026-06-02 16:53:19,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 18939904. Throughput: 0: 20235.4. Samples: 18968192. Policy #0 lag: (min: 63.0, avg: 79.0, max: 127.0) +[2026-06-02 16:53:19,726][246448] Avg episode reward: [(0, '1496.506')] +[2026-06-02 16:53:19,792][247478] Updated weights for policy 0, policy_version 37042 (0.0008) +[2026-06-02 16:53:19,982][247478] Updated weights for policy 0, policy_version 37052 (0.0008) +[2026-06-02 16:53:20,652][247478] Updated weights for policy 0, policy_version 37063 (0.0009) +[2026-06-02 16:53:20,825][247478] Updated weights for policy 0, policy_version 37073 (0.0008) +[2026-06-02 16:53:21,004][247478] Updated weights for policy 0, policy_version 37083 (0.0008) +[2026-06-02 16:53:21,192][247478] Updated weights for policy 0, policy_version 37093 (0.0009) +[2026-06-02 16:53:21,366][247478] Updated weights for policy 0, policy_version 37103 (0.0008) +[2026-06-02 16:53:21,555][247478] Updated weights for policy 0, policy_version 37113 (0.0009) +[2026-06-02 16:53:22,207][247478] Updated weights for policy 0, policy_version 37123 (0.0009) +[2026-06-02 16:53:22,412][247478] Updated weights for policy 0, policy_version 37135 (0.0008) +[2026-06-02 16:53:22,591][247478] Updated weights for policy 0, policy_version 37145 (0.0009) +[2026-06-02 16:53:22,782][247478] Updated weights for policy 0, policy_version 37155 (0.0010) +[2026-06-02 16:53:22,957][247478] Updated weights for policy 0, policy_version 37165 (0.0008) +[2026-06-02 16:53:23,144][247478] Updated weights for policy 0, policy_version 37175 (0.0009) +[2026-06-02 16:53:23,793][247478] Updated weights for policy 0, policy_version 37185 (0.0009) +[2026-06-02 16:53:23,965][247478] Updated weights for policy 0, policy_version 37195 (0.0008) +[2026-06-02 16:53:24,148][247478] Updated weights for policy 0, policy_version 37205 (0.0008) +[2026-06-02 16:53:24,335][247478] Updated weights for policy 0, policy_version 37216 (0.0008) +[2026-06-02 16:53:24,527][247478] Updated weights for policy 0, policy_version 37226 (0.0009) +[2026-06-02 16:53:24,720][247478] Updated weights for policy 0, policy_version 37237 (0.0008) +[2026-06-02 16:53:24,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 19038208. Throughput: 0: 20391.8. Samples: 19088384. Policy #0 lag: (min: 63.0, avg: 79.0, max: 127.0) +[2026-06-02 16:53:24,726][246448] Avg episode reward: [(0, '1531.075')] +[2026-06-02 16:53:24,909][247478] Updated weights for policy 0, policy_version 37247 (0.0009) +[2026-06-02 16:53:25,556][247478] Updated weights for policy 0, policy_version 37257 (0.0009) +[2026-06-02 16:53:25,741][247478] Updated weights for policy 0, policy_version 37267 (0.0008) +[2026-06-02 16:53:25,933][247478] Updated weights for policy 0, policy_version 37278 (0.0008) +[2026-06-02 16:53:26,123][247478] Updated weights for policy 0, policy_version 37288 (0.0008) +[2026-06-02 16:53:26,320][247478] Updated weights for policy 0, policy_version 37299 (0.0008) +[2026-06-02 16:53:26,526][247478] Updated weights for policy 0, policy_version 37310 (0.0008) +[2026-06-02 16:53:27,156][247478] Updated weights for policy 0, policy_version 37321 (0.0009) +[2026-06-02 16:53:27,338][247478] Updated weights for policy 0, policy_version 37331 (0.0009) +[2026-06-02 16:53:27,524][247478] Updated weights for policy 0, policy_version 37341 (0.0008) +[2026-06-02 16:53:27,702][247478] Updated weights for policy 0, policy_version 37351 (0.0009) +[2026-06-02 16:53:27,884][247478] Updated weights for policy 0, policy_version 37361 (0.0008) +[2026-06-02 16:53:28,090][247478] Updated weights for policy 0, policy_version 37372 (0.0008) +[2026-06-02 16:53:28,735][247478] Updated weights for policy 0, policy_version 37382 (0.0009) +[2026-06-02 16:53:28,901][247478] Updated weights for policy 0, policy_version 37392 (0.0008) +[2026-06-02 16:53:29,103][247478] Updated weights for policy 0, policy_version 37403 (0.0008) +[2026-06-02 16:53:29,289][247478] Updated weights for policy 0, policy_version 37413 (0.0008) +[2026-06-02 16:53:29,487][247478] Updated weights for policy 0, policy_version 37424 (0.0009) +[2026-06-02 16:53:29,672][247478] Updated weights for policy 0, policy_version 37434 (0.0009) +[2026-06-02 16:53:29,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 19136512. Throughput: 0: 20366.2. Samples: 19150848. Policy #0 lag: (min: 63.0, avg: 79.0, max: 127.0) +[2026-06-02 16:53:29,726][246448] Avg episode reward: [(0, '1523.044')] +[2026-06-02 16:53:30,354][247478] Updated weights for policy 0, policy_version 37446 (0.0009) +[2026-06-02 16:53:30,531][247478] Updated weights for policy 0, policy_version 37456 (0.0009) +[2026-06-02 16:53:30,722][247478] Updated weights for policy 0, policy_version 37467 (0.0008) +[2026-06-02 16:53:30,913][247478] Updated weights for policy 0, policy_version 37477 (0.0009) +[2026-06-02 16:53:31,096][247478] Updated weights for policy 0, policy_version 37487 (0.0009) +[2026-06-02 16:53:31,276][247478] Updated weights for policy 0, policy_version 37497 (0.0008) +[2026-06-02 16:53:31,914][247478] Updated weights for policy 0, policy_version 37507 (0.0009) +[2026-06-02 16:53:32,103][247478] Updated weights for policy 0, policy_version 37518 (0.0008) +[2026-06-02 16:53:32,279][247478] Updated weights for policy 0, policy_version 37528 (0.0009) +[2026-06-02 16:53:32,483][247478] Updated weights for policy 0, policy_version 37539 (0.0008) +[2026-06-02 16:53:32,672][247478] Updated weights for policy 0, policy_version 37549 (0.0009) +[2026-06-02 16:53:32,869][247478] Updated weights for policy 0, policy_version 37560 (0.0009) +[2026-06-02 16:53:33,509][247478] Updated weights for policy 0, policy_version 37570 (0.0008) +[2026-06-02 16:53:33,681][247478] Updated weights for policy 0, policy_version 37580 (0.0009) +[2026-06-02 16:53:33,860][247478] Updated weights for policy 0, policy_version 37590 (0.0008) +[2026-06-02 16:53:34,040][247478] Updated weights for policy 0, policy_version 37600 (0.0008) +[2026-06-02 16:53:34,221][247478] Updated weights for policy 0, policy_version 37610 (0.0008) +[2026-06-02 16:53:34,404][247478] Updated weights for policy 0, policy_version 37620 (0.0009) +[2026-06-02 16:53:34,587][247478] Updated weights for policy 0, policy_version 37630 (0.0008) +[2026-06-02 16:53:34,725][246448] Fps is (10 sec: 22937.7, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 19267584. Throughput: 0: 20386.2. Samples: 19276544. Policy #0 lag: (min: 63.0, avg: 79.0, max: 127.0) +[2026-06-02 16:53:34,726][246448] Avg episode reward: [(0, '1590.948')] +[2026-06-02 16:53:35,223][247478] Updated weights for policy 0, policy_version 37640 (0.0008) +[2026-06-02 16:53:35,398][247478] Updated weights for policy 0, policy_version 37650 (0.0008) +[2026-06-02 16:53:35,609][247478] Updated weights for policy 0, policy_version 37662 (0.0009) +[2026-06-02 16:53:35,797][247478] Updated weights for policy 0, policy_version 37672 (0.0008) +[2026-06-02 16:53:35,977][247478] Updated weights for policy 0, policy_version 37682 (0.0008) +[2026-06-02 16:53:36,163][247478] Updated weights for policy 0, policy_version 37692 (0.0008) +[2026-06-02 16:53:36,798][247478] Updated weights for policy 0, policy_version 37702 (0.0009) +[2026-06-02 16:53:36,972][247478] Updated weights for policy 0, policy_version 37712 (0.0008) +[2026-06-02 16:53:37,155][247478] Updated weights for policy 0, policy_version 37722 (0.0008) +[2026-06-02 16:53:37,334][247478] Updated weights for policy 0, policy_version 37732 (0.0008) +[2026-06-02 16:53:37,514][247478] Updated weights for policy 0, policy_version 37742 (0.0008) +[2026-06-02 16:53:37,721][247478] Updated weights for policy 0, policy_version 37753 (0.0008) +[2026-06-02 16:53:38,381][247478] Updated weights for policy 0, policy_version 37763 (0.0009) +[2026-06-02 16:53:38,544][247478] Updated weights for policy 0, policy_version 37773 (0.0008) +[2026-06-02 16:53:38,726][247478] Updated weights for policy 0, policy_version 37783 (0.0009) +[2026-06-02 16:53:38,927][247478] Updated weights for policy 0, policy_version 37794 (0.0009) +[2026-06-02 16:53:39,107][247478] Updated weights for policy 0, policy_version 37804 (0.0009) +[2026-06-02 16:53:39,281][247478] Updated weights for policy 0, policy_version 37814 (0.0008) +[2026-06-02 16:53:39,460][247478] Updated weights for policy 0, policy_version 37824 (0.0008) +[2026-06-02 16:53:39,725][246448] Fps is (10 sec: 22937.7, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 19365888. Throughput: 0: 20158.6. Samples: 19392384. Policy #0 lag: (min: 63.0, avg: 79.0, max: 127.0) +[2026-06-02 16:53:39,726][246448] Avg episode reward: [(0, '1642.825')] +[2026-06-02 16:53:39,731][247399] Saving new best policy, reward=1642.825! +[2026-06-02 16:53:40,156][247478] Updated weights for policy 0, policy_version 37835 (0.0009) +[2026-06-02 16:53:40,328][247478] Updated weights for policy 0, policy_version 37845 (0.0008) +[2026-06-02 16:53:40,531][247478] Updated weights for policy 0, policy_version 37856 (0.0009) +[2026-06-02 16:53:40,710][247478] Updated weights for policy 0, policy_version 37866 (0.0008) +[2026-06-02 16:53:40,893][247478] Updated weights for policy 0, policy_version 37876 (0.0008) +[2026-06-02 16:53:41,083][247478] Updated weights for policy 0, policy_version 37886 (0.0008) +[2026-06-02 16:53:41,720][247478] Updated weights for policy 0, policy_version 37897 (0.0009) +[2026-06-02 16:53:41,900][247478] Updated weights for policy 0, policy_version 37907 (0.0009) +[2026-06-02 16:53:42,111][247478] Updated weights for policy 0, policy_version 37918 (0.0009) +[2026-06-02 16:53:42,292][247478] Updated weights for policy 0, policy_version 37928 (0.0009) +[2026-06-02 16:53:42,496][247478] Updated weights for policy 0, policy_version 37939 (0.0009) +[2026-06-02 16:53:42,693][247478] Updated weights for policy 0, policy_version 37950 (0.0005) +[2026-06-02 16:53:43,339][247478] Updated weights for policy 0, policy_version 37961 (0.0005) +[2026-06-02 16:53:43,519][247478] Updated weights for policy 0, policy_version 37971 (0.0005) +[2026-06-02 16:53:43,704][247478] Updated weights for policy 0, policy_version 37981 (0.0005) +[2026-06-02 16:53:43,888][247478] Updated weights for policy 0, policy_version 37991 (0.0005) +[2026-06-02 16:53:44,089][247478] Updated weights for policy 0, policy_version 38002 (0.0008) +[2026-06-02 16:53:44,288][247478] Updated weights for policy 0, policy_version 38013 (0.0008) +[2026-06-02 16:53:44,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 19464192. Throughput: 0: 20206.9. Samples: 19451776. Policy #0 lag: (min: 61.0, avg: 76.0, max: 125.0) +[2026-06-02 16:53:44,726][246448] Avg episode reward: [(0, '1622.971')] +[2026-06-02 16:53:44,920][247478] Updated weights for policy 0, policy_version 38023 (0.0009) +[2026-06-02 16:53:45,094][247478] Updated weights for policy 0, policy_version 38033 (0.0009) +[2026-06-02 16:53:45,312][247478] Updated weights for policy 0, policy_version 38045 (0.0009) +[2026-06-02 16:53:45,493][247478] Updated weights for policy 0, policy_version 38055 (0.0009) +[2026-06-02 16:53:45,686][247478] Updated weights for policy 0, policy_version 38066 (0.0009) +[2026-06-02 16:53:45,877][247478] Updated weights for policy 0, policy_version 38076 (0.0008) +[2026-06-02 16:53:46,508][247478] Updated weights for policy 0, policy_version 38086 (0.0008) +[2026-06-02 16:53:46,682][247478] Updated weights for policy 0, policy_version 38096 (0.0008) +[2026-06-02 16:53:46,877][247478] Updated weights for policy 0, policy_version 38107 (0.0008) +[2026-06-02 16:53:47,079][247478] Updated weights for policy 0, policy_version 38118 (0.0009) +[2026-06-02 16:53:47,302][247478] Updated weights for policy 0, policy_version 38130 (0.0009) +[2026-06-02 16:53:47,493][247478] Updated weights for policy 0, policy_version 38141 (0.0008) +[2026-06-02 16:53:48,146][247478] Updated weights for policy 0, policy_version 38151 (0.0008) +[2026-06-02 16:53:48,320][247478] Updated weights for policy 0, policy_version 38161 (0.0008) +[2026-06-02 16:53:48,495][247478] Updated weights for policy 0, policy_version 38171 (0.0008) +[2026-06-02 16:53:48,711][247478] Updated weights for policy 0, policy_version 38183 (0.0006) +[2026-06-02 16:53:48,910][247478] Updated weights for policy 0, policy_version 38194 (0.0004) +[2026-06-02 16:53:49,094][247478] Updated weights for policy 0, policy_version 38204 (0.0005) +[2026-06-02 16:53:49,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20207.0, 300 sec: 20327.3). Total num frames: 19562496. Throughput: 0: 20334.9. Samples: 19576576. Policy #0 lag: (min: 61.0, avg: 76.0, max: 125.0) +[2026-06-02 16:53:49,726][246448] Avg episode reward: [(0, '1666.458')] +[2026-06-02 16:53:49,744][247478] Updated weights for policy 0, policy_version 38214 (0.0009) +[2026-06-02 16:53:49,916][247478] Updated weights for policy 0, policy_version 38224 (0.0009) +[2026-06-02 16:53:50,102][247478] Updated weights for policy 0, policy_version 38234 (0.0008) +[2026-06-02 16:53:50,286][247478] Updated weights for policy 0, policy_version 38244 (0.0009) +[2026-06-02 16:53:50,455][247478] Updated weights for policy 0, policy_version 38254 (0.0009) +[2026-06-02 16:53:50,639][247478] Updated weights for policy 0, policy_version 38264 (0.0008) +[2026-06-02 16:53:50,783][247399] Saving new best policy, reward=1666.458! +[2026-06-02 16:53:51,296][247478] Updated weights for policy 0, policy_version 38274 (0.0008) +[2026-06-02 16:53:51,465][247478] Updated weights for policy 0, policy_version 38284 (0.0008) +[2026-06-02 16:53:51,641][247478] Updated weights for policy 0, policy_version 38294 (0.0009) +[2026-06-02 16:53:51,818][247478] Updated weights for policy 0, policy_version 38304 (0.0008) +[2026-06-02 16:53:52,026][247478] Updated weights for policy 0, policy_version 38315 (0.0008) +[2026-06-02 16:53:52,214][247478] Updated weights for policy 0, policy_version 38325 (0.0008) +[2026-06-02 16:53:52,396][247478] Updated weights for policy 0, policy_version 38335 (0.0008) +[2026-06-02 16:53:53,044][247478] Updated weights for policy 0, policy_version 38345 (0.0008) +[2026-06-02 16:53:53,241][247478] Updated weights for policy 0, policy_version 38356 (0.0008) +[2026-06-02 16:53:53,420][247478] Updated weights for policy 0, policy_version 38366 (0.0008) +[2026-06-02 16:53:53,608][247478] Updated weights for policy 0, policy_version 38376 (0.0009) +[2026-06-02 16:53:53,779][247478] Updated weights for policy 0, policy_version 38386 (0.0008) +[2026-06-02 16:53:53,988][247478] Updated weights for policy 0, policy_version 38397 (0.0009) +[2026-06-02 16:53:54,647][247478] Updated weights for policy 0, policy_version 38408 (0.0009) +[2026-06-02 16:53:54,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 19660800. Throughput: 0: 20337.8. Samples: 19702528. Policy #0 lag: (min: 61.0, avg: 76.0, max: 125.0) +[2026-06-02 16:53:54,726][246448] Avg episode reward: [(0, '1695.642')] +[2026-06-02 16:53:54,852][247478] Updated weights for policy 0, policy_version 38419 (0.0009) +[2026-06-02 16:53:55,034][247478] Updated weights for policy 0, policy_version 38429 (0.0008) +[2026-06-02 16:53:55,212][247478] Updated weights for policy 0, policy_version 38439 (0.0008) +[2026-06-02 16:53:55,394][247478] Updated weights for policy 0, policy_version 38449 (0.0008) +[2026-06-02 16:53:55,577][247478] Updated weights for policy 0, policy_version 38459 (0.0008) +[2026-06-02 16:53:55,659][247399] Saving new best policy, reward=1695.642! +[2026-06-02 16:53:56,206][247478] Updated weights for policy 0, policy_version 38469 (0.0009) +[2026-06-02 16:53:56,390][247478] Updated weights for policy 0, policy_version 38479 (0.0008) +[2026-06-02 16:53:56,571][247478] Updated weights for policy 0, policy_version 38489 (0.0009) +[2026-06-02 16:53:56,768][247478] Updated weights for policy 0, policy_version 38500 (0.0008) +[2026-06-02 16:53:56,952][247478] Updated weights for policy 0, policy_version 38510 (0.0008) +[2026-06-02 16:53:57,131][247478] Updated weights for policy 0, policy_version 38520 (0.0008) +[2026-06-02 16:53:57,771][247478] Updated weights for policy 0, policy_version 38530 (0.0008) +[2026-06-02 16:53:57,951][247478] Updated weights for policy 0, policy_version 38541 (0.0009) +[2026-06-02 16:53:58,136][247478] Updated weights for policy 0, policy_version 38551 (0.0009) +[2026-06-02 16:53:58,317][247478] Updated weights for policy 0, policy_version 38561 (0.0009) +[2026-06-02 16:53:58,514][247478] Updated weights for policy 0, policy_version 38572 (0.0009) +[2026-06-02 16:53:58,707][247478] Updated weights for policy 0, policy_version 38582 (0.0009) +[2026-06-02 16:53:58,870][247478] Updated weights for policy 0, policy_version 38592 (0.0008) +[2026-06-02 16:53:59,508][247478] Updated weights for policy 0, policy_version 38602 (0.0009) +[2026-06-02 16:53:59,686][247478] Updated weights for policy 0, policy_version 38612 (0.0009) +[2026-06-02 16:53:59,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 19759104. Throughput: 0: 20337.8. Samples: 19765120. Policy #0 lag: (min: 61.0, avg: 76.0, max: 125.0) +[2026-06-02 16:53:59,726][246448] Avg episode reward: [(0, '1723.951')] +[2026-06-02 16:53:59,896][247478] Updated weights for policy 0, policy_version 38624 (0.0008) +[2026-06-02 16:54:00,077][247478] Updated weights for policy 0, policy_version 38634 (0.0009) +[2026-06-02 16:54:00,264][247478] Updated weights for policy 0, policy_version 38644 (0.0009) +[2026-06-02 16:54:00,446][247478] Updated weights for policy 0, policy_version 38654 (0.0009) +[2026-06-02 16:54:00,474][247399] Saving new best policy, reward=1723.951! +[2026-06-02 16:54:01,097][247478] Updated weights for policy 0, policy_version 38664 (0.0009) +[2026-06-02 16:54:01,270][247478] Updated weights for policy 0, policy_version 38674 (0.0009) +[2026-06-02 16:54:01,454][247478] Updated weights for policy 0, policy_version 38684 (0.0009) +[2026-06-02 16:54:01,628][247478] Updated weights for policy 0, policy_version 38694 (0.0009) +[2026-06-02 16:54:01,815][247478] Updated weights for policy 0, policy_version 38704 (0.0008) +[2026-06-02 16:54:02,009][247478] Updated weights for policy 0, policy_version 38714 (0.0009) +[2026-06-02 16:54:02,652][247478] Updated weights for policy 0, policy_version 38724 (0.0009) +[2026-06-02 16:54:02,825][247478] Updated weights for policy 0, policy_version 38734 (0.0009) +[2026-06-02 16:54:03,004][247478] Updated weights for policy 0, policy_version 38744 (0.0007) +[2026-06-02 16:54:03,181][247478] Updated weights for policy 0, policy_version 38754 (0.0009) +[2026-06-02 16:54:03,363][247478] Updated weights for policy 0, policy_version 38764 (0.0009) +[2026-06-02 16:54:03,563][247478] Updated weights for policy 0, policy_version 38775 (0.0009) +[2026-06-02 16:54:04,222][247478] Updated weights for policy 0, policy_version 38785 (0.0009) +[2026-06-02 16:54:04,403][247478] Updated weights for policy 0, policy_version 38796 (0.0009) +[2026-06-02 16:54:04,582][247478] Updated weights for policy 0, policy_version 38806 (0.0008) +[2026-06-02 16:54:04,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 19857408. Throughput: 0: 20206.9. Samples: 19877504. Policy #0 lag: (min: 61.0, avg: 76.0, max: 125.0) +[2026-06-02 16:54:04,726][246448] Avg episode reward: [(0, '1733.180')] +[2026-06-02 16:54:04,762][247478] Updated weights for policy 0, policy_version 38816 (0.0008) +[2026-06-02 16:54:04,945][247478] Updated weights for policy 0, policy_version 38826 (0.0009) +[2026-06-02 16:54:05,134][247478] Updated weights for policy 0, policy_version 38836 (0.0009) +[2026-06-02 16:54:05,312][247478] Updated weights for policy 0, policy_version 38846 (0.0008) +[2026-06-02 16:54:05,337][247399] Saving new best policy, reward=1733.180! +[2026-06-02 16:54:05,925][247478] Updated weights for policy 0, policy_version 38856 (0.0008) +[2026-06-02 16:54:06,141][247478] Updated weights for policy 0, policy_version 38868 (0.0009) +[2026-06-02 16:54:06,330][247478] Updated weights for policy 0, policy_version 38878 (0.0009) +[2026-06-02 16:54:06,509][247478] Updated weights for policy 0, policy_version 38888 (0.0009) +[2026-06-02 16:54:06,690][247478] Updated weights for policy 0, policy_version 38898 (0.0009) +[2026-06-02 16:54:06,861][247478] Updated weights for policy 0, policy_version 38908 (0.0008) +[2026-06-02 16:54:07,528][247478] Updated weights for policy 0, policy_version 38919 (0.0008) +[2026-06-02 16:54:07,711][247478] Updated weights for policy 0, policy_version 38929 (0.0008) +[2026-06-02 16:54:07,892][247478] Updated weights for policy 0, policy_version 38939 (0.0008) +[2026-06-02 16:54:08,074][247478] Updated weights for policy 0, policy_version 38949 (0.0008) +[2026-06-02 16:54:08,247][247478] Updated weights for policy 0, policy_version 38959 (0.0008) +[2026-06-02 16:54:08,455][247478] Updated weights for policy 0, policy_version 38970 (0.0008) +[2026-06-02 16:54:09,114][247478] Updated weights for policy 0, policy_version 38981 (0.0008) +[2026-06-02 16:54:09,294][247478] Updated weights for policy 0, policy_version 38991 (0.0008) +[2026-06-02 16:54:09,491][247478] Updated weights for policy 0, policy_version 39002 (0.0009) +[2026-06-02 16:54:09,675][247478] Updated weights for policy 0, policy_version 39012 (0.0008) +[2026-06-02 16:54:09,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 19955712. Throughput: 0: 20326.4. Samples: 20003072. Policy #0 lag: (min: 63.0, avg: 79.7, max: 127.0) +[2026-06-02 16:54:09,726][246448] Avg episode reward: [(0, '1679.135')] +[2026-06-02 16:54:09,855][247478] Updated weights for policy 0, policy_version 39022 (0.0009) +[2026-06-02 16:54:10,036][247478] Updated weights for policy 0, policy_version 39032 (0.0008) +[2026-06-02 16:54:10,694][247478] Updated weights for policy 0, policy_version 39042 (0.0008) +[2026-06-02 16:54:10,880][247478] Updated weights for policy 0, policy_version 39053 (0.0009) +[2026-06-02 16:54:11,061][247478] Updated weights for policy 0, policy_version 39063 (0.0008) +[2026-06-02 16:54:11,252][247478] Updated weights for policy 0, policy_version 39074 (0.0008) +[2026-06-02 16:54:11,441][247478] Updated weights for policy 0, policy_version 39084 (0.0008) +[2026-06-02 16:54:11,633][247478] Updated weights for policy 0, policy_version 39095 (0.0008) +[2026-06-02 16:54:12,308][247478] Updated weights for policy 0, policy_version 39106 (0.0009) +[2026-06-02 16:54:12,477][247478] Updated weights for policy 0, policy_version 39116 (0.0008) +[2026-06-02 16:54:12,653][247478] Updated weights for policy 0, policy_version 39126 (0.0008) +[2026-06-02 16:54:12,831][247478] Updated weights for policy 0, policy_version 39136 (0.0008) +[2026-06-02 16:54:13,012][247478] Updated weights for policy 0, policy_version 39146 (0.0009) +[2026-06-02 16:54:13,214][247478] Updated weights for policy 0, policy_version 39157 (0.0008) +[2026-06-02 16:54:13,403][247478] Updated weights for policy 0, policy_version 39167 (0.0008) +[2026-06-02 16:54:14,034][247478] Updated weights for policy 0, policy_version 39177 (0.0009) +[2026-06-02 16:54:14,218][247478] Updated weights for policy 0, policy_version 39188 (0.0008) +[2026-06-02 16:54:14,408][247478] Updated weights for policy 0, policy_version 39198 (0.0009) +[2026-06-02 16:54:14,589][247478] Updated weights for policy 0, policy_version 39208 (0.0008) +[2026-06-02 16:54:14,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 20054016. Throughput: 0: 20312.2. Samples: 20064896. Policy #0 lag: (min: 63.0, avg: 79.7, max: 127.0) +[2026-06-02 16:54:14,726][246448] Avg episode reward: [(0, '1718.107')] +[2026-06-02 16:54:14,774][247478] Updated weights for policy 0, policy_version 39218 (0.0008) +[2026-06-02 16:54:14,951][247478] Updated weights for policy 0, policy_version 39228 (0.0008) +[2026-06-02 16:54:15,594][247478] Updated weights for policy 0, policy_version 39238 (0.0009) +[2026-06-02 16:54:15,770][247478] Updated weights for policy 0, policy_version 39248 (0.0008) +[2026-06-02 16:54:15,966][247478] Updated weights for policy 0, policy_version 39259 (0.0009) +[2026-06-02 16:54:16,168][247478] Updated weights for policy 0, policy_version 39270 (0.0008) +[2026-06-02 16:54:16,347][247478] Updated weights for policy 0, policy_version 39280 (0.0008) +[2026-06-02 16:54:16,527][247478] Updated weights for policy 0, policy_version 39290 (0.0008) +[2026-06-02 16:54:17,186][247478] Updated weights for policy 0, policy_version 39300 (0.0008) +[2026-06-02 16:54:17,360][247478] Updated weights for policy 0, policy_version 39310 (0.0008) +[2026-06-02 16:54:17,562][247478] Updated weights for policy 0, policy_version 39321 (0.0008) +[2026-06-02 16:54:17,745][247478] Updated weights for policy 0, policy_version 39331 (0.0008) +[2026-06-02 16:54:17,925][247478] Updated weights for policy 0, policy_version 39341 (0.0008) +[2026-06-02 16:54:18,127][247478] Updated weights for policy 0, policy_version 39352 (0.0008) +[2026-06-02 16:54:18,769][247478] Updated weights for policy 0, policy_version 39362 (0.0009) +[2026-06-02 16:54:18,942][247478] Updated weights for policy 0, policy_version 39372 (0.0009) +[2026-06-02 16:54:19,118][247478] Updated weights for policy 0, policy_version 39382 (0.0008) +[2026-06-02 16:54:19,306][247478] Updated weights for policy 0, policy_version 39392 (0.0009) +[2026-06-02 16:54:19,474][247478] Updated weights for policy 0, policy_version 39402 (0.0009) +[2026-06-02 16:54:19,658][247478] Updated weights for policy 0, policy_version 39412 (0.0008) +[2026-06-02 16:54:19,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20206.9, 300 sec: 20216.2). Total num frames: 20152320. Throughput: 0: 20309.3. Samples: 20190464. Policy #0 lag: (min: 63.0, avg: 79.7, max: 127.0) +[2026-06-02 16:54:19,726][246448] Avg episode reward: [(0, '1736.074')] +[2026-06-02 16:54:19,841][247478] Updated weights for policy 0, policy_version 39422 (0.0009) +[2026-06-02 16:54:19,869][247399] Saving new best policy, reward=1736.074! +[2026-06-02 16:54:20,489][247478] Updated weights for policy 0, policy_version 39432 (0.0009) +[2026-06-02 16:54:20,658][247478] Updated weights for policy 0, policy_version 39442 (0.0008) +[2026-06-02 16:54:20,841][247478] Updated weights for policy 0, policy_version 39452 (0.0008) +[2026-06-02 16:54:21,046][247478] Updated weights for policy 0, policy_version 39463 (0.0008) +[2026-06-02 16:54:21,224][247478] Updated weights for policy 0, policy_version 39473 (0.0009) +[2026-06-02 16:54:21,400][247478] Updated weights for policy 0, policy_version 39483 (0.0009) +[2026-06-02 16:54:22,056][247478] Updated weights for policy 0, policy_version 39494 (0.0009) +[2026-06-02 16:54:22,232][247478] Updated weights for policy 0, policy_version 39504 (0.0009) +[2026-06-02 16:54:22,433][247478] Updated weights for policy 0, policy_version 39515 (0.0009) +[2026-06-02 16:54:22,618][247478] Updated weights for policy 0, policy_version 39525 (0.0008) +[2026-06-02 16:54:22,788][247478] Updated weights for policy 0, policy_version 39535 (0.0008) +[2026-06-02 16:54:22,971][247478] Updated weights for policy 0, policy_version 39545 (0.0008) +[2026-06-02 16:54:23,645][247478] Updated weights for policy 0, policy_version 39556 (0.0008) +[2026-06-02 16:54:23,807][247478] Updated weights for policy 0, policy_version 39566 (0.0008) +[2026-06-02 16:54:24,009][247478] Updated weights for policy 0, policy_version 39577 (0.0008) +[2026-06-02 16:54:24,202][247478] Updated weights for policy 0, policy_version 39588 (0.0008) +[2026-06-02 16:54:24,391][247478] Updated weights for policy 0, policy_version 39598 (0.0008) +[2026-06-02 16:54:24,570][247478] Updated weights for policy 0, policy_version 39608 (0.0008) +[2026-06-02 16:54:24,725][246448] Fps is (10 sec: 22937.7, 60 sec: 20753.1, 300 sec: 20327.3). Total num frames: 20283392. Throughput: 0: 20229.7. Samples: 20302720. Policy #0 lag: (min: 63.0, avg: 79.7, max: 127.0) +[2026-06-02 16:54:24,726][246448] Avg episode reward: [(0, '1763.408')] +[2026-06-02 16:54:24,732][247399] Saving new best policy, reward=1763.408! +[2026-06-02 16:54:25,225][247478] Updated weights for policy 0, policy_version 39618 (0.0008) +[2026-06-02 16:54:25,390][247478] Updated weights for policy 0, policy_version 39628 (0.0008) +[2026-06-02 16:54:25,565][247478] Updated weights for policy 0, policy_version 39638 (0.0008) +[2026-06-02 16:54:25,746][247478] Updated weights for policy 0, policy_version 39648 (0.0008) +[2026-06-02 16:54:25,933][247478] Updated weights for policy 0, policy_version 39658 (0.0008) +[2026-06-02 16:54:26,114][247478] Updated weights for policy 0, policy_version 39668 (0.0008) +[2026-06-02 16:54:26,300][247478] Updated weights for policy 0, policy_version 39678 (0.0008) +[2026-06-02 16:54:26,942][247478] Updated weights for policy 0, policy_version 39688 (0.0008) +[2026-06-02 16:54:27,118][247478] Updated weights for policy 0, policy_version 39698 (0.0009) +[2026-06-02 16:54:27,302][247478] Updated weights for policy 0, policy_version 39708 (0.0009) +[2026-06-02 16:54:27,473][247478] Updated weights for policy 0, policy_version 39718 (0.0008) +[2026-06-02 16:54:27,657][247478] Updated weights for policy 0, policy_version 39728 (0.0009) +[2026-06-02 16:54:27,870][247478] Updated weights for policy 0, policy_version 39739 (0.0009) +[2026-06-02 16:54:28,495][247478] Updated weights for policy 0, policy_version 39749 (0.0009) +[2026-06-02 16:54:28,685][247478] Updated weights for policy 0, policy_version 39760 (0.0009) +[2026-06-02 16:54:28,863][247478] Updated weights for policy 0, policy_version 39770 (0.0009) +[2026-06-02 16:54:29,043][247478] Updated weights for policy 0, policy_version 39780 (0.0009) +[2026-06-02 16:54:29,231][247478] Updated weights for policy 0, policy_version 39790 (0.0009) +[2026-06-02 16:54:29,411][247478] Updated weights for policy 0, policy_version 39800 (0.0009) +[2026-06-02 16:54:29,726][246448] Fps is (10 sec: 22937.6, 60 sec: 20753.1, 300 sec: 20327.3). Total num frames: 20381696. Throughput: 0: 20315.0. Samples: 20365952. Policy #0 lag: (min: 63.0, avg: 79.7, max: 127.0) +[2026-06-02 16:54:29,727][246448] Avg episode reward: [(0, '1773.049')] +[2026-06-02 16:54:29,732][247399] Saving new best policy, reward=1773.049! +[2026-06-02 16:54:30,063][247478] Updated weights for policy 0, policy_version 39811 (0.0008) +[2026-06-02 16:54:30,242][247478] Updated weights for policy 0, policy_version 39821 (0.0008) +[2026-06-02 16:54:30,433][247478] Updated weights for policy 0, policy_version 39832 (0.0009) +[2026-06-02 16:54:30,623][247478] Updated weights for policy 0, policy_version 39842 (0.0008) +[2026-06-02 16:54:30,806][247478] Updated weights for policy 0, policy_version 39852 (0.0009) +[2026-06-02 16:54:30,987][247478] Updated weights for policy 0, policy_version 39862 (0.0008) +[2026-06-02 16:54:31,158][247478] Updated weights for policy 0, policy_version 39872 (0.0008) +[2026-06-02 16:54:31,816][247478] Updated weights for policy 0, policy_version 39882 (0.0008) +[2026-06-02 16:54:32,005][247478] Updated weights for policy 0, policy_version 39893 (0.0008) +[2026-06-02 16:54:32,193][247478] Updated weights for policy 0, policy_version 39903 (0.0009) +[2026-06-02 16:54:32,375][247478] Updated weights for policy 0, policy_version 39913 (0.0008) +[2026-06-02 16:54:32,557][247478] Updated weights for policy 0, policy_version 39923 (0.0008) +[2026-06-02 16:54:32,740][247478] Updated weights for policy 0, policy_version 39933 (0.0008) +[2026-06-02 16:54:33,396][247478] Updated weights for policy 0, policy_version 39943 (0.0009) +[2026-06-02 16:54:33,570][247478] Updated weights for policy 0, policy_version 39953 (0.0009) +[2026-06-02 16:54:33,749][247478] Updated weights for policy 0, policy_version 39963 (0.0008) +[2026-06-02 16:54:33,938][247478] Updated weights for policy 0, policy_version 39973 (0.0008) +[2026-06-02 16:54:34,125][247478] Updated weights for policy 0, policy_version 39983 (0.0008) +[2026-06-02 16:54:34,306][247478] Updated weights for policy 0, policy_version 39993 (0.0009) +[2026-06-02 16:54:34,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 20480000. Throughput: 0: 20332.1. Samples: 20491520. Policy #0 lag: (min: 27.0, avg: 64.5, max: 91.0) +[2026-06-02 16:54:34,726][246448] Avg episode reward: [(0, '1759.815')] +[2026-06-02 16:54:34,960][247478] Updated weights for policy 0, policy_version 40004 (0.0008) +[2026-06-02 16:54:35,134][247478] Updated weights for policy 0, policy_version 40014 (0.0008) +[2026-06-02 16:54:35,335][247478] Updated weights for policy 0, policy_version 40025 (0.0008) +[2026-06-02 16:54:35,519][247478] Updated weights for policy 0, policy_version 40035 (0.0008) +[2026-06-02 16:54:35,722][247478] Updated weights for policy 0, policy_version 40046 (0.0009) +[2026-06-02 16:54:35,901][247478] Updated weights for policy 0, policy_version 40056 (0.0009) +[2026-06-02 16:54:36,539][247478] Updated weights for policy 0, policy_version 40066 (0.0009) +[2026-06-02 16:54:36,700][247478] Updated weights for policy 0, policy_version 40076 (0.0008) +[2026-06-02 16:54:36,885][247478] Updated weights for policy 0, policy_version 40086 (0.0008) +[2026-06-02 16:54:37,060][247478] Updated weights for policy 0, policy_version 40096 (0.0008) +[2026-06-02 16:54:37,265][247478] Updated weights for policy 0, policy_version 40107 (0.0008) +[2026-06-02 16:54:37,463][247478] Updated weights for policy 0, policy_version 40118 (0.0008) +[2026-06-02 16:54:38,134][247478] Updated weights for policy 0, policy_version 40129 (0.0010) +[2026-06-02 16:54:38,289][247478] Updated weights for policy 0, policy_version 40139 (0.0008) +[2026-06-02 16:54:38,476][247478] Updated weights for policy 0, policy_version 40149 (0.0008) +[2026-06-02 16:54:38,655][247478] Updated weights for policy 0, policy_version 40159 (0.0009) +[2026-06-02 16:54:38,841][247478] Updated weights for policy 0, policy_version 40169 (0.0008) +[2026-06-02 16:54:39,038][247478] Updated weights for policy 0, policy_version 40180 (0.0008) +[2026-06-02 16:54:39,215][247478] Updated weights for policy 0, policy_version 40190 (0.0008) +[2026-06-02 16:54:39,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 20578304. Throughput: 0: 20295.1. Samples: 20615808. Policy #0 lag: (min: 27.0, avg: 64.5, max: 91.0) +[2026-06-02 16:54:39,726][246448] Avg episode reward: [(0, '1732.372')] +[2026-06-02 16:54:39,877][247478] Updated weights for policy 0, policy_version 40200 (0.0008) +[2026-06-02 16:54:40,061][247478] Updated weights for policy 0, policy_version 40210 (0.0008) +[2026-06-02 16:54:40,234][247478] Updated weights for policy 0, policy_version 40220 (0.0008) +[2026-06-02 16:54:40,416][247478] Updated weights for policy 0, policy_version 40230 (0.0008) +[2026-06-02 16:54:40,599][247478] Updated weights for policy 0, policy_version 40240 (0.0009) +[2026-06-02 16:54:40,782][247478] Updated weights for policy 0, policy_version 40250 (0.0008) +[2026-06-02 16:54:41,433][247478] Updated weights for policy 0, policy_version 40260 (0.0010) +[2026-06-02 16:54:41,594][247478] Updated weights for policy 0, policy_version 40270 (0.0008) +[2026-06-02 16:54:41,776][247478] Updated weights for policy 0, policy_version 40280 (0.0009) +[2026-06-02 16:54:41,952][247478] Updated weights for policy 0, policy_version 40290 (0.0008) +[2026-06-02 16:54:42,144][247478] Updated weights for policy 0, policy_version 40300 (0.0008) +[2026-06-02 16:54:42,323][247478] Updated weights for policy 0, policy_version 40310 (0.0008) +[2026-06-02 16:54:42,499][247478] Updated weights for policy 0, policy_version 40320 (0.0008) +[2026-06-02 16:54:43,178][247478] Updated weights for policy 0, policy_version 40331 (0.0009) +[2026-06-02 16:54:43,356][247478] Updated weights for policy 0, policy_version 40341 (0.0009) +[2026-06-02 16:54:43,540][247478] Updated weights for policy 0, policy_version 40351 (0.0008) +[2026-06-02 16:54:43,720][247478] Updated weights for policy 0, policy_version 40361 (0.0008) +[2026-06-02 16:54:43,902][247478] Updated weights for policy 0, policy_version 40371 (0.0009) +[2026-06-02 16:54:44,075][247478] Updated weights for policy 0, policy_version 40381 (0.0008) +[2026-06-02 16:54:44,714][247478] Updated weights for policy 0, policy_version 40391 (0.0008) +[2026-06-02 16:54:44,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20207.0, 300 sec: 20327.3). Total num frames: 20676608. Throughput: 0: 20096.0. Samples: 20669440. Policy #0 lag: (min: 27.0, avg: 64.5, max: 91.0) +[2026-06-02 16:54:44,726][246448] Avg episode reward: [(0, '1744.411')] +[2026-06-02 16:54:44,894][247478] Updated weights for policy 0, policy_version 40401 (0.0008) +[2026-06-02 16:54:45,067][247478] Updated weights for policy 0, policy_version 40411 (0.0008) +[2026-06-02 16:54:45,272][247478] Updated weights for policy 0, policy_version 40422 (0.0008) +[2026-06-02 16:54:45,454][247478] Updated weights for policy 0, policy_version 40432 (0.0009) +[2026-06-02 16:54:45,647][247478] Updated weights for policy 0, policy_version 40443 (0.0008) +[2026-06-02 16:54:46,290][247478] Updated weights for policy 0, policy_version 40453 (0.0009) +[2026-06-02 16:54:46,459][247478] Updated weights for policy 0, policy_version 40463 (0.0009) +[2026-06-02 16:54:46,645][247478] Updated weights for policy 0, policy_version 40473 (0.0008) +[2026-06-02 16:54:46,825][247478] Updated weights for policy 0, policy_version 40483 (0.0009) +[2026-06-02 16:54:47,000][247478] Updated weights for policy 0, policy_version 40493 (0.0008) +[2026-06-02 16:54:47,174][247478] Updated weights for policy 0, policy_version 40503 (0.0009) +[2026-06-02 16:54:47,848][247478] Updated weights for policy 0, policy_version 40514 (0.0006) +[2026-06-02 16:54:48,016][247478] Updated weights for policy 0, policy_version 40524 (0.0009) +[2026-06-02 16:54:48,196][247478] Updated weights for policy 0, policy_version 40534 (0.0008) +[2026-06-02 16:54:48,371][247478] Updated weights for policy 0, policy_version 40544 (0.0008) +[2026-06-02 16:54:48,554][247478] Updated weights for policy 0, policy_version 40554 (0.0009) +[2026-06-02 16:54:48,737][247478] Updated weights for policy 0, policy_version 40564 (0.0008) +[2026-06-02 16:54:48,937][247478] Updated weights for policy 0, policy_version 40575 (0.0009) +[2026-06-02 16:54:49,591][247478] Updated weights for policy 0, policy_version 40586 (0.0009) +[2026-06-02 16:54:49,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 20774912. Throughput: 0: 20312.2. Samples: 20791552. Policy #0 lag: (min: 27.0, avg: 64.5, max: 91.0) +[2026-06-02 16:54:49,726][246448] Avg episode reward: [(0, '1762.304')] +[2026-06-02 16:54:49,765][247478] Updated weights for policy 0, policy_version 40596 (0.0007) +[2026-06-02 16:54:49,946][247478] Updated weights for policy 0, policy_version 40606 (0.0009) +[2026-06-02 16:54:50,158][247478] Updated weights for policy 0, policy_version 40617 (0.0008) +[2026-06-02 16:54:50,359][247478] Updated weights for policy 0, policy_version 40628 (0.0008) +[2026-06-02 16:54:50,543][247478] Updated weights for policy 0, policy_version 40638 (0.0008) +[2026-06-02 16:54:51,171][247478] Updated weights for policy 0, policy_version 40648 (0.0009) +[2026-06-02 16:54:51,358][247478] Updated weights for policy 0, policy_version 40658 (0.0008) +[2026-06-02 16:54:51,541][247478] Updated weights for policy 0, policy_version 40668 (0.0009) +[2026-06-02 16:54:51,734][247478] Updated weights for policy 0, policy_version 40679 (0.0008) +[2026-06-02 16:54:51,922][247478] Updated weights for policy 0, policy_version 40689 (0.0007) +[2026-06-02 16:54:52,105][247478] Updated weights for policy 0, policy_version 40699 (0.0004) +[2026-06-02 16:54:52,717][247478] Updated weights for policy 0, policy_version 40709 (0.0007) +[2026-06-02 16:54:52,896][247478] Updated weights for policy 0, policy_version 40719 (0.0009) +[2026-06-02 16:54:53,063][247478] Updated weights for policy 0, policy_version 40729 (0.0008) +[2026-06-02 16:54:53,253][247478] Updated weights for policy 0, policy_version 40739 (0.0009) +[2026-06-02 16:54:53,431][247478] Updated weights for policy 0, policy_version 40749 (0.0008) +[2026-06-02 16:54:53,607][247478] Updated weights for policy 0, policy_version 40759 (0.0008) +[2026-06-02 16:54:54,271][247478] Updated weights for policy 0, policy_version 40769 (0.0008) +[2026-06-02 16:54:54,440][247478] Updated weights for policy 0, policy_version 40779 (0.0007) +[2026-06-02 16:54:54,608][247478] Updated weights for policy 0, policy_version 40789 (0.0008) +[2026-06-02 16:54:54,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 20873216. Throughput: 0: 20303.7. Samples: 20916736. Policy #0 lag: (min: 27.0, avg: 64.5, max: 91.0) +[2026-06-02 16:54:54,726][246448] Avg episode reward: [(0, '1797.601')] +[2026-06-02 16:54:54,794][247478] Updated weights for policy 0, policy_version 40799 (0.0009) +[2026-06-02 16:54:54,983][247478] Updated weights for policy 0, policy_version 40809 (0.0009) +[2026-06-02 16:54:55,165][247478] Updated weights for policy 0, policy_version 40819 (0.0009) +[2026-06-02 16:54:55,347][247478] Updated weights for policy 0, policy_version 40829 (0.0009) +[2026-06-02 16:54:55,395][247399] Saving new best policy, reward=1797.601! +[2026-06-02 16:54:55,990][247478] Updated weights for policy 0, policy_version 40839 (0.0009) +[2026-06-02 16:54:56,171][247478] Updated weights for policy 0, policy_version 40849 (0.0009) +[2026-06-02 16:54:56,343][247478] Updated weights for policy 0, policy_version 40859 (0.0009) +[2026-06-02 16:54:56,538][247478] Updated weights for policy 0, policy_version 40869 (0.0008) +[2026-06-02 16:54:56,736][247478] Updated weights for policy 0, policy_version 40880 (0.0009) +[2026-06-02 16:54:56,925][247478] Updated weights for policy 0, policy_version 40890 (0.0008) +[2026-06-02 16:54:57,559][247478] Updated weights for policy 0, policy_version 40900 (0.0008) +[2026-06-02 16:54:57,736][247478] Updated weights for policy 0, policy_version 40910 (0.0009) +[2026-06-02 16:54:57,916][247478] Updated weights for policy 0, policy_version 40920 (0.0009) +[2026-06-02 16:54:58,102][247478] Updated weights for policy 0, policy_version 40930 (0.0009) +[2026-06-02 16:54:58,292][247478] Updated weights for policy 0, policy_version 40941 (0.0009) +[2026-06-02 16:54:58,514][247478] Updated weights for policy 0, policy_version 40953 (0.0009) +[2026-06-02 16:54:59,165][247478] Updated weights for policy 0, policy_version 40963 (0.0009) +[2026-06-02 16:54:59,348][247478] Updated weights for policy 0, policy_version 40973 (0.0009) +[2026-06-02 16:54:59,535][247478] Updated weights for policy 0, policy_version 40983 (0.0009) +[2026-06-02 16:54:59,708][247478] Updated weights for policy 0, policy_version 40993 (0.0009) +[2026-06-02 16:54:59,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 20971520. Throughput: 0: 20332.1. Samples: 20979840. Policy #0 lag: (min: 27.0, avg: 64.5, max: 91.0) +[2026-06-02 16:54:59,726][246448] Avg episode reward: [(0, '1813.221')] +[2026-06-02 16:54:59,882][247478] Updated weights for policy 0, policy_version 41003 (0.0009) +[2026-06-02 16:55:00,077][247478] Updated weights for policy 0, policy_version 41013 (0.0010) +[2026-06-02 16:55:00,254][247478] Updated weights for policy 0, policy_version 41023 (0.0009) +[2026-06-02 16:55:00,263][247399] Saving new best policy, reward=1813.221! +[2026-06-02 16:55:00,863][247478] Updated weights for policy 0, policy_version 41033 (0.0009) +[2026-06-02 16:55:01,045][247478] Updated weights for policy 0, policy_version 41043 (0.0009) +[2026-06-02 16:55:01,216][247478] Updated weights for policy 0, policy_version 41053 (0.0008) +[2026-06-02 16:55:01,444][247478] Updated weights for policy 0, policy_version 41065 (0.0008) +[2026-06-02 16:55:01,627][247478] Updated weights for policy 0, policy_version 41075 (0.0009) +[2026-06-02 16:55:01,808][247478] Updated weights for policy 0, policy_version 41085 (0.0009) +[2026-06-02 16:55:02,440][247478] Updated weights for policy 0, policy_version 41095 (0.0010) +[2026-06-02 16:55:02,632][247478] Updated weights for policy 0, policy_version 41106 (0.0009) +[2026-06-02 16:55:02,806][247478] Updated weights for policy 0, policy_version 41116 (0.0009) +[2026-06-02 16:55:02,987][247478] Updated weights for policy 0, policy_version 41126 (0.0009) +[2026-06-02 16:55:03,171][247478] Updated weights for policy 0, policy_version 41136 (0.0009) +[2026-06-02 16:55:03,343][247478] Updated weights for policy 0, policy_version 41146 (0.0008) +[2026-06-02 16:55:04,018][247478] Updated weights for policy 0, policy_version 41157 (0.0009) +[2026-06-02 16:55:04,204][247478] Updated weights for policy 0, policy_version 41168 (0.0009) +[2026-06-02 16:55:04,390][247478] Updated weights for policy 0, policy_version 41178 (0.0009) +[2026-06-02 16:55:04,581][247478] Updated weights for policy 0, policy_version 41189 (0.0009) +[2026-06-02 16:55:04,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 20216.4). Total num frames: 21069824. Throughput: 0: 20138.7. Samples: 21096704. Policy #0 lag: (min: 63.0, avg: 81.0, max: 127.0) +[2026-06-02 16:55:04,726][246448] Avg episode reward: [(0, '1762.531')] +[2026-06-02 16:55:04,765][247478] Updated weights for policy 0, policy_version 41199 (0.0009) +[2026-06-02 16:55:04,958][247478] Updated weights for policy 0, policy_version 41210 (0.0009) +[2026-06-02 16:55:05,625][247478] Updated weights for policy 0, policy_version 41220 (0.0009) +[2026-06-02 16:55:05,819][247478] Updated weights for policy 0, policy_version 41232 (0.0009) +[2026-06-02 16:55:05,996][247478] Updated weights for policy 0, policy_version 41242 (0.0008) +[2026-06-02 16:55:06,179][247478] Updated weights for policy 0, policy_version 41252 (0.0009) +[2026-06-02 16:55:06,374][247478] Updated weights for policy 0, policy_version 41263 (0.0008) +[2026-06-02 16:55:06,560][247478] Updated weights for policy 0, policy_version 41273 (0.0010) +[2026-06-02 16:55:07,227][247478] Updated weights for policy 0, policy_version 41283 (0.0009) +[2026-06-02 16:55:07,399][247478] Updated weights for policy 0, policy_version 41293 (0.0008) +[2026-06-02 16:55:07,581][247478] Updated weights for policy 0, policy_version 41303 (0.0009) +[2026-06-02 16:55:07,764][247478] Updated weights for policy 0, policy_version 41313 (0.0008) +[2026-06-02 16:55:07,947][247478] Updated weights for policy 0, policy_version 41323 (0.0008) +[2026-06-02 16:55:08,130][247478] Updated weights for policy 0, policy_version 41333 (0.0008) +[2026-06-02 16:55:08,306][247478] Updated weights for policy 0, policy_version 41343 (0.0010) +[2026-06-02 16:55:08,934][247478] Updated weights for policy 0, policy_version 41353 (0.0009) +[2026-06-02 16:55:09,115][247478] Updated weights for policy 0, policy_version 41363 (0.0009) +[2026-06-02 16:55:09,307][247478] Updated weights for policy 0, policy_version 41373 (0.0009) +[2026-06-02 16:55:09,482][247478] Updated weights for policy 0, policy_version 41383 (0.0009) +[2026-06-02 16:55:09,670][247478] Updated weights for policy 0, policy_version 41393 (0.0009) +[2026-06-02 16:55:09,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20207.0, 300 sec: 20216.2). Total num frames: 21168128. Throughput: 0: 20323.6. Samples: 21217280. Policy #0 lag: (min: 63.0, avg: 81.0, max: 127.0) +[2026-06-02 16:55:09,726][246448] Avg episode reward: [(0, '1765.808')] +[2026-06-02 16:55:09,849][247478] Updated weights for policy 0, policy_version 41403 (0.0008) +[2026-06-02 16:55:10,501][247478] Updated weights for policy 0, policy_version 41413 (0.0009) +[2026-06-02 16:55:10,668][247478] Updated weights for policy 0, policy_version 41423 (0.0008) +[2026-06-02 16:55:10,868][247478] Updated weights for policy 0, policy_version 41434 (0.0008) +[2026-06-02 16:55:11,061][247478] Updated weights for policy 0, policy_version 41444 (0.0008) +[2026-06-02 16:55:11,235][247478] Updated weights for policy 0, policy_version 41454 (0.0008) +[2026-06-02 16:55:11,428][247478] Updated weights for policy 0, policy_version 41464 (0.0009) +[2026-06-02 16:55:12,059][247478] Updated weights for policy 0, policy_version 41474 (0.0009) +[2026-06-02 16:55:12,227][247478] Updated weights for policy 0, policy_version 41484 (0.0008) +[2026-06-02 16:55:12,397][247478] Updated weights for policy 0, policy_version 41494 (0.0008) +[2026-06-02 16:55:12,598][247478] Updated weights for policy 0, policy_version 41505 (0.0008) +[2026-06-02 16:55:12,800][247478] Updated weights for policy 0, policy_version 41516 (0.0008) +[2026-06-02 16:55:12,986][247478] Updated weights for policy 0, policy_version 41526 (0.0008) +[2026-06-02 16:55:13,162][247478] Updated weights for policy 0, policy_version 41536 (0.0008) +[2026-06-02 16:55:13,793][247478] Updated weights for policy 0, policy_version 41546 (0.0009) +[2026-06-02 16:55:13,973][247478] Updated weights for policy 0, policy_version 41556 (0.0008) +[2026-06-02 16:55:14,153][247478] Updated weights for policy 0, policy_version 41566 (0.0008) +[2026-06-02 16:55:14,339][247478] Updated weights for policy 0, policy_version 41576 (0.0008) +[2026-06-02 16:55:14,524][247478] Updated weights for policy 0, policy_version 41586 (0.0008) +[2026-06-02 16:55:14,708][247478] Updated weights for policy 0, policy_version 41596 (0.0008) +[2026-06-02 16:55:14,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20206.9, 300 sec: 20216.2). Total num frames: 21266432. Throughput: 0: 20312.2. Samples: 21280000. Policy #0 lag: (min: 63.0, avg: 81.0, max: 127.0) +[2026-06-02 16:55:14,726][246448] Avg episode reward: [(0, '1780.399')] +[2026-06-02 16:55:15,342][247478] Updated weights for policy 0, policy_version 41606 (0.0008) +[2026-06-02 16:55:15,514][247478] Updated weights for policy 0, policy_version 41616 (0.0008) +[2026-06-02 16:55:15,701][247478] Updated weights for policy 0, policy_version 41626 (0.0009) +[2026-06-02 16:55:15,882][247478] Updated weights for policy 0, policy_version 41636 (0.0008) +[2026-06-02 16:55:16,065][247478] Updated weights for policy 0, policy_version 41646 (0.0008) +[2026-06-02 16:55:16,246][247478] Updated weights for policy 0, policy_version 41656 (0.0008) +[2026-06-02 16:55:16,886][247478] Updated weights for policy 0, policy_version 41666 (0.0009) +[2026-06-02 16:55:17,063][247478] Updated weights for policy 0, policy_version 41676 (0.0009) +[2026-06-02 16:55:17,239][247478] Updated weights for policy 0, policy_version 41686 (0.0008) +[2026-06-02 16:55:17,423][247478] Updated weights for policy 0, policy_version 41696 (0.0008) +[2026-06-02 16:55:17,602][247478] Updated weights for policy 0, policy_version 41706 (0.0008) +[2026-06-02 16:55:17,801][247478] Updated weights for policy 0, policy_version 41717 (0.0009) +[2026-06-02 16:55:17,986][247478] Updated weights for policy 0, policy_version 41727 (0.0008) +[2026-06-02 16:55:18,623][247478] Updated weights for policy 0, policy_version 41737 (0.0010) +[2026-06-02 16:55:18,798][247478] Updated weights for policy 0, policy_version 41747 (0.0010) +[2026-06-02 16:55:18,999][247478] Updated weights for policy 0, policy_version 41758 (0.0006) +[2026-06-02 16:55:19,186][247478] Updated weights for policy 0, policy_version 41768 (0.0009) +[2026-06-02 16:55:19,366][247478] Updated weights for policy 0, policy_version 41778 (0.0009) +[2026-06-02 16:55:19,547][247478] Updated weights for policy 0, policy_version 41788 (0.0008) +[2026-06-02 16:55:19,725][246448] Fps is (10 sec: 22937.5, 60 sec: 20753.1, 300 sec: 20327.3). Total num frames: 21397504. Throughput: 0: 20292.3. Samples: 21404672. Policy #0 lag: (min: 63.0, avg: 81.0, max: 127.0) +[2026-06-02 16:55:19,726][246448] Avg episode reward: [(0, '1802.460')] +[2026-06-02 16:55:20,175][247478] Updated weights for policy 0, policy_version 41798 (0.0010) +[2026-06-02 16:55:20,367][247478] Updated weights for policy 0, policy_version 41809 (0.0010) +[2026-06-02 16:55:20,550][247478] Updated weights for policy 0, policy_version 41819 (0.0012) +[2026-06-02 16:55:20,735][247478] Updated weights for policy 0, policy_version 41829 (0.0011) +[2026-06-02 16:55:20,914][247478] Updated weights for policy 0, policy_version 41839 (0.0010) +[2026-06-02 16:55:21,095][247478] Updated weights for policy 0, policy_version 41849 (0.0008) +[2026-06-02 16:55:21,742][247478] Updated weights for policy 0, policy_version 41859 (0.0009) +[2026-06-02 16:55:21,934][247478] Updated weights for policy 0, policy_version 41870 (0.0009) +[2026-06-02 16:55:22,132][247478] Updated weights for policy 0, policy_version 41881 (0.0009) +[2026-06-02 16:55:22,314][247478] Updated weights for policy 0, policy_version 41891 (0.0009) +[2026-06-02 16:55:22,499][247478] Updated weights for policy 0, policy_version 41901 (0.0009) +[2026-06-02 16:55:22,674][247478] Updated weights for policy 0, policy_version 41911 (0.0009) +[2026-06-02 16:55:23,324][247478] Updated weights for policy 0, policy_version 41921 (0.0008) +[2026-06-02 16:55:23,490][247478] Updated weights for policy 0, policy_version 41931 (0.0008) +[2026-06-02 16:55:23,678][247478] Updated weights for policy 0, policy_version 41941 (0.0009) +[2026-06-02 16:55:23,868][247478] Updated weights for policy 0, policy_version 41952 (0.0009) +[2026-06-02 16:55:24,051][247478] Updated weights for policy 0, policy_version 41962 (0.0009) +[2026-06-02 16:55:24,282][247478] Updated weights for policy 0, policy_version 41974 (0.0008) +[2026-06-02 16:55:24,725][246448] Fps is (10 sec: 22937.6, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 21495808. Throughput: 0: 20147.2. Samples: 21522432. Policy #0 lag: (min: 63.0, avg: 81.0, max: 127.0) +[2026-06-02 16:55:24,727][246448] Avg episode reward: [(0, '1815.303')] +[2026-06-02 16:55:24,731][247399] Saving new best policy, reward=1815.303! +[2026-06-02 16:55:24,939][247478] Updated weights for policy 0, policy_version 41985 (0.0008) +[2026-06-02 16:55:25,109][247478] Updated weights for policy 0, policy_version 41995 (0.0008) +[2026-06-02 16:55:25,275][247478] Updated weights for policy 0, policy_version 42005 (0.0008) +[2026-06-02 16:55:25,479][247478] Updated weights for policy 0, policy_version 42015 (0.0008) +[2026-06-02 16:55:25,646][247478] Updated weights for policy 0, policy_version 42025 (0.0008) +[2026-06-02 16:55:25,845][247478] Updated weights for policy 0, policy_version 42036 (0.0009) +[2026-06-02 16:55:26,050][247478] Updated weights for policy 0, policy_version 42047 (0.0010) +[2026-06-02 16:55:26,670][247478] Updated weights for policy 0, policy_version 42057 (0.0009) +[2026-06-02 16:55:26,853][247478] Updated weights for policy 0, policy_version 42067 (0.0009) +[2026-06-02 16:55:27,029][247478] Updated weights for policy 0, policy_version 42077 (0.0009) +[2026-06-02 16:55:27,218][247478] Updated weights for policy 0, policy_version 42088 (0.0009) +[2026-06-02 16:55:27,416][247478] Updated weights for policy 0, policy_version 42099 (0.0009) +[2026-06-02 16:55:27,611][247478] Updated weights for policy 0, policy_version 42109 (0.0009) +[2026-06-02 16:55:28,262][247478] Updated weights for policy 0, policy_version 42120 (0.0009) +[2026-06-02 16:55:28,444][247478] Updated weights for policy 0, policy_version 42130 (0.0008) +[2026-06-02 16:55:28,657][247478] Updated weights for policy 0, policy_version 42142 (0.0009) +[2026-06-02 16:55:28,838][247478] Updated weights for policy 0, policy_version 42152 (0.0008) +[2026-06-02 16:55:29,022][247478] Updated weights for policy 0, policy_version 42162 (0.0008) +[2026-06-02 16:55:29,219][247478] Updated weights for policy 0, policy_version 42173 (0.0008) +[2026-06-02 16:55:29,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 21594112. Throughput: 0: 20218.3. Samples: 21579264. Policy #0 lag: (min: 63.0, avg: 81.0, max: 127.0) +[2026-06-02 16:55:29,726][246448] Avg episode reward: [(0, '1819.048')] +[2026-06-02 16:55:29,871][247478] Updated weights for policy 0, policy_version 42184 (0.0008) +[2026-06-02 16:55:30,049][247478] Updated weights for policy 0, policy_version 42194 (0.0008) +[2026-06-02 16:55:30,234][247478] Updated weights for policy 0, policy_version 42204 (0.0008) +[2026-06-02 16:55:30,418][247478] Updated weights for policy 0, policy_version 42214 (0.0008) +[2026-06-02 16:55:30,604][247478] Updated weights for policy 0, policy_version 42224 (0.0009) +[2026-06-02 16:55:30,780][247478] Updated weights for policy 0, policy_version 42234 (0.0009) +[2026-06-02 16:55:30,889][247399] Saving new best policy, reward=1819.048! +[2026-06-02 16:55:31,430][247478] Updated weights for policy 0, policy_version 42244 (0.0008) +[2026-06-02 16:55:31,602][247478] Updated weights for policy 0, policy_version 42254 (0.0008) +[2026-06-02 16:55:31,788][247478] Updated weights for policy 0, policy_version 42264 (0.0008) +[2026-06-02 16:55:31,976][247478] Updated weights for policy 0, policy_version 42274 (0.0008) +[2026-06-02 16:55:32,152][247478] Updated weights for policy 0, policy_version 42284 (0.0008) +[2026-06-02 16:55:32,342][247478] Updated weights for policy 0, policy_version 42294 (0.0008) +[2026-06-02 16:55:32,525][247478] Updated weights for policy 0, policy_version 42304 (0.0008) +[2026-06-02 16:55:33,150][247478] Updated weights for policy 0, policy_version 42314 (0.0009) +[2026-06-02 16:55:33,320][247478] Updated weights for policy 0, policy_version 42324 (0.0008) +[2026-06-02 16:55:33,507][247478] Updated weights for policy 0, policy_version 42334 (0.0008) +[2026-06-02 16:55:33,689][247478] Updated weights for policy 0, policy_version 42344 (0.0008) +[2026-06-02 16:55:33,871][247478] Updated weights for policy 0, policy_version 42354 (0.0008) +[2026-06-02 16:55:34,095][247478] Updated weights for policy 0, policy_version 42366 (0.0008) +[2026-06-02 16:55:34,724][247478] Updated weights for policy 0, policy_version 42376 (0.0008) +[2026-06-02 16:55:34,726][246448] Fps is (10 sec: 19660.5, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 21692416. Throughput: 0: 20295.0. Samples: 21704832. Policy #0 lag: (min: 63.0, avg: 79.8, max: 127.0) +[2026-06-02 16:55:34,727][246448] Avg episode reward: [(0, '1844.666')] +[2026-06-02 16:55:34,901][247478] Updated weights for policy 0, policy_version 42386 (0.0008) +[2026-06-02 16:55:35,107][247478] Updated weights for policy 0, policy_version 42397 (0.0008) +[2026-06-02 16:55:35,278][247478] Updated weights for policy 0, policy_version 42407 (0.0008) +[2026-06-02 16:55:35,493][247478] Updated weights for policy 0, policy_version 42419 (0.0008) +[2026-06-02 16:55:35,680][247478] Updated weights for policy 0, policy_version 42429 (0.0008) +[2026-06-02 16:55:35,730][247399] Saving new best policy, reward=1844.666! +[2026-06-02 16:55:36,320][247478] Updated weights for policy 0, policy_version 42439 (0.0008) +[2026-06-02 16:55:36,540][247478] Updated weights for policy 0, policy_version 42451 (0.0008) +[2026-06-02 16:55:36,735][247478] Updated weights for policy 0, policy_version 42462 (0.0009) +[2026-06-02 16:55:36,923][247478] Updated weights for policy 0, policy_version 42472 (0.0009) +[2026-06-02 16:55:37,121][247478] Updated weights for policy 0, policy_version 42483 (0.0008) +[2026-06-02 16:55:37,297][247478] Updated weights for policy 0, policy_version 42493 (0.0008) +[2026-06-02 16:55:37,935][247478] Updated weights for policy 0, policy_version 42503 (0.0009) +[2026-06-02 16:55:38,116][247478] Updated weights for policy 0, policy_version 42513 (0.0010) +[2026-06-02 16:55:38,303][247478] Updated weights for policy 0, policy_version 42523 (0.0008) +[2026-06-02 16:55:38,484][247478] Updated weights for policy 0, policy_version 42533 (0.0009) +[2026-06-02 16:55:38,658][247478] Updated weights for policy 0, policy_version 42543 (0.0008) +[2026-06-02 16:55:38,845][247478] Updated weights for policy 0, policy_version 42553 (0.0008) +[2026-06-02 16:55:39,512][247478] Updated weights for policy 0, policy_version 42563 (0.0008) +[2026-06-02 16:55:39,677][247478] Updated weights for policy 0, policy_version 42573 (0.0009) +[2026-06-02 16:55:39,725][246448] Fps is (10 sec: 19661.0, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 21790720. Throughput: 0: 20303.6. Samples: 21830400. Policy #0 lag: (min: 63.0, avg: 79.8, max: 127.0) +[2026-06-02 16:55:39,726][246448] Avg episode reward: [(0, '1844.666')] +[2026-06-02 16:55:39,854][247478] Updated weights for policy 0, policy_version 42583 (0.0009) +[2026-06-02 16:55:40,054][247478] Updated weights for policy 0, policy_version 42594 (0.0008) +[2026-06-02 16:55:40,240][247478] Updated weights for policy 0, policy_version 42604 (0.0008) +[2026-06-02 16:55:40,427][247478] Updated weights for policy 0, policy_version 42614 (0.0008) +[2026-06-02 16:55:40,599][247478] Updated weights for policy 0, policy_version 42624 (0.0009) +[2026-06-02 16:55:41,248][247478] Updated weights for policy 0, policy_version 42635 (0.0008) +[2026-06-02 16:55:41,422][247478] Updated weights for policy 0, policy_version 42645 (0.0008) +[2026-06-02 16:55:41,601][247478] Updated weights for policy 0, policy_version 42655 (0.0008) +[2026-06-02 16:55:41,786][247478] Updated weights for policy 0, policy_version 42665 (0.0008) +[2026-06-02 16:55:41,968][247478] Updated weights for policy 0, policy_version 42675 (0.0008) +[2026-06-02 16:55:42,155][247478] Updated weights for policy 0, policy_version 42685 (0.0008) +[2026-06-02 16:55:42,799][247478] Updated weights for policy 0, policy_version 42696 (0.0008) +[2026-06-02 16:55:42,990][247478] Updated weights for policy 0, policy_version 42707 (0.0008) +[2026-06-02 16:55:43,176][247478] Updated weights for policy 0, policy_version 42717 (0.0008) +[2026-06-02 16:55:43,355][247478] Updated weights for policy 0, policy_version 42727 (0.0008) +[2026-06-02 16:55:43,539][247478] Updated weights for policy 0, policy_version 42737 (0.0009) +[2026-06-02 16:55:43,715][247478] Updated weights for policy 0, policy_version 42747 (0.0008) +[2026-06-02 16:55:44,366][247478] Updated weights for policy 0, policy_version 42757 (0.0009) +[2026-06-02 16:55:44,542][247478] Updated weights for policy 0, policy_version 42767 (0.0008) +[2026-06-02 16:55:44,723][247478] Updated weights for policy 0, policy_version 42777 (0.0009) +[2026-06-02 16:55:44,725][246448] Fps is (10 sec: 19661.0, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 21889024. Throughput: 0: 20278.0. Samples: 21892352. Policy #0 lag: (min: 63.0, avg: 79.8, max: 127.0) +[2026-06-02 16:55:44,726][246448] Avg episode reward: [(0, '1854.635')] +[2026-06-02 16:55:44,903][247478] Updated weights for policy 0, policy_version 42787 (0.0009) +[2026-06-02 16:55:45,084][247478] Updated weights for policy 0, policy_version 42797 (0.0008) +[2026-06-02 16:55:45,264][247478] Updated weights for policy 0, policy_version 42807 (0.0009) +[2026-06-02 16:55:45,428][247399] Saving new best policy, reward=1854.635! +[2026-06-02 16:55:45,921][247478] Updated weights for policy 0, policy_version 42817 (0.0009) +[2026-06-02 16:55:46,085][247478] Updated weights for policy 0, policy_version 42827 (0.0008) +[2026-06-02 16:55:46,274][247478] Updated weights for policy 0, policy_version 42837 (0.0009) +[2026-06-02 16:55:46,470][247478] Updated weights for policy 0, policy_version 42848 (0.0009) +[2026-06-02 16:55:46,654][247478] Updated weights for policy 0, policy_version 42858 (0.0009) +[2026-06-02 16:55:46,841][247478] Updated weights for policy 0, policy_version 42868 (0.0008) +[2026-06-02 16:55:47,026][247478] Updated weights for policy 0, policy_version 42878 (0.0008) +[2026-06-02 16:55:47,656][247478] Updated weights for policy 0, policy_version 42888 (0.0008) +[2026-06-02 16:55:47,840][247478] Updated weights for policy 0, policy_version 42898 (0.0008) +[2026-06-02 16:55:48,049][247478] Updated weights for policy 0, policy_version 42909 (0.0009) +[2026-06-02 16:55:48,211][247478] Updated weights for policy 0, policy_version 42919 (0.0008) +[2026-06-02 16:55:48,400][247478] Updated weights for policy 0, policy_version 42929 (0.0008) +[2026-06-02 16:55:48,589][247478] Updated weights for policy 0, policy_version 42939 (0.0009) +[2026-06-02 16:55:49,212][247478] Updated weights for policy 0, policy_version 42949 (0.0010) +[2026-06-02 16:55:49,385][247478] Updated weights for policy 0, policy_version 42959 (0.0008) +[2026-06-02 16:55:49,579][247478] Updated weights for policy 0, policy_version 42970 (0.0009) +[2026-06-02 16:55:49,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 21987328. Throughput: 0: 20207.0. Samples: 22006016. Policy #0 lag: (min: 63.0, avg: 79.8, max: 127.0) +[2026-06-02 16:55:49,726][246448] Avg episode reward: [(0, '1878.520')] +[2026-06-02 16:55:49,757][247478] Updated weights for policy 0, policy_version 42980 (0.0008) +[2026-06-02 16:55:49,943][247478] Updated weights for policy 0, policy_version 42990 (0.0008) +[2026-06-02 16:55:50,125][247478] Updated weights for policy 0, policy_version 43000 (0.0006) +[2026-06-02 16:55:50,264][247399] Saving new best policy, reward=1878.520! +[2026-06-02 16:55:50,767][247478] Updated weights for policy 0, policy_version 43010 (0.0006) +[2026-06-02 16:55:50,938][247478] Updated weights for policy 0, policy_version 43020 (0.0008) +[2026-06-02 16:55:51,111][247478] Updated weights for policy 0, policy_version 43030 (0.0008) +[2026-06-02 16:55:51,302][247478] Updated weights for policy 0, policy_version 43040 (0.0009) +[2026-06-02 16:55:51,485][247478] Updated weights for policy 0, policy_version 43050 (0.0008) +[2026-06-02 16:55:51,668][247478] Updated weights for policy 0, policy_version 43060 (0.0009) +[2026-06-02 16:55:51,848][247478] Updated weights for policy 0, policy_version 43070 (0.0009) +[2026-06-02 16:55:52,485][247478] Updated weights for policy 0, policy_version 43080 (0.0008) +[2026-06-02 16:55:52,660][247478] Updated weights for policy 0, policy_version 43090 (0.0009) +[2026-06-02 16:55:52,847][247478] Updated weights for policy 0, policy_version 43100 (0.0008) +[2026-06-02 16:55:53,047][247478] Updated weights for policy 0, policy_version 43111 (0.0008) +[2026-06-02 16:55:53,240][247478] Updated weights for policy 0, policy_version 43122 (0.0008) +[2026-06-02 16:55:53,433][247478] Updated weights for policy 0, policy_version 43132 (0.0009) +[2026-06-02 16:55:54,056][247478] Updated weights for policy 0, policy_version 43142 (0.0008) +[2026-06-02 16:55:54,226][247478] Updated weights for policy 0, policy_version 43152 (0.0008) +[2026-06-02 16:55:54,416][247478] Updated weights for policy 0, policy_version 43162 (0.0008) +[2026-06-02 16:55:54,593][247478] Updated weights for policy 0, policy_version 43172 (0.0007) +[2026-06-02 16:55:54,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20216.2). Total num frames: 22085632. Throughput: 0: 20278.0. Samples: 22129792. Policy #0 lag: (min: 63.0, avg: 79.8, max: 127.0) +[2026-06-02 16:55:54,726][246448] Avg episode reward: [(0, '1892.075')] +[2026-06-02 16:55:54,776][247478] Updated weights for policy 0, policy_version 43182 (0.0009) +[2026-06-02 16:55:54,954][247478] Updated weights for policy 0, policy_version 43192 (0.0008) +[2026-06-02 16:55:55,093][247399] Saving new best policy, reward=1892.075! +[2026-06-02 16:55:55,619][247478] Updated weights for policy 0, policy_version 43202 (0.0009) +[2026-06-02 16:55:55,791][247478] Updated weights for policy 0, policy_version 43212 (0.0008) +[2026-06-02 16:55:55,980][247478] Updated weights for policy 0, policy_version 43223 (0.0008) +[2026-06-02 16:55:56,173][247478] Updated weights for policy 0, policy_version 43233 (0.0008) +[2026-06-02 16:55:56,373][247478] Updated weights for policy 0, policy_version 43244 (0.0005) +[2026-06-02 16:55:56,551][247478] Updated weights for policy 0, policy_version 43254 (0.0005) +[2026-06-02 16:55:56,730][247478] Updated weights for policy 0, policy_version 43264 (0.0009) +[2026-06-02 16:55:57,399][247478] Updated weights for policy 0, policy_version 43276 (0.0009) +[2026-06-02 16:55:57,574][247478] Updated weights for policy 0, policy_version 43286 (0.0009) +[2026-06-02 16:55:57,751][247478] Updated weights for policy 0, policy_version 43296 (0.0009) +[2026-06-02 16:55:57,937][247478] Updated weights for policy 0, policy_version 43306 (0.0009) +[2026-06-02 16:55:58,147][247478] Updated weights for policy 0, policy_version 43317 (0.0009) +[2026-06-02 16:55:58,330][247478] Updated weights for policy 0, policy_version 43327 (0.0009) +[2026-06-02 16:55:58,958][247478] Updated weights for policy 0, policy_version 43338 (0.0008) +[2026-06-02 16:55:59,137][247478] Updated weights for policy 0, policy_version 43348 (0.0009) +[2026-06-02 16:55:59,321][247478] Updated weights for policy 0, policy_version 43358 (0.0009) +[2026-06-02 16:55:59,504][247478] Updated weights for policy 0, policy_version 43368 (0.0009) +[2026-06-02 16:55:59,686][247478] Updated weights for policy 0, policy_version 43378 (0.0009) +[2026-06-02 16:55:59,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 20216.2). Total num frames: 22183936. Throughput: 0: 20280.9. Samples: 22192640. Policy #0 lag: (min: 63.0, avg: 79.8, max: 127.0) +[2026-06-02 16:55:59,726][246448] Avg episode reward: [(0, '1895.938')] +[2026-06-02 16:55:59,870][247478] Updated weights for policy 0, policy_version 43388 (0.0009) +[2026-06-02 16:55:59,935][247399] Saving new best policy, reward=1895.938! +[2026-06-02 16:56:00,504][247478] Updated weights for policy 0, policy_version 43398 (0.0009) +[2026-06-02 16:56:00,679][247478] Updated weights for policy 0, policy_version 43408 (0.0009) +[2026-06-02 16:56:00,872][247478] Updated weights for policy 0, policy_version 43418 (0.0007) +[2026-06-02 16:56:01,049][247478] Updated weights for policy 0, policy_version 43428 (0.0009) +[2026-06-02 16:56:01,236][247478] Updated weights for policy 0, policy_version 43438 (0.0009) +[2026-06-02 16:56:01,420][247478] Updated weights for policy 0, policy_version 43448 (0.0009) +[2026-06-02 16:56:02,058][247478] Updated weights for policy 0, policy_version 43458 (0.0009) +[2026-06-02 16:56:02,220][247478] Updated weights for policy 0, policy_version 43468 (0.0008) +[2026-06-02 16:56:02,400][247478] Updated weights for policy 0, policy_version 43478 (0.0009) +[2026-06-02 16:56:02,579][247478] Updated weights for policy 0, policy_version 43488 (0.0008) +[2026-06-02 16:56:02,765][247478] Updated weights for policy 0, policy_version 43498 (0.0009) +[2026-06-02 16:56:02,954][247478] Updated weights for policy 0, policy_version 43508 (0.0009) +[2026-06-02 16:56:03,132][247478] Updated weights for policy 0, policy_version 43518 (0.0008) +[2026-06-02 16:56:03,764][247478] Updated weights for policy 0, policy_version 43528 (0.0009) +[2026-06-02 16:56:03,975][247478] Updated weights for policy 0, policy_version 43539 (0.0009) +[2026-06-02 16:56:04,162][247478] Updated weights for policy 0, policy_version 43550 (0.0008) +[2026-06-02 16:56:04,351][247478] Updated weights for policy 0, policy_version 43560 (0.0009) +[2026-06-02 16:56:04,536][247478] Updated weights for policy 0, policy_version 43570 (0.0008) +[2026-06-02 16:56:04,717][247478] Updated weights for policy 0, policy_version 43580 (0.0008) +[2026-06-02 16:56:04,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20216.2). Total num frames: 22282240. Throughput: 0: 20295.1. Samples: 22317952. Policy #0 lag: (min: 46.0, avg: 62.7, max: 110.0) +[2026-06-02 16:56:04,726][246448] Avg episode reward: [(0, '1928.983')] +[2026-06-02 16:56:04,780][247399] Saving new best policy, reward=1928.983! +[2026-06-02 16:56:05,384][247478] Updated weights for policy 0, policy_version 43591 (0.0009) +[2026-06-02 16:56:05,561][247478] Updated weights for policy 0, policy_version 43601 (0.0009) +[2026-06-02 16:56:05,742][247478] Updated weights for policy 0, policy_version 43611 (0.0009) +[2026-06-02 16:56:05,927][247478] Updated weights for policy 0, policy_version 43621 (0.0008) +[2026-06-02 16:56:06,105][247478] Updated weights for policy 0, policy_version 43631 (0.0009) +[2026-06-02 16:56:06,308][247478] Updated weights for policy 0, policy_version 43642 (0.0008) +[2026-06-02 16:56:06,942][247478] Updated weights for policy 0, policy_version 43652 (0.0009) +[2026-06-02 16:56:07,106][247478] Updated weights for policy 0, policy_version 43662 (0.0008) +[2026-06-02 16:56:07,282][247478] Updated weights for policy 0, policy_version 43672 (0.0008) +[2026-06-02 16:56:07,463][247478] Updated weights for policy 0, policy_version 43682 (0.0009) +[2026-06-02 16:56:07,645][247478] Updated weights for policy 0, policy_version 43692 (0.0009) +[2026-06-02 16:56:07,827][247478] Updated weights for policy 0, policy_version 43702 (0.0008) +[2026-06-02 16:56:08,011][247478] Updated weights for policy 0, policy_version 43712 (0.0009) +[2026-06-02 16:56:08,653][247478] Updated weights for policy 0, policy_version 43722 (0.0008) +[2026-06-02 16:56:08,826][247478] Updated weights for policy 0, policy_version 43732 (0.0009) +[2026-06-02 16:56:09,014][247478] Updated weights for policy 0, policy_version 43742 (0.0008) +[2026-06-02 16:56:09,196][247478] Updated weights for policy 0, policy_version 43752 (0.0008) +[2026-06-02 16:56:09,375][247478] Updated weights for policy 0, policy_version 43762 (0.0008) +[2026-06-02 16:56:09,563][247478] Updated weights for policy 0, policy_version 43772 (0.0008) +[2026-06-02 16:56:09,725][246448] Fps is (10 sec: 22937.6, 60 sec: 20753.0, 300 sec: 20327.3). Total num frames: 22413312. Throughput: 0: 20201.2. Samples: 22431488. Policy #0 lag: (min: 46.0, avg: 62.7, max: 110.0) +[2026-06-02 16:56:09,726][246448] Avg episode reward: [(0, '1918.415')] +[2026-06-02 16:56:10,226][247478] Updated weights for policy 0, policy_version 43782 (0.0009) +[2026-06-02 16:56:10,403][247478] Updated weights for policy 0, policy_version 43792 (0.0009) +[2026-06-02 16:56:10,587][247478] Updated weights for policy 0, policy_version 43802 (0.0009) +[2026-06-02 16:56:10,785][247478] Updated weights for policy 0, policy_version 43813 (0.0008) +[2026-06-02 16:56:10,974][247478] Updated weights for policy 0, policy_version 43823 (0.0008) +[2026-06-02 16:56:11,153][247478] Updated weights for policy 0, policy_version 43833 (0.0008) +[2026-06-02 16:56:11,783][247478] Updated weights for policy 0, policy_version 43843 (0.0008) +[2026-06-02 16:56:11,958][247478] Updated weights for policy 0, policy_version 43853 (0.0008) +[2026-06-02 16:56:12,137][247478] Updated weights for policy 0, policy_version 43863 (0.0008) +[2026-06-02 16:56:12,316][247478] Updated weights for policy 0, policy_version 43873 (0.0008) +[2026-06-02 16:56:12,528][247478] Updated weights for policy 0, policy_version 43885 (0.0008) +[2026-06-02 16:56:12,721][247478] Updated weights for policy 0, policy_version 43895 (0.0008) +[2026-06-02 16:56:13,359][247478] Updated weights for policy 0, policy_version 43905 (0.0008) +[2026-06-02 16:56:13,528][247478] Updated weights for policy 0, policy_version 43915 (0.0009) +[2026-06-02 16:56:13,701][247478] Updated weights for policy 0, policy_version 43925 (0.0008) +[2026-06-02 16:56:13,883][247478] Updated weights for policy 0, policy_version 43935 (0.0008) +[2026-06-02 16:56:14,066][247478] Updated weights for policy 0, policy_version 43945 (0.0008) +[2026-06-02 16:56:14,247][247478] Updated weights for policy 0, policy_version 43955 (0.0008) +[2026-06-02 16:56:14,431][247478] Updated weights for policy 0, policy_version 43965 (0.0009) +[2026-06-02 16:56:14,725][246448] Fps is (10 sec: 22937.5, 60 sec: 20753.0, 300 sec: 20327.3). Total num frames: 22511616. Throughput: 0: 20337.8. Samples: 22494464. Policy #0 lag: (min: 46.0, avg: 62.7, max: 110.0) +[2026-06-02 16:56:14,726][246448] Avg episode reward: [(0, '1950.167')] +[2026-06-02 16:56:14,732][247399] Saving new best policy, reward=1950.167! +[2026-06-02 16:56:15,071][247478] Updated weights for policy 0, policy_version 43975 (0.0008) +[2026-06-02 16:56:15,246][247478] Updated weights for policy 0, policy_version 43985 (0.0008) +[2026-06-02 16:56:15,449][247478] Updated weights for policy 0, policy_version 43996 (0.0008) +[2026-06-02 16:56:15,631][247478] Updated weights for policy 0, policy_version 44006 (0.0009) +[2026-06-02 16:56:15,825][247478] Updated weights for policy 0, policy_version 44017 (0.0008) +[2026-06-02 16:56:16,020][247478] Updated weights for policy 0, policy_version 44027 (0.0009) +[2026-06-02 16:56:16,657][247478] Updated weights for policy 0, policy_version 44037 (0.0009) +[2026-06-02 16:56:16,839][247478] Updated weights for policy 0, policy_version 44047 (0.0008) +[2026-06-02 16:56:17,020][247478] Updated weights for policy 0, policy_version 44057 (0.0008) +[2026-06-02 16:56:17,199][247478] Updated weights for policy 0, policy_version 44067 (0.0008) +[2026-06-02 16:56:17,383][247478] Updated weights for policy 0, policy_version 44077 (0.0008) +[2026-06-02 16:56:17,565][247478] Updated weights for policy 0, policy_version 44087 (0.0008) +[2026-06-02 16:56:18,213][247478] Updated weights for policy 0, policy_version 44097 (0.0009) +[2026-06-02 16:56:18,374][247478] Updated weights for policy 0, policy_version 44107 (0.0008) +[2026-06-02 16:56:18,582][247478] Updated weights for policy 0, policy_version 44118 (0.0009) +[2026-06-02 16:56:18,775][247478] Updated weights for policy 0, policy_version 44129 (0.0008) +[2026-06-02 16:56:18,967][247478] Updated weights for policy 0, policy_version 44139 (0.0009) +[2026-06-02 16:56:19,147][247478] Updated weights for policy 0, policy_version 44149 (0.0008) +[2026-06-02 16:56:19,325][247478] Updated weights for policy 0, policy_version 44159 (0.0008) +[2026-06-02 16:56:19,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 22609920. Throughput: 0: 20323.6. Samples: 22619392. Policy #0 lag: (min: 46.0, avg: 62.7, max: 110.0) +[2026-06-02 16:56:19,726][246448] Avg episode reward: [(0, '1990.000')] +[2026-06-02 16:56:19,946][247478] Updated weights for policy 0, policy_version 44169 (0.0009) +[2026-06-02 16:56:20,127][247478] Updated weights for policy 0, policy_version 44179 (0.0008) +[2026-06-02 16:56:20,310][247478] Updated weights for policy 0, policy_version 44189 (0.0009) +[2026-06-02 16:56:20,504][247478] Updated weights for policy 0, policy_version 44200 (0.0009) +[2026-06-02 16:56:20,685][247478] Updated weights for policy 0, policy_version 44210 (0.0008) +[2026-06-02 16:56:20,869][247478] Updated weights for policy 0, policy_version 44220 (0.0008) +[2026-06-02 16:56:20,932][247399] Saving new best policy, reward=1990.000! +[2026-06-02 16:56:21,517][247478] Updated weights for policy 0, policy_version 44230 (0.0009) +[2026-06-02 16:56:21,690][247478] Updated weights for policy 0, policy_version 44240 (0.0009) +[2026-06-02 16:56:21,874][247478] Updated weights for policy 0, policy_version 44250 (0.0008) +[2026-06-02 16:56:22,052][247478] Updated weights for policy 0, policy_version 44260 (0.0008) +[2026-06-02 16:56:22,239][247478] Updated weights for policy 0, policy_version 44270 (0.0008) +[2026-06-02 16:56:22,419][247478] Updated weights for policy 0, policy_version 44280 (0.0008) +[2026-06-02 16:56:23,071][247478] Updated weights for policy 0, policy_version 44290 (0.0009) +[2026-06-02 16:56:23,230][247478] Updated weights for policy 0, policy_version 44300 (0.0008) +[2026-06-02 16:56:23,420][247478] Updated weights for policy 0, policy_version 44310 (0.0009) +[2026-06-02 16:56:23,601][247478] Updated weights for policy 0, policy_version 44320 (0.0008) +[2026-06-02 16:56:23,809][247478] Updated weights for policy 0, policy_version 44331 (0.0008) +[2026-06-02 16:56:23,976][247478] Updated weights for policy 0, policy_version 44341 (0.0008) +[2026-06-02 16:56:24,167][247478] Updated weights for policy 0, policy_version 44351 (0.0008) +[2026-06-02 16:56:24,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 22708224. Throughput: 0: 20320.7. Samples: 22744832. Policy #0 lag: (min: 46.0, avg: 62.7, max: 110.0) +[2026-06-02 16:56:24,726][246448] Avg episode reward: [(0, '2038.136')] +[2026-06-02 16:56:24,810][247478] Updated weights for policy 0, policy_version 44361 (0.0008) +[2026-06-02 16:56:24,986][247478] Updated weights for policy 0, policy_version 44371 (0.0008) +[2026-06-02 16:56:25,178][247478] Updated weights for policy 0, policy_version 44382 (0.0008) +[2026-06-02 16:56:25,363][247478] Updated weights for policy 0, policy_version 44392 (0.0009) +[2026-06-02 16:56:25,549][247478] Updated weights for policy 0, policy_version 44402 (0.0008) +[2026-06-02 16:56:25,752][247478] Updated weights for policy 0, policy_version 44413 (0.0008) +[2026-06-02 16:56:25,799][247399] Saving new best policy, reward=2038.136! +[2026-06-02 16:56:26,391][247478] Updated weights for policy 0, policy_version 44423 (0.0008) +[2026-06-02 16:56:26,591][247478] Updated weights for policy 0, policy_version 44434 (0.0008) +[2026-06-02 16:56:26,770][247478] Updated weights for policy 0, policy_version 44444 (0.0009) +[2026-06-02 16:56:26,957][247478] Updated weights for policy 0, policy_version 44454 (0.0008) +[2026-06-02 16:56:27,127][247478] Updated weights for policy 0, policy_version 44464 (0.0008) +[2026-06-02 16:56:27,312][247478] Updated weights for policy 0, policy_version 44474 (0.0008) +[2026-06-02 16:56:27,967][247478] Updated weights for policy 0, policy_version 44484 (0.0009) +[2026-06-02 16:56:28,154][247478] Updated weights for policy 0, policy_version 44495 (0.0008) +[2026-06-02 16:56:28,344][247478] Updated weights for policy 0, policy_version 44505 (0.0008) +[2026-06-02 16:56:28,536][247478] Updated weights for policy 0, policy_version 44515 (0.0008) +[2026-06-02 16:56:28,708][247478] Updated weights for policy 0, policy_version 44525 (0.0008) +[2026-06-02 16:56:28,888][247478] Updated weights for policy 0, policy_version 44535 (0.0008) +[2026-06-02 16:56:29,544][247478] Updated weights for policy 0, policy_version 44545 (0.0009) +[2026-06-02 16:56:29,710][247478] Updated weights for policy 0, policy_version 44555 (0.0008) +[2026-06-02 16:56:29,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20207.0, 300 sec: 20327.3). Total num frames: 22806528. Throughput: 0: 20209.8. Samples: 22801792. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) +[2026-06-02 16:56:29,726][246448] Avg episode reward: [(0, '2046.118')] +[2026-06-02 16:56:29,883][247478] Updated weights for policy 0, policy_version 44565 (0.0009) +[2026-06-02 16:56:30,066][247478] Updated weights for policy 0, policy_version 44575 (0.0008) +[2026-06-02 16:56:30,241][247478] Updated weights for policy 0, policy_version 44585 (0.0008) +[2026-06-02 16:56:30,422][247478] Updated weights for policy 0, policy_version 44595 (0.0008) +[2026-06-02 16:56:30,610][247478] Updated weights for policy 0, policy_version 44605 (0.0008) +[2026-06-02 16:56:30,655][247399] Saving new best policy, reward=2046.118! +[2026-06-02 16:56:31,262][247478] Updated weights for policy 0, policy_version 44615 (0.0008) +[2026-06-02 16:56:31,445][247478] Updated weights for policy 0, policy_version 44626 (0.0008) +[2026-06-02 16:56:31,630][247478] Updated weights for policy 0, policy_version 44636 (0.0009) +[2026-06-02 16:56:31,812][247478] Updated weights for policy 0, policy_version 44646 (0.0008) +[2026-06-02 16:56:31,992][247478] Updated weights for policy 0, policy_version 44656 (0.0008) +[2026-06-02 16:56:32,176][247478] Updated weights for policy 0, policy_version 44666 (0.0008) +[2026-06-02 16:56:32,821][247478] Updated weights for policy 0, policy_version 44676 (0.0008) +[2026-06-02 16:56:32,996][247478] Updated weights for policy 0, policy_version 44686 (0.0008) +[2026-06-02 16:56:33,175][247478] Updated weights for policy 0, policy_version 44696 (0.0008) +[2026-06-02 16:56:33,363][247478] Updated weights for policy 0, policy_version 44706 (0.0008) +[2026-06-02 16:56:33,538][247478] Updated weights for policy 0, policy_version 44716 (0.0009) +[2026-06-02 16:56:33,720][247478] Updated weights for policy 0, policy_version 44726 (0.0008) +[2026-06-02 16:56:33,906][247478] Updated weights for policy 0, policy_version 44736 (0.0009) +[2026-06-02 16:56:34,545][247478] Updated weights for policy 0, policy_version 44746 (0.0009) +[2026-06-02 16:56:34,718][247478] Updated weights for policy 0, policy_version 44756 (0.0008) +[2026-06-02 16:56:34,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20207.0, 300 sec: 20327.3). Total num frames: 22904832. Throughput: 0: 20352.0. Samples: 22921856. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) +[2026-06-02 16:56:34,726][246448] Avg episode reward: [(0, '2053.417')] +[2026-06-02 16:56:34,899][247478] Updated weights for policy 0, policy_version 44766 (0.0008) +[2026-06-02 16:56:35,084][247478] Updated weights for policy 0, policy_version 44776 (0.0008) +[2026-06-02 16:56:35,269][247478] Updated weights for policy 0, policy_version 44786 (0.0008) +[2026-06-02 16:56:35,495][247478] Updated weights for policy 0, policy_version 44798 (0.0009) +[2026-06-02 16:56:35,521][247399] Saving new best policy, reward=2053.417! +[2026-06-02 16:56:36,142][247478] Updated weights for policy 0, policy_version 44809 (0.0009) +[2026-06-02 16:56:36,323][247478] Updated weights for policy 0, policy_version 44819 (0.0008) +[2026-06-02 16:56:36,503][247478] Updated weights for policy 0, policy_version 44829 (0.0009) +[2026-06-02 16:56:36,685][247478] Updated weights for policy 0, policy_version 44839 (0.0008) +[2026-06-02 16:56:36,857][247478] Updated weights for policy 0, policy_version 44849 (0.0008) +[2026-06-02 16:56:37,046][247478] Updated weights for policy 0, policy_version 44859 (0.0009) +[2026-06-02 16:56:37,693][247478] Updated weights for policy 0, policy_version 44870 (0.0009) +[2026-06-02 16:56:37,868][247478] Updated weights for policy 0, policy_version 44880 (0.0008) +[2026-06-02 16:56:38,049][247478] Updated weights for policy 0, policy_version 44890 (0.0008) +[2026-06-02 16:56:38,260][247478] Updated weights for policy 0, policy_version 44901 (0.0008) +[2026-06-02 16:56:38,432][247478] Updated weights for policy 0, policy_version 44911 (0.0008) +[2026-06-02 16:56:38,620][247478] Updated weights for policy 0, policy_version 44921 (0.0009) +[2026-06-02 16:56:39,282][247478] Updated weights for policy 0, policy_version 44931 (0.0009) +[2026-06-02 16:56:39,448][247478] Updated weights for policy 0, policy_version 44941 (0.0008) +[2026-06-02 16:56:39,633][247478] Updated weights for policy 0, policy_version 44951 (0.0009) +[2026-06-02 16:56:39,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 23003136. Throughput: 0: 20386.1. Samples: 23047168. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) +[2026-06-02 16:56:39,726][246448] Avg episode reward: [(0, '2045.434')] +[2026-06-02 16:56:39,816][247478] Updated weights for policy 0, policy_version 44961 (0.0009) +[2026-06-02 16:56:39,998][247478] Updated weights for policy 0, policy_version 44971 (0.0009) +[2026-06-02 16:56:40,180][247478] Updated weights for policy 0, policy_version 44981 (0.0007) +[2026-06-02 16:56:40,366][247478] Updated weights for policy 0, policy_version 44991 (0.0009) +[2026-06-02 16:56:41,006][247478] Updated weights for policy 0, policy_version 45001 (0.0008) +[2026-06-02 16:56:41,184][247478] Updated weights for policy 0, policy_version 45011 (0.0009) +[2026-06-02 16:56:41,367][247478] Updated weights for policy 0, policy_version 45021 (0.0008) +[2026-06-02 16:56:41,548][247478] Updated weights for policy 0, policy_version 45031 (0.0008) +[2026-06-02 16:56:41,756][247478] Updated weights for policy 0, policy_version 45042 (0.0009) +[2026-06-02 16:56:41,958][247478] Updated weights for policy 0, policy_version 45053 (0.0009) +[2026-06-02 16:56:42,582][247478] Updated weights for policy 0, policy_version 45063 (0.0009) +[2026-06-02 16:56:42,756][247478] Updated weights for policy 0, policy_version 45073 (0.0009) +[2026-06-02 16:56:42,940][247478] Updated weights for policy 0, policy_version 45083 (0.0008) +[2026-06-02 16:56:43,143][247478] Updated weights for policy 0, policy_version 45094 (0.0009) +[2026-06-02 16:56:43,344][247478] Updated weights for policy 0, policy_version 45105 (0.0008) +[2026-06-02 16:56:43,528][247478] Updated weights for policy 0, policy_version 45115 (0.0008) +[2026-06-02 16:56:44,159][247478] Updated weights for policy 0, policy_version 45125 (0.0009) +[2026-06-02 16:56:44,334][247478] Updated weights for policy 0, policy_version 45135 (0.0008) +[2026-06-02 16:56:44,522][247478] Updated weights for policy 0, policy_version 45146 (0.0008) +[2026-06-02 16:56:44,706][247478] Updated weights for policy 0, policy_version 45156 (0.0008) +[2026-06-02 16:56:44,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20216.2). Total num frames: 23101440. Throughput: 0: 20380.5. Samples: 23109760. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) +[2026-06-02 16:56:44,726][246448] Avg episode reward: [(0, '1997.305')] +[2026-06-02 16:56:44,907][247478] Updated weights for policy 0, policy_version 45167 (0.0009) +[2026-06-02 16:56:45,112][247478] Updated weights for policy 0, policy_version 45178 (0.0008) +[2026-06-02 16:56:45,766][247478] Updated weights for policy 0, policy_version 45188 (0.0008) +[2026-06-02 16:56:45,967][247478] Updated weights for policy 0, policy_version 45200 (0.0009) +[2026-06-02 16:56:46,134][247478] Updated weights for policy 0, policy_version 45210 (0.0008) +[2026-06-02 16:56:46,323][247478] Updated weights for policy 0, policy_version 45220 (0.0008) +[2026-06-02 16:56:46,490][247478] Updated weights for policy 0, policy_version 45230 (0.0008) +[2026-06-02 16:56:46,673][247478] Updated weights for policy 0, policy_version 45240 (0.0008) +[2026-06-02 16:56:47,392][247478] Updated weights for policy 0, policy_version 45253 (0.0009) +[2026-06-02 16:56:47,561][247478] Updated weights for policy 0, policy_version 45263 (0.0009) +[2026-06-02 16:56:47,741][247478] Updated weights for policy 0, policy_version 45273 (0.0008) +[2026-06-02 16:56:47,948][247478] Updated weights for policy 0, policy_version 45285 (0.0008) +[2026-06-02 16:56:48,129][247478] Updated weights for policy 0, policy_version 45295 (0.0008) +[2026-06-02 16:56:48,327][247478] Updated weights for policy 0, policy_version 45306 (0.0009) +[2026-06-02 16:56:48,992][247478] Updated weights for policy 0, policy_version 45317 (0.0009) +[2026-06-02 16:56:49,180][247478] Updated weights for policy 0, policy_version 45328 (0.0008) +[2026-06-02 16:56:49,367][247478] Updated weights for policy 0, policy_version 45339 (0.0009) +[2026-06-02 16:56:49,596][247478] Updated weights for policy 0, policy_version 45351 (0.0009) +[2026-06-02 16:56:49,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20216.2). Total num frames: 23199744. Throughput: 0: 20209.8. Samples: 23227392. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) +[2026-06-02 16:56:49,726][246448] Avg episode reward: [(0, '2015.601')] +[2026-06-02 16:56:49,784][247478] Updated weights for policy 0, policy_version 45362 (0.0008) +[2026-06-02 16:56:49,954][247478] Updated weights for policy 0, policy_version 45372 (0.0008) +[2026-06-02 16:56:50,673][247478] Updated weights for policy 0, policy_version 45385 (0.0009) +[2026-06-02 16:56:50,869][247478] Updated weights for policy 0, policy_version 45396 (0.0009) +[2026-06-02 16:56:51,040][247478] Updated weights for policy 0, policy_version 45406 (0.0009) +[2026-06-02 16:56:51,236][247478] Updated weights for policy 0, policy_version 45416 (0.0008) +[2026-06-02 16:56:51,407][247478] Updated weights for policy 0, policy_version 45426 (0.0009) +[2026-06-02 16:56:51,615][247478] Updated weights for policy 0, policy_version 45437 (0.0009) +[2026-06-02 16:56:52,259][247478] Updated weights for policy 0, policy_version 45447 (0.0008) +[2026-06-02 16:56:52,453][247478] Updated weights for policy 0, policy_version 45458 (0.0009) +[2026-06-02 16:56:52,638][247478] Updated weights for policy 0, policy_version 45468 (0.0008) +[2026-06-02 16:56:52,824][247478] Updated weights for policy 0, policy_version 45478 (0.0008) +[2026-06-02 16:56:53,001][247478] Updated weights for policy 0, policy_version 45488 (0.0009) +[2026-06-02 16:56:53,176][247478] Updated weights for policy 0, policy_version 45498 (0.0009) +[2026-06-02 16:56:53,810][247478] Updated weights for policy 0, policy_version 45508 (0.0009) +[2026-06-02 16:56:53,978][247478] Updated weights for policy 0, policy_version 45518 (0.0009) +[2026-06-02 16:56:54,166][247478] Updated weights for policy 0, policy_version 45528 (0.0009) +[2026-06-02 16:56:54,344][247478] Updated weights for policy 0, policy_version 45538 (0.0009) +[2026-06-02 16:56:54,537][247478] Updated weights for policy 0, policy_version 45548 (0.0009) +[2026-06-02 16:56:54,703][247478] Updated weights for policy 0, policy_version 45558 (0.0009) +[2026-06-02 16:56:54,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20206.9, 300 sec: 20216.2). Total num frames: 23298048. Throughput: 0: 20343.5. Samples: 23346944. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) +[2026-06-02 16:56:54,726][246448] Avg episode reward: [(0, '2067.980')] +[2026-06-02 16:56:54,882][247399] Saving new best policy, reward=2067.980! +[2026-06-02 16:56:54,886][247478] Updated weights for policy 0, policy_version 45568 (0.0009) +[2026-06-02 16:56:55,541][247478] Updated weights for policy 0, policy_version 45578 (0.0008) +[2026-06-02 16:56:55,721][247478] Updated weights for policy 0, policy_version 45588 (0.0008) +[2026-06-02 16:56:55,897][247478] Updated weights for policy 0, policy_version 45598 (0.0008) +[2026-06-02 16:56:56,082][247478] Updated weights for policy 0, policy_version 45608 (0.0008) +[2026-06-02 16:56:56,262][247478] Updated weights for policy 0, policy_version 45618 (0.0008) +[2026-06-02 16:56:56,449][247478] Updated weights for policy 0, policy_version 45628 (0.0009) +[2026-06-02 16:56:57,070][247478] Updated weights for policy 0, policy_version 45638 (0.0008) +[2026-06-02 16:56:57,248][247478] Updated weights for policy 0, policy_version 45648 (0.0009) +[2026-06-02 16:56:57,428][247478] Updated weights for policy 0, policy_version 45658 (0.0009) +[2026-06-02 16:56:57,611][247478] Updated weights for policy 0, policy_version 45668 (0.0009) +[2026-06-02 16:56:57,793][247478] Updated weights for policy 0, policy_version 45678 (0.0009) +[2026-06-02 16:56:58,015][247478] Updated weights for policy 0, policy_version 45690 (0.0009) +[2026-06-02 16:56:58,657][247478] Updated weights for policy 0, policy_version 45700 (0.0009) +[2026-06-02 16:56:58,830][247478] Updated weights for policy 0, policy_version 45710 (0.0009) +[2026-06-02 16:56:59,011][247478] Updated weights for policy 0, policy_version 45720 (0.0008) +[2026-06-02 16:56:59,197][247478] Updated weights for policy 0, policy_version 45730 (0.0009) +[2026-06-02 16:56:59,379][247478] Updated weights for policy 0, policy_version 45740 (0.0008) +[2026-06-02 16:56:59,553][247478] Updated weights for policy 0, policy_version 45750 (0.0008) +[2026-06-02 16:56:59,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 20216.2). Total num frames: 23396352. Throughput: 0: 20329.2. Samples: 23409280. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) +[2026-06-02 16:56:59,727][246448] Avg episode reward: [(0, '2039.775')] +[2026-06-02 16:56:59,743][247478] Updated weights for policy 0, policy_version 45760 (0.0009) +[2026-06-02 16:57:00,382][247478] Updated weights for policy 0, policy_version 45770 (0.0008) +[2026-06-02 16:57:00,552][247478] Updated weights for policy 0, policy_version 45780 (0.0009) +[2026-06-02 16:57:00,730][247478] Updated weights for policy 0, policy_version 45790 (0.0009) +[2026-06-02 16:57:00,917][247478] Updated weights for policy 0, policy_version 45800 (0.0009) +[2026-06-02 16:57:01,100][247478] Updated weights for policy 0, policy_version 45810 (0.0009) +[2026-06-02 16:57:01,284][247478] Updated weights for policy 0, policy_version 45820 (0.0008) +[2026-06-02 16:57:01,926][247478] Updated weights for policy 0, policy_version 45830 (0.0009) +[2026-06-02 16:57:02,101][247478] Updated weights for policy 0, policy_version 45840 (0.0008) +[2026-06-02 16:57:02,271][247478] Updated weights for policy 0, policy_version 45850 (0.0008) +[2026-06-02 16:57:02,455][247478] Updated weights for policy 0, policy_version 45860 (0.0008) +[2026-06-02 16:57:02,659][247478] Updated weights for policy 0, policy_version 45871 (0.0008) +[2026-06-02 16:57:02,844][247478] Updated weights for policy 0, policy_version 45881 (0.0008) +[2026-06-02 16:57:03,497][247478] Updated weights for policy 0, policy_version 45891 (0.0008) +[2026-06-02 16:57:03,668][247478] Updated weights for policy 0, policy_version 45901 (0.0008) +[2026-06-02 16:57:03,844][247478] Updated weights for policy 0, policy_version 45911 (0.0008) +[2026-06-02 16:57:04,033][247478] Updated weights for policy 0, policy_version 45921 (0.0008) +[2026-06-02 16:57:04,218][247478] Updated weights for policy 0, policy_version 45931 (0.0008) +[2026-06-02 16:57:04,397][247478] Updated weights for policy 0, policy_version 45941 (0.0008) +[2026-06-02 16:57:04,577][247478] Updated weights for policy 0, policy_version 45951 (0.0008) +[2026-06-02 16:57:04,725][246448] Fps is (10 sec: 22937.3, 60 sec: 20753.0, 300 sec: 20327.3). Total num frames: 23527424. Throughput: 0: 20349.1. Samples: 23535104. Policy #0 lag: (min: 63.0, avg: 79.3, max: 127.0) +[2026-06-02 16:57:04,727][246448] Avg episode reward: [(0, '2063.611')] +[2026-06-02 16:57:05,261][247478] Updated weights for policy 0, policy_version 45963 (0.0009) +[2026-06-02 16:57:05,450][247478] Updated weights for policy 0, policy_version 45974 (0.0009) +[2026-06-02 16:57:05,634][247478] Updated weights for policy 0, policy_version 45984 (0.0009) +[2026-06-02 16:57:05,803][247478] Updated weights for policy 0, policy_version 45994 (0.0008) +[2026-06-02 16:57:05,990][247478] Updated weights for policy 0, policy_version 46004 (0.0009) +[2026-06-02 16:57:06,171][247478] Updated weights for policy 0, policy_version 46014 (0.0009) +[2026-06-02 16:57:06,815][247478] Updated weights for policy 0, policy_version 46024 (0.0009) +[2026-06-02 16:57:06,987][247478] Updated weights for policy 0, policy_version 46034 (0.0008) +[2026-06-02 16:57:07,178][247478] Updated weights for policy 0, policy_version 46044 (0.0008) +[2026-06-02 16:57:07,350][247478] Updated weights for policy 0, policy_version 46054 (0.0009) +[2026-06-02 16:57:07,530][247478] Updated weights for policy 0, policy_version 46064 (0.0009) +[2026-06-02 16:57:07,723][247478] Updated weights for policy 0, policy_version 46074 (0.0009) +[2026-06-02 16:57:08,358][247478] Updated weights for policy 0, policy_version 46084 (0.0009) +[2026-06-02 16:57:08,519][247478] Updated weights for policy 0, policy_version 46094 (0.0009) +[2026-06-02 16:57:08,706][247478] Updated weights for policy 0, policy_version 46105 (0.0008) +[2026-06-02 16:57:08,879][247478] Updated weights for policy 0, policy_version 46115 (0.0007) +[2026-06-02 16:57:09,087][247478] Updated weights for policy 0, policy_version 46126 (0.0009) +[2026-06-02 16:57:09,259][247478] Updated weights for policy 0, policy_version 46136 (0.0009) +[2026-06-02 16:57:09,725][246448] Fps is (10 sec: 22937.7, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 23625728. Throughput: 0: 20204.1. Samples: 23654016. Policy #0 lag: (min: 63.0, avg: 79.3, max: 127.0) +[2026-06-02 16:57:09,726][246448] Avg episode reward: [(0, '2086.438')] +[2026-06-02 16:57:09,731][247399] Saving new best policy, reward=2086.438! +[2026-06-02 16:57:09,972][247478] Updated weights for policy 0, policy_version 46147 (0.0009) +[2026-06-02 16:57:10,153][247478] Updated weights for policy 0, policy_version 46158 (0.0008) +[2026-06-02 16:57:10,322][247478] Updated weights for policy 0, policy_version 46168 (0.0009) +[2026-06-02 16:57:10,501][247478] Updated weights for policy 0, policy_version 46178 (0.0009) +[2026-06-02 16:57:10,695][247478] Updated weights for policy 0, policy_version 46189 (0.0008) +[2026-06-02 16:57:10,872][247478] Updated weights for policy 0, policy_version 46199 (0.0008) +[2026-06-02 16:57:11,523][247478] Updated weights for policy 0, policy_version 46209 (0.0008) +[2026-06-02 16:57:11,682][247478] Updated weights for policy 0, policy_version 46219 (0.0009) +[2026-06-02 16:57:11,870][247478] Updated weights for policy 0, policy_version 46230 (0.0009) +[2026-06-02 16:57:12,047][247478] Updated weights for policy 0, policy_version 46240 (0.0008) +[2026-06-02 16:57:12,223][247478] Updated weights for policy 0, policy_version 46250 (0.0009) +[2026-06-02 16:57:12,405][247478] Updated weights for policy 0, policy_version 46260 (0.0008) +[2026-06-02 16:57:12,585][247478] Updated weights for policy 0, policy_version 46270 (0.0008) +[2026-06-02 16:57:13,261][247478] Updated weights for policy 0, policy_version 46281 (0.0008) +[2026-06-02 16:57:13,452][247478] Updated weights for policy 0, policy_version 46292 (0.0009) +[2026-06-02 16:57:13,626][247478] Updated weights for policy 0, policy_version 46302 (0.0008) +[2026-06-02 16:57:13,824][247478] Updated weights for policy 0, policy_version 46313 (0.0008) +[2026-06-02 16:57:14,036][247478] Updated weights for policy 0, policy_version 46325 (0.0008) +[2026-06-02 16:57:14,222][247478] Updated weights for policy 0, policy_version 46336 (0.0009) +[2026-06-02 16:57:14,725][246448] Fps is (10 sec: 19661.0, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 23724032. Throughput: 0: 20178.5. Samples: 23709824. Policy #0 lag: (min: 63.0, avg: 79.3, max: 127.0) +[2026-06-02 16:57:14,726][246448] Avg episode reward: [(0, '2103.686')] +[2026-06-02 16:57:14,912][247478] Updated weights for policy 0, policy_version 46347 (0.0009) +[2026-06-02 16:57:15,095][247478] Updated weights for policy 0, policy_version 46357 (0.0009) +[2026-06-02 16:57:15,279][247478] Updated weights for policy 0, policy_version 46367 (0.0009) +[2026-06-02 16:57:15,469][247478] Updated weights for policy 0, policy_version 46378 (0.0009) +[2026-06-02 16:57:15,670][247478] Updated weights for policy 0, policy_version 46389 (0.0009) +[2026-06-02 16:57:15,852][247478] Updated weights for policy 0, policy_version 46399 (0.0009) +[2026-06-02 16:57:15,869][247399] Saving new best policy, reward=2103.686! +[2026-06-02 16:57:16,508][247478] Updated weights for policy 0, policy_version 46409 (0.0008) +[2026-06-02 16:57:16,733][247478] Updated weights for policy 0, policy_version 46421 (0.0008) +[2026-06-02 16:57:16,918][247478] Updated weights for policy 0, policy_version 46431 (0.0009) +[2026-06-02 16:57:17,098][247478] Updated weights for policy 0, policy_version 46441 (0.0008) +[2026-06-02 16:57:17,273][247478] Updated weights for policy 0, policy_version 46451 (0.0008) +[2026-06-02 16:57:17,460][247478] Updated weights for policy 0, policy_version 46461 (0.0008) +[2026-06-02 16:57:18,106][247478] Updated weights for policy 0, policy_version 46472 (0.0009) +[2026-06-02 16:57:18,301][247478] Updated weights for policy 0, policy_version 46483 (0.0008) +[2026-06-02 16:57:18,484][247478] Updated weights for policy 0, policy_version 46493 (0.0008) +[2026-06-02 16:57:18,662][247478] Updated weights for policy 0, policy_version 46503 (0.0008) +[2026-06-02 16:57:18,842][247478] Updated weights for policy 0, policy_version 46513 (0.0008) +[2026-06-02 16:57:19,027][247478] Updated weights for policy 0, policy_version 46523 (0.0008) +[2026-06-02 16:57:19,679][247478] Updated weights for policy 0, policy_version 46533 (0.0008) +[2026-06-02 16:57:19,725][246448] Fps is (10 sec: 19660.6, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 23822336. Throughput: 0: 20317.8. Samples: 23836160. Policy #0 lag: (min: 63.0, avg: 79.3, max: 127.0) +[2026-06-02 16:57:19,726][246448] Avg episode reward: [(0, '2111.008')] +[2026-06-02 16:57:19,877][247478] Updated weights for policy 0, policy_version 46544 (0.0009) +[2026-06-02 16:57:20,040][247478] Updated weights for policy 0, policy_version 46554 (0.0008) +[2026-06-02 16:57:20,241][247478] Updated weights for policy 0, policy_version 46565 (0.0008) +[2026-06-02 16:57:20,423][247478] Updated weights for policy 0, policy_version 46575 (0.0009) +[2026-06-02 16:57:20,604][247478] Updated weights for policy 0, policy_version 46585 (0.0009) +[2026-06-02 16:57:20,723][247399] Saving new best policy, reward=2111.008! +[2026-06-02 16:57:21,255][247478] Updated weights for policy 0, policy_version 46595 (0.0009) +[2026-06-02 16:57:21,422][247478] Updated weights for policy 0, policy_version 46605 (0.0008) +[2026-06-02 16:57:21,604][247478] Updated weights for policy 0, policy_version 46615 (0.0008) +[2026-06-02 16:57:21,782][247478] Updated weights for policy 0, policy_version 46625 (0.0008) +[2026-06-02 16:57:21,972][247478] Updated weights for policy 0, policy_version 46635 (0.0008) +[2026-06-02 16:57:22,148][247478] Updated weights for policy 0, policy_version 46645 (0.0008) +[2026-06-02 16:57:22,332][247478] Updated weights for policy 0, policy_version 46655 (0.0008) +[2026-06-02 16:57:22,980][247478] Updated weights for policy 0, policy_version 46665 (0.0008) +[2026-06-02 16:57:23,156][247478] Updated weights for policy 0, policy_version 46675 (0.0008) +[2026-06-02 16:57:23,333][247478] Updated weights for policy 0, policy_version 46685 (0.0008) +[2026-06-02 16:57:23,517][247478] Updated weights for policy 0, policy_version 46695 (0.0008) +[2026-06-02 16:57:23,700][247478] Updated weights for policy 0, policy_version 46705 (0.0008) +[2026-06-02 16:57:23,883][247478] Updated weights for policy 0, policy_version 46715 (0.0009) +[2026-06-02 16:57:24,512][247478] Updated weights for policy 0, policy_version 46725 (0.0009) +[2026-06-02 16:57:24,700][247478] Updated weights for policy 0, policy_version 46736 (0.0008) +[2026-06-02 16:57:24,725][246448] Fps is (10 sec: 19660.8, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 23920640. Throughput: 0: 20317.9. Samples: 23961472. Policy #0 lag: (min: 63.0, avg: 79.3, max: 127.0) +[2026-06-02 16:57:24,727][246448] Avg episode reward: [(0, '2081.471')] +[2026-06-02 16:57:24,882][247478] Updated weights for policy 0, policy_version 46746 (0.0008) +[2026-06-02 16:57:25,054][247478] Updated weights for policy 0, policy_version 46756 (0.0008) +[2026-06-02 16:57:25,280][247478] Updated weights for policy 0, policy_version 46768 (0.0009) +[2026-06-02 16:57:25,464][247478] Updated weights for policy 0, policy_version 46778 (0.0009) +[2026-06-02 16:57:26,108][247478] Updated weights for policy 0, policy_version 46788 (0.0008) +[2026-06-02 16:57:26,280][247478] Updated weights for policy 0, policy_version 46798 (0.0008) +[2026-06-02 16:57:26,460][247478] Updated weights for policy 0, policy_version 46808 (0.0008) +[2026-06-02 16:57:26,670][247478] Updated weights for policy 0, policy_version 46820 (0.0008) +[2026-06-02 16:57:26,855][247478] Updated weights for policy 0, policy_version 46830 (0.0009) +[2026-06-02 16:57:27,029][247478] Updated weights for policy 0, policy_version 46840 (0.0008) +[2026-06-02 16:57:27,705][247478] Updated weights for policy 0, policy_version 46850 (0.0009) +[2026-06-02 16:57:27,884][247478] Updated weights for policy 0, policy_version 46861 (0.0007) +[2026-06-02 16:57:28,092][247478] Updated weights for policy 0, policy_version 46872 (0.0009) +[2026-06-02 16:57:28,259][247478] Updated weights for policy 0, policy_version 46882 (0.0009) +[2026-06-02 16:57:28,465][247478] Updated weights for policy 0, policy_version 46893 (0.0009) +[2026-06-02 16:57:28,648][247478] Updated weights for policy 0, policy_version 46903 (0.0009) +[2026-06-02 16:57:29,309][247478] Updated weights for policy 0, policy_version 46913 (0.0009) +[2026-06-02 16:57:29,475][247478] Updated weights for policy 0, policy_version 46923 (0.0009) +[2026-06-02 16:57:29,649][247478] Updated weights for policy 0, policy_version 46933 (0.0009) +[2026-06-02 16:57:29,725][246448] Fps is (10 sec: 19661.0, 60 sec: 20206.9, 300 sec: 20216.2). Total num frames: 24018944. Throughput: 0: 20334.9. Samples: 24024832. Policy #0 lag: (min: 63.0, avg: 79.3, max: 127.0) +[2026-06-02 16:57:29,726][246448] Avg episode reward: [(0, '2175.532')] +[2026-06-02 16:57:29,844][247478] Updated weights for policy 0, policy_version 46943 (0.0009) +[2026-06-02 16:57:30,019][247478] Updated weights for policy 0, policy_version 46953 (0.0008) +[2026-06-02 16:57:30,214][247478] Updated weights for policy 0, policy_version 46964 (0.0009) +[2026-06-02 16:57:30,423][247399] Saving new best policy, reward=2175.532! +[2026-06-02 16:57:30,425][247478] Updated weights for policy 0, policy_version 46976 (0.0009) +[2026-06-02 16:57:31,068][247478] Updated weights for policy 0, policy_version 46986 (0.0009) +[2026-06-02 16:57:31,244][247478] Updated weights for policy 0, policy_version 46996 (0.0009) +[2026-06-02 16:57:31,418][247478] Updated weights for policy 0, policy_version 47006 (0.0010) +[2026-06-02 16:57:31,606][247478] Updated weights for policy 0, policy_version 47016 (0.0009) +[2026-06-02 16:57:31,779][247478] Updated weights for policy 0, policy_version 47026 (0.0008) +[2026-06-02 16:57:31,968][247478] Updated weights for policy 0, policy_version 47036 (0.0008) +[2026-06-02 16:57:32,605][247478] Updated weights for policy 0, policy_version 47046 (0.0009) +[2026-06-02 16:57:32,779][247478] Updated weights for policy 0, policy_version 47056 (0.0009) +[2026-06-02 16:57:32,956][247478] Updated weights for policy 0, policy_version 47066 (0.0008) +[2026-06-02 16:57:33,155][247478] Updated weights for policy 0, policy_version 47077 (0.0009) +[2026-06-02 16:57:33,357][247478] Updated weights for policy 0, policy_version 47088 (0.0009) +[2026-06-02 16:57:33,542][247478] Updated weights for policy 0, policy_version 47098 (0.0009) +[2026-06-02 16:57:34,198][247478] Updated weights for policy 0, policy_version 47108 (0.0009) +[2026-06-02 16:57:34,370][247478] Updated weights for policy 0, policy_version 47118 (0.0008) +[2026-06-02 16:57:34,542][247478] Updated weights for policy 0, policy_version 47128 (0.0008) +[2026-06-02 16:57:34,724][247478] Updated weights for policy 0, policy_version 47138 (0.0008) +[2026-06-02 16:57:34,726][246448] Fps is (10 sec: 19660.4, 60 sec: 20206.9, 300 sec: 20216.2). Total num frames: 24117248. Throughput: 0: 20218.2. Samples: 24137216. Policy #0 lag: (min: 23.0, avg: 39.2, max: 87.0) +[2026-06-02 16:57:34,727][246448] Avg episode reward: [(0, '2133.625')] +[2026-06-02 16:57:34,915][247478] Updated weights for policy 0, policy_version 47148 (0.0008) +[2026-06-02 16:57:35,097][247478] Updated weights for policy 0, policy_version 47158 (0.0008) +[2026-06-02 16:57:35,273][247399] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs4_seed14/checkpoint_p0/checkpoint_000047168_24150016.pth... +[2026-06-02 16:57:35,275][247478] Updated weights for policy 0, policy_version 47168 (0.0009) +[2026-06-02 16:57:35,943][247478] Updated weights for policy 0, policy_version 47179 (0.0008) +[2026-06-02 16:57:36,120][247478] Updated weights for policy 0, policy_version 47189 (0.0008) +[2026-06-02 16:57:36,298][247478] Updated weights for policy 0, policy_version 47199 (0.0008) +[2026-06-02 16:57:36,485][247478] Updated weights for policy 0, policy_version 47209 (0.0008) +[2026-06-02 16:57:36,680][247478] Updated weights for policy 0, policy_version 47220 (0.0008) +[2026-06-02 16:57:36,860][247478] Updated weights for policy 0, policy_version 47230 (0.0009) +[2026-06-02 16:57:37,528][247478] Updated weights for policy 0, policy_version 47241 (0.0009) +[2026-06-02 16:57:37,693][247478] Updated weights for policy 0, policy_version 47251 (0.0008) +[2026-06-02 16:57:37,879][247478] Updated weights for policy 0, policy_version 47261 (0.0008) +[2026-06-02 16:57:38,077][247478] Updated weights for policy 0, policy_version 47272 (0.0008) +[2026-06-02 16:57:38,258][247478] Updated weights for policy 0, policy_version 47282 (0.0010) +[2026-06-02 16:57:38,435][247478] Updated weights for policy 0, policy_version 47292 (0.0008) +[2026-06-02 16:57:39,090][247478] Updated weights for policy 0, policy_version 47302 (0.0008) +[2026-06-02 16:57:39,262][247478] Updated weights for policy 0, policy_version 47312 (0.0009) +[2026-06-02 16:57:39,428][247478] Updated weights for policy 0, policy_version 47322 (0.0009) +[2026-06-02 16:57:39,610][247478] Updated weights for policy 0, policy_version 47332 (0.0008) +[2026-06-02 16:57:39,725][246448] Fps is (10 sec: 19660.9, 60 sec: 20206.9, 300 sec: 20216.2). Total num frames: 24215552. Throughput: 0: 20360.5. Samples: 24263168. Policy #0 lag: (min: 23.0, avg: 39.2, max: 87.0) +[2026-06-02 16:57:39,726][246448] Avg episode reward: [(0, '2153.544')] +[2026-06-02 16:57:39,786][247478] Updated weights for policy 0, policy_version 47342 (0.0008) +[2026-06-02 16:57:39,958][247478] Updated weights for policy 0, policy_version 47352 (0.0008) +[2026-06-02 16:57:40,658][247478] Updated weights for policy 0, policy_version 47363 (0.0009) +[2026-06-02 16:57:40,829][247478] Updated weights for policy 0, policy_version 47373 (0.0008) +[2026-06-02 16:57:41,020][247478] Updated weights for policy 0, policy_version 47384 (0.0009) +[2026-06-02 16:57:41,195][247478] Updated weights for policy 0, policy_version 47394 (0.0009) +[2026-06-02 16:57:41,363][247478] Updated weights for policy 0, policy_version 47404 (0.0008) +[2026-06-02 16:57:41,549][247478] Updated weights for policy 0, policy_version 47414 (0.0008) +[2026-06-02 16:57:41,718][247478] Updated weights for policy 0, policy_version 47424 (0.0008) +[2026-06-02 16:57:42,407][247478] Updated weights for policy 0, policy_version 47435 (0.0009) +[2026-06-02 16:57:42,612][247478] Updated weights for policy 0, policy_version 47447 (0.0009) +[2026-06-02 16:57:42,811][247478] Updated weights for policy 0, policy_version 47458 (0.0009) +[2026-06-02 16:57:42,987][247478] Updated weights for policy 0, policy_version 47468 (0.0009) +[2026-06-02 16:57:43,161][247478] Updated weights for policy 0, policy_version 47478 (0.0008) +[2026-06-02 16:57:43,867][247478] Updated weights for policy 0, policy_version 47489 (0.0008) +[2026-06-02 16:57:44,041][247478] Updated weights for policy 0, policy_version 47500 (0.0009) +[2026-06-02 16:57:44,220][247478] Updated weights for policy 0, policy_version 47510 (0.0009) +[2026-06-02 16:57:44,395][247478] Updated weights for policy 0, policy_version 47520 (0.0008) +[2026-06-02 16:57:44,591][247478] Updated weights for policy 0, policy_version 47531 (0.0008) +[2026-06-02 16:57:44,725][246448] Fps is (10 sec: 19661.2, 60 sec: 20206.9, 300 sec: 20216.2). Total num frames: 24313856. Throughput: 0: 20366.3. Samples: 24325760. Policy #0 lag: (min: 23.0, avg: 39.2, max: 87.0) +[2026-06-02 16:57:44,726][246448] Avg episode reward: [(0, '2153.544')] +[2026-06-02 16:57:44,769][247478] Updated weights for policy 0, policy_version 47541 (0.0008) +[2026-06-02 16:57:44,942][247478] Updated weights for policy 0, policy_version 47551 (0.0009) +[2026-06-02 16:57:45,652][247478] Updated weights for policy 0, policy_version 47564 (0.0009) +[2026-06-02 16:57:45,842][247478] Updated weights for policy 0, policy_version 47575 (0.0009) +[2026-06-02 16:57:46,030][247478] Updated weights for policy 0, policy_version 47585 (0.0009) +[2026-06-02 16:57:46,231][247478] Updated weights for policy 0, policy_version 47597 (0.0009) +[2026-06-02 16:57:46,417][247478] Updated weights for policy 0, policy_version 47607 (0.0009) +[2026-06-02 16:57:47,128][247478] Updated weights for policy 0, policy_version 47619 (0.0009) +[2026-06-02 16:57:47,310][247478] Updated weights for policy 0, policy_version 47630 (0.0008) +[2026-06-02 16:57:47,512][247478] Updated weights for policy 0, policy_version 47641 (0.0008) +[2026-06-02 16:57:47,689][247478] Updated weights for policy 0, policy_version 47651 (0.0009) +[2026-06-02 16:57:47,884][247478] Updated weights for policy 0, policy_version 47662 (0.0009) +[2026-06-02 16:57:48,091][247478] Updated weights for policy 0, policy_version 47674 (0.0008) +[2026-06-02 16:57:48,755][247478] Updated weights for policy 0, policy_version 47684 (0.0008) +[2026-06-02 16:57:48,926][247478] Updated weights for policy 0, policy_version 47694 (0.0008) +[2026-06-02 16:57:49,101][247478] Updated weights for policy 0, policy_version 47704 (0.0009) +[2026-06-02 16:57:49,304][247478] Updated weights for policy 0, policy_version 47716 (0.0009) +[2026-06-02 16:57:49,487][247478] Updated weights for policy 0, policy_version 47726 (0.0009) +[2026-06-02 16:57:49,673][247478] Updated weights for policy 0, policy_version 47737 (0.0008) +[2026-06-02 16:57:49,726][246448] Fps is (10 sec: 19660.3, 60 sec: 20206.9, 300 sec: 20216.2). Total num frames: 24412160. Throughput: 0: 20377.5. Samples: 24452096. Policy #0 lag: (min: 23.0, avg: 39.2, max: 87.0) +[2026-06-02 16:57:49,727][246448] Avg episode reward: [(0, '2251.791')] +[2026-06-02 16:57:49,798][247399] Saving new best policy, reward=2251.791! +[2026-06-02 16:57:50,344][247478] Updated weights for policy 0, policy_version 47747 (0.0008) +[2026-06-02 16:57:50,512][247478] Updated weights for policy 0, policy_version 47757 (0.0007) +[2026-06-02 16:57:50,695][247478] Updated weights for policy 0, policy_version 47768 (0.0008) +[2026-06-02 16:57:50,894][247478] Updated weights for policy 0, policy_version 47779 (0.0009) +[2026-06-02 16:57:51,070][247478] Updated weights for policy 0, policy_version 47789 (0.0009) +[2026-06-02 16:57:51,267][247478] Updated weights for policy 0, policy_version 47800 (0.0008) +[2026-06-02 16:57:51,949][247478] Updated weights for policy 0, policy_version 47810 (0.0009) +[2026-06-02 16:57:52,131][247478] Updated weights for policy 0, policy_version 47821 (0.0009) +[2026-06-02 16:57:52,308][247478] Updated weights for policy 0, policy_version 47831 (0.0008) +[2026-06-02 16:57:52,485][247478] Updated weights for policy 0, policy_version 47841 (0.0008) +[2026-06-02 16:57:52,659][247478] Updated weights for policy 0, policy_version 47851 (0.0008) +[2026-06-02 16:57:52,843][247478] Updated weights for policy 0, policy_version 47861 (0.0009) +[2026-06-02 16:57:53,577][247478] Updated weights for policy 0, policy_version 47874 (0.0009) +[2026-06-02 16:57:53,759][247478] Updated weights for policy 0, policy_version 47885 (0.0009) +[2026-06-02 16:57:53,944][247478] Updated weights for policy 0, policy_version 47895 (0.0008) +[2026-06-02 16:57:54,119][247478] Updated weights for policy 0, policy_version 47905 (0.0008) +[2026-06-02 16:57:54,288][247478] Updated weights for policy 0, policy_version 47915 (0.0008) +[2026-06-02 16:57:54,504][247478] Updated weights for policy 0, policy_version 47927 (0.0009) +[2026-06-02 16:57:54,725][246448] Fps is (10 sec: 22937.6, 60 sec: 20753.1, 300 sec: 20327.3). Total num frames: 24543232. Throughput: 0: 20232.5. Samples: 24564480. Policy #0 lag: (min: 23.0, avg: 39.2, max: 87.0) +[2026-06-02 16:57:54,726][246448] Avg episode reward: [(0, '2244.845')] +[2026-06-02 16:57:55,231][247478] Updated weights for policy 0, policy_version 47941 (0.0009) +[2026-06-02 16:57:55,414][247478] Updated weights for policy 0, policy_version 47951 (0.0008) +[2026-06-02 16:57:55,606][247478] Updated weights for policy 0, policy_version 47962 (0.0008) +[2026-06-02 16:57:55,779][247478] Updated weights for policy 0, policy_version 47972 (0.0009) +[2026-06-02 16:57:55,991][247478] Updated weights for policy 0, policy_version 47984 (0.0009) +[2026-06-02 16:57:56,185][247478] Updated weights for policy 0, policy_version 47995 (0.0009) +[2026-06-02 16:57:56,869][247478] Updated weights for policy 0, policy_version 48006 (0.0009) +[2026-06-02 16:57:57,035][247478] Updated weights for policy 0, policy_version 48016 (0.0008) +[2026-06-02 16:57:57,231][247478] Updated weights for policy 0, policy_version 48027 (0.0009) +[2026-06-02 16:57:57,401][247478] Updated weights for policy 0, policy_version 48037 (0.0009) +[2026-06-02 16:57:57,584][247478] Updated weights for policy 0, policy_version 48047 (0.0008) +[2026-06-02 16:57:57,756][247478] Updated weights for policy 0, policy_version 48057 (0.0008) +[2026-06-02 16:57:58,423][247478] Updated weights for policy 0, policy_version 48067 (0.0009) +[2026-06-02 16:57:58,596][247478] Updated weights for policy 0, policy_version 48077 (0.0008) +[2026-06-02 16:57:58,778][247478] Updated weights for policy 0, policy_version 48087 (0.0009) +[2026-06-02 16:57:58,946][247478] Updated weights for policy 0, policy_version 48097 (0.0008) +[2026-06-02 16:57:59,130][247478] Updated weights for policy 0, policy_version 48107 (0.0008) +[2026-06-02 16:57:59,322][247478] Updated weights for policy 0, policy_version 48117 (0.0008) +[2026-06-02 16:57:59,499][247478] Updated weights for policy 0, policy_version 48127 (0.0010) +[2026-06-02 16:57:59,725][246448] Fps is (10 sec: 22938.2, 60 sec: 20753.1, 300 sec: 20327.3). Total num frames: 24641536. Throughput: 0: 20380.4. Samples: 24626944. Policy #0 lag: (min: 23.0, avg: 39.2, max: 87.0) +[2026-06-02 16:57:59,726][246448] Avg episode reward: [(0, '2319.535')] +[2026-06-02 16:57:59,731][247399] Saving new best policy, reward=2319.535! +[2026-06-02 16:58:00,146][247478] Updated weights for policy 0, policy_version 48137 (0.0008) +[2026-06-02 16:58:00,334][247478] Updated weights for policy 0, policy_version 48148 (0.0008) +[2026-06-02 16:58:00,517][247478] Updated weights for policy 0, policy_version 48158 (0.0009) +[2026-06-02 16:58:00,695][247478] Updated weights for policy 0, policy_version 48168 (0.0008) +[2026-06-02 16:58:00,878][247478] Updated weights for policy 0, policy_version 48178 (0.0009) +[2026-06-02 16:58:01,064][247478] Updated weights for policy 0, policy_version 48188 (0.0008) +[2026-06-02 16:58:01,701][247478] Updated weights for policy 0, policy_version 48198 (0.0009) +[2026-06-02 16:58:01,881][247478] Updated weights for policy 0, policy_version 48208 (0.0008) +[2026-06-02 16:58:02,060][247478] Updated weights for policy 0, policy_version 48218 (0.0008) +[2026-06-02 16:58:02,263][247478] Updated weights for policy 0, policy_version 48229 (0.0008) +[2026-06-02 16:58:02,440][247478] Updated weights for policy 0, policy_version 48239 (0.0008) +[2026-06-02 16:58:02,629][247478] Updated weights for policy 0, policy_version 48249 (0.0008) +[2026-06-02 16:58:03,278][247478] Updated weights for policy 0, policy_version 48259 (0.0009) +[2026-06-02 16:58:03,468][247478] Updated weights for policy 0, policy_version 48270 (0.0008) +[2026-06-02 16:58:03,639][247478] Updated weights for policy 0, policy_version 48280 (0.0011) +[2026-06-02 16:58:03,834][247478] Updated weights for policy 0, policy_version 48290 (0.0010) +[2026-06-02 16:58:04,027][247478] Updated weights for policy 0, policy_version 48301 (0.0011) +[2026-06-02 16:58:04,220][247478] Updated weights for policy 0, policy_version 48311 (0.0012) +[2026-06-02 16:58:04,726][246448] Fps is (10 sec: 19660.4, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 24739840. Throughput: 0: 20380.4. Samples: 24753280. Policy #0 lag: (min: 37.0, avg: 53.0, max: 101.0) +[2026-06-02 16:58:04,727][246448] Avg episode reward: [(0, '2318.783')] +[2026-06-02 16:58:04,867][247478] Updated weights for policy 0, policy_version 48321 (0.0010) +[2026-06-02 16:58:05,029][247478] Updated weights for policy 0, policy_version 48331 (0.0011) +[2026-06-02 16:58:05,215][247478] Updated weights for policy 0, policy_version 48341 (0.0010) +[2026-06-02 16:58:05,415][247478] Updated weights for policy 0, policy_version 48352 (0.0011) +[2026-06-02 16:58:05,599][247478] Updated weights for policy 0, policy_version 48362 (0.0012) +[2026-06-02 16:58:05,785][247478] Updated weights for policy 0, policy_version 48372 (0.0012) +[2026-06-02 16:58:05,968][247478] Updated weights for policy 0, policy_version 48382 (0.0011) +[2026-06-02 16:58:06,601][247478] Updated weights for policy 0, policy_version 48392 (0.0011) +[2026-06-02 16:58:06,787][247478] Updated weights for policy 0, policy_version 48402 (0.0011) +[2026-06-02 16:58:06,970][247478] Updated weights for policy 0, policy_version 48412 (0.0014) +[2026-06-02 16:58:07,148][247478] Updated weights for policy 0, policy_version 48422 (0.0006) +[2026-06-02 16:58:07,339][247478] Updated weights for policy 0, policy_version 48432 (0.0004) +[2026-06-02 16:58:07,514][247478] Updated weights for policy 0, policy_version 48442 (0.0004) +[2026-06-02 16:58:08,162][247478] Updated weights for policy 0, policy_version 48453 (0.0007) +[2026-06-02 16:58:08,333][247478] Updated weights for policy 0, policy_version 48463 (0.0009) +[2026-06-02 16:58:08,522][247478] Updated weights for policy 0, policy_version 48473 (0.0009) +[2026-06-02 16:58:08,703][247478] Updated weights for policy 0, policy_version 48483 (0.0009) +[2026-06-02 16:58:08,879][247478] Updated weights for policy 0, policy_version 48493 (0.0008) +[2026-06-02 16:58:09,064][247478] Updated weights for policy 0, policy_version 48503 (0.0009) +[2026-06-02 16:58:09,722][247478] Updated weights for policy 0, policy_version 48513 (0.0009) +[2026-06-02 16:58:09,725][246448] Fps is (10 sec: 19660.7, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 24838144. Throughput: 0: 20400.3. Samples: 24879488. Policy #0 lag: (min: 37.0, avg: 53.0, max: 101.0) +[2026-06-02 16:58:09,726][246448] Avg episode reward: [(0, '2362.749')] +[2026-06-02 16:58:09,904][247478] Updated weights for policy 0, policy_version 48524 (0.0009) +[2026-06-02 16:58:10,092][247478] Updated weights for policy 0, policy_version 48534 (0.0010) +[2026-06-02 16:58:10,267][247478] Updated weights for policy 0, policy_version 48544 (0.0009) +[2026-06-02 16:58:10,446][247478] Updated weights for policy 0, policy_version 48554 (0.0009) +[2026-06-02 16:58:10,630][247478] Updated weights for policy 0, policy_version 48564 (0.0008) +[2026-06-02 16:58:10,815][247478] Updated weights for policy 0, policy_version 48574 (0.0008) +[2026-06-02 16:58:10,843][247399] Saving new best policy, reward=2362.749! +[2026-06-02 16:58:11,454][247478] Updated weights for policy 0, policy_version 48584 (0.0009) +[2026-06-02 16:58:11,645][247478] Updated weights for policy 0, policy_version 48595 (0.0009) +[2026-06-02 16:58:11,831][247478] Updated weights for policy 0, policy_version 48605 (0.0010) +[2026-06-02 16:58:12,030][247478] Updated weights for policy 0, policy_version 48616 (0.0009) +[2026-06-02 16:58:12,212][247478] Updated weights for policy 0, policy_version 48626 (0.0008) +[2026-06-02 16:58:12,388][247478] Updated weights for policy 0, policy_version 48636 (0.0007) +[2026-06-02 16:58:13,022][247478] Updated weights for policy 0, policy_version 48646 (0.0008) +[2026-06-02 16:58:13,193][247478] Updated weights for policy 0, policy_version 48656 (0.0008) +[2026-06-02 16:58:13,371][247478] Updated weights for policy 0, policy_version 48666 (0.0009) +[2026-06-02 16:58:13,555][247478] Updated weights for policy 0, policy_version 48676 (0.0008) +[2026-06-02 16:58:13,738][247478] Updated weights for policy 0, policy_version 48686 (0.0008) +[2026-06-02 16:58:13,919][247478] Updated weights for policy 0, policy_version 48696 (0.0009) +[2026-06-02 16:58:14,574][247478] Updated weights for policy 0, policy_version 48706 (0.0008) +[2026-06-02 16:58:14,725][246448] Fps is (10 sec: 19661.2, 60 sec: 20206.9, 300 sec: 20327.3). Total num frames: 24936448. Throughput: 0: 20201.3. Samples: 24933888. Policy #0 lag: (min: 37.0, avg: 53.0, max: 101.0) +[2026-06-02 16:58:14,726][246448] Avg episode reward: [(0, '2362.749')] +[2026-06-02 16:58:14,758][247478] Updated weights for policy 0, policy_version 48717 (0.0008) +[2026-06-02 16:58:14,948][247478] Updated weights for policy 0, policy_version 48728 (0.0009) +[2026-06-02 16:58:15,139][247478] Updated weights for policy 0, policy_version 48738 (0.0008) +[2026-06-02 16:58:15,320][247478] Updated weights for policy 0, policy_version 48748 (0.0008) +[2026-06-02 16:58:15,496][247478] Updated weights for policy 0, policy_version 48758 (0.0008) +[2026-06-02 16:58:15,669][247478] Updated weights for policy 0, policy_version 48768 (0.0008) +[2026-06-02 16:58:16,322][247478] Updated weights for policy 0, policy_version 48778 (0.0009) +[2026-06-02 16:58:16,499][247478] Updated weights for policy 0, policy_version 48788 (0.0009) +[2026-06-02 16:58:16,675][247478] Updated weights for policy 0, policy_version 48798 (0.0006) +[2026-06-02 16:58:16,852][247478] Updated weights for policy 0, policy_version 48808 (0.0006) +[2026-06-02 16:58:17,037][247478] Updated weights for policy 0, policy_version 48818 (0.0005) +[2026-06-02 16:58:17,219][247478] Updated weights for policy 0, policy_version 48828 (0.0008) +[2026-06-02 16:58:17,851][247478] Updated weights for policy 0, policy_version 48838 (0.0007) +[2026-06-02 16:58:18,017][247399] Early stopping after 2 epochs (16 sgd steps), loss delta 0.0000000 +[2026-06-02 16:58:18,018][247399] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs4_seed14/checkpoint_p0/checkpoint_000048848_25034752.pth... +[2026-06-02 16:58:18,019][247480] Stopping RolloutWorker_w1... +[2026-06-02 16:58:18,020][247480] Loop rollout_proc1_evt_loop terminating... +[2026-06-02 16:58:18,020][247399] Stopping Batcher_0... +[2026-06-02 16:58:18,020][246448] Component RolloutWorker_w1 stopped! +[2026-06-02 16:58:18,020][247478] Updated weights for policy 0, policy_version 48848 (0.0009) +[2026-06-02 16:58:18,020][247479] Stopping RolloutWorker_w0... +[2026-06-02 16:58:18,021][247399] Loop batcher_evt_loop terminating... +[2026-06-02 16:58:18,021][247479] Loop rollout_proc0_evt_loop terminating... +[2026-06-02 16:58:18,021][246448] Component Batcher_0 stopped! +[2026-06-02 16:58:18,022][246448] Component RolloutWorker_w0 stopped! +[2026-06-02 16:58:18,036][247399] Saving new best policy, reward=2387.653! +[2026-06-02 16:58:18,057][247399] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs4_seed14/checkpoint_p0/checkpoint_000048848_25034752.pth... +[2026-06-02 16:58:18,067][247478] Weights refcount: 2 0 +[2026-06-02 16:58:18,068][247478] Stopping InferenceWorker_p0-w0... +[2026-06-02 16:58:18,069][247478] Loop inference_proc0-0_evt_loop terminating... +[2026-06-02 16:58:18,069][246448] Component InferenceWorker_p0-w0 stopped! +[2026-06-02 16:58:18,078][247399] Stopping LearnerWorker_p0... +[2026-06-02 16:58:18,078][247399] Loop learner_proc0_evt_loop terminating... +[2026-06-02 16:58:18,078][246448] Component LearnerWorker_p0 stopped! +[2026-06-02 16:58:18,078][246448] Waiting for process learner_proc0 to stop... +[2026-06-02 16:58:18,863][246448] Waiting for process inference_proc0-0 to join... +[2026-06-02 16:58:18,864][246448] Waiting for process rollout_proc0 to join... +[2026-06-02 16:58:18,865][246448] Waiting for process rollout_proc1 to join... +[2026-06-02 16:58:18,866][246448] Batcher 0 profile tree view: +batching: 0.8053, releasing_batches: 0.0312 +[2026-06-02 16:58:18,866][246448] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0000 + wait_policy_total: 774.8344 +update_model: 45.5311 + weight_update: 0.0009 +one_step: 0.0017 + handle_policy_step: 390.2147 + deserialize: 5.0499, stack: 0.3659, obs_to_device_normalize: 54.5072, forward: 144.8428, prepare_outputs: 159.5089, send_messages: 10.3232 +[2026-06-02 16:58:18,866][246448] Learner 0 profile tree view: +misc: 0.0044, prepare_batch: 100.0279 +train: 864.4256 + epoch_init: 0.0592, minibatch_init: 2.5355, losses_postprocess: 300.2584, kl_divergence: 24.5092, after_optimizer: 360.1264 + calculate_losses: 40.1523 + losses_init: 0.0789, forward_head: 13.6698, bptt_initial: 0.3554, bptt: 0.4364, tail: 8.9774, advantages_returns: 2.9138, losses: 10.8224 + update: 133.5046 + clip: 12.9603 +[2026-06-02 16:58:18,867][246448] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.0325, enqueue_policy_requests: 135.4068, process_policy_outputs: 7.5546, env_step: 813.6807, finalize_trajectories: 0.1016, complete_rollouts: 0.0757 +post_env_step: 16.1597 + process_env_step: 4.6930 +[2026-06-02 16:58:18,868][246448] RolloutWorker_w1 profile tree view: +wait_for_trajectories: 0.0338, enqueue_policy_requests: 136.7938, process_policy_outputs: 7.3618, env_step: 817.7522, finalize_trajectories: 0.1039, complete_rollouts: 0.0797 +post_env_step: 15.9049 + process_env_step: 4.6271 +[2026-06-02 16:58:18,869][246448] Loop Runner_EvtLoop terminating... +[2026-06-02 16:58:18,870][246448] Runner profile tree view: +main_loop: 1239.6026 +[2026-06-02 16:58:18,870][246448] Collected {0: 25034752}, FPS: 20195.8