diff --git a/.gitattributes b/.gitattributes index 9d2d3440ad6a8804a841b061a2a883f6572171af..f75551501eea425da5229aeed55ce2ced7ab7798 100644 --- a/.gitattributes +++ b/.gitattributes @@ -46,3 +46,4 @@ factor_sweeps/flappy/observation_stride/train/factor_sweep:flappy:observation_st factor_sweeps/flappy/observation_stride/train/factor_sweep:flappy:observation_stride:fixed_l2:fs4:obs15:stride2:seed11/episode_metrics.jsonl filter=lfs diff=lfs merge=lfs -text factor_sweeps/flappy/observation_stride/train/factor_sweep:flappy:observation_stride:fixed_l2:fs4:obs30:stride1:seed14/episode_metrics.jsonl filter=lfs diff=lfs merge=lfs -text factor_sweeps/flappy/observation_stride/train/factor_sweep:flappy:observation_stride:fixed_l2:fs4:obs15:stride2:seed12/episode_metrics.jsonl filter=lfs diff=lfs merge=lfs -text +factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed10/episode_metrics.jsonl filter=lfs diff=lfs merge=lfs -text diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed10/checkpoint_p0/best_000048584_24936448_reward_2463.582.pth b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed10/checkpoint_p0/best_000048584_24936448_reward_2463.582.pth new file mode 100644 index 0000000000000000000000000000000000000000..b32ffadc54bc352b27bc4889766d880bd57a6f7c --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed10/checkpoint_p0/best_000048584_24936448_reward_2463.582.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2498cc8a6798a15f097ed4b3ab7d50cf4120b42e258fa4e1544ed071b6ef78ac +size 22210361 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed10/checkpoint_p0/checkpoint_000022168_11403264.pth b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed10/checkpoint_p0/checkpoint_000022168_11403264.pth new file mode 100644 index 0000000000000000000000000000000000000000..d55d032dc1e6eb9b34349734ac8f5ecfc44fb27e --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed10/checkpoint_p0/checkpoint_000022168_11403264.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:829df0fc9ff58afcef0e0d461f934e4107da7c8e6b222c14e9dc761c56e000a5 +size 22210721 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed10/checkpoint_p0/checkpoint_000044936_23068672.pth b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed10/checkpoint_p0/checkpoint_000044936_23068672.pth new file mode 100644 index 0000000000000000000000000000000000000000..e080079dafc9a1b9ae0ca0c0574ae0237049c146 --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed10/checkpoint_p0/checkpoint_000044936_23068672.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc1475958a41bff83d266c0c255bf13584082ccc2c368d0e5beecaf878b8b456 +size 22210721 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed10/checkpoint_p0/checkpoint_000048776_25034752.pth b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed10/checkpoint_p0/checkpoint_000048776_25034752.pth new file mode 100644 index 0000000000000000000000000000000000000000..174997124899bca1d2b01c8cfbab1b1742361d4f --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed10/checkpoint_p0/checkpoint_000048776_25034752.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47934e1ffa92951b361734a29063b0dd4633200bee154fc78e7b5817263d0fc0 +size 22210721 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed10/config.json b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed10/config.json new file mode 100644 index 0000000000000000000000000000000000000000..152daf111c2956e4466a73ab45f83e475a10b3e0 --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed10/config.json @@ -0,0 +1,266 @@ +{ + "help": false, + "algo": "APPO", + "env": "latency_flappy", + "experiment": "flappy_frame_stack_fixed_l2_fs5_seed10", + "train_dir": "results/checkpoints_factor_sweeps/flappy/context_window", + "restart_behavior": "resume", + "device": "gpu", + "seed": 10, + "num_policies": 1, + "async_rl": true, + "serial_mode": false, + "batched_sampling": true, + "num_batches_to_accumulate": 2, + "worker_num_splits": 1, + "policy_workers_per_policy": 1, + "max_policy_lag": 400, + "num_workers": 2, + "num_envs_per_worker": 1, + "batch_size": 4096, + "num_batches_per_epoch": 8, + "num_epochs": 8, + "rollout": 128, + "recurrence": 1, + "shuffle_minibatches": false, + "gamma": 0.99, + "reward_scale": 1.0, + "reward_clip": 1000.0, + "value_bootstrap": false, + "normalize_returns": true, + "exploration_loss_coeff": 0.003, + "value_loss_coeff": 0.5, + "kl_loss_coeff": 0.0, + "exploration_loss": "entropy", + "gae_lambda": 0.95, + "ppo_clip_ratio": 0.1, + "ppo_clip_value": 0.2, + "with_vtrace": false, + "vtrace_rho": 1.0, + "vtrace_c": 1.0, + "optimizer": "adam", + "adam_eps": 1e-05, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "max_grad_norm": 0.5, + "learning_rate": 0.00025, + "lr_schedule": "linear_decay", + "lr_schedule_kl_threshold": 0.008, + "lr_adaptive_min": 1e-06, + "lr_adaptive_max": 0.01, + "obs_subtract_mean": 0.0, + "obs_scale": 255.0, + "normalize_input": true, + "normalize_input_keys": null, + "decorrelate_experience_max_seconds": 0, + "decorrelate_envs_on_one_worker": true, + "actor_worker_gpus": [ + 0 + ], + "set_workers_cpu_affinity": true, + "force_envs_single_thread": false, + "default_niceness": 0, + "log_to_file": true, + "experiment_summaries_interval": 1, + "flush_summaries_interval": 30, + "stats_avg": 100, + "summaries_use_frameskip": true, + "heartbeat_interval": 20, + "heartbeat_reporting_interval": 180, + "train_for_env_steps": 25000000, + "train_for_seconds": 10000000000, + "save_every_sec": 600, + "keep_checkpoints": 5, + "load_checkpoint_kind": "latest", + "save_milestones_sec": -1, + "save_best_every_sec": 5, + "save_best_metric": "reward", + "save_best_after": 100000, + "benchmark": false, + "encoder_mlp_layers": [ + 512, + 512 + ], + "encoder_conv_architecture": "convnet_atari", + "encoder_conv_mlp_layers": [ + 512 + ], + "use_rnn": false, + "rnn_size": 512, + "rnn_type": "gru", + "rnn_num_layers": 1, + "decoder_mlp_layers": [], + "nonlinearity": "elu", + "policy_initialization": "orthogonal", + "policy_init_gain": 1.0, + "actor_critic_share_weights": true, + "adaptive_stddev": true, + "continuous_tanh_scale": 0.0, + "initial_stddev": 1.0, + "use_env_info_cache": false, + "env_gpu_actions": true, + "env_gpu_observations": true, + "env_frameskip": 1, + "env_framestack": 1, + "pixel_format": "CHW", + "use_record_episode_statistics": false, + "with_wandb": true, + "wandb_user": null, + "wandb_project": "latency-sensitive-bench", + "wandb_group": "flappy-fs5-fixed_l2", + "wandb_job_type": "sample_factory", + "wandb_tags": [ + "factor_sweep", + "flappy", + "frame_stack", + "fixed", + "fixed_l2", + "fs5", + "seed10" + ], + "with_pbt": false, + "pbt_mix_policies_in_one_env": true, + "pbt_period_env_steps": 5000000, + "pbt_start_mutation": 20000000, + "pbt_replace_fraction": 0.3, + "pbt_mutation_rate": 0.15, + "pbt_replace_reward_gap": 0.1, + "pbt_replace_reward_gap_absolute": 1e-06, + "pbt_optimize_gamma": false, + "pbt_target_objective": "true_objective", + "pbt_perturb_min": 1.1, + "pbt_perturb_max": 1.5, + "gym_id": "FlappyBird-v0", + "env_fps": 30.0, + "obs_fps": 30.0, + "use_lidar": false, + "normalize_obs": true, + "audio_on": false, + "screen_size": "", + "obs_resize": "84,84", + "use_gpu_render": true, + "simulator": "gpu", + "gpu_render_device": "auto", + "gpu_render_batch_size": 128, + "gpu_render_profile": false, + "gpu_render_profile_interval": 200, + "pipe_gap": 100, + "bird_color": "yellow", + "pipe_color": "green", + "background": "day", + "score_limit": -1, + "frame_stack": 5, + "debug": false, + "debug_timelimit_diagnostics": false, + "max_episode_steps": 0, + "mode": "train", + "latency_type": "fixed", + "fixed_latency_ms": 66.66666666666667, + "mean_latency_ms": null, + "std_latency_ms": null, + "min_latency_ms": null, + "max_latency_ms": null, + "latency_seed": null, + "add_latency_info": false, + "max_pending_actions": null, + "hold_policy": "one_frame_then_noop", + "ordering_policy": "latest_ready", + "eval_episodes": 100, + "eval_parallel_envs": 100, + "eval_latency_raw_frame_values": "0,1,2,3,4,5", + "eval_max_steps": 3600, + "eval_deterministic": true, + "eval_raw_reward": false, + "episode_metrics_path": "results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs5_seed10/episode_metrics.jsonl", + "command_line": "--mode train --algo APPO --env latency_flappy --experiment flappy_frame_stack_fixed_l2_fs5_seed10 --train_dir results/checkpoints_factor_sweeps/flappy/context_window --restart_behavior resume --device gpu --actor_worker_gpus 0 --env_gpu_observations True --env_gpu_actions True --gpu-render-batch-size 128 --seed 10 --episode_metrics_path results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs5_seed10/episode_metrics.jsonl --train_for_env_steps 25000000 --num_workers 2 --num_envs_per_worker 1 --num_policies 1 --batch_size 4096 --rollout 128 --recurrence 1 --num_epochs 8 --num_batches_per_epoch 8 --worker_num_splits 1 --max_policy_lag 400 --learning_rate 0.00025 --gamma 0.99 --gae_lambda 0.95 --ppo_clip_ratio 0.1 --ppo_clip_value 0.2 --value_loss_coeff 0.5 --max_grad_norm 0.5 --save_every_sec 600 --keep_checkpoints 5 --stats_avg 100 --experiment_summaries_interval 1 --batched_sampling True --async_rl True --use_rnn False --normalize_returns True --normalize_input True --latency-type fixed --fixed-latency-ms 66.66666666666667 --add-latency-info False --eval-episodes 100 --eval-parallel-envs 100 --eval-max-steps 3600 --eval-deterministic True --with_wandb True --wandb_project latency-sensitive-bench --wandb_group flappy-fs5-fixed_l2 --wandb_job_type sample_factory --wandb_tags factor_sweep flappy frame_stack fixed fixed_l2 fs5 seed10 --gym_id FlappyBird-v0 --env-fps 30 --obs-fps 30.0 --use_lidar False --normalize_obs True --audio_on False --obs_resize 84,84 --use-gpu-render True --simulator gpu --gpu-render-device auto --gpu-render-profile False --gpu-render-profile-interval 200 --pipe_gap 100 --bird_color yellow --pipe_color green --background day --frame_stack 5 --debug False --debug-timelimit-diagnostics False --hold-policy one_frame_then_noop --ordering-policy latest_ready", + "cli_args": { + "algo": "APPO", + "env": "latency_flappy", + "experiment": "flappy_frame_stack_fixed_l2_fs5_seed10", + "train_dir": "results/checkpoints_factor_sweeps/flappy/context_window", + "restart_behavior": "resume", + "device": "gpu", + "seed": 10, + "num_policies": 1, + "async_rl": true, + "batched_sampling": true, + "worker_num_splits": 1, + "max_policy_lag": 400, + "num_workers": 2, + "num_envs_per_worker": 1, + "batch_size": 4096, + "num_batches_per_epoch": 8, + "num_epochs": 8, + "rollout": 128, + "recurrence": 1, + "gamma": 0.99, + "normalize_returns": true, + "value_loss_coeff": 0.5, + "gae_lambda": 0.95, + "ppo_clip_ratio": 0.1, + "ppo_clip_value": 0.2, + "max_grad_norm": 0.5, + "learning_rate": 0.00025, + "normalize_input": true, + "actor_worker_gpus": [ + 0 + ], + "experiment_summaries_interval": 1, + "stats_avg": 100, + "train_for_env_steps": 25000000, + "save_every_sec": 600, + "keep_checkpoints": 5, + "use_rnn": false, + "env_gpu_actions": true, + "env_gpu_observations": true, + "with_wandb": true, + "wandb_project": "latency-sensitive-bench", + "wandb_group": "flappy-fs5-fixed_l2", + "wandb_job_type": "sample_factory", + "wandb_tags": [ + "factor_sweep", + "flappy", + "frame_stack", + "fixed", + "fixed_l2", + "fs5", + "seed10" + ], + "gym_id": "FlappyBird-v0", + "env_fps": 30.0, + "obs_fps": 30.0, + "use_lidar": false, + "normalize_obs": true, + "audio_on": false, + "obs_resize": "84,84", + "use_gpu_render": true, + "simulator": "gpu", + "gpu_render_device": "auto", + "gpu_render_batch_size": 128, + "gpu_render_profile": false, + "gpu_render_profile_interval": 200, + "pipe_gap": 100, + "bird_color": "yellow", + "pipe_color": "green", + "background": "day", + "frame_stack": 5, + "debug": false, + "debug_timelimit_diagnostics": false, + "mode": "train", + "latency_type": "fixed", + "fixed_latency_ms": 66.66666666666667, + "add_latency_info": false, + "hold_policy": "one_frame_then_noop", + "ordering_policy": "latest_ready", + "eval_episodes": 100, + "eval_parallel_envs": 100, + "eval_max_steps": 3600, + "eval_deterministic": true, + "episode_metrics_path": "results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs5_seed10/episode_metrics.jsonl" + }, + "git_hash": "284fe8ace24f0e8a40c03c5b559969abd7caeb29", + "git_repo_name": "git@github.com:ZihanWang314/latency-sensitive-bench.git", + "eval_env_frameskip": 1, + "output_dir": "outputs/factor_sweeps/flappy/context_window/train/frame_stack/fixed_l2/fs5/seed_10", + "wandb_unique_id": "flappy-fs5-fixed_l2-s10" +} \ No newline at end of file diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed10/episode_metrics.jsonl b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed10/episode_metrics.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e9075498de8676cc7a4d2131ea66a889205886d5 --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed10/episode_metrics.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:197078159fe17607fddd1d235742135ed047d6f09a00a013ac605c29c9831ee7 +size 24714307 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed10/git.diff b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed10/git.diff new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed10/sf_log.txt b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed10/sf_log.txt new file mode 100644 index 0000000000000000000000000000000000000000..cd0a074c692db4ee2948f15660471b6abe14d794 --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs5:obs30:stride1:seed10/sf_log.txt @@ -0,0 +1,5582 @@ +[2026-06-02 16:45:53,531][253683] Saving configuration to results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs5_seed10/config.json... +[2026-06-02 16:45:53,596][253683] Using GPUs [0] for process 0 (actually maps to GPUs [4]) +[2026-06-02 16:45:53,597][253683] Rollout worker 0 uses device cuda:0 +[2026-06-02 16:45:53,597][253683] Using GPUs [0] for process 1 (actually maps to GPUs [4]) +[2026-06-02 16:45:53,598][253683] Rollout worker 1 uses device cuda:0 +[2026-06-02 16:45:55,442][253683] Using GPUs [0] for process 0 (actually maps to GPUs [4]) +[2026-06-02 16:45:55,443][253683] InferenceWorker_p0-w0: min num requests: 1 +[2026-06-02 16:45:55,447][253683] Using GPUs [0] for process 0 (actually maps to GPUs [4]) +[2026-06-02 16:45:55,451][253683] Using GPUs [0] for process 1 (actually maps to GPUs [4]) +[2026-06-02 16:45:55,451][253683] Starting all processes... +[2026-06-02 16:45:55,452][253683] Starting process learner_proc0 +[2026-06-02 16:45:56,677][253683] Starting all processes... +[2026-06-02 16:45:56,681][253683] Starting process inference_proc0-0 +[2026-06-02 16:45:56,681][253683] Starting process rollout_proc0 +[2026-06-02 16:45:56,682][253683] Starting process rollout_proc1 +[2026-06-02 16:45:57,009][255187] Using GPUs [0] for process 0 (actually maps to GPUs [4]) +[2026-06-02 16:45:57,009][255187] Set environment var CUDA_VISIBLE_DEVICES to '4' (GPU indices [0]) for learning process 0 +[2026-06-02 16:45:57,009][255187] Num visible devices: 1 +[2026-06-02 16:45:57,009][255187] Setting fixed seed 10 +[2026-06-02 16:45:57,010][255187] Using GPUs [0] for process 0 (actually maps to GPUs [4]) +[2026-06-02 16:45:57,010][255187] Initializing actor-critic model on device cuda:0 +[2026-06-02 16:45:57,011][255187] RunningMeanStd input shape: (15, 84, 84) +[2026-06-02 16:45:57,040][255187] RunningMeanStd input shape: (1,) +[2026-06-02 16:45:57,047][255187] ConvEncoder: input_channels=15 +[2026-06-02 16:45:57,108][255187] Conv encoder output size: 512 +[2026-06-02 16:45:57,109][255187] Created Actor Critic model with architecture: +[2026-06-02 16:45:57,109][255187] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): MultiInputEncoder( + (encoders): ModuleDict( + (obs): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + ) + (core): ModelCoreIdentity() + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=2, bias=True) + ) +) +[2026-06-02 16:45:57,112][255187] Using optimizer +[2026-06-02 16:45:57,789][255187] No checkpoints found +[2026-06-02 16:45:57,790][255187] Did not load from checkpoint, starting from scratch! +[2026-06-02 16:45:57,790][255187] Initialized policy 0 weights for model version 0 +[2026-06-02 16:45:57,791][255187] LearnerWorker_p0 finished initialization! +[2026-06-02 16:45:57,792][255187] Using GPUs [0] for process 0 (actually maps to GPUs [4]) +[2026-06-02 16:45:58,506][255280] Worker 0 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191] +[2026-06-02 16:45:58,506][255280] Using GPUs [0] for process 0 (actually maps to GPUs [4]) +[2026-06-02 16:45:58,506][255280] Set environment var CUDA_VISIBLE_DEVICES to '4' (GPU indices [0]) for actor process 0 +[2026-06-02 16:45:58,506][255280] Num visible devices: 1 +[2026-06-02 16:45:58,526][255279] Using GPUs [0] for process 0 (actually maps to GPUs [4]) +[2026-06-02 16:45:58,526][255279] Set environment var CUDA_VISIBLE_DEVICES to '4' (GPU indices [0]) for inference process 0 +[2026-06-02 16:45:58,526][255279] Num visible devices: 1 +[2026-06-02 16:45:58,555][255279] RunningMeanStd input shape: (15, 84, 84) +[2026-06-02 16:45:58,563][255279] RunningMeanStd input shape: (1,) +[2026-06-02 16:45:58,570][255279] ConvEncoder: input_channels=15 +[2026-06-02 16:45:58,630][255279] Conv encoder output size: 512 +[2026-06-02 16:45:58,634][253683] Inference worker 0-0 is ready! +[2026-06-02 16:45:58,635][253683] All inference workers are ready! Signal rollout workers to start! +[2026-06-02 16:45:58,636][255280] EnvRunner 0-0 uses policy 0 +[2026-06-02 16:45:58,652][255281] Worker 1 uses CPU cores [192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383] +[2026-06-02 16:45:58,652][255281] Using GPUs [0] for process 1 (actually maps to GPUs [4]) +[2026-06-02 16:45:58,652][255281] Set environment var CUDA_VISIBLE_DEVICES to '4' (GPU indices [0]) for actor process 1 +[2026-06-02 16:45:58,653][255281] Num visible devices: 1 +[2026-06-02 16:45:58,653][255281] EnvRunner 1-0 uses policy 0 +[2026-06-02 16:46:00,502][253683] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2026-06-02 16:46:00,503][253683] Avg episode reward: [(0, '-7.505')] +[2026-06-02 16:46:01,559][255187] Signal inference workers to stop experience collection... +[2026-06-02 16:46:01,564][255279] InferenceWorker_p0-w0: stopping experience collection +[2026-06-02 16:46:03,980][255187] Signal inference workers to resume experience collection... +[2026-06-02 16:46:03,981][255279] InferenceWorker_p0-w0: resuming experience collection +[2026-06-02 16:46:04,364][255279] Updated weights for policy 0, policy_version 77 (0.0060) +[2026-06-02 16:46:04,572][255279] Updated weights for policy 0, policy_version 87 (0.0004) +[2026-06-02 16:46:04,888][255279] Updated weights for policy 0, policy_version 104 (0.0007) +[2026-06-02 16:46:05,093][255279] Updated weights for policy 0, policy_version 114 (0.0007) +[2026-06-02 16:46:05,301][255279] Updated weights for policy 0, policy_version 124 (0.0007) +[2026-06-02 16:46:05,502][253683] Fps is (10 sec: 13107.2, 60 sec: 13107.2, 300 sec: 13107.2). Total num frames: 65536. Throughput: 0: 15334.4. Samples: 76672. Policy #0 lag: (min: 63.0, avg: 63.0, max: 63.0) +[2026-06-02 16:46:05,503][253683] Avg episode reward: [(0, '-7.031')] +[2026-06-02 16:46:05,962][255279] Updated weights for policy 0, policy_version 143 (0.0008) +[2026-06-02 16:46:06,169][255279] Updated weights for policy 0, policy_version 153 (0.0005) +[2026-06-02 16:46:06,491][255279] Updated weights for policy 0, policy_version 170 (0.0005) +[2026-06-02 16:46:06,689][255279] Updated weights for policy 0, policy_version 180 (0.0008) +[2026-06-02 16:46:06,899][255279] Updated weights for policy 0, policy_version 190 (0.0005) +[2026-06-02 16:46:07,406][255279] Updated weights for policy 0, policy_version 200 (0.0004) +[2026-06-02 16:46:07,637][255279] Updated weights for policy 0, policy_version 213 (0.0008) +[2026-06-02 16:46:07,842][255279] Updated weights for policy 0, policy_version 223 (0.0009) +[2026-06-02 16:46:08,054][255279] Updated weights for policy 0, policy_version 233 (0.0010) +[2026-06-02 16:46:08,268][255279] Updated weights for policy 0, policy_version 245 (0.0009) +[2026-06-02 16:46:08,478][255279] Updated weights for policy 0, policy_version 255 (0.0009) +[2026-06-02 16:46:08,898][255279] Updated weights for policy 0, policy_version 266 (0.0009) +[2026-06-02 16:46:09,100][255279] Updated weights for policy 0, policy_version 276 (0.0007) +[2026-06-02 16:46:09,306][255279] Updated weights for policy 0, policy_version 286 (0.0008) +[2026-06-02 16:46:09,506][255279] Updated weights for policy 0, policy_version 297 (0.0009) +[2026-06-02 16:46:09,714][255279] Updated weights for policy 0, policy_version 307 (0.0008) +[2026-06-02 16:46:09,920][255279] Updated weights for policy 0, policy_version 317 (0.0005) +[2026-06-02 16:46:10,416][255279] Updated weights for policy 0, policy_version 327 (0.0005) +[2026-06-02 16:46:10,502][253683] Fps is (10 sec: 16383.9, 60 sec: 16383.9, 300 sec: 16383.9). Total num frames: 163840. Throughput: 0: 14323.2. Samples: 143232. Policy #0 lag: (min: 5.0, avg: 33.3, max: 69.0) +[2026-06-02 16:46:10,503][253683] Avg episode reward: [(0, '-5.644')] +[2026-06-02 16:46:10,606][255279] Updated weights for policy 0, policy_version 338 (0.0009) +[2026-06-02 16:46:10,817][255279] Updated weights for policy 0, policy_version 348 (0.0008) +[2026-06-02 16:46:11,017][255279] Updated weights for policy 0, policy_version 358 (0.0005) +[2026-06-02 16:46:11,218][255279] Updated weights for policy 0, policy_version 369 (0.0008) +[2026-06-02 16:46:11,427][255279] Updated weights for policy 0, policy_version 379 (0.0008) +[2026-06-02 16:46:11,528][255187] Saving new best policy, reward=-5.644! +[2026-06-02 16:46:11,854][255279] Updated weights for policy 0, policy_version 391 (0.0008) +[2026-06-02 16:46:12,054][255279] Updated weights for policy 0, policy_version 401 (0.0009) +[2026-06-02 16:46:12,262][255279] Updated weights for policy 0, policy_version 411 (0.0006) +[2026-06-02 16:46:12,498][255279] Updated weights for policy 0, policy_version 424 (0.0007) +[2026-06-02 16:46:12,709][255279] Updated weights for policy 0, policy_version 434 (0.0007) +[2026-06-02 16:46:12,919][255279] Updated weights for policy 0, policy_version 444 (0.0004) +[2026-06-02 16:46:13,470][255279] Updated weights for policy 0, policy_version 458 (0.0006) +[2026-06-02 16:46:13,656][255279] Updated weights for policy 0, policy_version 468 (0.0009) +[2026-06-02 16:46:13,867][255279] Updated weights for policy 0, policy_version 478 (0.0009) +[2026-06-02 16:46:14,101][255279] Updated weights for policy 0, policy_version 490 (0.0007) +[2026-06-02 16:46:14,289][255279] Updated weights for policy 0, policy_version 500 (0.0004) +[2026-06-02 16:46:14,499][255279] Updated weights for policy 0, policy_version 510 (0.0004) +[2026-06-02 16:46:14,974][255279] Updated weights for policy 0, policy_version 520 (0.0006) +[2026-06-02 16:46:15,163][255279] Updated weights for policy 0, policy_version 530 (0.0008) +[2026-06-02 16:46:15,345][255279] Updated weights for policy 0, policy_version 540 (0.0008) +[2026-06-02 16:46:15,432][253683] Heartbeat connected on Batcher_0 +[2026-06-02 16:46:15,450][253683] Heartbeat connected on RolloutWorker_w0 +[2026-06-02 16:46:15,451][253683] Heartbeat connected on RolloutWorker_w1 +[2026-06-02 16:46:15,454][253683] Heartbeat connected on InferenceWorker_p0-w0 +[2026-06-02 16:46:15,502][253683] Fps is (10 sec: 19660.9, 60 sec: 17476.3, 300 sec: 17476.3). Total num frames: 262144. Throughput: 0: 18167.5. Samples: 272512. Policy #0 lag: (min: 63.0, avg: 88.7, max: 127.0) +[2026-06-02 16:46:15,502][253683] Avg episode reward: [(0, '-1.604')] +[2026-06-02 16:46:15,555][255279] Updated weights for policy 0, policy_version 550 (0.0009) +[2026-06-02 16:46:15,756][255279] Updated weights for policy 0, policy_version 560 (0.0007) +[2026-06-02 16:46:15,942][255279] Updated weights for policy 0, policy_version 570 (0.0008) +[2026-06-02 16:46:16,047][255187] Saving new best policy, reward=-1.604! +[2026-06-02 16:46:16,070][253683] Heartbeat connected on LearnerWorker_p0 +[2026-06-02 16:46:16,415][255279] Updated weights for policy 0, policy_version 581 (0.0009) +[2026-06-02 16:46:16,590][255279] Updated weights for policy 0, policy_version 591 (0.0009) +[2026-06-02 16:46:16,787][255279] Updated weights for policy 0, policy_version 601 (0.0009) +[2026-06-02 16:46:16,990][255279] Updated weights for policy 0, policy_version 611 (0.0007) +[2026-06-02 16:46:17,179][255279] Updated weights for policy 0, policy_version 621 (0.0008) +[2026-06-02 16:46:17,361][255279] Updated weights for policy 0, policy_version 631 (0.0008) +[2026-06-02 16:46:17,937][255279] Updated weights for policy 0, policy_version 641 (0.0009) +[2026-06-02 16:46:18,107][255279] Updated weights for policy 0, policy_version 651 (0.0008) +[2026-06-02 16:46:18,309][255279] Updated weights for policy 0, policy_version 662 (0.0008) +[2026-06-02 16:46:18,492][255279] Updated weights for policy 0, policy_version 672 (0.0008) +[2026-06-02 16:46:18,686][255279] Updated weights for policy 0, policy_version 682 (0.0008) +[2026-06-02 16:46:18,870][255279] Updated weights for policy 0, policy_version 692 (0.0008) +[2026-06-02 16:46:19,063][255279] Updated weights for policy 0, policy_version 702 (0.0008) +[2026-06-02 16:46:19,643][255279] Updated weights for policy 0, policy_version 714 (0.0010) +[2026-06-02 16:46:19,829][255279] Updated weights for policy 0, policy_version 724 (0.0008) +[2026-06-02 16:46:20,019][255279] Updated weights for policy 0, policy_version 734 (0.0008) +[2026-06-02 16:46:20,208][255279] Updated weights for policy 0, policy_version 744 (0.0008) +[2026-06-02 16:46:20,400][255279] Updated weights for policy 0, policy_version 754 (0.0009) +[2026-06-02 16:46:20,502][253683] Fps is (10 sec: 19660.3, 60 sec: 18022.1, 300 sec: 18022.1). Total num frames: 360448. Throughput: 0: 19954.9. Samples: 399104. Policy #0 lag: (min: 63.0, avg: 87.7, max: 127.0) +[2026-06-02 16:46:20,503][253683] Avg episode reward: [(0, '3.817')] +[2026-06-02 16:46:20,580][255279] Updated weights for policy 0, policy_version 764 (0.0008) +[2026-06-02 16:46:20,654][255187] Saving new best policy, reward=3.817! +[2026-06-02 16:46:21,231][255279] Updated weights for policy 0, policy_version 774 (0.0006) +[2026-06-02 16:46:21,407][255279] Updated weights for policy 0, policy_version 784 (0.0008) +[2026-06-02 16:46:21,592][255279] Updated weights for policy 0, policy_version 794 (0.0008) +[2026-06-02 16:46:21,775][255279] Updated weights for policy 0, policy_version 804 (0.0008) +[2026-06-02 16:46:21,965][255279] Updated weights for policy 0, policy_version 814 (0.0008) +[2026-06-02 16:46:22,145][255279] Updated weights for policy 0, policy_version 824 (0.0008) +[2026-06-02 16:46:22,774][255279] Updated weights for policy 0, policy_version 834 (0.0009) +[2026-06-02 16:46:22,946][255279] Updated weights for policy 0, policy_version 844 (0.0008) +[2026-06-02 16:46:23,131][255279] Updated weights for policy 0, policy_version 854 (0.0008) +[2026-06-02 16:46:23,307][255279] Updated weights for policy 0, policy_version 864 (0.0006) +[2026-06-02 16:46:23,495][255279] Updated weights for policy 0, policy_version 874 (0.0004) +[2026-06-02 16:46:23,682][255279] Updated weights for policy 0, policy_version 884 (0.0004) +[2026-06-02 16:46:23,878][255279] Updated weights for policy 0, policy_version 894 (0.0004) +[2026-06-02 16:46:24,522][255279] Updated weights for policy 0, policy_version 904 (0.0008) +[2026-06-02 16:46:24,706][255279] Updated weights for policy 0, policy_version 914 (0.0006) +[2026-06-02 16:46:24,884][255279] Updated weights for policy 0, policy_version 924 (0.0008) +[2026-06-02 16:46:25,093][255279] Updated weights for policy 0, policy_version 935 (0.0008) +[2026-06-02 16:46:25,281][255279] Updated weights for policy 0, policy_version 946 (0.0005) +[2026-06-02 16:46:25,470][255279] Updated weights for policy 0, policy_version 956 (0.0004) +[2026-06-02 16:46:25,501][253683] Fps is (10 sec: 19661.0, 60 sec: 18350.2, 300 sec: 18350.2). Total num frames: 458752. Throughput: 0: 18498.7. Samples: 462464. Policy #0 lag: (min: 63.0, avg: 84.3, max: 127.0) +[2026-06-02 16:46:25,502][253683] Avg episode reward: [(0, '3.923')] +[2026-06-02 16:46:25,544][255187] Saving new best policy, reward=3.923! +[2026-06-02 16:46:26,136][255279] Updated weights for policy 0, policy_version 966 (0.0007) +[2026-06-02 16:46:26,310][255279] Updated weights for policy 0, policy_version 976 (0.0008) +[2026-06-02 16:46:26,524][255279] Updated weights for policy 0, policy_version 988 (0.0008) +[2026-06-02 16:46:26,713][255279] Updated weights for policy 0, policy_version 998 (0.0008) +[2026-06-02 16:46:26,895][255279] Updated weights for policy 0, policy_version 1008 (0.0008) +[2026-06-02 16:46:27,077][255279] Updated weights for policy 0, policy_version 1018 (0.0007) +[2026-06-02 16:46:27,802][255279] Updated weights for policy 0, policy_version 1028 (0.0007) +[2026-06-02 16:46:28,015][255279] Updated weights for policy 0, policy_version 1039 (0.0010) +[2026-06-02 16:46:28,186][255279] Updated weights for policy 0, policy_version 1049 (0.0004) +[2026-06-02 16:46:28,369][255279] Updated weights for policy 0, policy_version 1059 (0.0008) +[2026-06-02 16:46:28,562][255279] Updated weights for policy 0, policy_version 1070 (0.0007) +[2026-06-02 16:46:28,753][255279] Updated weights for policy 0, policy_version 1080 (0.0007) +[2026-06-02 16:46:29,435][255279] Updated weights for policy 0, policy_version 1090 (0.0007) +[2026-06-02 16:46:29,619][255279] Updated weights for policy 0, policy_version 1100 (0.0008) +[2026-06-02 16:46:29,804][255279] Updated weights for policy 0, policy_version 1111 (0.0008) +[2026-06-02 16:46:29,983][255279] Updated weights for policy 0, policy_version 1121 (0.0008) +[2026-06-02 16:46:30,182][255279] Updated weights for policy 0, policy_version 1131 (0.0008) +[2026-06-02 16:46:30,371][255279] Updated weights for policy 0, policy_version 1142 (0.0007) +[2026-06-02 16:46:30,502][253683] Fps is (10 sec: 19661.5, 60 sec: 18568.6, 300 sec: 18568.6). Total num frames: 557056. Throughput: 0: 19191.5. Samples: 575744. Policy #0 lag: (min: 63.0, avg: 81.7, max: 127.0) +[2026-06-02 16:46:30,503][253683] Avg episode reward: [(0, '3.976')] +[2026-06-02 16:46:30,550][255187] Saving new best policy, reward=3.976! +[2026-06-02 16:46:30,554][255279] Updated weights for policy 0, policy_version 1152 (0.0006) +[2026-06-02 16:46:31,318][255279] Updated weights for policy 0, policy_version 1162 (0.0007) +[2026-06-02 16:46:31,499][255279] Updated weights for policy 0, policy_version 1172 (0.0008) +[2026-06-02 16:46:31,713][255279] Updated weights for policy 0, policy_version 1184 (0.0007) +[2026-06-02 16:46:31,892][255279] Updated weights for policy 0, policy_version 1194 (0.0008) +[2026-06-02 16:46:32,078][255279] Updated weights for policy 0, policy_version 1204 (0.0008) +[2026-06-02 16:46:32,283][255279] Updated weights for policy 0, policy_version 1215 (0.0008) +[2026-06-02 16:46:32,982][255279] Updated weights for policy 0, policy_version 1226 (0.0008) +[2026-06-02 16:46:33,170][255279] Updated weights for policy 0, policy_version 1237 (0.0008) +[2026-06-02 16:46:33,384][255279] Updated weights for policy 0, policy_version 1249 (0.0008) +[2026-06-02 16:46:33,567][255279] Updated weights for policy 0, policy_version 1259 (0.0006) +[2026-06-02 16:46:33,746][255279] Updated weights for policy 0, policy_version 1269 (0.0007) +[2026-06-02 16:46:33,943][255279] Updated weights for policy 0, policy_version 1280 (0.0005) +[2026-06-02 16:46:34,709][255279] Updated weights for policy 0, policy_version 1290 (0.0009) +[2026-06-02 16:46:34,894][255279] Updated weights for policy 0, policy_version 1300 (0.0008) +[2026-06-02 16:46:35,071][255279] Updated weights for policy 0, policy_version 1310 (0.0007) +[2026-06-02 16:46:35,254][255279] Updated weights for policy 0, policy_version 1320 (0.0006) +[2026-06-02 16:46:35,448][255279] Updated weights for policy 0, policy_version 1331 (0.0008) +[2026-06-02 16:46:35,502][253683] Fps is (10 sec: 19660.7, 60 sec: 18724.6, 300 sec: 18724.6). Total num frames: 655360. Throughput: 0: 19894.9. Samples: 696320. Policy #0 lag: (min: 63.0, avg: 81.9, max: 127.0) +[2026-06-02 16:46:35,502][253683] Avg episode reward: [(0, '4.067')] +[2026-06-02 16:46:35,640][255279] Updated weights for policy 0, policy_version 1341 (0.0008) +[2026-06-02 16:46:35,692][255187] Saving new best policy, reward=4.067! +[2026-06-02 16:46:36,384][255279] Updated weights for policy 0, policy_version 1351 (0.0007) +[2026-06-02 16:46:36,562][255279] Updated weights for policy 0, policy_version 1361 (0.0004) +[2026-06-02 16:46:36,749][255279] Updated weights for policy 0, policy_version 1371 (0.0006) +[2026-06-02 16:46:36,932][255279] Updated weights for policy 0, policy_version 1381 (0.0009) +[2026-06-02 16:46:37,109][255279] Updated weights for policy 0, policy_version 1391 (0.0009) +[2026-06-02 16:46:37,285][255279] Updated weights for policy 0, policy_version 1401 (0.0008) +[2026-06-02 16:46:38,072][255279] Updated weights for policy 0, policy_version 1411 (0.0009) +[2026-06-02 16:46:38,239][255279] Updated weights for policy 0, policy_version 1421 (0.0009) +[2026-06-02 16:46:38,426][255279] Updated weights for policy 0, policy_version 1431 (0.0009) +[2026-06-02 16:46:38,624][255279] Updated weights for policy 0, policy_version 1442 (0.0010) +[2026-06-02 16:46:38,815][255279] Updated weights for policy 0, policy_version 1452 (0.0009) +[2026-06-02 16:46:38,981][255279] Updated weights for policy 0, policy_version 1462 (0.0009) +[2026-06-02 16:46:39,174][255279] Updated weights for policy 0, policy_version 1472 (0.0009) +[2026-06-02 16:46:39,999][255279] Updated weights for policy 0, policy_version 1483 (0.0007) +[2026-06-02 16:46:40,166][255279] Updated weights for policy 0, policy_version 1493 (0.0004) +[2026-06-02 16:46:40,361][255279] Updated weights for policy 0, policy_version 1503 (0.0006) +[2026-06-02 16:46:40,502][253683] Fps is (10 sec: 19660.7, 60 sec: 18841.6, 300 sec: 18841.6). Total num frames: 753664. Throughput: 0: 18739.2. Samples: 749568. Policy #0 lag: (min: 63.0, avg: 78.5, max: 127.0) +[2026-06-02 16:46:40,503][253683] Avg episode reward: [(0, '4.447')] +[2026-06-02 16:46:40,556][255279] Updated weights for policy 0, policy_version 1514 (0.0005) +[2026-06-02 16:46:40,730][255279] Updated weights for policy 0, policy_version 1524 (0.0005) +[2026-06-02 16:46:40,917][255279] Updated weights for policy 0, policy_version 1534 (0.0008) +[2026-06-02 16:46:40,946][255187] Saving new best policy, reward=4.447! +[2026-06-02 16:46:41,694][255279] Updated weights for policy 0, policy_version 1545 (0.0007) +[2026-06-02 16:46:41,867][255279] Updated weights for policy 0, policy_version 1555 (0.0008) +[2026-06-02 16:46:42,054][255279] Updated weights for policy 0, policy_version 1565 (0.0009) +[2026-06-02 16:46:42,235][255279] Updated weights for policy 0, policy_version 1575 (0.0010) +[2026-06-02 16:46:42,420][255279] Updated weights for policy 0, policy_version 1585 (0.0010) +[2026-06-02 16:46:42,599][255279] Updated weights for policy 0, policy_version 1595 (0.0010) +[2026-06-02 16:46:43,393][255279] Updated weights for policy 0, policy_version 1605 (0.0009) +[2026-06-02 16:46:43,573][255279] Updated weights for policy 0, policy_version 1616 (0.0009) +[2026-06-02 16:46:43,785][255279] Updated weights for policy 0, policy_version 1628 (0.0006) +[2026-06-02 16:46:43,967][255279] Updated weights for policy 0, policy_version 1638 (0.0006) +[2026-06-02 16:46:44,165][255279] Updated weights for policy 0, policy_version 1648 (0.0005) +[2026-06-02 16:46:44,334][255279] Updated weights for policy 0, policy_version 1658 (0.0008) +[2026-06-02 16:46:45,165][255279] Updated weights for policy 0, policy_version 1669 (0.0009) +[2026-06-02 16:46:45,337][255279] Updated weights for policy 0, policy_version 1679 (0.0008) +[2026-06-02 16:46:45,502][253683] Fps is (10 sec: 19660.3, 60 sec: 18932.6, 300 sec: 18932.6). Total num frames: 851968. Throughput: 0: 19128.8. Samples: 860800. Policy #0 lag: (min: 58.0, avg: 89.0, max: 122.0) +[2026-06-02 16:46:45,503][253683] Avg episode reward: [(0, '5.156')] +[2026-06-02 16:46:45,519][255279] Updated weights for policy 0, policy_version 1689 (0.0008) +[2026-06-02 16:46:45,703][255279] Updated weights for policy 0, policy_version 1699 (0.0008) +[2026-06-02 16:46:45,883][255279] Updated weights for policy 0, policy_version 1709 (0.0008) +[2026-06-02 16:46:46,076][255279] Updated weights for policy 0, policy_version 1719 (0.0008) +[2026-06-02 16:46:46,221][255187] Saving new best policy, reward=5.156! +[2026-06-02 16:46:46,892][255279] Updated weights for policy 0, policy_version 1730 (0.0009) +[2026-06-02 16:46:47,084][255279] Updated weights for policy 0, policy_version 1741 (0.0008) +[2026-06-02 16:46:47,272][255279] Updated weights for policy 0, policy_version 1751 (0.0007) +[2026-06-02 16:46:47,451][255279] Updated weights for policy 0, policy_version 1761 (0.0009) +[2026-06-02 16:46:47,663][255279] Updated weights for policy 0, policy_version 1773 (0.0010) +[2026-06-02 16:46:47,833][255279] Updated weights for policy 0, policy_version 1783 (0.0011) +[2026-06-02 16:46:48,669][255279] Updated weights for policy 0, policy_version 1793 (0.0009) +[2026-06-02 16:46:48,850][255279] Updated weights for policy 0, policy_version 1804 (0.0008) +[2026-06-02 16:46:49,028][255279] Updated weights for policy 0, policy_version 1814 (0.0008) +[2026-06-02 16:46:49,219][255279] Updated weights for policy 0, policy_version 1824 (0.0008) +[2026-06-02 16:46:49,392][255279] Updated weights for policy 0, policy_version 1834 (0.0008) +[2026-06-02 16:46:49,620][255279] Updated weights for policy 0, policy_version 1846 (0.0009) +[2026-06-02 16:46:49,791][255279] Updated weights for policy 0, policy_version 1856 (0.0009) +[2026-06-02 16:46:50,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19005.4, 300 sec: 19005.4). Total num frames: 950272. Throughput: 0: 19791.6. Samples: 967296. Policy #0 lag: (min: 28.0, avg: 42.9, max: 92.0) +[2026-06-02 16:46:50,503][253683] Avg episode reward: [(0, '5.377')] +[2026-06-02 16:46:50,606][255279] Updated weights for policy 0, policy_version 1866 (0.0006) +[2026-06-02 16:46:50,791][255279] Updated weights for policy 0, policy_version 1876 (0.0009) +[2026-06-02 16:46:50,972][255279] Updated weights for policy 0, policy_version 1886 (0.0009) +[2026-06-02 16:46:51,154][255279] Updated weights for policy 0, policy_version 1896 (0.0010) +[2026-06-02 16:46:51,334][255279] Updated weights for policy 0, policy_version 1906 (0.0010) +[2026-06-02 16:46:51,515][255279] Updated weights for policy 0, policy_version 1916 (0.0007) +[2026-06-02 16:46:51,578][255187] Saving new best policy, reward=5.377! +[2026-06-02 16:46:52,308][255279] Updated weights for policy 0, policy_version 1926 (0.0004) +[2026-06-02 16:46:52,469][255279] Updated weights for policy 0, policy_version 1936 (0.0004) +[2026-06-02 16:46:52,649][255279] Updated weights for policy 0, policy_version 1946 (0.0004) +[2026-06-02 16:46:52,855][255279] Updated weights for policy 0, policy_version 1957 (0.0006) +[2026-06-02 16:46:53,052][255279] Updated weights for policy 0, policy_version 1968 (0.0008) +[2026-06-02 16:46:53,240][255279] Updated weights for policy 0, policy_version 1979 (0.0008) +[2026-06-02 16:46:54,046][255279] Updated weights for policy 0, policy_version 1989 (0.0008) +[2026-06-02 16:46:54,209][255279] Updated weights for policy 0, policy_version 1999 (0.0008) +[2026-06-02 16:46:54,401][255279] Updated weights for policy 0, policy_version 2010 (0.0008) +[2026-06-02 16:46:54,594][255279] Updated weights for policy 0, policy_version 2021 (0.0008) +[2026-06-02 16:46:54,793][255279] Updated weights for policy 0, policy_version 2032 (0.0008) +[2026-06-02 16:46:55,014][255279] Updated weights for policy 0, policy_version 2044 (0.0008) +[2026-06-02 16:46:55,502][253683] Fps is (10 sec: 19660.6, 60 sec: 19064.9, 300 sec: 19064.9). Total num frames: 1048576. Throughput: 0: 19609.5. Samples: 1025664. Policy #0 lag: (min: 44.0, avg: 59.0, max: 108.0) +[2026-06-02 16:46:55,507][253683] Avg episode reward: [(0, '6.209')] +[2026-06-02 16:46:55,515][255187] Saving new best policy, reward=6.209! +[2026-06-02 16:46:55,835][255279] Updated weights for policy 0, policy_version 2054 (0.0008) +[2026-06-02 16:46:56,026][255279] Updated weights for policy 0, policy_version 2065 (0.0008) +[2026-06-02 16:46:56,207][255279] Updated weights for policy 0, policy_version 2075 (0.0009) +[2026-06-02 16:46:56,393][255279] Updated weights for policy 0, policy_version 2085 (0.0009) +[2026-06-02 16:46:56,570][255279] Updated weights for policy 0, policy_version 2095 (0.0008) +[2026-06-02 16:46:56,744][255279] Updated weights for policy 0, policy_version 2105 (0.0009) +[2026-06-02 16:46:57,600][255279] Updated weights for policy 0, policy_version 2115 (0.0009) +[2026-06-02 16:46:57,792][255279] Updated weights for policy 0, policy_version 2126 (0.0009) +[2026-06-02 16:46:57,992][255279] Updated weights for policy 0, policy_version 2137 (0.0009) +[2026-06-02 16:46:58,165][255279] Updated weights for policy 0, policy_version 2147 (0.0009) +[2026-06-02 16:46:58,347][255279] Updated weights for policy 0, policy_version 2157 (0.0009) +[2026-06-02 16:46:58,523][255279] Updated weights for policy 0, policy_version 2167 (0.0008) +[2026-06-02 16:46:59,371][255279] Updated weights for policy 0, policy_version 2177 (0.0009) +[2026-06-02 16:46:59,541][255279] Updated weights for policy 0, policy_version 2187 (0.0008) +[2026-06-02 16:46:59,721][255279] Updated weights for policy 0, policy_version 2197 (0.0008) +[2026-06-02 16:46:59,906][255279] Updated weights for policy 0, policy_version 2207 (0.0009) +[2026-06-02 16:47:00,094][255279] Updated weights for policy 0, policy_version 2217 (0.0008) +[2026-06-02 16:47:00,269][255279] Updated weights for policy 0, policy_version 2227 (0.0009) +[2026-06-02 16:47:00,459][255279] Updated weights for policy 0, policy_version 2237 (0.0009) +[2026-06-02 16:47:00,505][253683] Fps is (10 sec: 19654.4, 60 sec: 19113.6, 300 sec: 19113.6). Total num frames: 1146880. Throughput: 0: 19104.7. Samples: 1132288. Policy #0 lag: (min: 7.0, avg: 22.4, max: 71.0) +[2026-06-02 16:47:00,507][253683] Avg episode reward: [(0, '6.548')] +[2026-06-02 16:47:00,512][255187] Saving new best policy, reward=6.548! +[2026-06-02 16:47:01,253][255279] Updated weights for policy 0, policy_version 2247 (0.0009) +[2026-06-02 16:47:01,448][255279] Updated weights for policy 0, policy_version 2258 (0.0009) +[2026-06-02 16:47:01,628][255279] Updated weights for policy 0, policy_version 2268 (0.0008) +[2026-06-02 16:47:01,845][255279] Updated weights for policy 0, policy_version 2280 (0.0009) +[2026-06-02 16:47:02,039][255279] Updated weights for policy 0, policy_version 2291 (0.0008) +[2026-06-02 16:47:02,222][255279] Updated weights for policy 0, policy_version 2301 (0.0008) +[2026-06-02 16:47:03,051][255279] Updated weights for policy 0, policy_version 2311 (0.0006) +[2026-06-02 16:47:03,240][255279] Updated weights for policy 0, policy_version 2322 (0.0009) +[2026-06-02 16:47:03,427][255279] Updated weights for policy 0, policy_version 2332 (0.0009) +[2026-06-02 16:47:03,596][255279] Updated weights for policy 0, policy_version 2342 (0.0009) +[2026-06-02 16:47:03,773][255279] Updated weights for policy 0, policy_version 2352 (0.0009) +[2026-06-02 16:47:03,961][255279] Updated weights for policy 0, policy_version 2362 (0.0009) +[2026-06-02 16:47:04,814][255279] Updated weights for policy 0, policy_version 2372 (0.0009) +[2026-06-02 16:47:05,015][255279] Updated weights for policy 0, policy_version 2384 (0.0009) +[2026-06-02 16:47:05,205][255279] Updated weights for policy 0, policy_version 2395 (0.0008) +[2026-06-02 16:47:05,394][255279] Updated weights for policy 0, policy_version 2405 (0.0008) +[2026-06-02 16:47:05,502][253683] Fps is (10 sec: 16384.6, 60 sec: 19114.7, 300 sec: 18652.6). Total num frames: 1212416. Throughput: 0: 18910.0. Samples: 1250048. Policy #0 lag: (min: 63.0, avg: 77.1, max: 127.0) +[2026-06-02 16:47:05,503][253683] Avg episode reward: [(0, '7.491')] +[2026-06-02 16:47:05,593][255279] Updated weights for policy 0, policy_version 2416 (0.0009) +[2026-06-02 16:47:05,796][255279] Updated weights for policy 0, policy_version 2427 (0.0008) +[2026-06-02 16:47:05,880][255187] Saving new best policy, reward=7.491! +[2026-06-02 16:47:06,648][255279] Updated weights for policy 0, policy_version 2437 (0.0009) +[2026-06-02 16:47:06,839][255279] Updated weights for policy 0, policy_version 2448 (0.0008) +[2026-06-02 16:47:07,039][255279] Updated weights for policy 0, policy_version 2459 (0.0008) +[2026-06-02 16:47:07,221][255279] Updated weights for policy 0, policy_version 2469 (0.0008) +[2026-06-02 16:47:07,418][255279] Updated weights for policy 0, policy_version 2480 (0.0008) +[2026-06-02 16:47:07,601][255279] Updated weights for policy 0, policy_version 2490 (0.0008) +[2026-06-02 16:47:08,449][255279] Updated weights for policy 0, policy_version 2500 (0.0009) +[2026-06-02 16:47:08,624][255279] Updated weights for policy 0, policy_version 2510 (0.0009) +[2026-06-02 16:47:08,794][255279] Updated weights for policy 0, policy_version 2520 (0.0009) +[2026-06-02 16:47:08,982][255279] Updated weights for policy 0, policy_version 2530 (0.0008) +[2026-06-02 16:47:09,160][255279] Updated weights for policy 0, policy_version 2540 (0.0008) +[2026-06-02 16:47:09,343][255279] Updated weights for policy 0, policy_version 2550 (0.0010) +[2026-06-02 16:47:10,210][255279] Updated weights for policy 0, policy_version 2562 (0.0009) +[2026-06-02 16:47:10,390][255279] Updated weights for policy 0, policy_version 2573 (0.0008) +[2026-06-02 16:47:10,501][253683] Fps is (10 sec: 16389.6, 60 sec: 19114.7, 300 sec: 18724.6). Total num frames: 1310720. Throughput: 0: 18517.3. Samples: 1295744. Policy #0 lag: (min: 63.0, avg: 77.2, max: 127.0) +[2026-06-02 16:47:10,502][253683] Avg episode reward: [(0, '7.639')] +[2026-06-02 16:47:10,622][255279] Updated weights for policy 0, policy_version 2586 (0.0010) +[2026-06-02 16:47:10,805][255279] Updated weights for policy 0, policy_version 2596 (0.0009) +[2026-06-02 16:47:11,008][255279] Updated weights for policy 0, policy_version 2607 (0.0009) +[2026-06-02 16:47:11,191][255279] Updated weights for policy 0, policy_version 2617 (0.0008) +[2026-06-02 16:47:11,311][255187] Saving new best policy, reward=7.639! +[2026-06-02 16:47:12,020][255279] Updated weights for policy 0, policy_version 2627 (0.0009) +[2026-06-02 16:47:12,230][255279] Updated weights for policy 0, policy_version 2639 (0.0008) +[2026-06-02 16:47:12,407][255279] Updated weights for policy 0, policy_version 2649 (0.0008) +[2026-06-02 16:47:12,615][255279] Updated weights for policy 0, policy_version 2660 (0.0008) +[2026-06-02 16:47:12,832][255279] Updated weights for policy 0, policy_version 2672 (0.0008) +[2026-06-02 16:47:13,006][255279] Updated weights for policy 0, policy_version 2682 (0.0008) +[2026-06-02 16:47:13,858][255279] Updated weights for policy 0, policy_version 2692 (0.0008) +[2026-06-02 16:47:14,032][255279] Updated weights for policy 0, policy_version 2702 (0.0009) +[2026-06-02 16:47:14,214][255279] Updated weights for policy 0, policy_version 2712 (0.0008) +[2026-06-02 16:47:14,392][255279] Updated weights for policy 0, policy_version 2722 (0.0009) +[2026-06-02 16:47:14,597][255279] Updated weights for policy 0, policy_version 2733 (0.0008) +[2026-06-02 16:47:14,801][255279] Updated weights for policy 0, policy_version 2744 (0.0009) +[2026-06-02 16:47:15,502][253683] Fps is (10 sec: 19659.7, 60 sec: 19114.5, 300 sec: 18786.9). Total num frames: 1409024. Throughput: 0: 18659.4. Samples: 1415424. Policy #0 lag: (min: 63.0, avg: 77.6, max: 127.0) +[2026-06-02 16:47:15,504][253683] Avg episode reward: [(0, '8.279')] +[2026-06-02 16:47:15,618][255279] Updated weights for policy 0, policy_version 2754 (0.0010) +[2026-06-02 16:47:15,780][255279] Updated weights for policy 0, policy_version 2764 (0.0008) +[2026-06-02 16:47:15,964][255279] Updated weights for policy 0, policy_version 2774 (0.0008) +[2026-06-02 16:47:16,173][255279] Updated weights for policy 0, policy_version 2786 (0.0009) +[2026-06-02 16:47:16,368][255279] Updated weights for policy 0, policy_version 2797 (0.0008) +[2026-06-02 16:47:16,565][255279] Updated weights for policy 0, policy_version 2808 (0.0008) +[2026-06-02 16:47:16,695][255187] Saving new best policy, reward=8.279! +[2026-06-02 16:47:17,445][255279] Updated weights for policy 0, policy_version 2820 (0.0010) +[2026-06-02 16:47:17,642][255279] Updated weights for policy 0, policy_version 2832 (0.0007) +[2026-06-02 16:47:17,823][255279] Updated weights for policy 0, policy_version 2842 (0.0009) +[2026-06-02 16:47:18,025][255279] Updated weights for policy 0, policy_version 2853 (0.0008) +[2026-06-02 16:47:18,203][255279] Updated weights for policy 0, policy_version 2863 (0.0009) +[2026-06-02 16:47:18,390][255279] Updated weights for policy 0, policy_version 2873 (0.0008) +[2026-06-02 16:47:19,201][255279] Updated weights for policy 0, policy_version 2883 (0.0009) +[2026-06-02 16:47:19,374][255279] Updated weights for policy 0, policy_version 2893 (0.0010) +[2026-06-02 16:47:19,542][255279] Updated weights for policy 0, policy_version 2903 (0.0009) +[2026-06-02 16:47:19,726][255279] Updated weights for policy 0, policy_version 2913 (0.0007) +[2026-06-02 16:47:19,909][255279] Updated weights for policy 0, policy_version 2923 (0.0009) +[2026-06-02 16:47:20,081][255279] Updated weights for policy 0, policy_version 2933 (0.0009) +[2026-06-02 16:47:20,264][255279] Updated weights for policy 0, policy_version 2943 (0.0008) +[2026-06-02 16:47:20,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19114.8, 300 sec: 18841.6). Total num frames: 1507328. Throughput: 0: 18306.8. Samples: 1520128. Policy #0 lag: (min: 23.0, avg: 37.0, max: 87.0) +[2026-06-02 16:47:20,503][253683] Avg episode reward: [(0, '8.863')] +[2026-06-02 16:47:20,508][255187] Saving new best policy, reward=8.863! +[2026-06-02 16:47:21,158][255279] Updated weights for policy 0, policy_version 2955 (0.0009) +[2026-06-02 16:47:21,349][255279] Updated weights for policy 0, policy_version 2966 (0.0009) +[2026-06-02 16:47:21,529][255279] Updated weights for policy 0, policy_version 2976 (0.0009) +[2026-06-02 16:47:21,715][255279] Updated weights for policy 0, policy_version 2986 (0.0009) +[2026-06-02 16:47:21,888][255279] Updated weights for policy 0, policy_version 2996 (0.0008) +[2026-06-02 16:47:22,076][255279] Updated weights for policy 0, policy_version 3006 (0.0009) +[2026-06-02 16:47:22,913][255279] Updated weights for policy 0, policy_version 3017 (0.0008) +[2026-06-02 16:47:23,107][255279] Updated weights for policy 0, policy_version 3028 (0.0009) +[2026-06-02 16:47:23,296][255279] Updated weights for policy 0, policy_version 3039 (0.0009) +[2026-06-02 16:47:23,485][255279] Updated weights for policy 0, policy_version 3049 (0.0009) +[2026-06-02 16:47:23,664][255279] Updated weights for policy 0, policy_version 3059 (0.0007) +[2026-06-02 16:47:23,881][255279] Updated weights for policy 0, policy_version 3071 (0.0009) +[2026-06-02 16:47:24,778][255279] Updated weights for policy 0, policy_version 3083 (0.0009) +[2026-06-02 16:47:24,967][255279] Updated weights for policy 0, policy_version 3094 (0.0009) +[2026-06-02 16:47:25,144][255279] Updated weights for policy 0, policy_version 3104 (0.0009) +[2026-06-02 16:47:25,323][255279] Updated weights for policy 0, policy_version 3114 (0.0010) +[2026-06-02 16:47:25,502][253683] Fps is (10 sec: 16384.9, 60 sec: 18568.5, 300 sec: 18504.3). Total num frames: 1572864. Throughput: 0: 18463.3. Samples: 1580416. Policy #0 lag: (min: 23.0, avg: 37.0, max: 87.0) +[2026-06-02 16:47:25,502][253683] Avg episode reward: [(0, '8.850')] +[2026-06-02 16:47:25,532][255279] Updated weights for policy 0, policy_version 3125 (0.0009) +[2026-06-02 16:47:25,716][255279] Updated weights for policy 0, policy_version 3135 (0.0009) +[2026-06-02 16:47:26,553][255279] Updated weights for policy 0, policy_version 3146 (0.0009) +[2026-06-02 16:47:26,727][255279] Updated weights for policy 0, policy_version 3156 (0.0008) +[2026-06-02 16:47:26,911][255279] Updated weights for policy 0, policy_version 3166 (0.0008) +[2026-06-02 16:47:27,098][255279] Updated weights for policy 0, policy_version 3176 (0.0008) +[2026-06-02 16:47:27,299][255279] Updated weights for policy 0, policy_version 3187 (0.0008) +[2026-06-02 16:47:27,476][255279] Updated weights for policy 0, policy_version 3197 (0.0008) +[2026-06-02 16:47:28,275][255279] Updated weights for policy 0, policy_version 3208 (0.0008) +[2026-06-02 16:47:28,468][255279] Updated weights for policy 0, policy_version 3219 (0.0008) +[2026-06-02 16:47:28,645][255279] Updated weights for policy 0, policy_version 3229 (0.0008) +[2026-06-02 16:47:28,826][255279] Updated weights for policy 0, policy_version 3239 (0.0008) +[2026-06-02 16:47:29,027][255279] Updated weights for policy 0, policy_version 3250 (0.0008) +[2026-06-02 16:47:29,209][255279] Updated weights for policy 0, policy_version 3260 (0.0008) +[2026-06-02 16:47:30,044][255279] Updated weights for policy 0, policy_version 3270 (0.0008) +[2026-06-02 16:47:30,218][255279] Updated weights for policy 0, policy_version 3280 (0.0008) +[2026-06-02 16:47:30,391][255279] Updated weights for policy 0, policy_version 3290 (0.0008) +[2026-06-02 16:47:30,502][253683] Fps is (10 sec: 16383.7, 60 sec: 18568.5, 300 sec: 18568.5). Total num frames: 1671168. Throughput: 0: 18329.6. Samples: 1685632. Policy #0 lag: (min: 10.0, avg: 50.7, max: 74.0) +[2026-06-02 16:47:30,503][253683] Avg episode reward: [(0, '11.085')] +[2026-06-02 16:47:30,605][255279] Updated weights for policy 0, policy_version 3302 (0.0009) +[2026-06-02 16:47:30,814][255279] Updated weights for policy 0, policy_version 3313 (0.0009) +[2026-06-02 16:47:31,019][255279] Updated weights for policy 0, policy_version 3324 (0.0008) +[2026-06-02 16:47:31,090][255187] Saving new best policy, reward=11.085! +[2026-06-02 16:47:31,835][255279] Updated weights for policy 0, policy_version 3334 (0.0008) +[2026-06-02 16:47:32,011][255279] Updated weights for policy 0, policy_version 3344 (0.0008) +[2026-06-02 16:47:32,226][255279] Updated weights for policy 0, policy_version 3356 (0.0008) +[2026-06-02 16:47:32,438][255279] Updated weights for policy 0, policy_version 3367 (0.0006) +[2026-06-02 16:47:32,628][255279] Updated weights for policy 0, policy_version 3378 (0.0007) +[2026-06-02 16:47:32,834][255279] Updated weights for policy 0, policy_version 3389 (0.0009) +[2026-06-02 16:47:33,723][255279] Updated weights for policy 0, policy_version 3404 (0.0008) +[2026-06-02 16:47:33,920][255279] Updated weights for policy 0, policy_version 3415 (0.0008) +[2026-06-02 16:47:34,099][255279] Updated weights for policy 0, policy_version 3425 (0.0008) +[2026-06-02 16:47:34,307][255279] Updated weights for policy 0, policy_version 3436 (0.0008) +[2026-06-02 16:47:34,524][255279] Updated weights for policy 0, policy_version 3448 (0.0009) +[2026-06-02 16:47:35,355][255279] Updated weights for policy 0, policy_version 3458 (0.0009) +[2026-06-02 16:47:35,502][253683] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18626.0). Total num frames: 1769472. Throughput: 0: 18301.2. Samples: 1790848. Policy #0 lag: (min: 63.0, avg: 76.1, max: 127.0) +[2026-06-02 16:47:35,502][253683] Avg episode reward: [(0, '10.769')] +[2026-06-02 16:47:35,540][255279] Updated weights for policy 0, policy_version 3469 (0.0008) +[2026-06-02 16:47:35,746][255279] Updated weights for policy 0, policy_version 3480 (0.0008) +[2026-06-02 16:47:35,927][255279] Updated weights for policy 0, policy_version 3490 (0.0008) +[2026-06-02 16:47:36,102][255279] Updated weights for policy 0, policy_version 3500 (0.0008) +[2026-06-02 16:47:36,309][255279] Updated weights for policy 0, policy_version 3511 (0.0008) +[2026-06-02 16:47:37,123][255279] Updated weights for policy 0, policy_version 3521 (0.0008) +[2026-06-02 16:47:37,290][255279] Updated weights for policy 0, policy_version 3531 (0.0008) +[2026-06-02 16:47:37,479][255279] Updated weights for policy 0, policy_version 3542 (0.0008) +[2026-06-02 16:47:37,666][255279] Updated weights for policy 0, policy_version 3552 (0.0008) +[2026-06-02 16:47:37,889][255279] Updated weights for policy 0, policy_version 3564 (0.0009) +[2026-06-02 16:47:38,078][255279] Updated weights for policy 0, policy_version 3574 (0.0008) +[2026-06-02 16:47:38,253][255279] Updated weights for policy 0, policy_version 3584 (0.0008) +[2026-06-02 16:47:39,064][255279] Updated weights for policy 0, policy_version 3594 (0.0008) +[2026-06-02 16:47:39,257][255279] Updated weights for policy 0, policy_version 3604 (0.0008) +[2026-06-02 16:47:39,428][255279] Updated weights for policy 0, policy_version 3614 (0.0008) +[2026-06-02 16:47:39,606][255279] Updated weights for policy 0, policy_version 3624 (0.0008) +[2026-06-02 16:47:39,803][255279] Updated weights for policy 0, policy_version 3634 (0.0008) +[2026-06-02 16:47:39,982][255279] Updated weights for policy 0, policy_version 3644 (0.0008) +[2026-06-02 16:47:40,501][253683] Fps is (10 sec: 19661.2, 60 sec: 18568.6, 300 sec: 18677.8). Total num frames: 1867776. Throughput: 0: 18309.8. Samples: 1849600. Policy #0 lag: (min: 12.0, avg: 26.9, max: 76.0) +[2026-06-02 16:47:40,502][253683] Avg episode reward: [(0, '11.795')] +[2026-06-02 16:47:40,797][255279] Updated weights for policy 0, policy_version 3656 (0.0008) +[2026-06-02 16:47:40,985][255279] Updated weights for policy 0, policy_version 3666 (0.0008) +[2026-06-02 16:47:41,154][255279] Updated weights for policy 0, policy_version 3676 (0.0009) +[2026-06-02 16:47:41,356][255279] Updated weights for policy 0, policy_version 3686 (0.0008) +[2026-06-02 16:47:41,530][255279] Updated weights for policy 0, policy_version 3696 (0.0008) +[2026-06-02 16:47:41,729][255279] Updated weights for policy 0, policy_version 3707 (0.0008) +[2026-06-02 16:47:41,817][255187] Saving new best policy, reward=11.795! +[2026-06-02 16:47:42,558][255279] Updated weights for policy 0, policy_version 3717 (0.0009) +[2026-06-02 16:47:42,734][255279] Updated weights for policy 0, policy_version 3727 (0.0008) +[2026-06-02 16:47:42,925][255279] Updated weights for policy 0, policy_version 3737 (0.0009) +[2026-06-02 16:47:43,108][255279] Updated weights for policy 0, policy_version 3747 (0.0009) +[2026-06-02 16:47:43,296][255279] Updated weights for policy 0, policy_version 3758 (0.0009) +[2026-06-02 16:47:43,476][255279] Updated weights for policy 0, policy_version 3768 (0.0009) +[2026-06-02 16:47:44,276][255279] Updated weights for policy 0, policy_version 3778 (0.0009) +[2026-06-02 16:47:44,480][255279] Updated weights for policy 0, policy_version 3790 (0.0008) +[2026-06-02 16:47:44,667][255279] Updated weights for policy 0, policy_version 3800 (0.0008) +[2026-06-02 16:47:44,870][255279] Updated weights for policy 0, policy_version 3811 (0.0009) +[2026-06-02 16:47:45,086][255279] Updated weights for policy 0, policy_version 3823 (0.0009) +[2026-06-02 16:47:45,297][255279] Updated weights for policy 0, policy_version 3834 (0.0009) +[2026-06-02 16:47:45,502][253683] Fps is (10 sec: 19660.6, 60 sec: 18568.6, 300 sec: 18724.6). Total num frames: 1966080. Throughput: 0: 18296.8. Samples: 1955584. Policy #0 lag: (min: 36.0, avg: 50.9, max: 100.0) +[2026-06-02 16:47:45,503][253683] Avg episode reward: [(0, '13.292')] +[2026-06-02 16:47:45,510][255187] Saving new best policy, reward=13.292! +[2026-06-02 16:47:46,098][255279] Updated weights for policy 0, policy_version 3844 (0.0009) +[2026-06-02 16:47:46,278][255279] Updated weights for policy 0, policy_version 3854 (0.0009) +[2026-06-02 16:47:46,452][255279] Updated weights for policy 0, policy_version 3864 (0.0009) +[2026-06-02 16:47:46,634][255279] Updated weights for policy 0, policy_version 3874 (0.0009) +[2026-06-02 16:47:46,852][255279] Updated weights for policy 0, policy_version 3886 (0.0010) +[2026-06-02 16:47:47,026][255279] Updated weights for policy 0, policy_version 3896 (0.0009) +[2026-06-02 16:47:47,903][255279] Updated weights for policy 0, policy_version 3908 (0.0009) +[2026-06-02 16:47:48,074][255279] Updated weights for policy 0, policy_version 3918 (0.0008) +[2026-06-02 16:47:48,254][255279] Updated weights for policy 0, policy_version 3928 (0.0009) +[2026-06-02 16:47:48,434][255279] Updated weights for policy 0, policy_version 3938 (0.0010) +[2026-06-02 16:47:48,640][255279] Updated weights for policy 0, policy_version 3949 (0.0009) +[2026-06-02 16:47:48,823][255279] Updated weights for policy 0, policy_version 3959 (0.0009) +[2026-06-02 16:47:49,667][255279] Updated weights for policy 0, policy_version 3969 (0.0009) +[2026-06-02 16:47:49,851][255279] Updated weights for policy 0, policy_version 3980 (0.0009) +[2026-06-02 16:47:50,035][255279] Updated weights for policy 0, policy_version 3990 (0.0008) +[2026-06-02 16:47:50,228][255279] Updated weights for policy 0, policy_version 4001 (0.0009) +[2026-06-02 16:47:50,411][255279] Updated weights for policy 0, policy_version 4011 (0.0008) +[2026-06-02 16:47:50,502][253683] Fps is (10 sec: 16384.0, 60 sec: 18022.4, 300 sec: 18469.3). Total num frames: 2031616. Throughput: 0: 18349.5. Samples: 2075776. Policy #0 lag: (min: 63.0, avg: 78.2, max: 127.0) +[2026-06-02 16:47:50,503][253683] Avg episode reward: [(0, '12.793')] +[2026-06-02 16:47:50,634][255279] Updated weights for policy 0, policy_version 4023 (0.0008) +[2026-06-02 16:47:51,449][255279] Updated weights for policy 0, policy_version 4033 (0.0008) +[2026-06-02 16:47:51,659][255279] Updated weights for policy 0, policy_version 4046 (0.0008) +[2026-06-02 16:47:51,865][255279] Updated weights for policy 0, policy_version 4057 (0.0008) +[2026-06-02 16:47:52,059][255279] Updated weights for policy 0, policy_version 4068 (0.0009) +[2026-06-02 16:47:52,271][255279] Updated weights for policy 0, policy_version 4080 (0.0008) +[2026-06-02 16:47:52,456][255279] Updated weights for policy 0, policy_version 4090 (0.0008) +[2026-06-02 16:47:53,275][255279] Updated weights for policy 0, policy_version 4100 (0.0008) +[2026-06-02 16:47:53,465][255279] Updated weights for policy 0, policy_version 4111 (0.0009) +[2026-06-02 16:47:53,662][255279] Updated weights for policy 0, policy_version 4122 (0.0009) +[2026-06-02 16:47:53,842][255279] Updated weights for policy 0, policy_version 4132 (0.0009) +[2026-06-02 16:47:54,059][255279] Updated weights for policy 0, policy_version 4144 (0.0008) +[2026-06-02 16:47:54,279][255279] Updated weights for policy 0, policy_version 4156 (0.0008) +[2026-06-02 16:47:55,103][255279] Updated weights for policy 0, policy_version 4166 (0.0009) +[2026-06-02 16:47:55,278][255279] Updated weights for policy 0, policy_version 4176 (0.0008) +[2026-06-02 16:47:55,497][255279] Updated weights for policy 0, policy_version 4188 (0.0009) +[2026-06-02 16:47:55,502][253683] Fps is (10 sec: 16384.1, 60 sec: 18022.5, 300 sec: 18521.1). Total num frames: 2129920. Throughput: 0: 18392.1. Samples: 2123392. Policy #0 lag: (min: 63.0, avg: 75.7, max: 127.0) +[2026-06-02 16:47:55,503][253683] Avg episode reward: [(0, '14.122')] +[2026-06-02 16:47:55,680][255279] Updated weights for policy 0, policy_version 4198 (0.0008) +[2026-06-02 16:47:55,880][255279] Updated weights for policy 0, policy_version 4209 (0.0008) +[2026-06-02 16:47:56,074][255279] Updated weights for policy 0, policy_version 4220 (0.0008) +[2026-06-02 16:47:56,146][255187] Saving new best policy, reward=14.122! +[2026-06-02 16:47:56,919][255279] Updated weights for policy 0, policy_version 4231 (0.0008) +[2026-06-02 16:47:57,132][255279] Updated weights for policy 0, policy_version 4243 (0.0009) +[2026-06-02 16:47:57,315][255279] Updated weights for policy 0, policy_version 4253 (0.0009) +[2026-06-02 16:47:57,490][255279] Updated weights for policy 0, policy_version 4263 (0.0009) +[2026-06-02 16:47:57,669][255279] Updated weights for policy 0, policy_version 4273 (0.0009) +[2026-06-02 16:47:57,861][255279] Updated weights for policy 0, policy_version 4283 (0.0009) +[2026-06-02 16:47:58,627][255279] Updated weights for policy 0, policy_version 4293 (0.0009) +[2026-06-02 16:47:58,797][255279] Updated weights for policy 0, policy_version 4303 (0.0008) +[2026-06-02 16:47:58,976][255279] Updated weights for policy 0, policy_version 4313 (0.0009) +[2026-06-02 16:47:59,194][255279] Updated weights for policy 0, policy_version 4325 (0.0009) +[2026-06-02 16:47:59,394][255279] Updated weights for policy 0, policy_version 4336 (0.0008) +[2026-06-02 16:47:59,574][255279] Updated weights for policy 0, policy_version 4346 (0.0008) +[2026-06-02 16:48:00,412][255279] Updated weights for policy 0, policy_version 4359 (0.0008) +[2026-06-02 16:48:00,502][253683] Fps is (10 sec: 19660.7, 60 sec: 18023.4, 300 sec: 18568.5). Total num frames: 2228224. Throughput: 0: 18383.9. Samples: 2242688. Policy #0 lag: (min: 63.0, avg: 75.7, max: 127.0) +[2026-06-02 16:48:00,503][253683] Avg episode reward: [(0, '17.049')] +[2026-06-02 16:48:00,590][255279] Updated weights for policy 0, policy_version 4369 (0.0008) +[2026-06-02 16:48:00,831][255279] Updated weights for policy 0, policy_version 4383 (0.0009) +[2026-06-02 16:48:01,012][255279] Updated weights for policy 0, policy_version 4393 (0.0008) +[2026-06-02 16:48:01,191][255279] Updated weights for policy 0, policy_version 4403 (0.0008) +[2026-06-02 16:48:01,380][255279] Updated weights for policy 0, policy_version 4414 (0.0010) +[2026-06-02 16:48:01,418][255187] Saving new best policy, reward=17.049! +[2026-06-02 16:48:02,203][255279] Updated weights for policy 0, policy_version 4425 (0.0009) +[2026-06-02 16:48:02,417][255279] Updated weights for policy 0, policy_version 4437 (0.0008) +[2026-06-02 16:48:02,616][255279] Updated weights for policy 0, policy_version 4448 (0.0008) +[2026-06-02 16:48:02,810][255279] Updated weights for policy 0, policy_version 4459 (0.0009) +[2026-06-02 16:48:02,994][255279] Updated weights for policy 0, policy_version 4469 (0.0008) +[2026-06-02 16:48:03,189][255279] Updated weights for policy 0, policy_version 4480 (0.0008) +[2026-06-02 16:48:03,989][255279] Updated weights for policy 0, policy_version 4491 (0.0009) +[2026-06-02 16:48:04,166][255279] Updated weights for policy 0, policy_version 4501 (0.0009) +[2026-06-02 16:48:04,390][255279] Updated weights for policy 0, policy_version 4513 (0.0009) +[2026-06-02 16:48:04,577][255279] Updated weights for policy 0, policy_version 4524 (0.0008) +[2026-06-02 16:48:04,765][255279] Updated weights for policy 0, policy_version 4534 (0.0009) +[2026-06-02 16:48:05,502][253683] Fps is (10 sec: 19660.7, 60 sec: 18568.5, 300 sec: 18612.2). Total num frames: 2326528. Throughput: 0: 18420.6. Samples: 2349056. Policy #0 lag: (min: 63.0, avg: 76.9, max: 127.0) +[2026-06-02 16:48:05,503][253683] Avg episode reward: [(0, '21.014')] +[2026-06-02 16:48:05,582][255279] Updated weights for policy 0, policy_version 4545 (0.0009) +[2026-06-02 16:48:05,765][255279] Updated weights for policy 0, policy_version 4556 (0.0009) +[2026-06-02 16:48:05,942][255279] Updated weights for policy 0, policy_version 4566 (0.0009) +[2026-06-02 16:48:06,137][255279] Updated weights for policy 0, policy_version 4577 (0.0009) +[2026-06-02 16:48:06,316][255279] Updated weights for policy 0, policy_version 4587 (0.0009) +[2026-06-02 16:48:06,508][255279] Updated weights for policy 0, policy_version 4598 (0.0009) +[2026-06-02 16:48:06,685][255187] Saving new best policy, reward=21.014! +[2026-06-02 16:48:07,338][255279] Updated weights for policy 0, policy_version 4610 (0.0008) +[2026-06-02 16:48:07,534][255279] Updated weights for policy 0, policy_version 4621 (0.0008) +[2026-06-02 16:48:07,706][255279] Updated weights for policy 0, policy_version 4631 (0.0009) +[2026-06-02 16:48:07,890][255279] Updated weights for policy 0, policy_version 4641 (0.0009) +[2026-06-02 16:48:08,087][255279] Updated weights for policy 0, policy_version 4652 (0.0009) +[2026-06-02 16:48:08,275][255279] Updated weights for policy 0, policy_version 4662 (0.0009) +[2026-06-02 16:48:08,461][255279] Updated weights for policy 0, policy_version 4672 (0.0008) +[2026-06-02 16:48:09,227][255279] Updated weights for policy 0, policy_version 4683 (0.0010) +[2026-06-02 16:48:09,396][255279] Updated weights for policy 0, policy_version 4693 (0.0009) +[2026-06-02 16:48:09,632][255279] Updated weights for policy 0, policy_version 4706 (0.0009) +[2026-06-02 16:48:09,826][255279] Updated weights for policy 0, policy_version 4717 (0.0009) +[2026-06-02 16:48:10,009][255279] Updated weights for policy 0, policy_version 4727 (0.0009) +[2026-06-02 16:48:10,501][253683] Fps is (10 sec: 19660.9, 60 sec: 18568.5, 300 sec: 18652.6). Total num frames: 2424832. Throughput: 0: 18432.0. Samples: 2409856. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-02 16:48:10,502][253683] Avg episode reward: [(0, '26.358')] +[2026-06-02 16:48:10,507][255187] Saving new best policy, reward=26.358! +[2026-06-02 16:48:10,784][255279] Updated weights for policy 0, policy_version 4737 (0.0009) +[2026-06-02 16:48:10,955][255279] Updated weights for policy 0, policy_version 4747 (0.0009) +[2026-06-02 16:48:11,130][255279] Updated weights for policy 0, policy_version 4757 (0.0008) +[2026-06-02 16:48:11,310][255279] Updated weights for policy 0, policy_version 4767 (0.0008) +[2026-06-02 16:48:11,490][255279] Updated weights for policy 0, policy_version 4777 (0.0009) +[2026-06-02 16:48:11,679][255279] Updated weights for policy 0, policy_version 4787 (0.0009) +[2026-06-02 16:48:11,859][255279] Updated weights for policy 0, policy_version 4797 (0.0009) +[2026-06-02 16:48:12,645][255279] Updated weights for policy 0, policy_version 4808 (0.0008) +[2026-06-02 16:48:12,859][255279] Updated weights for policy 0, policy_version 4820 (0.0008) +[2026-06-02 16:48:13,064][255279] Updated weights for policy 0, policy_version 4831 (0.0008) +[2026-06-02 16:48:13,263][255279] Updated weights for policy 0, policy_version 4842 (0.0008) +[2026-06-02 16:48:13,493][255279] Updated weights for policy 0, policy_version 4854 (0.0009) +[2026-06-02 16:48:13,670][255279] Updated weights for policy 0, policy_version 4864 (0.0009) +[2026-06-02 16:48:14,424][255279] Updated weights for policy 0, policy_version 4877 (0.0009) +[2026-06-02 16:48:14,619][255279] Updated weights for policy 0, policy_version 4888 (0.0008) +[2026-06-02 16:48:14,828][255279] Updated weights for policy 0, policy_version 4899 (0.0009) +[2026-06-02 16:48:15,011][255279] Updated weights for policy 0, policy_version 4909 (0.0010) +[2026-06-02 16:48:15,195][255279] Updated weights for policy 0, policy_version 4919 (0.0012) +[2026-06-02 16:48:15,502][253683] Fps is (10 sec: 19661.0, 60 sec: 18568.7, 300 sec: 18689.9). Total num frames: 2523136. Throughput: 0: 18500.4. Samples: 2518144. Policy #0 lag: (min: 63.0, avg: 79.4, max: 127.0) +[2026-06-02 16:48:15,502][253683] Avg episode reward: [(0, '29.869')] +[2026-06-02 16:48:15,507][255187] Saving new best policy, reward=29.869! +[2026-06-02 16:48:15,955][255279] Updated weights for policy 0, policy_version 4929 (0.0011) +[2026-06-02 16:48:16,124][255279] Updated weights for policy 0, policy_version 4939 (0.0008) +[2026-06-02 16:48:16,304][255279] Updated weights for policy 0, policy_version 4949 (0.0008) +[2026-06-02 16:48:16,490][255279] Updated weights for policy 0, policy_version 4959 (0.0008) +[2026-06-02 16:48:16,678][255279] Updated weights for policy 0, policy_version 4969 (0.0008) +[2026-06-02 16:48:16,861][255279] Updated weights for policy 0, policy_version 4979 (0.0009) +[2026-06-02 16:48:17,089][255279] Updated weights for policy 0, policy_version 4991 (0.0009) +[2026-06-02 16:48:17,805][255279] Updated weights for policy 0, policy_version 5002 (0.0009) +[2026-06-02 16:48:17,991][255279] Updated weights for policy 0, policy_version 5012 (0.0009) +[2026-06-02 16:48:18,198][255279] Updated weights for policy 0, policy_version 5023 (0.0010) +[2026-06-02 16:48:18,396][255279] Updated weights for policy 0, policy_version 5034 (0.0010) +[2026-06-02 16:48:18,581][255279] Updated weights for policy 0, policy_version 5044 (0.0010) +[2026-06-02 16:48:18,770][255279] Updated weights for policy 0, policy_version 5054 (0.0007) +[2026-06-02 16:48:19,471][255279] Updated weights for policy 0, policy_version 5064 (0.0009) +[2026-06-02 16:48:19,658][255279] Updated weights for policy 0, policy_version 5074 (0.0011) +[2026-06-02 16:48:19,839][255279] Updated weights for policy 0, policy_version 5084 (0.0010) +[2026-06-02 16:48:20,036][255279] Updated weights for policy 0, policy_version 5095 (0.0010) +[2026-06-02 16:48:20,249][255279] Updated weights for policy 0, policy_version 5106 (0.0010) +[2026-06-02 16:48:20,432][255279] Updated weights for policy 0, policy_version 5116 (0.0011) +[2026-06-02 16:48:20,501][253683] Fps is (10 sec: 19660.9, 60 sec: 18568.5, 300 sec: 18724.6). Total num frames: 2621440. Throughput: 0: 18827.4. Samples: 2638080. Policy #0 lag: (min: 63.0, avg: 79.4, max: 127.0) +[2026-06-02 16:48:20,502][253683] Avg episode reward: [(0, '39.455')] +[2026-06-02 16:48:20,507][255187] Saving new best policy, reward=39.455! +[2026-06-02 16:48:21,171][255279] Updated weights for policy 0, policy_version 5126 (0.0009) +[2026-06-02 16:48:21,365][255279] Updated weights for policy 0, policy_version 5137 (0.0009) +[2026-06-02 16:48:21,556][255279] Updated weights for policy 0, policy_version 5148 (0.0009) +[2026-06-02 16:48:21,757][255279] Updated weights for policy 0, policy_version 5158 (0.0009) +[2026-06-02 16:48:21,934][255279] Updated weights for policy 0, policy_version 5168 (0.0009) +[2026-06-02 16:48:22,123][255279] Updated weights for policy 0, policy_version 5178 (0.0009) +[2026-06-02 16:48:22,846][255279] Updated weights for policy 0, policy_version 5188 (0.0009) +[2026-06-02 16:48:23,013][255279] Updated weights for policy 0, policy_version 5198 (0.0009) +[2026-06-02 16:48:23,201][255279] Updated weights for policy 0, policy_version 5208 (0.0009) +[2026-06-02 16:48:23,385][255279] Updated weights for policy 0, policy_version 5218 (0.0008) +[2026-06-02 16:48:23,588][255279] Updated weights for policy 0, policy_version 5229 (0.0008) +[2026-06-02 16:48:23,808][255279] Updated weights for policy 0, policy_version 5241 (0.0009) +[2026-06-02 16:48:24,545][255279] Updated weights for policy 0, policy_version 5251 (0.0009) +[2026-06-02 16:48:24,739][255279] Updated weights for policy 0, policy_version 5262 (0.0009) +[2026-06-02 16:48:24,922][255279] Updated weights for policy 0, policy_version 5272 (0.0009) +[2026-06-02 16:48:25,102][255279] Updated weights for policy 0, policy_version 5282 (0.0009) +[2026-06-02 16:48:25,294][255279] Updated weights for policy 0, policy_version 5292 (0.0009) +[2026-06-02 16:48:25,477][255279] Updated weights for policy 0, policy_version 5302 (0.0009) +[2026-06-02 16:48:25,501][253683] Fps is (10 sec: 16384.1, 60 sec: 18568.6, 300 sec: 18530.9). Total num frames: 2686976. Throughput: 0: 18884.3. Samples: 2699392. Policy #0 lag: (min: 36.0, avg: 51.8, max: 100.0) +[2026-06-02 16:48:25,502][253683] Avg episode reward: [(0, '45.190')] +[2026-06-02 16:48:25,653][255187] Saving new best policy, reward=45.190! +[2026-06-02 16:48:26,243][255279] Updated weights for policy 0, policy_version 5313 (0.0009) +[2026-06-02 16:48:26,412][255279] Updated weights for policy 0, policy_version 5323 (0.0008) +[2026-06-02 16:48:26,592][255279] Updated weights for policy 0, policy_version 5333 (0.0008) +[2026-06-02 16:48:26,782][255279] Updated weights for policy 0, policy_version 5343 (0.0009) +[2026-06-02 16:48:26,983][255279] Updated weights for policy 0, policy_version 5354 (0.0009) +[2026-06-02 16:48:27,169][255279] Updated weights for policy 0, policy_version 5364 (0.0009) +[2026-06-02 16:48:27,368][255279] Updated weights for policy 0, policy_version 5375 (0.0009) +[2026-06-02 16:48:28,080][255279] Updated weights for policy 0, policy_version 5385 (0.0008) +[2026-06-02 16:48:28,265][255279] Updated weights for policy 0, policy_version 5395 (0.0008) +[2026-06-02 16:48:28,470][255279] Updated weights for policy 0, policy_version 5406 (0.0008) +[2026-06-02 16:48:28,658][255279] Updated weights for policy 0, policy_version 5416 (0.0008) +[2026-06-02 16:48:28,846][255279] Updated weights for policy 0, policy_version 5426 (0.0008) +[2026-06-02 16:48:29,036][255279] Updated weights for policy 0, policy_version 5436 (0.0008) +[2026-06-02 16:48:29,768][255279] Updated weights for policy 0, policy_version 5448 (0.0008) +[2026-06-02 16:48:29,948][255279] Updated weights for policy 0, policy_version 5458 (0.0008) +[2026-06-02 16:48:30,129][255279] Updated weights for policy 0, policy_version 5468 (0.0008) +[2026-06-02 16:48:30,332][255279] Updated weights for policy 0, policy_version 5479 (0.0008) +[2026-06-02 16:48:30,501][253683] Fps is (10 sec: 16384.0, 60 sec: 18568.6, 300 sec: 18568.6). Total num frames: 2785280. Throughput: 0: 18935.5. Samples: 2807680. Policy #0 lag: (min: 14.0, avg: 30.9, max: 78.0) +[2026-06-02 16:48:30,502][253683] Avg episode reward: [(0, '48.287')] +[2026-06-02 16:48:30,511][255279] Updated weights for policy 0, policy_version 5489 (0.0009) +[2026-06-02 16:48:30,715][255279] Updated weights for policy 0, policy_version 5500 (0.0008) +[2026-06-02 16:48:30,788][255187] Saving new best policy, reward=48.287! +[2026-06-02 16:48:31,429][255279] Updated weights for policy 0, policy_version 5510 (0.0008) +[2026-06-02 16:48:31,610][255279] Updated weights for policy 0, policy_version 5520 (0.0008) +[2026-06-02 16:48:31,795][255279] Updated weights for policy 0, policy_version 5530 (0.0009) +[2026-06-02 16:48:31,990][255279] Updated weights for policy 0, policy_version 5541 (0.0008) +[2026-06-02 16:48:32,192][255279] Updated weights for policy 0, policy_version 5552 (0.0008) +[2026-06-02 16:48:32,383][255279] Updated weights for policy 0, policy_version 5562 (0.0009) +[2026-06-02 16:48:33,088][255279] Updated weights for policy 0, policy_version 5572 (0.0008) +[2026-06-02 16:48:33,268][255279] Updated weights for policy 0, policy_version 5582 (0.0009) +[2026-06-02 16:48:33,472][255279] Updated weights for policy 0, policy_version 5593 (0.0008) +[2026-06-02 16:48:33,648][255279] Updated weights for policy 0, policy_version 5603 (0.0008) +[2026-06-02 16:48:33,833][255279] Updated weights for policy 0, policy_version 5613 (0.0008) +[2026-06-02 16:48:34,031][255279] Updated weights for policy 0, policy_version 5623 (0.0009) +[2026-06-02 16:48:34,741][255279] Updated weights for policy 0, policy_version 5633 (0.0009) +[2026-06-02 16:48:34,915][255279] Updated weights for policy 0, policy_version 5643 (0.0009) +[2026-06-02 16:48:35,106][255279] Updated weights for policy 0, policy_version 5654 (0.0009) +[2026-06-02 16:48:35,315][255279] Updated weights for policy 0, policy_version 5665 (0.0009) +[2026-06-02 16:48:35,495][255279] Updated weights for policy 0, policy_version 5675 (0.0009) +[2026-06-02 16:48:35,501][253683] Fps is (10 sec: 19660.8, 60 sec: 18568.6, 300 sec: 18603.8). Total num frames: 2883584. Throughput: 0: 18927.0. Samples: 2927488. Policy #0 lag: (min: 14.0, avg: 30.9, max: 78.0) +[2026-06-02 16:48:35,502][253683] Avg episode reward: [(0, '51.761')] +[2026-06-02 16:48:35,685][255279] Updated weights for policy 0, policy_version 5685 (0.0009) +[2026-06-02 16:48:35,885][255279] Updated weights for policy 0, policy_version 5695 (0.0009) +[2026-06-02 16:48:35,889][255187] Saving new best policy, reward=51.761! +[2026-06-02 16:48:36,598][255279] Updated weights for policy 0, policy_version 5706 (0.0009) +[2026-06-02 16:48:36,784][255279] Updated weights for policy 0, policy_version 5716 (0.0009) +[2026-06-02 16:48:36,961][255279] Updated weights for policy 0, policy_version 5726 (0.0009) +[2026-06-02 16:48:37,148][255279] Updated weights for policy 0, policy_version 5736 (0.0009) +[2026-06-02 16:48:37,342][255279] Updated weights for policy 0, policy_version 5746 (0.0009) +[2026-06-02 16:48:37,538][255279] Updated weights for policy 0, policy_version 5757 (0.0008) +[2026-06-02 16:48:38,224][255279] Updated weights for policy 0, policy_version 5767 (0.0009) +[2026-06-02 16:48:38,413][255279] Updated weights for policy 0, policy_version 5777 (0.0009) +[2026-06-02 16:48:38,594][255279] Updated weights for policy 0, policy_version 5787 (0.0009) +[2026-06-02 16:48:38,781][255279] Updated weights for policy 0, policy_version 5797 (0.0007) +[2026-06-02 16:48:38,980][255279] Updated weights for policy 0, policy_version 5808 (0.0009) +[2026-06-02 16:48:39,162][255279] Updated weights for policy 0, policy_version 5818 (0.0008) +[2026-06-02 16:48:39,871][255279] Updated weights for policy 0, policy_version 5828 (0.0004) +[2026-06-02 16:48:40,046][255279] Updated weights for policy 0, policy_version 5838 (0.0005) +[2026-06-02 16:48:40,249][255279] Updated weights for policy 0, policy_version 5849 (0.0005) +[2026-06-02 16:48:40,437][255279] Updated weights for policy 0, policy_version 5859 (0.0008) +[2026-06-02 16:48:40,501][253683] Fps is (10 sec: 19660.8, 60 sec: 18568.5, 300 sec: 18636.8). Total num frames: 2981888. Throughput: 0: 18975.3. Samples: 2977280. Policy #0 lag: (min: 67.0, avg: 107.8, max: 133.0) +[2026-06-02 16:48:40,502][253683] Avg episode reward: [(0, '60.701')] +[2026-06-02 16:48:40,637][255279] Updated weights for policy 0, policy_version 5870 (0.0008) +[2026-06-02 16:48:40,828][255279] Updated weights for policy 0, policy_version 5880 (0.0009) +[2026-06-02 16:48:40,968][255187] Saving new best policy, reward=60.701! +[2026-06-02 16:48:41,541][255279] Updated weights for policy 0, policy_version 5890 (0.0009) +[2026-06-02 16:48:41,716][255279] Updated weights for policy 0, policy_version 5900 (0.0009) +[2026-06-02 16:48:41,905][255279] Updated weights for policy 0, policy_version 5910 (0.0008) +[2026-06-02 16:48:42,106][255279] Updated weights for policy 0, policy_version 5921 (0.0008) +[2026-06-02 16:48:42,293][255279] Updated weights for policy 0, policy_version 5931 (0.0008) +[2026-06-02 16:48:42,482][255279] Updated weights for policy 0, policy_version 5941 (0.0008) +[2026-06-02 16:48:42,666][255279] Updated weights for policy 0, policy_version 5951 (0.0009) +[2026-06-02 16:48:43,358][255279] Updated weights for policy 0, policy_version 5961 (0.0008) +[2026-06-02 16:48:43,558][255279] Updated weights for policy 0, policy_version 5972 (0.0008) +[2026-06-02 16:48:43,745][255279] Updated weights for policy 0, policy_version 5982 (0.0008) +[2026-06-02 16:48:43,928][255279] Updated weights for policy 0, policy_version 5992 (0.0008) +[2026-06-02 16:48:44,130][255279] Updated weights for policy 0, policy_version 6003 (0.0008) +[2026-06-02 16:48:44,319][255279] Updated weights for policy 0, policy_version 6013 (0.0008) +[2026-06-02 16:48:45,004][255279] Updated weights for policy 0, policy_version 6023 (0.0009) +[2026-06-02 16:48:45,197][255279] Updated weights for policy 0, policy_version 6033 (0.0009) +[2026-06-02 16:48:45,379][255279] Updated weights for policy 0, policy_version 6043 (0.0009) +[2026-06-02 16:48:45,502][253683] Fps is (10 sec: 19660.1, 60 sec: 18568.5, 300 sec: 18667.8). Total num frames: 3080192. Throughput: 0: 18992.2. Samples: 3097344. Policy #0 lag: (min: 63.0, avg: 79.1, max: 127.0) +[2026-06-02 16:48:45,503][253683] Avg episode reward: [(0, '64.448')] +[2026-06-02 16:48:45,562][255279] Updated weights for policy 0, policy_version 6053 (0.0009) +[2026-06-02 16:48:45,752][255279] Updated weights for policy 0, policy_version 6063 (0.0008) +[2026-06-02 16:48:45,936][255279] Updated weights for policy 0, policy_version 6073 (0.0008) +[2026-06-02 16:48:46,061][255187] Saving new best policy, reward=64.448! +[2026-06-02 16:48:46,623][255279] Updated weights for policy 0, policy_version 6083 (0.0008) +[2026-06-02 16:48:46,812][255279] Updated weights for policy 0, policy_version 6094 (0.0008) +[2026-06-02 16:48:47,004][255279] Updated weights for policy 0, policy_version 6104 (0.0008) +[2026-06-02 16:48:47,182][255279] Updated weights for policy 0, policy_version 6114 (0.0008) +[2026-06-02 16:48:47,381][255279] Updated weights for policy 0, policy_version 6124 (0.0009) +[2026-06-02 16:48:47,560][255279] Updated weights for policy 0, policy_version 6134 (0.0008) +[2026-06-02 16:48:47,742][255279] Updated weights for policy 0, policy_version 6144 (0.0008) +[2026-06-02 16:48:48,442][255279] Updated weights for policy 0, policy_version 6154 (0.0008) +[2026-06-02 16:48:48,646][255279] Updated weights for policy 0, policy_version 6165 (0.0009) +[2026-06-02 16:48:48,841][255279] Updated weights for policy 0, policy_version 6176 (0.0008) +[2026-06-02 16:48:49,021][255279] Updated weights for policy 0, policy_version 6186 (0.0008) +[2026-06-02 16:48:49,214][255279] Updated weights for policy 0, policy_version 6196 (0.0008) +[2026-06-02 16:48:49,400][255279] Updated weights for policy 0, policy_version 6206 (0.0009) +[2026-06-02 16:48:50,128][255279] Updated weights for policy 0, policy_version 6218 (0.0009) +[2026-06-02 16:48:50,309][255279] Updated weights for policy 0, policy_version 6228 (0.0008) +[2026-06-02 16:48:50,486][255279] Updated weights for policy 0, policy_version 6238 (0.0009) +[2026-06-02 16:48:50,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19114.7, 300 sec: 18697.1). Total num frames: 3178496. Throughput: 0: 19177.3. Samples: 3212032. Policy #0 lag: (min: 63.0, avg: 79.2, max: 127.0) +[2026-06-02 16:48:50,502][253683] Avg episode reward: [(0, '74.192')] +[2026-06-02 16:48:50,680][255279] Updated weights for policy 0, policy_version 6248 (0.0008) +[2026-06-02 16:48:50,868][255279] Updated weights for policy 0, policy_version 6258 (0.0008) +[2026-06-02 16:48:51,051][255279] Updated weights for policy 0, policy_version 6268 (0.0008) +[2026-06-02 16:48:51,117][255187] Saving new best policy, reward=74.192! +[2026-06-02 16:48:51,752][255279] Updated weights for policy 0, policy_version 6278 (0.0009) +[2026-06-02 16:48:51,928][255279] Updated weights for policy 0, policy_version 6288 (0.0008) +[2026-06-02 16:48:52,107][255279] Updated weights for policy 0, policy_version 6298 (0.0008) +[2026-06-02 16:48:52,292][255279] Updated weights for policy 0, policy_version 6308 (0.0008) +[2026-06-02 16:48:52,479][255279] Updated weights for policy 0, policy_version 6318 (0.0008) +[2026-06-02 16:48:52,659][255279] Updated weights for policy 0, policy_version 6328 (0.0007) +[2026-06-02 16:48:53,365][255279] Updated weights for policy 0, policy_version 6338 (0.0008) +[2026-06-02 16:48:53,536][255279] Updated weights for policy 0, policy_version 6348 (0.0009) +[2026-06-02 16:48:53,723][255279] Updated weights for policy 0, policy_version 6358 (0.0008) +[2026-06-02 16:48:53,906][255279] Updated weights for policy 0, policy_version 6368 (0.0008) +[2026-06-02 16:48:54,084][255279] Updated weights for policy 0, policy_version 6378 (0.0009) +[2026-06-02 16:48:54,277][255279] Updated weights for policy 0, policy_version 6388 (0.0009) +[2026-06-02 16:48:54,462][255279] Updated weights for policy 0, policy_version 6398 (0.0009) +[2026-06-02 16:48:55,204][255279] Updated weights for policy 0, policy_version 6409 (0.0009) +[2026-06-02 16:48:55,402][255279] Updated weights for policy 0, policy_version 6420 (0.0008) +[2026-06-02 16:48:55,501][253683] Fps is (10 sec: 19661.5, 60 sec: 19114.7, 300 sec: 18724.6). Total num frames: 3276800. Throughput: 0: 18992.4. Samples: 3264512. Policy #0 lag: (min: 63.0, avg: 79.2, max: 127.0) +[2026-06-02 16:48:55,502][253683] Avg episode reward: [(0, '90.306')] +[2026-06-02 16:48:55,587][255279] Updated weights for policy 0, policy_version 6430 (0.0008) +[2026-06-02 16:48:55,771][255279] Updated weights for policy 0, policy_version 6440 (0.0008) +[2026-06-02 16:48:55,961][255279] Updated weights for policy 0, policy_version 6450 (0.0008) +[2026-06-02 16:48:56,143][255279] Updated weights for policy 0, policy_version 6460 (0.0008) +[2026-06-02 16:48:56,211][255187] Saving new best policy, reward=90.306! +[2026-06-02 16:48:56,868][255279] Updated weights for policy 0, policy_version 6471 (0.0008) +[2026-06-02 16:48:57,043][255279] Updated weights for policy 0, policy_version 6481 (0.0008) +[2026-06-02 16:48:57,238][255279] Updated weights for policy 0, policy_version 6491 (0.0008) +[2026-06-02 16:48:57,415][255279] Updated weights for policy 0, policy_version 6501 (0.0009) +[2026-06-02 16:48:57,626][255279] Updated weights for policy 0, policy_version 6512 (0.0008) +[2026-06-02 16:48:57,815][255279] Updated weights for policy 0, policy_version 6522 (0.0009) +[2026-06-02 16:48:58,474][255279] Updated weights for policy 0, policy_version 6532 (0.0008) +[2026-06-02 16:48:58,648][255279] Updated weights for policy 0, policy_version 6542 (0.0008) +[2026-06-02 16:48:58,833][255279] Updated weights for policy 0, policy_version 6552 (0.0008) +[2026-06-02 16:48:59,029][255279] Updated weights for policy 0, policy_version 6562 (0.0009) +[2026-06-02 16:48:59,216][255279] Updated weights for policy 0, policy_version 6572 (0.0008) +[2026-06-02 16:48:59,410][255279] Updated weights for policy 0, policy_version 6582 (0.0008) +[2026-06-02 16:48:59,583][255279] Updated weights for policy 0, policy_version 6592 (0.0009) +[2026-06-02 16:49:00,260][255279] Updated weights for policy 0, policy_version 6602 (0.0009) +[2026-06-02 16:49:00,448][255279] Updated weights for policy 0, policy_version 6612 (0.0009) +[2026-06-02 16:49:00,501][253683] Fps is (10 sec: 19660.9, 60 sec: 19114.7, 300 sec: 18750.6). Total num frames: 3375104. Throughput: 0: 19282.5. Samples: 3385856. Policy #0 lag: (min: 63.0, avg: 79.9, max: 127.0) +[2026-06-02 16:49:00,502][253683] Avg episode reward: [(0, '88.382')] +[2026-06-02 16:49:00,634][255279] Updated weights for policy 0, policy_version 6622 (0.0008) +[2026-06-02 16:49:00,814][255279] Updated weights for policy 0, policy_version 6632 (0.0009) +[2026-06-02 16:49:01,009][255279] Updated weights for policy 0, policy_version 6642 (0.0008) +[2026-06-02 16:49:01,198][255279] Updated weights for policy 0, policy_version 6652 (0.0008) +[2026-06-02 16:49:01,894][255279] Updated weights for policy 0, policy_version 6662 (0.0008) +[2026-06-02 16:49:02,070][255279] Updated weights for policy 0, policy_version 6672 (0.0008) +[2026-06-02 16:49:02,256][255279] Updated weights for policy 0, policy_version 6682 (0.0008) +[2026-06-02 16:49:02,446][255279] Updated weights for policy 0, policy_version 6692 (0.0008) +[2026-06-02 16:49:02,622][255279] Updated weights for policy 0, policy_version 6702 (0.0008) +[2026-06-02 16:49:02,805][255279] Updated weights for policy 0, policy_version 6712 (0.0008) +[2026-06-02 16:49:03,532][255279] Updated weights for policy 0, policy_version 6722 (0.0008) +[2026-06-02 16:49:03,704][255279] Updated weights for policy 0, policy_version 6732 (0.0008) +[2026-06-02 16:49:03,899][255279] Updated weights for policy 0, policy_version 6742 (0.0008) +[2026-06-02 16:49:04,103][255279] Updated weights for policy 0, policy_version 6753 (0.0008) +[2026-06-02 16:49:04,287][255279] Updated weights for policy 0, policy_version 6763 (0.0008) +[2026-06-02 16:49:04,468][255279] Updated weights for policy 0, policy_version 6773 (0.0008) +[2026-06-02 16:49:04,658][255279] Updated weights for policy 0, policy_version 6783 (0.0008) +[2026-06-02 16:49:05,347][255279] Updated weights for policy 0, policy_version 6793 (0.0009) +[2026-06-02 16:49:05,501][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 18775.2). Total num frames: 3473408. Throughput: 0: 19080.5. Samples: 3496704. Policy #0 lag: (min: 63.0, avg: 79.9, max: 127.0) +[2026-06-02 16:49:05,502][253683] Avg episode reward: [(0, '93.439')] +[2026-06-02 16:49:05,526][255279] Updated weights for policy 0, policy_version 6803 (0.0009) +[2026-06-02 16:49:05,750][255279] Updated weights for policy 0, policy_version 6815 (0.0008) +[2026-06-02 16:49:05,949][255279] Updated weights for policy 0, policy_version 6826 (0.0009) +[2026-06-02 16:49:06,142][255279] Updated weights for policy 0, policy_version 6836 (0.0008) +[2026-06-02 16:49:06,323][255279] Updated weights for policy 0, policy_version 6846 (0.0009) +[2026-06-02 16:49:06,366][255187] Saving new best policy, reward=93.439! +[2026-06-02 16:49:07,033][255279] Updated weights for policy 0, policy_version 6856 (0.0008) +[2026-06-02 16:49:07,225][255279] Updated weights for policy 0, policy_version 6867 (0.0008) +[2026-06-02 16:49:07,414][255279] Updated weights for policy 0, policy_version 6877 (0.0008) +[2026-06-02 16:49:07,619][255279] Updated weights for policy 0, policy_version 6888 (0.0008) +[2026-06-02 16:49:07,798][255279] Updated weights for policy 0, policy_version 6898 (0.0008) +[2026-06-02 16:49:08,002][255279] Updated weights for policy 0, policy_version 6909 (0.0009) +[2026-06-02 16:49:08,725][255279] Updated weights for policy 0, policy_version 6920 (0.0009) +[2026-06-02 16:49:08,911][255279] Updated weights for policy 0, policy_version 6930 (0.0008) +[2026-06-02 16:49:09,097][255279] Updated weights for policy 0, policy_version 6940 (0.0008) +[2026-06-02 16:49:09,285][255279] Updated weights for policy 0, policy_version 6950 (0.0008) +[2026-06-02 16:49:09,470][255279] Updated weights for policy 0, policy_version 6960 (0.0008) +[2026-06-02 16:49:09,660][255279] Updated weights for policy 0, policy_version 6970 (0.0008) +[2026-06-02 16:49:10,347][255279] Updated weights for policy 0, policy_version 6981 (0.0008) +[2026-06-02 16:49:10,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 18798.5). Total num frames: 3571712. Throughput: 0: 19080.5. Samples: 3558016. Policy #0 lag: (min: 63.0, avg: 80.4, max: 127.0) +[2026-06-02 16:49:10,503][253683] Avg episode reward: [(0, '124.403')] +[2026-06-02 16:49:10,534][255279] Updated weights for policy 0, policy_version 6991 (0.0008) +[2026-06-02 16:49:10,734][255279] Updated weights for policy 0, policy_version 7002 (0.0008) +[2026-06-02 16:49:10,919][255279] Updated weights for policy 0, policy_version 7012 (0.0008) +[2026-06-02 16:49:11,098][255279] Updated weights for policy 0, policy_version 7022 (0.0009) +[2026-06-02 16:49:11,304][255279] Updated weights for policy 0, policy_version 7033 (0.0008) +[2026-06-02 16:49:11,432][255187] Saving new best policy, reward=124.403! +[2026-06-02 16:49:12,029][255279] Updated weights for policy 0, policy_version 7044 (0.0008) +[2026-06-02 16:49:12,207][255279] Updated weights for policy 0, policy_version 7054 (0.0008) +[2026-06-02 16:49:12,386][255279] Updated weights for policy 0, policy_version 7064 (0.0008) +[2026-06-02 16:49:12,589][255279] Updated weights for policy 0, policy_version 7075 (0.0008) +[2026-06-02 16:49:12,775][255279] Updated weights for policy 0, policy_version 7085 (0.0008) +[2026-06-02 16:49:12,973][255279] Updated weights for policy 0, policy_version 7096 (0.0008) +[2026-06-02 16:49:13,711][255279] Updated weights for policy 0, policy_version 7107 (0.0008) +[2026-06-02 16:49:13,883][255279] Updated weights for policy 0, policy_version 7117 (0.0008) +[2026-06-02 16:49:14,067][255279] Updated weights for policy 0, policy_version 7127 (0.0008) +[2026-06-02 16:49:14,251][255279] Updated weights for policy 0, policy_version 7137 (0.0008) +[2026-06-02 16:49:14,440][255279] Updated weights for policy 0, policy_version 7147 (0.0007) +[2026-06-02 16:49:14,623][255279] Updated weights for policy 0, policy_version 7157 (0.0008) +[2026-06-02 16:49:14,809][255279] Updated weights for policy 0, policy_version 7167 (0.0009) +[2026-06-02 16:49:15,501][253683] Fps is (10 sec: 19660.7, 60 sec: 19114.7, 300 sec: 18820.6). Total num frames: 3670016. Throughput: 0: 19367.8. Samples: 3679232. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) +[2026-06-02 16:49:15,502][253683] Avg episode reward: [(0, '120.735')] +[2026-06-02 16:49:15,504][255279] Updated weights for policy 0, policy_version 7177 (0.0009) +[2026-06-02 16:49:15,683][255279] Updated weights for policy 0, policy_version 7187 (0.0008) +[2026-06-02 16:49:15,871][255279] Updated weights for policy 0, policy_version 7197 (0.0008) +[2026-06-02 16:49:16,052][255279] Updated weights for policy 0, policy_version 7207 (0.0008) +[2026-06-02 16:49:16,246][255279] Updated weights for policy 0, policy_version 7217 (0.0008) +[2026-06-02 16:49:16,430][255279] Updated weights for policy 0, policy_version 7227 (0.0008) +[2026-06-02 16:49:17,147][255279] Updated weights for policy 0, policy_version 7239 (0.0009) +[2026-06-02 16:49:17,330][255279] Updated weights for policy 0, policy_version 7249 (0.0008) +[2026-06-02 16:49:17,510][255279] Updated weights for policy 0, policy_version 7259 (0.0009) +[2026-06-02 16:49:17,701][255279] Updated weights for policy 0, policy_version 7269 (0.0008) +[2026-06-02 16:49:17,888][255279] Updated weights for policy 0, policy_version 7279 (0.0008) +[2026-06-02 16:49:18,081][255279] Updated weights for policy 0, policy_version 7289 (0.0008) +[2026-06-02 16:49:18,788][255279] Updated weights for policy 0, policy_version 7299 (0.0008) +[2026-06-02 16:49:18,960][255279] Updated weights for policy 0, policy_version 7309 (0.0008) +[2026-06-02 16:49:19,151][255279] Updated weights for policy 0, policy_version 7319 (0.0008) +[2026-06-02 16:49:19,332][255279] Updated weights for policy 0, policy_version 7329 (0.0008) +[2026-06-02 16:49:19,527][255279] Updated weights for policy 0, policy_version 7339 (0.0009) +[2026-06-02 16:49:19,714][255279] Updated weights for policy 0, policy_version 7349 (0.0008) +[2026-06-02 16:49:19,895][255279] Updated weights for policy 0, policy_version 7359 (0.0008) +[2026-06-02 16:49:20,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19114.7, 300 sec: 18841.6). Total num frames: 3768320. Throughput: 0: 19160.2. Samples: 3789696. Policy #0 lag: (min: 63.0, avg: 78.8, max: 127.0) +[2026-06-02 16:49:20,502][253683] Avg episode reward: [(0, '123.360')] +[2026-06-02 16:49:20,577][255279] Updated weights for policy 0, policy_version 7369 (0.0008) +[2026-06-02 16:49:20,757][255279] Updated weights for policy 0, policy_version 7379 (0.0009) +[2026-06-02 16:49:20,944][255279] Updated weights for policy 0, policy_version 7389 (0.0008) +[2026-06-02 16:49:21,124][255279] Updated weights for policy 0, policy_version 7399 (0.0009) +[2026-06-02 16:49:21,335][255279] Updated weights for policy 0, policy_version 7410 (0.0009) +[2026-06-02 16:49:21,525][255279] Updated weights for policy 0, policy_version 7420 (0.0009) +[2026-06-02 16:49:22,189][255279] Updated weights for policy 0, policy_version 7430 (0.0006) +[2026-06-02 16:49:22,370][255279] Updated weights for policy 0, policy_version 7440 (0.0004) +[2026-06-02 16:49:22,555][255279] Updated weights for policy 0, policy_version 7450 (0.0004) +[2026-06-02 16:49:22,778][255279] Updated weights for policy 0, policy_version 7462 (0.0008) +[2026-06-02 16:49:22,963][255279] Updated weights for policy 0, policy_version 7472 (0.0009) +[2026-06-02 16:49:23,143][255279] Updated weights for policy 0, policy_version 7482 (0.0008) +[2026-06-02 16:49:23,860][255279] Updated weights for policy 0, policy_version 7493 (0.0008) +[2026-06-02 16:49:24,040][255279] Updated weights for policy 0, policy_version 7503 (0.0009) +[2026-06-02 16:49:24,225][255279] Updated weights for policy 0, policy_version 7513 (0.0008) +[2026-06-02 16:49:24,408][255279] Updated weights for policy 0, policy_version 7523 (0.0008) +[2026-06-02 16:49:24,592][255279] Updated weights for policy 0, policy_version 7533 (0.0009) +[2026-06-02 16:49:24,796][255279] Updated weights for policy 0, policy_version 7544 (0.0008) +[2026-06-02 16:49:25,501][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 18861.6). Total num frames: 3866624. Throughput: 0: 19387.7. Samples: 3849728. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) +[2026-06-02 16:49:25,502][253683] Avg episode reward: [(0, '153.340')] +[2026-06-02 16:49:25,505][255279] Updated weights for policy 0, policy_version 7555 (0.0008) +[2026-06-02 16:49:25,689][255279] Updated weights for policy 0, policy_version 7565 (0.0008) +[2026-06-02 16:49:25,872][255279] Updated weights for policy 0, policy_version 7575 (0.0009) +[2026-06-02 16:49:26,079][255279] Updated weights for policy 0, policy_version 7586 (0.0008) +[2026-06-02 16:49:26,265][255279] Updated weights for policy 0, policy_version 7596 (0.0009) +[2026-06-02 16:49:26,450][255279] Updated weights for policy 0, policy_version 7606 (0.0009) +[2026-06-02 16:49:26,626][255187] Saving new best policy, reward=153.340! +[2026-06-02 16:49:26,628][255279] Updated weights for policy 0, policy_version 7616 (0.0009) +[2026-06-02 16:49:27,323][255279] Updated weights for policy 0, policy_version 7626 (0.0009) +[2026-06-02 16:49:27,502][255279] Updated weights for policy 0, policy_version 7636 (0.0009) +[2026-06-02 16:49:27,689][255279] Updated weights for policy 0, policy_version 7646 (0.0009) +[2026-06-02 16:49:27,894][255279] Updated weights for policy 0, policy_version 7657 (0.0009) +[2026-06-02 16:49:28,073][255279] Updated weights for policy 0, policy_version 7667 (0.0009) +[2026-06-02 16:49:28,289][255279] Updated weights for policy 0, policy_version 7678 (0.0009) +[2026-06-02 16:49:28,970][255279] Updated weights for policy 0, policy_version 7689 (0.0008) +[2026-06-02 16:49:29,149][255279] Updated weights for policy 0, policy_version 7699 (0.0009) +[2026-06-02 16:49:29,337][255279] Updated weights for policy 0, policy_version 7709 (0.0009) +[2026-06-02 16:49:29,518][255279] Updated weights for policy 0, policy_version 7719 (0.0009) +[2026-06-02 16:49:29,721][255279] Updated weights for policy 0, policy_version 7730 (0.0008) +[2026-06-02 16:49:29,907][255279] Updated weights for policy 0, policy_version 7740 (0.0008) +[2026-06-02 16:49:30,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 18880.6). Total num frames: 3964928. Throughput: 0: 19402.1. Samples: 3970432. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) +[2026-06-02 16:49:30,502][253683] Avg episode reward: [(0, '144.199')] +[2026-06-02 16:49:30,599][255279] Updated weights for policy 0, policy_version 7750 (0.0008) +[2026-06-02 16:49:30,774][255279] Updated weights for policy 0, policy_version 7760 (0.0008) +[2026-06-02 16:49:30,958][255279] Updated weights for policy 0, policy_version 7770 (0.0008) +[2026-06-02 16:49:31,169][255279] Updated weights for policy 0, policy_version 7781 (0.0008) +[2026-06-02 16:49:31,371][255279] Updated weights for policy 0, policy_version 7792 (0.0008) +[2026-06-02 16:49:31,564][255279] Updated weights for policy 0, policy_version 7802 (0.0009) +[2026-06-02 16:49:32,245][255279] Updated weights for policy 0, policy_version 7812 (0.0009) +[2026-06-02 16:49:32,432][255279] Updated weights for policy 0, policy_version 7822 (0.0009) +[2026-06-02 16:49:32,611][255279] Updated weights for policy 0, policy_version 7832 (0.0009) +[2026-06-02 16:49:32,797][255279] Updated weights for policy 0, policy_version 7842 (0.0009) +[2026-06-02 16:49:32,993][255279] Updated weights for policy 0, policy_version 7852 (0.0008) +[2026-06-02 16:49:33,175][255279] Updated weights for policy 0, policy_version 7862 (0.0008) +[2026-06-02 16:49:33,353][255279] Updated weights for policy 0, policy_version 7872 (0.0008) +[2026-06-02 16:49:34,046][255279] Updated weights for policy 0, policy_version 7882 (0.0008) +[2026-06-02 16:49:34,227][255279] Updated weights for policy 0, policy_version 7892 (0.0009) +[2026-06-02 16:49:34,416][255279] Updated weights for policy 0, policy_version 7902 (0.0008) +[2026-06-02 16:49:34,590][255279] Updated weights for policy 0, policy_version 7912 (0.0008) +[2026-06-02 16:49:34,786][255279] Updated weights for policy 0, policy_version 7922 (0.0008) +[2026-06-02 16:49:34,965][255279] Updated weights for policy 0, policy_version 7932 (0.0008) +[2026-06-02 16:49:35,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19660.8, 300 sec: 18898.8). Total num frames: 4063232. Throughput: 0: 19265.4. Samples: 4078976. Policy #0 lag: (min: 53.0, avg: 69.4, max: 117.0) +[2026-06-02 16:49:35,502][253683] Avg episode reward: [(0, '140.056')] +[2026-06-02 16:49:35,659][255279] Updated weights for policy 0, policy_version 7942 (0.0008) +[2026-06-02 16:49:35,841][255279] Updated weights for policy 0, policy_version 7952 (0.0006) +[2026-06-02 16:49:36,027][255279] Updated weights for policy 0, policy_version 7962 (0.0008) +[2026-06-02 16:49:36,228][255279] Updated weights for policy 0, policy_version 7973 (0.0009) +[2026-06-02 16:49:36,416][255279] Updated weights for policy 0, policy_version 7983 (0.0008) +[2026-06-02 16:49:36,627][255279] Updated weights for policy 0, policy_version 7994 (0.0009) +[2026-06-02 16:49:37,310][255279] Updated weights for policy 0, policy_version 8004 (0.0008) +[2026-06-02 16:49:37,488][255279] Updated weights for policy 0, policy_version 8014 (0.0008) +[2026-06-02 16:49:37,668][255279] Updated weights for policy 0, policy_version 8024 (0.0008) +[2026-06-02 16:49:37,861][255279] Updated weights for policy 0, policy_version 8034 (0.0009) +[2026-06-02 16:49:38,044][255279] Updated weights for policy 0, policy_version 8044 (0.0008) +[2026-06-02 16:49:38,239][255279] Updated weights for policy 0, policy_version 8054 (0.0008) +[2026-06-02 16:49:38,419][255279] Updated weights for policy 0, policy_version 8064 (0.0009) +[2026-06-02 16:49:39,125][255279] Updated weights for policy 0, policy_version 8074 (0.0009) +[2026-06-02 16:49:39,309][255279] Updated weights for policy 0, policy_version 8084 (0.0009) +[2026-06-02 16:49:39,498][255279] Updated weights for policy 0, policy_version 8094 (0.0008) +[2026-06-02 16:49:39,676][255279] Updated weights for policy 0, policy_version 8104 (0.0008) +[2026-06-02 16:49:39,860][255279] Updated weights for policy 0, policy_version 8114 (0.0008) +[2026-06-02 16:49:39,973][255187] Early stopping after 7 epochs (56 sgd steps), loss delta 0.0000006 +[2026-06-02 16:49:40,501][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 18916.1). Total num frames: 4161536. Throughput: 0: 19456.0. Samples: 4140032. Policy #0 lag: (min: 53.0, avg: 69.4, max: 117.0) +[2026-06-02 16:49:40,502][253683] Avg episode reward: [(0, '157.307')] +[2026-06-02 16:49:40,591][255279] Updated weights for policy 0, policy_version 8125 (0.0009) +[2026-06-02 16:49:40,778][255279] Updated weights for policy 0, policy_version 8135 (0.0008) +[2026-06-02 16:49:40,960][255279] Updated weights for policy 0, policy_version 8145 (0.0008) +[2026-06-02 16:49:41,138][255279] Updated weights for policy 0, policy_version 8155 (0.0008) +[2026-06-02 16:49:41,329][255279] Updated weights for policy 0, policy_version 8165 (0.0008) +[2026-06-02 16:49:41,522][255279] Updated weights for policy 0, policy_version 8175 (0.0009) +[2026-06-02 16:49:41,687][255187] Saving new best policy, reward=157.307! +[2026-06-02 16:49:42,227][255279] Updated weights for policy 0, policy_version 8185 (0.0008) +[2026-06-02 16:49:42,397][255279] Updated weights for policy 0, policy_version 8195 (0.0008) +[2026-06-02 16:49:42,584][255279] Updated weights for policy 0, policy_version 8205 (0.0008) +[2026-06-02 16:49:42,789][255279] Updated weights for policy 0, policy_version 8216 (0.0009) +[2026-06-02 16:49:42,969][255279] Updated weights for policy 0, policy_version 8226 (0.0008) +[2026-06-02 16:49:43,159][255279] Updated weights for policy 0, policy_version 8236 (0.0008) +[2026-06-02 16:49:43,350][255279] Updated weights for policy 0, policy_version 8246 (0.0008) +[2026-06-02 16:49:44,021][255279] Updated weights for policy 0, policy_version 8256 (0.0008) +[2026-06-02 16:49:44,208][255279] Updated weights for policy 0, policy_version 8266 (0.0008) +[2026-06-02 16:49:44,389][255279] Updated weights for policy 0, policy_version 8276 (0.0009) +[2026-06-02 16:49:44,578][255279] Updated weights for policy 0, policy_version 8286 (0.0008) +[2026-06-02 16:49:44,766][255279] Updated weights for policy 0, policy_version 8296 (0.0008) +[2026-06-02 16:49:44,970][255279] Updated weights for policy 0, policy_version 8307 (0.0008) +[2026-06-02 16:49:45,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.9, 300 sec: 18932.6). Total num frames: 4259840. Throughput: 0: 19447.4. Samples: 4260992. Policy #0 lag: (min: 63.0, avg: 77.4, max: 119.0) +[2026-06-02 16:49:45,502][253683] Avg episode reward: [(0, '181.542')] +[2026-06-02 16:49:45,671][255279] Updated weights for policy 0, policy_version 8318 (0.0008) +[2026-06-02 16:49:45,853][255279] Updated weights for policy 0, policy_version 8328 (0.0009) +[2026-06-02 16:49:46,036][255279] Updated weights for policy 0, policy_version 8338 (0.0009) +[2026-06-02 16:49:46,232][255279] Updated weights for policy 0, policy_version 8348 (0.0008) +[2026-06-02 16:49:46,407][255279] Updated weights for policy 0, policy_version 8358 (0.0008) +[2026-06-02 16:49:46,598][255279] Updated weights for policy 0, policy_version 8368 (0.0008) +[2026-06-02 16:49:46,738][255187] Saving new best policy, reward=181.542! +[2026-06-02 16:49:47,295][255279] Updated weights for policy 0, policy_version 8378 (0.0008) +[2026-06-02 16:49:47,479][255279] Updated weights for policy 0, policy_version 8388 (0.0008) +[2026-06-02 16:49:47,661][255279] Updated weights for policy 0, policy_version 8398 (0.0008) +[2026-06-02 16:49:47,847][255279] Updated weights for policy 0, policy_version 8408 (0.0008) +[2026-06-02 16:49:48,031][255279] Updated weights for policy 0, policy_version 8418 (0.0008) +[2026-06-02 16:49:48,223][255279] Updated weights for policy 0, policy_version 8428 (0.0008) +[2026-06-02 16:49:48,407][255279] Updated weights for policy 0, policy_version 8438 (0.0009) +[2026-06-02 16:49:49,088][255279] Updated weights for policy 0, policy_version 8448 (0.0008) +[2026-06-02 16:49:49,278][255279] Updated weights for policy 0, policy_version 8458 (0.0009) +[2026-06-02 16:49:49,503][255279] Updated weights for policy 0, policy_version 8470 (0.0009) +[2026-06-02 16:49:49,687][255279] Updated weights for policy 0, policy_version 8480 (0.0009) +[2026-06-02 16:49:49,873][255279] Updated weights for policy 0, policy_version 8490 (0.0009) +[2026-06-02 16:49:50,058][255279] Updated weights for policy 0, policy_version 8500 (0.0008) +[2026-06-02 16:49:50,501][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 18948.5). Total num frames: 4358144. Throughput: 0: 19484.4. Samples: 4373504. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 16:49:50,502][253683] Avg episode reward: [(0, '185.322')] +[2026-06-02 16:49:50,755][255279] Updated weights for policy 0, policy_version 8510 (0.0009) +[2026-06-02 16:49:50,932][255279] Updated weights for policy 0, policy_version 8520 (0.0008) +[2026-06-02 16:49:51,115][255279] Updated weights for policy 0, policy_version 8530 (0.0008) +[2026-06-02 16:49:51,295][255279] Updated weights for policy 0, policy_version 8540 (0.0009) +[2026-06-02 16:49:51,479][255279] Updated weights for policy 0, policy_version 8550 (0.0008) +[2026-06-02 16:49:51,666][255279] Updated weights for policy 0, policy_version 8560 (0.0008) +[2026-06-02 16:49:51,816][255187] Saving new best policy, reward=185.322! +[2026-06-02 16:49:52,373][255279] Updated weights for policy 0, policy_version 8570 (0.0009) +[2026-06-02 16:49:52,555][255279] Updated weights for policy 0, policy_version 8580 (0.0008) +[2026-06-02 16:49:52,734][255279] Updated weights for policy 0, policy_version 8590 (0.0009) +[2026-06-02 16:49:52,925][255279] Updated weights for policy 0, policy_version 8600 (0.0008) +[2026-06-02 16:49:53,109][255279] Updated weights for policy 0, policy_version 8610 (0.0008) +[2026-06-02 16:49:53,299][255279] Updated weights for policy 0, policy_version 8620 (0.0009) +[2026-06-02 16:49:53,503][255279] Updated weights for policy 0, policy_version 8631 (0.0008) +[2026-06-02 16:49:54,164][255279] Updated weights for policy 0, policy_version 8641 (0.0009) +[2026-06-02 16:49:54,343][255279] Updated weights for policy 0, policy_version 8651 (0.0009) +[2026-06-02 16:49:54,527][255279] Updated weights for policy 0, policy_version 8661 (0.0009) +[2026-06-02 16:49:54,707][255279] Updated weights for policy 0, policy_version 8671 (0.0008) +[2026-06-02 16:49:54,901][255279] Updated weights for policy 0, policy_version 8681 (0.0008) +[2026-06-02 16:49:55,078][255279] Updated weights for policy 0, policy_version 8691 (0.0008) +[2026-06-02 16:49:55,501][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 18963.6). Total num frames: 4456448. Throughput: 0: 19498.7. Samples: 4435456. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 16:49:55,502][253683] Avg episode reward: [(0, '193.511')] +[2026-06-02 16:49:55,507][255187] Saving new best policy, reward=193.511! +[2026-06-02 16:49:55,815][255279] Updated weights for policy 0, policy_version 8701 (0.0008) +[2026-06-02 16:49:55,991][255279] Updated weights for policy 0, policy_version 8711 (0.0008) +[2026-06-02 16:49:56,203][255279] Updated weights for policy 0, policy_version 8722 (0.0008) +[2026-06-02 16:49:56,389][255279] Updated weights for policy 0, policy_version 8732 (0.0008) +[2026-06-02 16:49:56,568][255279] Updated weights for policy 0, policy_version 8742 (0.0008) +[2026-06-02 16:49:56,751][255279] Updated weights for policy 0, policy_version 8752 (0.0009) +[2026-06-02 16:49:57,405][255279] Updated weights for policy 0, policy_version 8762 (0.0009) +[2026-06-02 16:49:57,576][255279] Updated weights for policy 0, policy_version 8772 (0.0008) +[2026-06-02 16:49:57,778][255279] Updated weights for policy 0, policy_version 8783 (0.0008) +[2026-06-02 16:49:57,960][255279] Updated weights for policy 0, policy_version 8793 (0.0008) +[2026-06-02 16:49:58,142][255279] Updated weights for policy 0, policy_version 8803 (0.0009) +[2026-06-02 16:49:58,334][255279] Updated weights for policy 0, policy_version 8813 (0.0008) +[2026-06-02 16:49:58,522][255279] Updated weights for policy 0, policy_version 8823 (0.0009) +[2026-06-02 16:49:59,225][255279] Updated weights for policy 0, policy_version 8834 (0.0006) +[2026-06-02 16:49:59,415][255279] Updated weights for policy 0, policy_version 8844 (0.0005) +[2026-06-02 16:49:59,601][255279] Updated weights for policy 0, policy_version 8854 (0.0005) +[2026-06-02 16:49:59,789][255279] Updated weights for policy 0, policy_version 8864 (0.0005) +[2026-06-02 16:49:59,977][255279] Updated weights for policy 0, policy_version 8874 (0.0005) +[2026-06-02 16:50:00,172][255279] Updated weights for policy 0, policy_version 8884 (0.0007) +[2026-06-02 16:50:00,501][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 18978.1). Total num frames: 4554752. Throughput: 0: 19285.3. Samples: 4547072. Policy #0 lag: (min: 63.0, avg: 84.0, max: 127.0) +[2026-06-02 16:50:00,502][253683] Avg episode reward: [(0, '208.296')] +[2026-06-02 16:50:00,505][255187] Saving new best policy, reward=208.296! +[2026-06-02 16:50:00,862][255279] Updated weights for policy 0, policy_version 8894 (0.0009) +[2026-06-02 16:50:01,040][255279] Updated weights for policy 0, policy_version 8904 (0.0009) +[2026-06-02 16:50:01,233][255279] Updated weights for policy 0, policy_version 8914 (0.0006) +[2026-06-02 16:50:01,421][255279] Updated weights for policy 0, policy_version 8924 (0.0005) +[2026-06-02 16:50:01,485][255187] Early stopping after 5 epochs (40 sgd steps), loss delta 0.0000000 +[2026-06-02 16:50:02,211][255279] Updated weights for policy 0, policy_version 8934 (0.0007) +[2026-06-02 16:50:02,389][255279] Updated weights for policy 0, policy_version 8944 (0.0008) +[2026-06-02 16:50:02,583][255279] Updated weights for policy 0, policy_version 8954 (0.0009) +[2026-06-02 16:50:02,771][255279] Updated weights for policy 0, policy_version 8964 (0.0008) +[2026-06-02 16:50:02,957][255279] Updated weights for policy 0, policy_version 8974 (0.0010) +[2026-06-02 16:50:03,142][255279] Updated weights for policy 0, policy_version 8984 (0.0009) +[2026-06-02 16:50:03,853][255279] Updated weights for policy 0, policy_version 8995 (0.0008) +[2026-06-02 16:50:04,031][255279] Updated weights for policy 0, policy_version 9005 (0.0008) +[2026-06-02 16:50:04,211][255279] Updated weights for policy 0, policy_version 9015 (0.0009) +[2026-06-02 16:50:04,401][255279] Updated weights for policy 0, policy_version 9025 (0.0009) +[2026-06-02 16:50:04,587][255279] Updated weights for policy 0, policy_version 9035 (0.0009) +[2026-06-02 16:50:04,773][255279] Updated weights for policy 0, policy_version 9045 (0.0008) +[2026-06-02 16:50:04,971][255279] Updated weights for policy 0, policy_version 9055 (0.0009) +[2026-06-02 16:50:05,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19660.8, 300 sec: 18992.1). Total num frames: 4653056. Throughput: 0: 19575.5. Samples: 4670592. Policy #0 lag: (min: 63.0, avg: 84.0, max: 127.0) +[2026-06-02 16:50:05,502][253683] Avg episode reward: [(0, '212.420')] +[2026-06-02 16:50:05,616][255279] Updated weights for policy 0, policy_version 9065 (0.0009) +[2026-06-02 16:50:05,794][255279] Updated weights for policy 0, policy_version 9075 (0.0008) +[2026-06-02 16:50:05,980][255279] Updated weights for policy 0, policy_version 9085 (0.0009) +[2026-06-02 16:50:06,164][255279] Updated weights for policy 0, policy_version 9095 (0.0009) +[2026-06-02 16:50:06,353][255279] Updated weights for policy 0, policy_version 9105 (0.0008) +[2026-06-02 16:50:06,546][255279] Updated weights for policy 0, policy_version 9115 (0.0008) +[2026-06-02 16:50:06,629][255187] Saving new best policy, reward=212.420! +[2026-06-02 16:50:07,257][255279] Updated weights for policy 0, policy_version 9125 (0.0007) +[2026-06-02 16:50:07,432][255279] Updated weights for policy 0, policy_version 9135 (0.0008) +[2026-06-02 16:50:07,619][255279] Updated weights for policy 0, policy_version 9145 (0.0008) +[2026-06-02 16:50:07,819][255279] Updated weights for policy 0, policy_version 9155 (0.0008) +[2026-06-02 16:50:08,003][255279] Updated weights for policy 0, policy_version 9165 (0.0009) +[2026-06-02 16:50:08,185][255279] Updated weights for policy 0, policy_version 9175 (0.0008) +[2026-06-02 16:50:08,861][255279] Updated weights for policy 0, policy_version 9185 (0.0008) +[2026-06-02 16:50:09,040][255279] Updated weights for policy 0, policy_version 9195 (0.0008) +[2026-06-02 16:50:09,220][255279] Updated weights for policy 0, policy_version 9205 (0.0009) +[2026-06-02 16:50:09,430][255279] Updated weights for policy 0, policy_version 9216 (0.0008) +[2026-06-02 16:50:09,611][255279] Updated weights for policy 0, policy_version 9226 (0.0009) +[2026-06-02 16:50:09,796][255279] Updated weights for policy 0, policy_version 9236 (0.0008) +[2026-06-02 16:50:09,985][255279] Updated weights for policy 0, policy_version 9246 (0.0008) +[2026-06-02 16:50:10,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19660.8, 300 sec: 19005.4). Total num frames: 4751360. Throughput: 0: 19601.0. Samples: 4731776. Policy #0 lag: (min: 63.0, avg: 78.7, max: 127.0) +[2026-06-02 16:50:10,502][253683] Avg episode reward: [(0, '209.185')] +[2026-06-02 16:50:10,680][255279] Updated weights for policy 0, policy_version 9256 (0.0008) +[2026-06-02 16:50:10,866][255279] Updated weights for policy 0, policy_version 9266 (0.0008) +[2026-06-02 16:50:11,040][255279] Updated weights for policy 0, policy_version 9276 (0.0006) +[2026-06-02 16:50:11,229][255279] Updated weights for policy 0, policy_version 9286 (0.0009) +[2026-06-02 16:50:11,426][255279] Updated weights for policy 0, policy_version 9296 (0.0008) +[2026-06-02 16:50:11,601][255279] Updated weights for policy 0, policy_version 9306 (0.0008) +[2026-06-02 16:50:12,329][255279] Updated weights for policy 0, policy_version 9316 (0.0009) +[2026-06-02 16:50:12,517][255279] Updated weights for policy 0, policy_version 9326 (0.0009) +[2026-06-02 16:50:12,705][255279] Updated weights for policy 0, policy_version 9336 (0.0008) +[2026-06-02 16:50:12,889][255279] Updated weights for policy 0, policy_version 9346 (0.0008) +[2026-06-02 16:50:13,078][255279] Updated weights for policy 0, policy_version 9356 (0.0009) +[2026-06-02 16:50:13,261][255279] Updated weights for policy 0, policy_version 9366 (0.0009) +[2026-06-02 16:50:13,450][255279] Updated weights for policy 0, policy_version 9376 (0.0009) +[2026-06-02 16:50:14,138][255279] Updated weights for policy 0, policy_version 9387 (0.0009) +[2026-06-02 16:50:14,318][255279] Updated weights for policy 0, policy_version 9397 (0.0008) +[2026-06-02 16:50:14,509][255279] Updated weights for policy 0, policy_version 9407 (0.0008) +[2026-06-02 16:50:14,697][255279] Updated weights for policy 0, policy_version 9417 (0.0008) +[2026-06-02 16:50:14,878][255279] Updated weights for policy 0, policy_version 9427 (0.0009) +[2026-06-02 16:50:15,071][255279] Updated weights for policy 0, policy_version 9437 (0.0008) +[2026-06-02 16:50:15,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19018.3). Total num frames: 4849664. Throughput: 0: 19524.3. Samples: 4849024. Policy #0 lag: (min: 63.0, avg: 78.7, max: 127.0) +[2026-06-02 16:50:15,502][253683] Avg episode reward: [(0, '211.864')] +[2026-06-02 16:50:15,743][255279] Updated weights for policy 0, policy_version 9447 (0.0008) +[2026-06-02 16:50:15,930][255279] Updated weights for policy 0, policy_version 9457 (0.0008) +[2026-06-02 16:50:16,111][255279] Updated weights for policy 0, policy_version 9467 (0.0009) +[2026-06-02 16:50:16,302][255279] Updated weights for policy 0, policy_version 9477 (0.0008) +[2026-06-02 16:50:16,475][255279] Updated weights for policy 0, policy_version 9487 (0.0008) +[2026-06-02 16:50:16,681][255279] Updated weights for policy 0, policy_version 9497 (0.0008) +[2026-06-02 16:50:17,354][255279] Updated weights for policy 0, policy_version 9507 (0.0009) +[2026-06-02 16:50:17,524][255279] Updated weights for policy 0, policy_version 9517 (0.0008) +[2026-06-02 16:50:17,715][255279] Updated weights for policy 0, policy_version 9527 (0.0008) +[2026-06-02 16:50:17,903][255279] Updated weights for policy 0, policy_version 9537 (0.0008) +[2026-06-02 16:50:18,096][255279] Updated weights for policy 0, policy_version 9547 (0.0008) +[2026-06-02 16:50:18,287][255279] Updated weights for policy 0, policy_version 9557 (0.0008) +[2026-06-02 16:50:18,473][255279] Updated weights for policy 0, policy_version 9567 (0.0008) +[2026-06-02 16:50:19,137][255279] Updated weights for policy 0, policy_version 9577 (0.0008) +[2026-06-02 16:50:19,320][255279] Updated weights for policy 0, policy_version 9587 (0.0008) +[2026-06-02 16:50:19,506][255279] Updated weights for policy 0, policy_version 9597 (0.0009) +[2026-06-02 16:50:19,701][255279] Updated weights for policy 0, policy_version 9607 (0.0008) +[2026-06-02 16:50:19,884][255279] Updated weights for policy 0, policy_version 9617 (0.0008) +[2026-06-02 16:50:20,075][255279] Updated weights for policy 0, policy_version 9627 (0.0008) +[2026-06-02 16:50:20,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19660.8, 300 sec: 19030.6). Total num frames: 4947968. Throughput: 0: 19657.9. Samples: 4963584. Policy #0 lag: (min: 24.0, avg: 63.5, max: 88.0) +[2026-06-02 16:50:20,503][253683] Avg episode reward: [(0, '231.269')] +[2026-06-02 16:50:20,508][255187] Saving new best policy, reward=231.269! +[2026-06-02 16:50:20,812][255279] Updated weights for policy 0, policy_version 9637 (0.0008) +[2026-06-02 16:50:20,993][255279] Updated weights for policy 0, policy_version 9647 (0.0009) +[2026-06-02 16:50:21,165][255279] Updated weights for policy 0, policy_version 9657 (0.0008) +[2026-06-02 16:50:21,382][255279] Updated weights for policy 0, policy_version 9668 (0.0008) +[2026-06-02 16:50:21,574][255279] Updated weights for policy 0, policy_version 9678 (0.0009) +[2026-06-02 16:50:21,755][255279] Updated weights for policy 0, policy_version 9688 (0.0008) +[2026-06-02 16:50:22,395][255279] Updated weights for policy 0, policy_version 9698 (0.0008) +[2026-06-02 16:50:22,569][255279] Updated weights for policy 0, policy_version 9708 (0.0008) +[2026-06-02 16:50:22,768][255279] Updated weights for policy 0, policy_version 9719 (0.0008) +[2026-06-02 16:50:22,959][255279] Updated weights for policy 0, policy_version 9729 (0.0006) +[2026-06-02 16:50:23,150][255279] Updated weights for policy 0, policy_version 9739 (0.0009) +[2026-06-02 16:50:23,337][255279] Updated weights for policy 0, policy_version 9749 (0.0008) +[2026-06-02 16:50:23,523][255279] Updated weights for policy 0, policy_version 9759 (0.0008) +[2026-06-02 16:50:24,204][255279] Updated weights for policy 0, policy_version 9769 (0.0009) +[2026-06-02 16:50:24,381][255279] Updated weights for policy 0, policy_version 9779 (0.0007) +[2026-06-02 16:50:24,566][255279] Updated weights for policy 0, policy_version 9789 (0.0009) +[2026-06-02 16:50:24,761][255279] Updated weights for policy 0, policy_version 9799 (0.0009) +[2026-06-02 16:50:24,945][255279] Updated weights for policy 0, policy_version 9809 (0.0009) +[2026-06-02 16:50:25,135][255279] Updated weights for policy 0, policy_version 9819 (0.0008) +[2026-06-02 16:50:25,501][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19042.5). Total num frames: 5046272. Throughput: 0: 19643.7. Samples: 5024000. Policy #0 lag: (min: 24.0, avg: 63.5, max: 88.0) +[2026-06-02 16:50:25,502][253683] Avg episode reward: [(0, '248.847')] +[2026-06-02 16:50:25,507][255187] Saving new best policy, reward=248.847! +[2026-06-02 16:50:25,841][255279] Updated weights for policy 0, policy_version 9829 (0.0009) +[2026-06-02 16:50:26,025][255279] Updated weights for policy 0, policy_version 9839 (0.0009) +[2026-06-02 16:50:26,203][255279] Updated weights for policy 0, policy_version 9849 (0.0008) +[2026-06-02 16:50:26,392][255279] Updated weights for policy 0, policy_version 9859 (0.0008) +[2026-06-02 16:50:26,584][255279] Updated weights for policy 0, policy_version 9869 (0.0009) +[2026-06-02 16:50:26,776][255279] Updated weights for policy 0, policy_version 9879 (0.0008) +[2026-06-02 16:50:27,444][255279] Updated weights for policy 0, policy_version 9889 (0.0009) +[2026-06-02 16:50:27,624][255279] Updated weights for policy 0, policy_version 9899 (0.0008) +[2026-06-02 16:50:27,812][255279] Updated weights for policy 0, policy_version 9909 (0.0009) +[2026-06-02 16:50:28,000][255279] Updated weights for policy 0, policy_version 9919 (0.0008) +[2026-06-02 16:50:28,193][255279] Updated weights for policy 0, policy_version 9929 (0.0008) +[2026-06-02 16:50:28,382][255279] Updated weights for policy 0, policy_version 9939 (0.0008) +[2026-06-02 16:50:28,566][255279] Updated weights for policy 0, policy_version 9949 (0.0008) +[2026-06-02 16:50:29,245][255279] Updated weights for policy 0, policy_version 9960 (0.0009) +[2026-06-02 16:50:29,430][255279] Updated weights for policy 0, policy_version 9970 (0.0009) +[2026-06-02 16:50:29,610][255279] Updated weights for policy 0, policy_version 9980 (0.0009) +[2026-06-02 16:50:29,807][255279] Updated weights for policy 0, policy_version 9990 (0.0008) +[2026-06-02 16:50:29,998][255279] Updated weights for policy 0, policy_version 10000 (0.0009) +[2026-06-02 16:50:30,178][255279] Updated weights for policy 0, policy_version 10010 (0.0008) +[2026-06-02 16:50:30,502][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19054.0). Total num frames: 5144576. Throughput: 0: 19419.0. Samples: 5134848. Policy #0 lag: (min: 63.0, avg: 80.5, max: 127.0) +[2026-06-02 16:50:30,503][253683] Avg episode reward: [(0, '259.976')] +[2026-06-02 16:50:30,508][255187] Saving new best policy, reward=259.976! +[2026-06-02 16:50:30,865][255279] Updated weights for policy 0, policy_version 10020 (0.0009) +[2026-06-02 16:50:31,047][255279] Updated weights for policy 0, policy_version 10030 (0.0009) +[2026-06-02 16:50:31,229][255279] Updated weights for policy 0, policy_version 10040 (0.0008) +[2026-06-02 16:50:31,430][255279] Updated weights for policy 0, policy_version 10051 (0.0008) +[2026-06-02 16:50:31,637][255279] Updated weights for policy 0, policy_version 10062 (0.0008) +[2026-06-02 16:50:31,820][255279] Updated weights for policy 0, policy_version 10072 (0.0008) +[2026-06-02 16:50:32,522][255279] Updated weights for policy 0, policy_version 10082 (0.0008) +[2026-06-02 16:50:32,711][255279] Updated weights for policy 0, policy_version 10093 (0.0009) +[2026-06-02 16:50:32,892][255279] Updated weights for policy 0, policy_version 10103 (0.0009) +[2026-06-02 16:50:33,073][255279] Updated weights for policy 0, policy_version 10113 (0.0009) +[2026-06-02 16:50:33,275][255279] Updated weights for policy 0, policy_version 10124 (0.0009) +[2026-06-02 16:50:33,447][255279] Updated weights for policy 0, policy_version 10134 (0.0008) +[2026-06-02 16:50:33,638][255279] Updated weights for policy 0, policy_version 10144 (0.0009) +[2026-06-02 16:50:34,332][255279] Updated weights for policy 0, policy_version 10154 (0.0008) +[2026-06-02 16:50:34,518][255279] Updated weights for policy 0, policy_version 10164 (0.0009) +[2026-06-02 16:50:34,712][255279] Updated weights for policy 0, policy_version 10174 (0.0009) +[2026-06-02 16:50:34,896][255279] Updated weights for policy 0, policy_version 10184 (0.0008) +[2026-06-02 16:50:35,074][255279] Updated weights for policy 0, policy_version 10194 (0.0008) +[2026-06-02 16:50:35,288][255279] Updated weights for policy 0, policy_version 10205 (0.0008) +[2026-06-02 16:50:35,501][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19065.0). Total num frames: 5242880. Throughput: 0: 19578.3. Samples: 5254528. Policy #0 lag: (min: 63.0, avg: 80.5, max: 127.0) +[2026-06-02 16:50:35,502][253683] Avg episode reward: [(0, '221.518')] +[2026-06-02 16:50:35,988][255279] Updated weights for policy 0, policy_version 10215 (0.0009) +[2026-06-02 16:50:36,167][255279] Updated weights for policy 0, policy_version 10225 (0.0009) +[2026-06-02 16:50:36,360][255279] Updated weights for policy 0, policy_version 10235 (0.0009) +[2026-06-02 16:50:36,546][255279] Updated weights for policy 0, policy_version 10245 (0.0008) +[2026-06-02 16:50:36,725][255279] Updated weights for policy 0, policy_version 10255 (0.0008) +[2026-06-02 16:50:36,922][255279] Updated weights for policy 0, policy_version 10265 (0.0009) +[2026-06-02 16:50:37,599][255279] Updated weights for policy 0, policy_version 10275 (0.0009) +[2026-06-02 16:50:37,778][255279] Updated weights for policy 0, policy_version 10285 (0.0008) +[2026-06-02 16:50:37,961][255279] Updated weights for policy 0, policy_version 10295 (0.0008) +[2026-06-02 16:50:38,144][255279] Updated weights for policy 0, policy_version 10305 (0.0008) +[2026-06-02 16:50:38,356][255279] Updated weights for policy 0, policy_version 10316 (0.0008) +[2026-06-02 16:50:38,552][255279] Updated weights for policy 0, policy_version 10327 (0.0009) +[2026-06-02 16:50:39,263][255279] Updated weights for policy 0, policy_version 10337 (0.0008) +[2026-06-02 16:50:39,442][255279] Updated weights for policy 0, policy_version 10347 (0.0008) +[2026-06-02 16:50:39,625][255279] Updated weights for policy 0, policy_version 10357 (0.0008) +[2026-06-02 16:50:39,829][255279] Updated weights for policy 0, policy_version 10368 (0.0008) +[2026-06-02 16:50:40,013][255279] Updated weights for policy 0, policy_version 10378 (0.0008) +[2026-06-02 16:50:40,195][255279] Updated weights for policy 0, policy_version 10388 (0.0008) +[2026-06-02 16:50:40,387][255279] Updated weights for policy 0, policy_version 10398 (0.0008) +[2026-06-02 16:50:40,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19660.8, 300 sec: 19075.7). Total num frames: 5341184. Throughput: 0: 19535.6. Samples: 5314560. Policy #0 lag: (min: 57.0, avg: 89.8, max: 121.0) +[2026-06-02 16:50:40,503][253683] Avg episode reward: [(0, '175.105')] +[2026-06-02 16:50:41,106][255279] Updated weights for policy 0, policy_version 10409 (0.0008) +[2026-06-02 16:50:41,294][255279] Updated weights for policy 0, policy_version 10419 (0.0009) +[2026-06-02 16:50:41,478][255279] Updated weights for policy 0, policy_version 10429 (0.0008) +[2026-06-02 16:50:41,671][255279] Updated weights for policy 0, policy_version 10439 (0.0008) +[2026-06-02 16:50:41,859][255279] Updated weights for policy 0, policy_version 10449 (0.0009) +[2026-06-02 16:50:42,064][255279] Updated weights for policy 0, policy_version 10460 (0.0008) +[2026-06-02 16:50:42,741][255279] Updated weights for policy 0, policy_version 10470 (0.0008) +[2026-06-02 16:50:42,950][255279] Updated weights for policy 0, policy_version 10481 (0.0008) +[2026-06-02 16:50:43,146][255279] Updated weights for policy 0, policy_version 10492 (0.0009) +[2026-06-02 16:50:43,361][255279] Updated weights for policy 0, policy_version 10503 (0.0009) +[2026-06-02 16:50:43,554][255279] Updated weights for policy 0, policy_version 10513 (0.0008) +[2026-06-02 16:50:43,737][255279] Updated weights for policy 0, policy_version 10523 (0.0009) +[2026-06-02 16:50:44,424][255279] Updated weights for policy 0, policy_version 10533 (0.0008) +[2026-06-02 16:50:44,605][255279] Updated weights for policy 0, policy_version 10543 (0.0009) +[2026-06-02 16:50:44,788][255279] Updated weights for policy 0, policy_version 10553 (0.0009) +[2026-06-02 16:50:44,978][255279] Updated weights for policy 0, policy_version 10563 (0.0008) +[2026-06-02 16:50:45,166][255279] Updated weights for policy 0, policy_version 10573 (0.0008) +[2026-06-02 16:50:45,352][255279] Updated weights for policy 0, policy_version 10583 (0.0008) +[2026-06-02 16:50:45,501][253683] Fps is (10 sec: 16384.0, 60 sec: 19114.7, 300 sec: 18971.0). Total num frames: 5406720. Throughput: 0: 19510.0. Samples: 5425024. Policy #0 lag: (min: 57.0, avg: 89.8, max: 121.0) +[2026-06-02 16:50:45,502][253683] Avg episode reward: [(0, '169.064')] +[2026-06-02 16:50:46,031][255279] Updated weights for policy 0, policy_version 10593 (0.0008) +[2026-06-02 16:50:46,223][255279] Updated weights for policy 0, policy_version 10604 (0.0008) +[2026-06-02 16:50:46,404][255279] Updated weights for policy 0, policy_version 10614 (0.0008) +[2026-06-02 16:50:46,601][255279] Updated weights for policy 0, policy_version 10624 (0.0007) +[2026-06-02 16:50:46,789][255279] Updated weights for policy 0, policy_version 10634 (0.0005) +[2026-06-02 16:50:46,975][255279] Updated weights for policy 0, policy_version 10644 (0.0005) +[2026-06-02 16:50:47,167][255279] Updated weights for policy 0, policy_version 10654 (0.0005) +[2026-06-02 16:50:47,833][255279] Updated weights for policy 0, policy_version 10664 (0.0005) +[2026-06-02 16:50:48,014][255279] Updated weights for policy 0, policy_version 10674 (0.0005) +[2026-06-02 16:50:48,202][255279] Updated weights for policy 0, policy_version 10684 (0.0005) +[2026-06-02 16:50:48,390][255279] Updated weights for policy 0, policy_version 10694 (0.0005) +[2026-06-02 16:50:48,597][255279] Updated weights for policy 0, policy_version 10705 (0.0005) +[2026-06-02 16:50:48,784][255279] Updated weights for policy 0, policy_version 10715 (0.0005) +[2026-06-02 16:50:49,471][255279] Updated weights for policy 0, policy_version 10725 (0.0005) +[2026-06-02 16:50:49,651][255279] Updated weights for policy 0, policy_version 10735 (0.0005) +[2026-06-02 16:50:49,840][255279] Updated weights for policy 0, policy_version 10745 (0.0004) +[2026-06-02 16:50:50,026][255279] Updated weights for policy 0, policy_version 10755 (0.0005) +[2026-06-02 16:50:50,215][255279] Updated weights for policy 0, policy_version 10765 (0.0004) +[2026-06-02 16:50:50,399][255279] Updated weights for policy 0, policy_version 10775 (0.0005) +[2026-06-02 16:50:50,502][253683] Fps is (10 sec: 16384.2, 60 sec: 19114.7, 300 sec: 18982.9). Total num frames: 5505024. Throughput: 0: 19481.6. Samples: 5547264. Policy #0 lag: (min: 63.0, avg: 79.8, max: 127.0) +[2026-06-02 16:50:50,502][253683] Avg episode reward: [(0, '163.562')] +[2026-06-02 16:50:51,087][255279] Updated weights for policy 0, policy_version 10785 (0.0005) +[2026-06-02 16:50:51,257][255279] Updated weights for policy 0, policy_version 10795 (0.0008) +[2026-06-02 16:50:51,461][255279] Updated weights for policy 0, policy_version 10806 (0.0008) +[2026-06-02 16:50:51,649][255279] Updated weights for policy 0, policy_version 10816 (0.0009) +[2026-06-02 16:50:51,855][255279] Updated weights for policy 0, policy_version 10827 (0.0008) +[2026-06-02 16:50:52,044][255279] Updated weights for policy 0, policy_version 10837 (0.0008) +[2026-06-02 16:50:52,228][255279] Updated weights for policy 0, policy_version 10847 (0.0009) +[2026-06-02 16:50:52,896][255279] Updated weights for policy 0, policy_version 10857 (0.0009) +[2026-06-02 16:50:53,072][255279] Updated weights for policy 0, policy_version 10867 (0.0009) +[2026-06-02 16:50:53,267][255279] Updated weights for policy 0, policy_version 10877 (0.0008) +[2026-06-02 16:50:53,457][255279] Updated weights for policy 0, policy_version 10887 (0.0008) +[2026-06-02 16:50:53,642][255279] Updated weights for policy 0, policy_version 10897 (0.0009) +[2026-06-02 16:50:53,830][255279] Updated weights for policy 0, policy_version 10907 (0.0008) +[2026-06-02 16:50:54,519][255279] Updated weights for policy 0, policy_version 10917 (0.0008) +[2026-06-02 16:50:54,694][255279] Updated weights for policy 0, policy_version 10927 (0.0008) +[2026-06-02 16:50:54,888][255279] Updated weights for policy 0, policy_version 10937 (0.0009) +[2026-06-02 16:50:55,090][255279] Updated weights for policy 0, policy_version 10948 (0.0009) +[2026-06-02 16:50:55,273][255279] Updated weights for policy 0, policy_version 10958 (0.0009) +[2026-06-02 16:50:55,474][255279] Updated weights for policy 0, policy_version 10969 (0.0009) +[2026-06-02 16:50:55,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 18994.3). Total num frames: 5603328. Throughput: 0: 19475.9. Samples: 5608192. Policy #0 lag: (min: 63.0, avg: 79.8, max: 127.0) +[2026-06-02 16:50:55,502][253683] Avg episode reward: [(0, '194.938')] +[2026-06-02 16:50:56,183][255279] Updated weights for policy 0, policy_version 10979 (0.0009) +[2026-06-02 16:50:56,366][255279] Updated weights for policy 0, policy_version 10989 (0.0008) +[2026-06-02 16:50:56,563][255279] Updated weights for policy 0, policy_version 11000 (0.0008) +[2026-06-02 16:50:56,755][255279] Updated weights for policy 0, policy_version 11010 (0.0009) +[2026-06-02 16:50:56,945][255279] Updated weights for policy 0, policy_version 11020 (0.0009) +[2026-06-02 16:50:57,128][255279] Updated weights for policy 0, policy_version 11030 (0.0008) +[2026-06-02 16:50:57,313][255279] Updated weights for policy 0, policy_version 11040 (0.0008) +[2026-06-02 16:50:57,984][255279] Updated weights for policy 0, policy_version 11050 (0.0008) +[2026-06-02 16:50:58,171][255279] Updated weights for policy 0, policy_version 11060 (0.0009) +[2026-06-02 16:50:58,357][255279] Updated weights for policy 0, policy_version 11070 (0.0008) +[2026-06-02 16:50:58,537][255279] Updated weights for policy 0, policy_version 11080 (0.0008) +[2026-06-02 16:50:58,743][255279] Updated weights for policy 0, policy_version 11091 (0.0008) +[2026-06-02 16:50:58,938][255279] Updated weights for policy 0, policy_version 11101 (0.0008) +[2026-06-02 16:50:59,632][255279] Updated weights for policy 0, policy_version 11111 (0.0009) +[2026-06-02 16:50:59,824][255279] Updated weights for policy 0, policy_version 11122 (0.0008) +[2026-06-02 16:51:00,035][255279] Updated weights for policy 0, policy_version 11133 (0.0008) +[2026-06-02 16:51:00,223][255279] Updated weights for policy 0, policy_version 11143 (0.0009) +[2026-06-02 16:51:00,412][255279] Updated weights for policy 0, policy_version 11153 (0.0009) +[2026-06-02 16:51:00,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.6, 300 sec: 19105.4). Total num frames: 5701632. Throughput: 0: 19296.7. Samples: 5717376. Policy #0 lag: (min: 63.0, avg: 80.1, max: 127.0) +[2026-06-02 16:51:00,502][253683] Avg episode reward: [(0, '192.710')] +[2026-06-02 16:51:00,597][255279] Updated weights for policy 0, policy_version 11163 (0.0008) +[2026-06-02 16:51:01,279][255279] Updated weights for policy 0, policy_version 11173 (0.0009) +[2026-06-02 16:51:01,461][255279] Updated weights for policy 0, policy_version 11183 (0.0008) +[2026-06-02 16:51:01,646][255279] Updated weights for policy 0, policy_version 11193 (0.0008) +[2026-06-02 16:51:01,831][255279] Updated weights for policy 0, policy_version 11203 (0.0008) +[2026-06-02 16:51:02,021][255279] Updated weights for policy 0, policy_version 11213 (0.0008) +[2026-06-02 16:51:02,205][255279] Updated weights for policy 0, policy_version 11223 (0.0009) +[2026-06-02 16:51:02,901][255279] Updated weights for policy 0, policy_version 11233 (0.0008) +[2026-06-02 16:51:03,077][255279] Updated weights for policy 0, policy_version 11243 (0.0008) +[2026-06-02 16:51:03,272][255279] Updated weights for policy 0, policy_version 11254 (0.0008) +[2026-06-02 16:51:03,471][255279] Updated weights for policy 0, policy_version 11264 (0.0008) +[2026-06-02 16:51:03,645][255279] Updated weights for policy 0, policy_version 11274 (0.0008) +[2026-06-02 16:51:03,840][255279] Updated weights for policy 0, policy_version 11284 (0.0008) +[2026-06-02 16:51:04,022][255279] Updated weights for policy 0, policy_version 11294 (0.0008) +[2026-06-02 16:51:04,713][255279] Updated weights for policy 0, policy_version 11304 (0.0008) +[2026-06-02 16:51:04,894][255279] Updated weights for policy 0, policy_version 11314 (0.0007) +[2026-06-02 16:51:05,080][255279] Updated weights for policy 0, policy_version 11324 (0.0008) +[2026-06-02 16:51:05,279][255279] Updated weights for policy 0, policy_version 11334 (0.0009) +[2026-06-02 16:51:05,477][255279] Updated weights for policy 0, policy_version 11345 (0.0008) +[2026-06-02 16:51:05,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19105.4). Total num frames: 5799936. Throughput: 0: 19470.3. Samples: 5839744. Policy #0 lag: (min: 63.0, avg: 80.1, max: 127.0) +[2026-06-02 16:51:05,502][253683] Avg episode reward: [(0, '207.245')] +[2026-06-02 16:51:05,659][255279] Updated weights for policy 0, policy_version 11355 (0.0009) +[2026-06-02 16:51:06,357][255279] Updated weights for policy 0, policy_version 11365 (0.0008) +[2026-06-02 16:51:06,530][255279] Updated weights for policy 0, policy_version 11375 (0.0008) +[2026-06-02 16:51:06,728][255279] Updated weights for policy 0, policy_version 11385 (0.0008) +[2026-06-02 16:51:06,925][255279] Updated weights for policy 0, policy_version 11396 (0.0008) +[2026-06-02 16:51:07,119][255279] Updated weights for policy 0, policy_version 11406 (0.0008) +[2026-06-02 16:51:07,300][255279] Updated weights for policy 0, policy_version 11416 (0.0008) +[2026-06-02 16:51:07,989][255279] Updated weights for policy 0, policy_version 11426 (0.0009) +[2026-06-02 16:51:08,185][255279] Updated weights for policy 0, policy_version 11437 (0.0009) +[2026-06-02 16:51:08,388][255279] Updated weights for policy 0, policy_version 11448 (0.0008) +[2026-06-02 16:51:08,595][255279] Updated weights for policy 0, policy_version 11459 (0.0009) +[2026-06-02 16:51:08,789][255279] Updated weights for policy 0, policy_version 11469 (0.0009) +[2026-06-02 16:51:08,990][255279] Updated weights for policy 0, policy_version 11480 (0.0009) +[2026-06-02 16:51:09,681][255279] Updated weights for policy 0, policy_version 11490 (0.0008) +[2026-06-02 16:51:09,860][255279] Updated weights for policy 0, policy_version 11500 (0.0008) +[2026-06-02 16:51:10,051][255279] Updated weights for policy 0, policy_version 11510 (0.0009) +[2026-06-02 16:51:10,227][255279] Updated weights for policy 0, policy_version 11520 (0.0009) +[2026-06-02 16:51:10,416][255279] Updated weights for policy 0, policy_version 11530 (0.0009) +[2026-06-02 16:51:10,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19105.4). Total num frames: 5898240. Throughput: 0: 19365.0. Samples: 5895424. Policy #0 lag: (min: 18.0, avg: 53.6, max: 82.0) +[2026-06-02 16:51:10,502][253683] Avg episode reward: [(0, '198.984')] +[2026-06-02 16:51:10,622][255279] Updated weights for policy 0, policy_version 11541 (0.0008) +[2026-06-02 16:51:10,808][255279] Updated weights for policy 0, policy_version 11551 (0.0008) +[2026-06-02 16:51:11,504][255279] Updated weights for policy 0, policy_version 11561 (0.0009) +[2026-06-02 16:51:11,682][255279] Updated weights for policy 0, policy_version 11571 (0.0008) +[2026-06-02 16:51:11,869][255279] Updated weights for policy 0, policy_version 11581 (0.0008) +[2026-06-02 16:51:12,073][255279] Updated weights for policy 0, policy_version 11592 (0.0008) +[2026-06-02 16:51:12,264][255279] Updated weights for policy 0, policy_version 11602 (0.0007) +[2026-06-02 16:51:12,470][255279] Updated weights for policy 0, policy_version 11613 (0.0009) +[2026-06-02 16:51:13,144][255279] Updated weights for policy 0, policy_version 11623 (0.0009) +[2026-06-02 16:51:13,347][255279] Updated weights for policy 0, policy_version 11634 (0.0009) +[2026-06-02 16:51:13,535][255279] Updated weights for policy 0, policy_version 11644 (0.0009) +[2026-06-02 16:51:13,724][255279] Updated weights for policy 0, policy_version 11654 (0.0008) +[2026-06-02 16:51:13,902][255279] Updated weights for policy 0, policy_version 11664 (0.0008) +[2026-06-02 16:51:14,092][255279] Updated weights for policy 0, policy_version 11674 (0.0008) +[2026-06-02 16:51:14,790][255279] Updated weights for policy 0, policy_version 11685 (0.0008) +[2026-06-02 16:51:14,976][255279] Updated weights for policy 0, policy_version 11695 (0.0009) +[2026-06-02 16:51:15,163][255279] Updated weights for policy 0, policy_version 11705 (0.0008) +[2026-06-02 16:51:15,342][255279] Updated weights for policy 0, policy_version 11715 (0.0008) +[2026-06-02 16:51:15,501][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19105.4). Total num frames: 5996544. Throughput: 0: 19470.3. Samples: 6011008. Policy #0 lag: (min: 18.0, avg: 53.6, max: 82.0) +[2026-06-02 16:51:15,502][253683] Avg episode reward: [(0, '238.138')] +[2026-06-02 16:51:15,538][255279] Updated weights for policy 0, policy_version 11725 (0.0008) +[2026-06-02 16:51:15,729][255279] Updated weights for policy 0, policy_version 11735 (0.0008) +[2026-06-02 16:51:16,407][255279] Updated weights for policy 0, policy_version 11745 (0.0008) +[2026-06-02 16:51:16,579][255279] Updated weights for policy 0, policy_version 11755 (0.0008) +[2026-06-02 16:51:16,784][255279] Updated weights for policy 0, policy_version 11766 (0.0009) +[2026-06-02 16:51:16,968][255279] Updated weights for policy 0, policy_version 11776 (0.0008) +[2026-06-02 16:51:17,156][255279] Updated weights for policy 0, policy_version 11786 (0.0008) +[2026-06-02 16:51:17,340][255279] Updated weights for policy 0, policy_version 11796 (0.0008) +[2026-06-02 16:51:17,540][255279] Updated weights for policy 0, policy_version 11807 (0.0009) +[2026-06-02 16:51:18,223][255279] Updated weights for policy 0, policy_version 11817 (0.0008) +[2026-06-02 16:51:18,414][255279] Updated weights for policy 0, policy_version 11827 (0.0008) +[2026-06-02 16:51:18,593][255279] Updated weights for policy 0, policy_version 11837 (0.0008) +[2026-06-02 16:51:18,788][255279] Updated weights for policy 0, policy_version 11847 (0.0008) +[2026-06-02 16:51:18,965][255279] Updated weights for policy 0, policy_version 11857 (0.0009) +[2026-06-02 16:51:19,152][255279] Updated weights for policy 0, policy_version 11867 (0.0009) +[2026-06-02 16:51:19,842][255279] Updated weights for policy 0, policy_version 11877 (0.0006) +[2026-06-02 16:51:20,030][255279] Updated weights for policy 0, policy_version 11887 (0.0004) +[2026-06-02 16:51:20,210][255279] Updated weights for policy 0, policy_version 11897 (0.0004) +[2026-06-02 16:51:20,407][255279] Updated weights for policy 0, policy_version 11907 (0.0004) +[2026-06-02 16:51:20,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19105.4). Total num frames: 6094848. Throughput: 0: 19507.2. Samples: 6132352. Policy #0 lag: (min: 63.0, avg: 80.0, max: 127.0) +[2026-06-02 16:51:20,502][253683] Avg episode reward: [(0, '241.680')] +[2026-06-02 16:51:20,598][255279] Updated weights for policy 0, policy_version 11917 (0.0004) +[2026-06-02 16:51:20,786][255279] Updated weights for policy 0, policy_version 11927 (0.0004) +[2026-06-02 16:51:21,454][255279] Updated weights for policy 0, policy_version 11937 (0.0004) +[2026-06-02 16:51:21,625][255279] Updated weights for policy 0, policy_version 11947 (0.0004) +[2026-06-02 16:51:21,811][255279] Updated weights for policy 0, policy_version 11957 (0.0004) +[2026-06-02 16:51:22,000][255279] Updated weights for policy 0, policy_version 11967 (0.0004) +[2026-06-02 16:51:22,192][255279] Updated weights for policy 0, policy_version 11977 (0.0008) +[2026-06-02 16:51:22,381][255279] Updated weights for policy 0, policy_version 11987 (0.0009) +[2026-06-02 16:51:22,582][255279] Updated weights for policy 0, policy_version 11998 (0.0008) +[2026-06-02 16:51:23,231][255279] Updated weights for policy 0, policy_version 12008 (0.0008) +[2026-06-02 16:51:23,427][255279] Updated weights for policy 0, policy_version 12018 (0.0009) +[2026-06-02 16:51:23,614][255279] Updated weights for policy 0, policy_version 12028 (0.0008) +[2026-06-02 16:51:23,801][255279] Updated weights for policy 0, policy_version 12038 (0.0008) +[2026-06-02 16:51:23,990][255279] Updated weights for policy 0, policy_version 12048 (0.0009) +[2026-06-02 16:51:24,180][255279] Updated weights for policy 0, policy_version 12058 (0.0008) +[2026-06-02 16:51:24,848][255279] Updated weights for policy 0, policy_version 12068 (0.0008) +[2026-06-02 16:51:25,021][255279] Updated weights for policy 0, policy_version 12078 (0.0008) +[2026-06-02 16:51:25,216][255279] Updated weights for policy 0, policy_version 12088 (0.0008) +[2026-06-02 16:51:25,400][255279] Updated weights for policy 0, policy_version 12098 (0.0008) +[2026-06-02 16:51:25,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19114.7, 300 sec: 19105.4). Total num frames: 6193152. Throughput: 0: 19285.4. Samples: 6182400. Policy #0 lag: (min: 63.0, avg: 80.0, max: 127.0) +[2026-06-02 16:51:25,502][253683] Avg episode reward: [(0, '253.100')] +[2026-06-02 16:51:25,592][255279] Updated weights for policy 0, policy_version 12108 (0.0008) +[2026-06-02 16:51:25,777][255279] Updated weights for policy 0, policy_version 12118 (0.0008) +[2026-06-02 16:51:25,960][255279] Updated weights for policy 0, policy_version 12128 (0.0008) +[2026-06-02 16:51:26,671][255279] Updated weights for policy 0, policy_version 12139 (0.0009) +[2026-06-02 16:51:26,869][255279] Updated weights for policy 0, policy_version 12149 (0.0009) +[2026-06-02 16:51:27,053][255279] Updated weights for policy 0, policy_version 12159 (0.0008) +[2026-06-02 16:51:27,227][255279] Updated weights for policy 0, policy_version 12169 (0.0008) +[2026-06-02 16:51:27,421][255279] Updated weights for policy 0, policy_version 12179 (0.0008) +[2026-06-02 16:51:27,605][255279] Updated weights for policy 0, policy_version 12189 (0.0008) +[2026-06-02 16:51:28,293][255279] Updated weights for policy 0, policy_version 12199 (0.0009) +[2026-06-02 16:51:28,472][255279] Updated weights for policy 0, policy_version 12209 (0.0009) +[2026-06-02 16:51:28,664][255279] Updated weights for policy 0, policy_version 12219 (0.0008) +[2026-06-02 16:51:28,869][255279] Updated weights for policy 0, policy_version 12230 (0.0008) +[2026-06-02 16:51:29,058][255279] Updated weights for policy 0, policy_version 12240 (0.0008) +[2026-06-02 16:51:29,262][255279] Updated weights for policy 0, policy_version 12251 (0.0008) +[2026-06-02 16:51:29,987][255279] Updated weights for policy 0, policy_version 12263 (0.0009) +[2026-06-02 16:51:30,162][255279] Updated weights for policy 0, policy_version 12273 (0.0008) +[2026-06-02 16:51:30,352][255279] Updated weights for policy 0, policy_version 12283 (0.0008) +[2026-06-02 16:51:30,501][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19105.4). Total num frames: 6291456. Throughput: 0: 19527.1. Samples: 6303744. Policy #0 lag: (min: 52.0, avg: 70.1, max: 116.0) +[2026-06-02 16:51:30,502][253683] Avg episode reward: [(0, '283.780')] +[2026-06-02 16:51:30,536][255279] Updated weights for policy 0, policy_version 12293 (0.0009) +[2026-06-02 16:51:30,723][255279] Updated weights for policy 0, policy_version 12303 (0.0008) +[2026-06-02 16:51:30,907][255279] Updated weights for policy 0, policy_version 12313 (0.0008) +[2026-06-02 16:51:31,030][255187] Saving new best policy, reward=283.780! +[2026-06-02 16:51:31,591][255279] Updated weights for policy 0, policy_version 12323 (0.0009) +[2026-06-02 16:51:31,762][255279] Updated weights for policy 0, policy_version 12333 (0.0008) +[2026-06-02 16:51:31,957][255279] Updated weights for policy 0, policy_version 12343 (0.0008) +[2026-06-02 16:51:32,142][255279] Updated weights for policy 0, policy_version 12353 (0.0009) +[2026-06-02 16:51:32,321][255279] Updated weights for policy 0, policy_version 12363 (0.0008) +[2026-06-02 16:51:32,507][255279] Updated weights for policy 0, policy_version 12373 (0.0009) +[2026-06-02 16:51:32,697][255279] Updated weights for policy 0, policy_version 12383 (0.0008) +[2026-06-02 16:51:33,384][255279] Updated weights for policy 0, policy_version 12393 (0.0008) +[2026-06-02 16:51:33,562][255279] Updated weights for policy 0, policy_version 12403 (0.0008) +[2026-06-02 16:51:33,750][255279] Updated weights for policy 0, policy_version 12413 (0.0008) +[2026-06-02 16:51:33,940][255279] Updated weights for policy 0, policy_version 12423 (0.0009) +[2026-06-02 16:51:34,125][255279] Updated weights for policy 0, policy_version 12433 (0.0008) +[2026-06-02 16:51:34,315][255279] Updated weights for policy 0, policy_version 12443 (0.0008) +[2026-06-02 16:51:35,003][255279] Updated weights for policy 0, policy_version 12453 (0.0008) +[2026-06-02 16:51:35,184][255279] Updated weights for policy 0, policy_version 12463 (0.0008) +[2026-06-02 16:51:35,370][255279] Updated weights for policy 0, policy_version 12473 (0.0009) +[2026-06-02 16:51:35,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.6, 300 sec: 19105.4). Total num frames: 6389760. Throughput: 0: 19453.1. Samples: 6422656. Policy #0 lag: (min: 52.0, avg: 70.1, max: 116.0) +[2026-06-02 16:51:35,502][253683] Avg episode reward: [(0, '292.730')] +[2026-06-02 16:51:35,557][255279] Updated weights for policy 0, policy_version 12483 (0.0008) +[2026-06-02 16:51:35,742][255279] Updated weights for policy 0, policy_version 12493 (0.0008) +[2026-06-02 16:51:35,948][255279] Updated weights for policy 0, policy_version 12504 (0.0008) +[2026-06-02 16:51:36,087][255187] Saving new best policy, reward=292.730! +[2026-06-02 16:51:36,639][255279] Updated weights for policy 0, policy_version 12514 (0.0008) +[2026-06-02 16:51:36,816][255279] Updated weights for policy 0, policy_version 12524 (0.0008) +[2026-06-02 16:51:36,999][255279] Updated weights for policy 0, policy_version 12534 (0.0008) +[2026-06-02 16:51:37,182][255279] Updated weights for policy 0, policy_version 12544 (0.0009) +[2026-06-02 16:51:37,368][255279] Updated weights for policy 0, policy_version 12554 (0.0009) +[2026-06-02 16:51:37,554][255279] Updated weights for policy 0, policy_version 12564 (0.0008) +[2026-06-02 16:51:37,741][255279] Updated weights for policy 0, policy_version 12574 (0.0008) +[2026-06-02 16:51:38,455][255279] Updated weights for policy 0, policy_version 12585 (0.0008) +[2026-06-02 16:51:38,637][255279] Updated weights for policy 0, policy_version 12595 (0.0008) +[2026-06-02 16:51:38,825][255279] Updated weights for policy 0, policy_version 12605 (0.0008) +[2026-06-02 16:51:39,010][255279] Updated weights for policy 0, policy_version 12615 (0.0008) +[2026-06-02 16:51:39,201][255279] Updated weights for policy 0, policy_version 12625 (0.0008) +[2026-06-02 16:51:39,390][255279] Updated weights for policy 0, policy_version 12635 (0.0008) +[2026-06-02 16:51:40,081][255279] Updated weights for policy 0, policy_version 12645 (0.0008) +[2026-06-02 16:51:40,269][255279] Updated weights for policy 0, policy_version 12655 (0.0009) +[2026-06-02 16:51:40,461][255279] Updated weights for policy 0, policy_version 12665 (0.0009) +[2026-06-02 16:51:40,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19114.7, 300 sec: 19105.4). Total num frames: 6488064. Throughput: 0: 19265.4. Samples: 6475136. Policy #0 lag: (min: 52.0, avg: 70.1, max: 116.0) +[2026-06-02 16:51:40,502][253683] Avg episode reward: [(0, '280.760')] +[2026-06-02 16:51:40,649][255279] Updated weights for policy 0, policy_version 12675 (0.0009) +[2026-06-02 16:51:40,826][255279] Updated weights for policy 0, policy_version 12685 (0.0009) +[2026-06-02 16:51:41,019][255279] Updated weights for policy 0, policy_version 12695 (0.0009) +[2026-06-02 16:51:41,675][255279] Updated weights for policy 0, policy_version 12705 (0.0008) +[2026-06-02 16:51:41,864][255279] Updated weights for policy 0, policy_version 12716 (0.0008) +[2026-06-02 16:51:42,047][255279] Updated weights for policy 0, policy_version 12726 (0.0008) +[2026-06-02 16:51:42,242][255279] Updated weights for policy 0, policy_version 12736 (0.0008) +[2026-06-02 16:51:42,424][255279] Updated weights for policy 0, policy_version 12746 (0.0008) +[2026-06-02 16:51:42,612][255279] Updated weights for policy 0, policy_version 12756 (0.0008) +[2026-06-02 16:51:42,804][255279] Updated weights for policy 0, policy_version 12766 (0.0009) +[2026-06-02 16:51:43,493][255279] Updated weights for policy 0, policy_version 12777 (0.0008) +[2026-06-02 16:51:43,673][255279] Updated weights for policy 0, policy_version 12787 (0.0008) +[2026-06-02 16:51:43,867][255279] Updated weights for policy 0, policy_version 12797 (0.0008) +[2026-06-02 16:51:44,043][255279] Updated weights for policy 0, policy_version 12807 (0.0008) +[2026-06-02 16:51:44,241][255279] Updated weights for policy 0, policy_version 12817 (0.0008) +[2026-06-02 16:51:44,433][255279] Updated weights for policy 0, policy_version 12827 (0.0008) +[2026-06-02 16:51:45,096][255279] Updated weights for policy 0, policy_version 12837 (0.0008) +[2026-06-02 16:51:45,279][255279] Updated weights for policy 0, policy_version 12847 (0.0008) +[2026-06-02 16:51:45,463][255279] Updated weights for policy 0, policy_version 12857 (0.0008) +[2026-06-02 16:51:45,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19105.4). Total num frames: 6586368. Throughput: 0: 19547.0. Samples: 6596992. Policy #0 lag: (min: 13.0, avg: 29.9, max: 77.0) +[2026-06-02 16:51:45,502][253683] Avg episode reward: [(0, '289.989')] +[2026-06-02 16:51:45,646][255279] Updated weights for policy 0, policy_version 12867 (0.0008) +[2026-06-02 16:51:45,833][255279] Updated weights for policy 0, policy_version 12877 (0.0008) +[2026-06-02 16:51:46,023][255279] Updated weights for policy 0, policy_version 12887 (0.0008) +[2026-06-02 16:51:46,712][255279] Updated weights for policy 0, policy_version 12897 (0.0008) +[2026-06-02 16:51:46,887][255279] Updated weights for policy 0, policy_version 12907 (0.0008) +[2026-06-02 16:51:47,067][255279] Updated weights for policy 0, policy_version 12917 (0.0008) +[2026-06-02 16:51:47,254][255279] Updated weights for policy 0, policy_version 12927 (0.0008) +[2026-06-02 16:51:47,441][255279] Updated weights for policy 0, policy_version 12937 (0.0005) +[2026-06-02 16:51:47,625][255279] Updated weights for policy 0, policy_version 12947 (0.0005) +[2026-06-02 16:51:47,819][255279] Updated weights for policy 0, policy_version 12957 (0.0005) +[2026-06-02 16:51:48,493][255279] Updated weights for policy 0, policy_version 12967 (0.0005) +[2026-06-02 16:51:48,676][255279] Updated weights for policy 0, policy_version 12977 (0.0005) +[2026-06-02 16:51:48,858][255279] Updated weights for policy 0, policy_version 12987 (0.0004) +[2026-06-02 16:51:49,048][255279] Updated weights for policy 0, policy_version 12997 (0.0004) +[2026-06-02 16:51:49,253][255279] Updated weights for policy 0, policy_version 13008 (0.0004) +[2026-06-02 16:51:49,434][255279] Updated weights for policy 0, policy_version 13018 (0.0008) +[2026-06-02 16:51:50,121][255279] Updated weights for policy 0, policy_version 13028 (0.0008) +[2026-06-02 16:51:50,297][255279] Updated weights for policy 0, policy_version 13038 (0.0008) +[2026-06-02 16:51:50,491][255279] Updated weights for policy 0, policy_version 13048 (0.0009) +[2026-06-02 16:51:50,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19660.8, 300 sec: 19105.4). Total num frames: 6684672. Throughput: 0: 19328.0. Samples: 6709504. Policy #0 lag: (min: 13.0, avg: 29.9, max: 77.0) +[2026-06-02 16:51:50,502][253683] Avg episode reward: [(0, '304.349')] +[2026-06-02 16:51:50,669][255279] Updated weights for policy 0, policy_version 13058 (0.0009) +[2026-06-02 16:51:50,857][255279] Updated weights for policy 0, policy_version 13068 (0.0007) +[2026-06-02 16:51:51,065][255279] Updated weights for policy 0, policy_version 13079 (0.0009) +[2026-06-02 16:51:51,247][255187] Saving new best policy, reward=304.349! +[2026-06-02 16:51:51,753][255279] Updated weights for policy 0, policy_version 13089 (0.0009) +[2026-06-02 16:51:51,933][255279] Updated weights for policy 0, policy_version 13099 (0.0009) +[2026-06-02 16:51:52,116][255279] Updated weights for policy 0, policy_version 13109 (0.0009) +[2026-06-02 16:51:52,297][255279] Updated weights for policy 0, policy_version 13119 (0.0009) +[2026-06-02 16:51:52,490][255279] Updated weights for policy 0, policy_version 13129 (0.0009) +[2026-06-02 16:51:52,676][255279] Updated weights for policy 0, policy_version 13139 (0.0009) +[2026-06-02 16:51:52,867][255279] Updated weights for policy 0, policy_version 13149 (0.0008) +[2026-06-02 16:51:53,546][255279] Updated weights for policy 0, policy_version 13159 (0.0008) +[2026-06-02 16:51:53,751][255279] Updated weights for policy 0, policy_version 13170 (0.0009) +[2026-06-02 16:51:53,929][255279] Updated weights for policy 0, policy_version 13180 (0.0009) +[2026-06-02 16:51:54,128][255279] Updated weights for policy 0, policy_version 13190 (0.0008) +[2026-06-02 16:51:54,317][255279] Updated weights for policy 0, policy_version 13200 (0.0008) +[2026-06-02 16:51:54,502][255279] Updated weights for policy 0, policy_version 13210 (0.0008) +[2026-06-02 16:51:55,169][255279] Updated weights for policy 0, policy_version 13220 (0.0008) +[2026-06-02 16:51:55,354][255279] Updated weights for policy 0, policy_version 13230 (0.0008) +[2026-06-02 16:51:55,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19105.6). Total num frames: 6782976. Throughput: 0: 19373.5. Samples: 6767232. Policy #0 lag: (min: 63.0, avg: 80.1, max: 127.0) +[2026-06-02 16:51:55,503][253683] Avg episode reward: [(0, '330.101')] +[2026-06-02 16:51:55,539][255279] Updated weights for policy 0, policy_version 13240 (0.0008) +[2026-06-02 16:51:55,726][255279] Updated weights for policy 0, policy_version 13250 (0.0008) +[2026-06-02 16:51:55,914][255279] Updated weights for policy 0, policy_version 13260 (0.0008) +[2026-06-02 16:51:56,101][255279] Updated weights for policy 0, policy_version 13270 (0.0008) +[2026-06-02 16:51:56,279][255187] Saving new best policy, reward=330.101! +[2026-06-02 16:51:56,281][255279] Updated weights for policy 0, policy_version 13280 (0.0008) +[2026-06-02 16:51:56,974][255279] Updated weights for policy 0, policy_version 13290 (0.0008) +[2026-06-02 16:51:57,162][255279] Updated weights for policy 0, policy_version 13300 (0.0006) +[2026-06-02 16:51:57,353][255279] Updated weights for policy 0, policy_version 13310 (0.0006) +[2026-06-02 16:51:57,540][255279] Updated weights for policy 0, policy_version 13320 (0.0006) +[2026-06-02 16:51:57,723][255279] Updated weights for policy 0, policy_version 13330 (0.0005) +[2026-06-02 16:51:57,923][255279] Updated weights for policy 0, policy_version 13340 (0.0004) +[2026-06-02 16:51:58,568][255279] Updated weights for policy 0, policy_version 13350 (0.0007) +[2026-06-02 16:51:58,740][255279] Updated weights for policy 0, policy_version 13360 (0.0008) +[2026-06-02 16:51:58,934][255279] Updated weights for policy 0, policy_version 13370 (0.0008) +[2026-06-02 16:51:59,118][255279] Updated weights for policy 0, policy_version 13380 (0.0008) +[2026-06-02 16:51:59,303][255279] Updated weights for policy 0, policy_version 13390 (0.0008) +[2026-06-02 16:51:59,495][255279] Updated weights for policy 0, policy_version 13400 (0.0009) +[2026-06-02 16:52:00,185][255279] Updated weights for policy 0, policy_version 13410 (0.0009) +[2026-06-02 16:52:00,370][255279] Updated weights for policy 0, policy_version 13420 (0.0008) +[2026-06-02 16:52:00,502][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19216.5). Total num frames: 6881280. Throughput: 0: 19527.1. Samples: 6889728. Policy #0 lag: (min: 63.0, avg: 80.1, max: 127.0) +[2026-06-02 16:52:00,502][253683] Avg episode reward: [(0, '347.208')] +[2026-06-02 16:52:00,557][255279] Updated weights for policy 0, policy_version 13430 (0.0008) +[2026-06-02 16:52:00,749][255279] Updated weights for policy 0, policy_version 13440 (0.0008) +[2026-06-02 16:52:00,937][255279] Updated weights for policy 0, policy_version 13450 (0.0009) +[2026-06-02 16:52:01,129][255279] Updated weights for policy 0, policy_version 13460 (0.0008) +[2026-06-02 16:52:01,329][255279] Updated weights for policy 0, policy_version 13470 (0.0008) +[2026-06-02 16:52:01,357][255187] Saving new best policy, reward=347.208! +[2026-06-02 16:52:01,959][255279] Updated weights for policy 0, policy_version 13480 (0.0008) +[2026-06-02 16:52:02,151][255279] Updated weights for policy 0, policy_version 13490 (0.0008) +[2026-06-02 16:52:02,347][255279] Updated weights for policy 0, policy_version 13500 (0.0009) +[2026-06-02 16:52:02,536][255279] Updated weights for policy 0, policy_version 13510 (0.0009) +[2026-06-02 16:52:02,719][255279] Updated weights for policy 0, policy_version 13520 (0.0009) +[2026-06-02 16:52:02,909][255279] Updated weights for policy 0, policy_version 13530 (0.0009) +[2026-06-02 16:52:03,592][255279] Updated weights for policy 0, policy_version 13540 (0.0009) +[2026-06-02 16:52:03,766][255279] Updated weights for policy 0, policy_version 13550 (0.0009) +[2026-06-02 16:52:03,961][255279] Updated weights for policy 0, policy_version 13560 (0.0009) +[2026-06-02 16:52:04,148][255279] Updated weights for policy 0, policy_version 13570 (0.0009) +[2026-06-02 16:52:04,334][255279] Updated weights for policy 0, policy_version 13580 (0.0009) +[2026-06-02 16:52:04,521][255279] Updated weights for policy 0, policy_version 13590 (0.0009) +[2026-06-02 16:52:04,700][255279] Updated weights for policy 0, policy_version 13600 (0.0009) +[2026-06-02 16:52:05,380][255279] Updated weights for policy 0, policy_version 13610 (0.0008) +[2026-06-02 16:52:05,501][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19216.5). Total num frames: 6979584. Throughput: 0: 19276.8. Samples: 6999808. Policy #0 lag: (min: 24.0, avg: 59.9, max: 88.0) +[2026-06-02 16:52:05,502][253683] Avg episode reward: [(0, '350.609')] +[2026-06-02 16:52:05,566][255279] Updated weights for policy 0, policy_version 13620 (0.0008) +[2026-06-02 16:52:05,754][255279] Updated weights for policy 0, policy_version 13630 (0.0008) +[2026-06-02 16:52:05,942][255279] Updated weights for policy 0, policy_version 13640 (0.0008) +[2026-06-02 16:52:06,125][255279] Updated weights for policy 0, policy_version 13650 (0.0008) +[2026-06-02 16:52:06,319][255279] Updated weights for policy 0, policy_version 13660 (0.0009) +[2026-06-02 16:52:06,392][255187] Saving new best policy, reward=350.609! +[2026-06-02 16:52:07,007][255279] Updated weights for policy 0, policy_version 13670 (0.0008) +[2026-06-02 16:52:07,178][255279] Updated weights for policy 0, policy_version 13680 (0.0009) +[2026-06-02 16:52:07,377][255279] Updated weights for policy 0, policy_version 13690 (0.0009) +[2026-06-02 16:52:07,583][255279] Updated weights for policy 0, policy_version 13701 (0.0009) +[2026-06-02 16:52:07,765][255279] Updated weights for policy 0, policy_version 13711 (0.0009) +[2026-06-02 16:52:07,956][255279] Updated weights for policy 0, policy_version 13721 (0.0008) +[2026-06-02 16:52:08,637][255279] Updated weights for policy 0, policy_version 13731 (0.0008) +[2026-06-02 16:52:08,815][255279] Updated weights for policy 0, policy_version 13741 (0.0008) +[2026-06-02 16:52:09,001][255279] Updated weights for policy 0, policy_version 13751 (0.0008) +[2026-06-02 16:52:09,183][255279] Updated weights for policy 0, policy_version 13761 (0.0009) +[2026-06-02 16:52:09,364][255279] Updated weights for policy 0, policy_version 13771 (0.0008) +[2026-06-02 16:52:09,562][255279] Updated weights for policy 0, policy_version 13781 (0.0008) +[2026-06-02 16:52:09,750][255279] Updated weights for policy 0, policy_version 13791 (0.0009) +[2026-06-02 16:52:10,424][255279] Updated weights for policy 0, policy_version 13801 (0.0009) +[2026-06-02 16:52:10,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19216.5). Total num frames: 7077888. Throughput: 0: 19530.0. Samples: 7061248. Policy #0 lag: (min: 24.0, avg: 59.9, max: 88.0) +[2026-06-02 16:52:10,502][253683] Avg episode reward: [(0, '376.080')] +[2026-06-02 16:52:10,617][255279] Updated weights for policy 0, policy_version 13811 (0.0008) +[2026-06-02 16:52:10,800][255279] Updated weights for policy 0, policy_version 13821 (0.0008) +[2026-06-02 16:52:10,991][255279] Updated weights for policy 0, policy_version 13831 (0.0009) +[2026-06-02 16:52:11,177][255279] Updated weights for policy 0, policy_version 13841 (0.0009) +[2026-06-02 16:52:11,353][255279] Updated weights for policy 0, policy_version 13851 (0.0009) +[2026-06-02 16:52:11,449][255187] Saving new best policy, reward=376.080! +[2026-06-02 16:52:12,044][255279] Updated weights for policy 0, policy_version 13861 (0.0008) +[2026-06-02 16:52:12,232][255279] Updated weights for policy 0, policy_version 13871 (0.0008) +[2026-06-02 16:52:12,417][255279] Updated weights for policy 0, policy_version 13881 (0.0008) +[2026-06-02 16:52:12,627][255279] Updated weights for policy 0, policy_version 13892 (0.0008) +[2026-06-02 16:52:12,813][255279] Updated weights for policy 0, policy_version 13902 (0.0008) +[2026-06-02 16:52:12,997][255279] Updated weights for policy 0, policy_version 13912 (0.0008) +[2026-06-02 16:52:13,683][255279] Updated weights for policy 0, policy_version 13922 (0.0009) +[2026-06-02 16:52:13,858][255279] Updated weights for policy 0, policy_version 13932 (0.0008) +[2026-06-02 16:52:14,043][255279] Updated weights for policy 0, policy_version 13942 (0.0008) +[2026-06-02 16:52:14,226][255279] Updated weights for policy 0, policy_version 13952 (0.0008) +[2026-06-02 16:52:14,412][255279] Updated weights for policy 0, policy_version 13962 (0.0008) +[2026-06-02 16:52:14,607][255279] Updated weights for policy 0, policy_version 13972 (0.0009) +[2026-06-02 16:52:14,791][255279] Updated weights for policy 0, policy_version 13982 (0.0008) +[2026-06-02 16:52:15,490][255279] Updated weights for policy 0, policy_version 13993 (0.0009) +[2026-06-02 16:52:15,502][253683] Fps is (10 sec: 19660.5, 60 sec: 19660.7, 300 sec: 19216.5). Total num frames: 7176192. Throughput: 0: 19529.9. Samples: 7182592. Policy #0 lag: (min: 0.0, avg: 38.1, max: 64.0) +[2026-06-02 16:52:15,503][253683] Avg episode reward: [(0, '385.032')] +[2026-06-02 16:52:15,607][255187] Early stopping after 2 epochs (16 sgd steps), loss delta 0.0000010 +[2026-06-02 16:52:15,608][255187] Saving new best policy, reward=385.032! +[2026-06-02 16:52:16,390][255279] Updated weights for policy 0, policy_version 14003 (0.0009) +[2026-06-02 16:52:16,576][255279] Updated weights for policy 0, policy_version 14013 (0.0008) +[2026-06-02 16:52:16,761][255279] Updated weights for policy 0, policy_version 14023 (0.0008) +[2026-06-02 16:52:16,949][255279] Updated weights for policy 0, policy_version 14033 (0.0008) +[2026-06-02 16:52:17,132][255279] Updated weights for policy 0, policy_version 14043 (0.0008) +[2026-06-02 16:52:17,319][255279] Updated weights for policy 0, policy_version 14053 (0.0008) +[2026-06-02 16:52:17,507][255279] Updated weights for policy 0, policy_version 14063 (0.0008) +[2026-06-02 16:52:18,183][255279] Updated weights for policy 0, policy_version 14073 (0.0008) +[2026-06-02 16:52:18,361][255279] Updated weights for policy 0, policy_version 14083 (0.0008) +[2026-06-02 16:52:18,564][255279] Updated weights for policy 0, policy_version 14094 (0.0008) +[2026-06-02 16:52:18,744][255279] Updated weights for policy 0, policy_version 14104 (0.0008) +[2026-06-02 16:52:18,937][255279] Updated weights for policy 0, policy_version 14114 (0.0008) +[2026-06-02 16:52:19,124][255279] Updated weights for policy 0, policy_version 14124 (0.0008) +[2026-06-02 16:52:19,820][255279] Updated weights for policy 0, policy_version 14134 (0.0008) +[2026-06-02 16:52:19,995][255279] Updated weights for policy 0, policy_version 14144 (0.0008) +[2026-06-02 16:52:20,175][255279] Updated weights for policy 0, policy_version 14154 (0.0008) +[2026-06-02 16:52:20,358][255279] Updated weights for policy 0, policy_version 14164 (0.0009) +[2026-06-02 16:52:20,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19327.6). Total num frames: 7274496. Throughput: 0: 19788.8. Samples: 7313152. Policy #0 lag: (min: 0.0, avg: 38.1, max: 64.0) +[2026-06-02 16:52:20,502][253683] Avg episode reward: [(0, '421.901')] +[2026-06-02 16:52:20,547][255279] Updated weights for policy 0, policy_version 14174 (0.0009) +[2026-06-02 16:52:20,741][255279] Updated weights for policy 0, policy_version 14184 (0.0009) +[2026-06-02 16:52:20,885][255187] Saving new best policy, reward=421.901! +[2026-06-02 16:52:21,431][255279] Updated weights for policy 0, policy_version 14194 (0.0009) +[2026-06-02 16:52:21,601][255279] Updated weights for policy 0, policy_version 14204 (0.0009) +[2026-06-02 16:52:21,798][255279] Updated weights for policy 0, policy_version 14214 (0.0009) +[2026-06-02 16:52:21,987][255279] Updated weights for policy 0, policy_version 14224 (0.0009) +[2026-06-02 16:52:22,170][255279] Updated weights for policy 0, policy_version 14234 (0.0009) +[2026-06-02 16:52:22,368][255279] Updated weights for policy 0, policy_version 14244 (0.0008) +[2026-06-02 16:52:22,549][255279] Updated weights for policy 0, policy_version 14254 (0.0009) +[2026-06-02 16:52:23,216][255279] Updated weights for policy 0, policy_version 14264 (0.0009) +[2026-06-02 16:52:23,397][255279] Updated weights for policy 0, policy_version 14274 (0.0008) +[2026-06-02 16:52:23,579][255279] Updated weights for policy 0, policy_version 14284 (0.0009) +[2026-06-02 16:52:23,773][255279] Updated weights for policy 0, policy_version 14294 (0.0009) +[2026-06-02 16:52:23,957][255279] Updated weights for policy 0, policy_version 14304 (0.0009) +[2026-06-02 16:52:24,142][255279] Updated weights for policy 0, policy_version 14314 (0.0008) +[2026-06-02 16:52:24,819][255279] Updated weights for policy 0, policy_version 14324 (0.0009) +[2026-06-02 16:52:24,995][255279] Updated weights for policy 0, policy_version 14334 (0.0010) +[2026-06-02 16:52:25,191][255279] Updated weights for policy 0, policy_version 14344 (0.0010) +[2026-06-02 16:52:25,376][255279] Updated weights for policy 0, policy_version 14354 (0.0008) +[2026-06-02 16:52:25,502][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19327.6). Total num frames: 7372800. Throughput: 0: 19763.2. Samples: 7364480. Policy #0 lag: (min: 0.0, avg: 38.1, max: 64.0) +[2026-06-02 16:52:25,502][253683] Avg episode reward: [(0, '435.014')] +[2026-06-02 16:52:25,563][255279] Updated weights for policy 0, policy_version 14364 (0.0008) +[2026-06-02 16:52:25,753][255279] Updated weights for policy 0, policy_version 14374 (0.0009) +[2026-06-02 16:52:25,944][255187] Saving new best policy, reward=435.014! +[2026-06-02 16:52:25,945][255279] Updated weights for policy 0, policy_version 14384 (0.0008) +[2026-06-02 16:52:26,627][255279] Updated weights for policy 0, policy_version 14394 (0.0009) +[2026-06-02 16:52:26,814][255279] Updated weights for policy 0, policy_version 14404 (0.0009) +[2026-06-02 16:52:27,033][255279] Updated weights for policy 0, policy_version 14415 (0.0008) +[2026-06-02 16:52:27,216][255279] Updated weights for policy 0, policy_version 14425 (0.0009) +[2026-06-02 16:52:27,397][255279] Updated weights for policy 0, policy_version 14435 (0.0009) +[2026-06-02 16:52:27,587][255279] Updated weights for policy 0, policy_version 14445 (0.0009) +[2026-06-02 16:52:28,223][255279] Updated weights for policy 0, policy_version 14455 (0.0008) +[2026-06-02 16:52:28,414][255279] Updated weights for policy 0, policy_version 14465 (0.0009) +[2026-06-02 16:52:28,594][255279] Updated weights for policy 0, policy_version 14475 (0.0007) +[2026-06-02 16:52:28,779][255279] Updated weights for policy 0, policy_version 14485 (0.0009) +[2026-06-02 16:52:28,967][255279] Updated weights for policy 0, policy_version 14495 (0.0009) +[2026-06-02 16:52:29,152][255279] Updated weights for policy 0, policy_version 14505 (0.0009) +[2026-06-02 16:52:29,836][255279] Updated weights for policy 0, policy_version 14515 (0.0009) +[2026-06-02 16:52:30,015][255279] Updated weights for policy 0, policy_version 14525 (0.0008) +[2026-06-02 16:52:30,194][255279] Updated weights for policy 0, policy_version 14535 (0.0008) +[2026-06-02 16:52:30,388][255279] Updated weights for policy 0, policy_version 14545 (0.0009) +[2026-06-02 16:52:30,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19327.6). Total num frames: 7471104. Throughput: 0: 19692.1. Samples: 7483136. Policy #0 lag: (min: 63.0, avg: 80.1, max: 127.0) +[2026-06-02 16:52:30,502][253683] Avg episode reward: [(0, '470.326')] +[2026-06-02 16:52:30,566][255279] Updated weights for policy 0, policy_version 14555 (0.0009) +[2026-06-02 16:52:30,760][255279] Updated weights for policy 0, policy_version 14565 (0.0009) +[2026-06-02 16:52:30,947][255279] Updated weights for policy 0, policy_version 14575 (0.0008) +[2026-06-02 16:52:30,957][255187] Saving new best policy, reward=470.326! +[2026-06-02 16:52:31,630][255279] Updated weights for policy 0, policy_version 14585 (0.0008) +[2026-06-02 16:52:31,810][255279] Updated weights for policy 0, policy_version 14595 (0.0008) +[2026-06-02 16:52:31,999][255279] Updated weights for policy 0, policy_version 14605 (0.0008) +[2026-06-02 16:52:32,188][255279] Updated weights for policy 0, policy_version 14615 (0.0009) +[2026-06-02 16:52:32,372][255279] Updated weights for policy 0, policy_version 14625 (0.0008) +[2026-06-02 16:52:32,560][255279] Updated weights for policy 0, policy_version 14635 (0.0008) +[2026-06-02 16:52:33,254][255279] Updated weights for policy 0, policy_version 14645 (0.0008) +[2026-06-02 16:52:33,428][255279] Updated weights for policy 0, policy_version 14655 (0.0008) +[2026-06-02 16:52:33,624][255279] Updated weights for policy 0, policy_version 14665 (0.0008) +[2026-06-02 16:52:33,803][255279] Updated weights for policy 0, policy_version 14675 (0.0008) +[2026-06-02 16:52:33,986][255279] Updated weights for policy 0, policy_version 14685 (0.0008) +[2026-06-02 16:52:34,176][255279] Updated weights for policy 0, policy_version 14695 (0.0008) +[2026-06-02 16:52:34,862][255279] Updated weights for policy 0, policy_version 14705 (0.0009) +[2026-06-02 16:52:35,040][255279] Updated weights for policy 0, policy_version 14715 (0.0008) +[2026-06-02 16:52:35,220][255279] Updated weights for policy 0, policy_version 14725 (0.0008) +[2026-06-02 16:52:35,417][255279] Updated weights for policy 0, policy_version 14736 (0.0008) +[2026-06-02 16:52:35,502][253683] Fps is (10 sec: 19661.0, 60 sec: 19660.8, 300 sec: 19327.6). Total num frames: 7569408. Throughput: 0: 19874.2. Samples: 7603840. Policy #0 lag: (min: 63.0, avg: 80.1, max: 127.0) +[2026-06-02 16:52:35,502][253683] Avg episode reward: [(0, '482.445')] +[2026-06-02 16:52:35,614][255279] Updated weights for policy 0, policy_version 14746 (0.0008) +[2026-06-02 16:52:35,804][255279] Updated weights for policy 0, policy_version 14756 (0.0008) +[2026-06-02 16:52:36,016][255279] Updated weights for policy 0, policy_version 14767 (0.0008) +[2026-06-02 16:52:36,024][255187] Saving new best policy, reward=482.445! +[2026-06-02 16:52:36,689][255279] Updated weights for policy 0, policy_version 14777 (0.0008) +[2026-06-02 16:52:36,872][255279] Updated weights for policy 0, policy_version 14787 (0.0008) +[2026-06-02 16:52:37,069][255279] Updated weights for policy 0, policy_version 14797 (0.0008) +[2026-06-02 16:52:37,259][255279] Updated weights for policy 0, policy_version 14807 (0.0008) +[2026-06-02 16:52:37,442][255279] Updated weights for policy 0, policy_version 14817 (0.0008) +[2026-06-02 16:52:37,639][255279] Updated weights for policy 0, policy_version 14827 (0.0009) +[2026-06-02 16:52:38,296][255279] Updated weights for policy 0, policy_version 14837 (0.0009) +[2026-06-02 16:52:38,492][255279] Updated weights for policy 0, policy_version 14847 (0.0009) +[2026-06-02 16:52:38,677][255279] Updated weights for policy 0, policy_version 14857 (0.0008) +[2026-06-02 16:52:38,856][255279] Updated weights for policy 0, policy_version 14867 (0.0009) +[2026-06-02 16:52:39,056][255279] Updated weights for policy 0, policy_version 14877 (0.0009) +[2026-06-02 16:52:39,241][255279] Updated weights for policy 0, policy_version 14887 (0.0009) +[2026-06-02 16:52:39,917][255279] Updated weights for policy 0, policy_version 14897 (0.0006) +[2026-06-02 16:52:40,096][255279] Updated weights for policy 0, policy_version 14907 (0.0009) +[2026-06-02 16:52:40,276][255279] Updated weights for policy 0, policy_version 14917 (0.0009) +[2026-06-02 16:52:40,489][255279] Updated weights for policy 0, policy_version 14928 (0.0009) +[2026-06-02 16:52:40,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19327.6). Total num frames: 7667712. Throughput: 0: 19723.4. Samples: 7654784. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) +[2026-06-02 16:52:40,502][253683] Avg episode reward: [(0, '478.837')] +[2026-06-02 16:52:40,677][255279] Updated weights for policy 0, policy_version 14938 (0.0009) +[2026-06-02 16:52:40,865][255279] Updated weights for policy 0, policy_version 14948 (0.0007) +[2026-06-02 16:52:41,050][255279] Updated weights for policy 0, policy_version 14958 (0.0009) +[2026-06-02 16:52:41,728][255279] Updated weights for policy 0, policy_version 14968 (0.0006) +[2026-06-02 16:52:41,916][255279] Updated weights for policy 0, policy_version 14978 (0.0006) +[2026-06-02 16:52:42,105][255279] Updated weights for policy 0, policy_version 14988 (0.0012) +[2026-06-02 16:52:42,289][255279] Updated weights for policy 0, policy_version 14998 (0.0008) +[2026-06-02 16:52:42,482][255279] Updated weights for policy 0, policy_version 15008 (0.0008) +[2026-06-02 16:52:42,668][255279] Updated weights for policy 0, policy_version 15018 (0.0008) +[2026-06-02 16:52:43,334][255279] Updated weights for policy 0, policy_version 15028 (0.0007) +[2026-06-02 16:52:43,513][255279] Updated weights for policy 0, policy_version 15038 (0.0008) +[2026-06-02 16:52:43,689][255279] Updated weights for policy 0, policy_version 15048 (0.0009) +[2026-06-02 16:52:43,882][255279] Updated weights for policy 0, policy_version 15058 (0.0008) +[2026-06-02 16:52:44,087][255279] Updated weights for policy 0, policy_version 15069 (0.0008) +[2026-06-02 16:52:44,274][255279] Updated weights for policy 0, policy_version 15079 (0.0009) +[2026-06-02 16:52:44,962][255279] Updated weights for policy 0, policy_version 15089 (0.0009) +[2026-06-02 16:52:45,138][255279] Updated weights for policy 0, policy_version 15099 (0.0009) +[2026-06-02 16:52:45,326][255279] Updated weights for policy 0, policy_version 15109 (0.0009) +[2026-06-02 16:52:45,501][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 7766016. Throughput: 0: 19697.8. Samples: 7776128. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) +[2026-06-02 16:52:45,502][253683] Avg episode reward: [(0, '490.919')] +[2026-06-02 16:52:45,510][255279] Updated weights for policy 0, policy_version 15119 (0.0008) +[2026-06-02 16:52:45,718][255279] Updated weights for policy 0, policy_version 15130 (0.0009) +[2026-06-02 16:52:45,905][255279] Updated weights for policy 0, policy_version 15140 (0.0009) +[2026-06-02 16:52:46,084][255279] Updated weights for policy 0, policy_version 15150 (0.0008) +[2026-06-02 16:52:46,117][255187] Saving new best policy, reward=490.919! +[2026-06-02 16:52:46,794][255279] Updated weights for policy 0, policy_version 15161 (0.0009) +[2026-06-02 16:52:46,977][255279] Updated weights for policy 0, policy_version 15171 (0.0009) +[2026-06-02 16:52:47,164][255279] Updated weights for policy 0, policy_version 15181 (0.0009) +[2026-06-02 16:52:47,345][255279] Updated weights for policy 0, policy_version 15191 (0.0008) +[2026-06-02 16:52:47,556][255279] Updated weights for policy 0, policy_version 15202 (0.0008) +[2026-06-02 16:52:47,754][255279] Updated weights for policy 0, policy_version 15212 (0.0009) +[2026-06-02 16:52:48,412][255279] Updated weights for policy 0, policy_version 15222 (0.0009) +[2026-06-02 16:52:48,598][255279] Updated weights for policy 0, policy_version 15232 (0.0009) +[2026-06-02 16:52:48,810][255279] Updated weights for policy 0, policy_version 15243 (0.0008) +[2026-06-02 16:52:49,011][255279] Updated weights for policy 0, policy_version 15254 (0.0009) +[2026-06-02 16:52:49,201][255279] Updated weights for policy 0, policy_version 15264 (0.0009) +[2026-06-02 16:52:49,405][255279] Updated weights for policy 0, policy_version 15275 (0.0008) +[2026-06-02 16:52:50,091][255279] Updated weights for policy 0, policy_version 15286 (0.0009) +[2026-06-02 16:52:50,281][255279] Updated weights for policy 0, policy_version 15296 (0.0009) +[2026-06-02 16:52:50,462][255279] Updated weights for policy 0, policy_version 15306 (0.0009) +[2026-06-02 16:52:50,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 7864320. Throughput: 0: 19808.7. Samples: 7891200. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) +[2026-06-02 16:52:50,502][253683] Avg episode reward: [(0, '498.452')] +[2026-06-02 16:52:50,677][255279] Updated weights for policy 0, policy_version 15317 (0.0009) +[2026-06-02 16:52:50,859][255279] Updated weights for policy 0, policy_version 15327 (0.0009) +[2026-06-02 16:52:51,075][255279] Updated weights for policy 0, policy_version 15338 (0.0009) +[2026-06-02 16:52:51,184][255187] Saving new best policy, reward=498.452! +[2026-06-02 16:52:51,755][255279] Updated weights for policy 0, policy_version 15348 (0.0009) +[2026-06-02 16:52:51,930][255279] Updated weights for policy 0, policy_version 15358 (0.0008) +[2026-06-02 16:52:52,112][255279] Updated weights for policy 0, policy_version 15368 (0.0008) +[2026-06-02 16:52:52,324][255279] Updated weights for policy 0, policy_version 15379 (0.0009) +[2026-06-02 16:52:52,510][255279] Updated weights for policy 0, policy_version 15389 (0.0009) +[2026-06-02 16:52:52,723][255279] Updated weights for policy 0, policy_version 15400 (0.0009) +[2026-06-02 16:52:53,436][255279] Updated weights for policy 0, policy_version 15411 (0.0009) +[2026-06-02 16:52:53,635][255279] Updated weights for policy 0, policy_version 15422 (0.0009) +[2026-06-02 16:52:53,817][255279] Updated weights for policy 0, policy_version 15432 (0.0009) +[2026-06-02 16:52:54,015][255279] Updated weights for policy 0, policy_version 15442 (0.0009) +[2026-06-02 16:52:54,203][255279] Updated weights for policy 0, policy_version 15452 (0.0009) +[2026-06-02 16:52:54,379][255279] Updated weights for policy 0, policy_version 15462 (0.0008) +[2026-06-02 16:52:55,088][255279] Updated weights for policy 0, policy_version 15473 (0.0009) +[2026-06-02 16:52:55,264][255279] Updated weights for policy 0, policy_version 15483 (0.0009) +[2026-06-02 16:52:55,452][255279] Updated weights for policy 0, policy_version 15493 (0.0009) +[2026-06-02 16:52:55,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 7962624. Throughput: 0: 19686.4. Samples: 7947136. Policy #0 lag: (min: 63.0, avg: 80.3, max: 127.0) +[2026-06-02 16:52:55,502][253683] Avg episode reward: [(0, '539.342')] +[2026-06-02 16:52:55,644][255279] Updated weights for policy 0, policy_version 15503 (0.0009) +[2026-06-02 16:52:55,824][255279] Updated weights for policy 0, policy_version 15513 (0.0009) +[2026-06-02 16:52:56,037][255279] Updated weights for policy 0, policy_version 15524 (0.0009) +[2026-06-02 16:52:56,228][255279] Updated weights for policy 0, policy_version 15534 (0.0008) +[2026-06-02 16:52:56,258][255187] Saving new best policy, reward=539.342! +[2026-06-02 16:52:56,932][255279] Updated weights for policy 0, policy_version 15545 (0.0009) +[2026-06-02 16:52:57,101][255279] Updated weights for policy 0, policy_version 15555 (0.0008) +[2026-06-02 16:52:57,292][255279] Updated weights for policy 0, policy_version 15565 (0.0008) +[2026-06-02 16:52:57,478][255279] Updated weights for policy 0, policy_version 15575 (0.0008) +[2026-06-02 16:52:57,672][255279] Updated weights for policy 0, policy_version 15585 (0.0008) +[2026-06-02 16:52:57,853][255279] Updated weights for policy 0, policy_version 15595 (0.0008) +[2026-06-02 16:52:58,529][255279] Updated weights for policy 0, policy_version 15605 (0.0008) +[2026-06-02 16:52:58,706][255279] Updated weights for policy 0, policy_version 15615 (0.0008) +[2026-06-02 16:52:58,884][255279] Updated weights for policy 0, policy_version 15625 (0.0008) +[2026-06-02 16:52:59,078][255279] Updated weights for policy 0, policy_version 15635 (0.0008) +[2026-06-02 16:52:59,279][255279] Updated weights for policy 0, policy_version 15646 (0.0008) +[2026-06-02 16:52:59,469][255279] Updated weights for policy 0, policy_version 15656 (0.0009) +[2026-06-02 16:53:00,177][255279] Updated weights for policy 0, policy_version 15666 (0.0009) +[2026-06-02 16:53:00,351][255279] Updated weights for policy 0, policy_version 15676 (0.0008) +[2026-06-02 16:53:00,502][253683] Fps is (10 sec: 19660.6, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 8060928. Throughput: 0: 19692.1. Samples: 8068736. Policy #0 lag: (min: 63.0, avg: 80.3, max: 127.0) +[2026-06-02 16:53:00,503][253683] Avg episode reward: [(0, '532.962')] +[2026-06-02 16:53:00,535][255279] Updated weights for policy 0, policy_version 15686 (0.0009) +[2026-06-02 16:53:00,713][255279] Updated weights for policy 0, policy_version 15696 (0.0009) +[2026-06-02 16:53:00,908][255279] Updated weights for policy 0, policy_version 15706 (0.0009) +[2026-06-02 16:53:01,093][255279] Updated weights for policy 0, policy_version 15716 (0.0009) +[2026-06-02 16:53:01,282][255279] Updated weights for policy 0, policy_version 15726 (0.0008) +[2026-06-02 16:53:02,019][255279] Updated weights for policy 0, policy_version 15737 (0.0009) +[2026-06-02 16:53:02,193][255279] Updated weights for policy 0, policy_version 15747 (0.0009) +[2026-06-02 16:53:02,379][255279] Updated weights for policy 0, policy_version 15757 (0.0009) +[2026-06-02 16:53:02,567][255279] Updated weights for policy 0, policy_version 15767 (0.0009) +[2026-06-02 16:53:02,749][255279] Updated weights for policy 0, policy_version 15777 (0.0009) +[2026-06-02 16:53:02,939][255279] Updated weights for policy 0, policy_version 15787 (0.0009) +[2026-06-02 16:53:03,625][255279] Updated weights for policy 0, policy_version 15797 (0.0008) +[2026-06-02 16:53:03,800][255279] Updated weights for policy 0, policy_version 15807 (0.0008) +[2026-06-02 16:53:03,985][255279] Updated weights for policy 0, policy_version 15817 (0.0009) +[2026-06-02 16:53:04,179][255279] Updated weights for policy 0, policy_version 15827 (0.0009) +[2026-06-02 16:53:04,365][255279] Updated weights for policy 0, policy_version 15837 (0.0008) +[2026-06-02 16:53:04,582][255279] Updated weights for policy 0, policy_version 15848 (0.0009) +[2026-06-02 16:53:05,254][255279] Updated weights for policy 0, policy_version 15858 (0.0008) +[2026-06-02 16:53:05,434][255279] Updated weights for policy 0, policy_version 15868 (0.0008) +[2026-06-02 16:53:05,502][253683] Fps is (10 sec: 19660.4, 60 sec: 19660.7, 300 sec: 19438.6). Total num frames: 8159232. Throughput: 0: 19276.7. Samples: 8180608. Policy #0 lag: (min: 63.0, avg: 80.1, max: 127.0) +[2026-06-02 16:53:05,503][253683] Avg episode reward: [(0, '535.101')] +[2026-06-02 16:53:05,621][255279] Updated weights for policy 0, policy_version 15878 (0.0009) +[2026-06-02 16:53:05,802][255279] Updated weights for policy 0, policy_version 15888 (0.0009) +[2026-06-02 16:53:05,994][255279] Updated weights for policy 0, policy_version 15898 (0.0009) +[2026-06-02 16:53:06,179][255279] Updated weights for policy 0, policy_version 15908 (0.0009) +[2026-06-02 16:53:06,360][255279] Updated weights for policy 0, policy_version 15918 (0.0009) +[2026-06-02 16:53:07,043][255279] Updated weights for policy 0, policy_version 15928 (0.0009) +[2026-06-02 16:53:07,221][255279] Updated weights for policy 0, policy_version 15938 (0.0009) +[2026-06-02 16:53:07,417][255279] Updated weights for policy 0, policy_version 15948 (0.0008) +[2026-06-02 16:53:07,628][255279] Updated weights for policy 0, policy_version 15959 (0.0008) +[2026-06-02 16:53:07,826][255279] Updated weights for policy 0, policy_version 15970 (0.0009) +[2026-06-02 16:53:08,013][255279] Updated weights for policy 0, policy_version 15980 (0.0009) +[2026-06-02 16:53:08,684][255279] Updated weights for policy 0, policy_version 15990 (0.0009) +[2026-06-02 16:53:08,893][255279] Updated weights for policy 0, policy_version 16001 (0.0008) +[2026-06-02 16:53:09,077][255279] Updated weights for policy 0, policy_version 16011 (0.0008) +[2026-06-02 16:53:09,267][255279] Updated weights for policy 0, policy_version 16021 (0.0008) +[2026-06-02 16:53:09,466][255279] Updated weights for policy 0, policy_version 16032 (0.0008) +[2026-06-02 16:53:09,662][255279] Updated weights for policy 0, policy_version 16042 (0.0008) +[2026-06-02 16:53:10,332][255279] Updated weights for policy 0, policy_version 16052 (0.0008) +[2026-06-02 16:53:10,502][253683] Fps is (10 sec: 19661.0, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 8257536. Throughput: 0: 19478.8. Samples: 8241024. Policy #0 lag: (min: 63.0, avg: 80.1, max: 127.0) +[2026-06-02 16:53:10,502][253683] Avg episode reward: [(0, '530.397')] +[2026-06-02 16:53:10,527][255279] Updated weights for policy 0, policy_version 16063 (0.0008) +[2026-06-02 16:53:10,712][255279] Updated weights for policy 0, policy_version 16073 (0.0008) +[2026-06-02 16:53:10,895][255279] Updated weights for policy 0, policy_version 16083 (0.0008) +[2026-06-02 16:53:11,083][255279] Updated weights for policy 0, policy_version 16093 (0.0009) +[2026-06-02 16:53:11,269][255279] Updated weights for policy 0, policy_version 16103 (0.0008) +[2026-06-02 16:53:11,966][255279] Updated weights for policy 0, policy_version 16113 (0.0008) +[2026-06-02 16:53:12,142][255279] Updated weights for policy 0, policy_version 16123 (0.0008) +[2026-06-02 16:53:12,329][255279] Updated weights for policy 0, policy_version 16133 (0.0008) +[2026-06-02 16:53:12,509][255279] Updated weights for policy 0, policy_version 16143 (0.0009) +[2026-06-02 16:53:12,705][255279] Updated weights for policy 0, policy_version 16153 (0.0008) +[2026-06-02 16:53:12,899][255279] Updated weights for policy 0, policy_version 16163 (0.0009) +[2026-06-02 16:53:13,085][255279] Updated weights for policy 0, policy_version 16173 (0.0009) +[2026-06-02 16:53:13,743][255279] Updated weights for policy 0, policy_version 16183 (0.0009) +[2026-06-02 16:53:13,953][255279] Updated weights for policy 0, policy_version 16194 (0.0009) +[2026-06-02 16:53:14,154][255279] Updated weights for policy 0, policy_version 16204 (0.0009) +[2026-06-02 16:53:14,331][255279] Updated weights for policy 0, policy_version 16214 (0.0009) +[2026-06-02 16:53:14,516][255279] Updated weights for policy 0, policy_version 16224 (0.0009) +[2026-06-02 16:53:14,717][255279] Updated weights for policy 0, policy_version 16234 (0.0009) +[2026-06-02 16:53:15,374][255279] Updated weights for policy 0, policy_version 16244 (0.0009) +[2026-06-02 16:53:15,501][253683] Fps is (10 sec: 19661.2, 60 sec: 19660.9, 300 sec: 19438.6). Total num frames: 8355840. Throughput: 0: 19527.1. Samples: 8361856. Policy #0 lag: (min: 63.0, avg: 80.1, max: 127.0) +[2026-06-02 16:53:15,502][253683] Avg episode reward: [(0, '531.027')] +[2026-06-02 16:53:15,563][255279] Updated weights for policy 0, policy_version 16254 (0.0009) +[2026-06-02 16:53:15,742][255279] Updated weights for policy 0, policy_version 16264 (0.0009) +[2026-06-02 16:53:15,922][255279] Updated weights for policy 0, policy_version 16274 (0.0009) +[2026-06-02 16:53:16,125][255279] Updated weights for policy 0, policy_version 16284 (0.0008) +[2026-06-02 16:53:16,309][255279] Updated weights for policy 0, policy_version 16294 (0.0009) +[2026-06-02 16:53:16,494][255279] Updated weights for policy 0, policy_version 16304 (0.0009) +[2026-06-02 16:53:17,160][255279] Updated weights for policy 0, policy_version 16314 (0.0008) +[2026-06-02 16:53:17,376][255279] Updated weights for policy 0, policy_version 16325 (0.0009) +[2026-06-02 16:53:17,562][255279] Updated weights for policy 0, policy_version 16335 (0.0008) +[2026-06-02 16:53:17,766][255279] Updated weights for policy 0, policy_version 16346 (0.0008) +[2026-06-02 16:53:17,948][255279] Updated weights for policy 0, policy_version 16356 (0.0008) +[2026-06-02 16:53:18,177][255279] Updated weights for policy 0, policy_version 16367 (0.0008) +[2026-06-02 16:53:18,842][255279] Updated weights for policy 0, policy_version 16378 (0.0009) +[2026-06-02 16:53:19,035][255279] Updated weights for policy 0, policy_version 16388 (0.0008) +[2026-06-02 16:53:19,220][255279] Updated weights for policy 0, policy_version 16398 (0.0008) +[2026-06-02 16:53:19,403][255279] Updated weights for policy 0, policy_version 16408 (0.0008) +[2026-06-02 16:53:19,613][255279] Updated weights for policy 0, policy_version 16419 (0.0009) +[2026-06-02 16:53:19,801][255279] Updated weights for policy 0, policy_version 16429 (0.0008) +[2026-06-02 16:53:20,472][255279] Updated weights for policy 0, policy_version 16439 (0.0008) +[2026-06-02 16:53:20,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 8454144. Throughput: 0: 19291.0. Samples: 8471936. Policy #0 lag: (min: 61.0, avg: 77.6, max: 125.0) +[2026-06-02 16:53:20,502][253683] Avg episode reward: [(0, '512.647')] +[2026-06-02 16:53:20,665][255279] Updated weights for policy 0, policy_version 16449 (0.0008) +[2026-06-02 16:53:20,862][255279] Updated weights for policy 0, policy_version 16460 (0.0008) +[2026-06-02 16:53:21,059][255279] Updated weights for policy 0, policy_version 16470 (0.0008) +[2026-06-02 16:53:21,249][255279] Updated weights for policy 0, policy_version 16480 (0.0008) +[2026-06-02 16:53:21,433][255279] Updated weights for policy 0, policy_version 16490 (0.0008) +[2026-06-02 16:53:22,108][255279] Updated weights for policy 0, policy_version 16500 (0.0008) +[2026-06-02 16:53:22,291][255279] Updated weights for policy 0, policy_version 16510 (0.0008) +[2026-06-02 16:53:22,480][255279] Updated weights for policy 0, policy_version 16520 (0.0008) +[2026-06-02 16:53:22,682][255279] Updated weights for policy 0, policy_version 16531 (0.0008) +[2026-06-02 16:53:22,869][255279] Updated weights for policy 0, policy_version 16541 (0.0008) +[2026-06-02 16:53:23,056][255279] Updated weights for policy 0, policy_version 16551 (0.0008) +[2026-06-02 16:53:23,739][255279] Updated weights for policy 0, policy_version 16561 (0.0008) +[2026-06-02 16:53:23,916][255279] Updated weights for policy 0, policy_version 16571 (0.0008) +[2026-06-02 16:53:24,103][255279] Updated weights for policy 0, policy_version 16581 (0.0008) +[2026-06-02 16:53:24,286][255279] Updated weights for policy 0, policy_version 16591 (0.0008) +[2026-06-02 16:53:24,478][255279] Updated weights for policy 0, policy_version 16601 (0.0008) +[2026-06-02 16:53:24,670][255279] Updated weights for policy 0, policy_version 16611 (0.0008) +[2026-06-02 16:53:24,868][255279] Updated weights for policy 0, policy_version 16622 (0.0009) +[2026-06-02 16:53:25,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 8552448. Throughput: 0: 19518.6. Samples: 8533120. Policy #0 lag: (min: 61.0, avg: 77.6, max: 125.0) +[2026-06-02 16:53:25,502][253683] Avg episode reward: [(0, '508.418')] +[2026-06-02 16:53:25,557][255279] Updated weights for policy 0, policy_version 16632 (0.0009) +[2026-06-02 16:53:25,755][255279] Updated weights for policy 0, policy_version 16643 (0.0008) +[2026-06-02 16:53:25,949][255279] Updated weights for policy 0, policy_version 16653 (0.0008) +[2026-06-02 16:53:26,132][255279] Updated weights for policy 0, policy_version 16663 (0.0009) +[2026-06-02 16:53:26,326][255279] Updated weights for policy 0, policy_version 16673 (0.0008) +[2026-06-02 16:53:26,516][255279] Updated weights for policy 0, policy_version 16683 (0.0008) +[2026-06-02 16:53:27,209][255279] Updated weights for policy 0, policy_version 16693 (0.0008) +[2026-06-02 16:53:27,390][255279] Updated weights for policy 0, policy_version 16703 (0.0008) +[2026-06-02 16:53:27,601][255279] Updated weights for policy 0, policy_version 16714 (0.0008) +[2026-06-02 16:53:27,789][255279] Updated weights for policy 0, policy_version 16724 (0.0009) +[2026-06-02 16:53:27,981][255279] Updated weights for policy 0, policy_version 16734 (0.0008) +[2026-06-02 16:53:28,162][255279] Updated weights for policy 0, policy_version 16744 (0.0008) +[2026-06-02 16:53:28,844][255279] Updated weights for policy 0, policy_version 16754 (0.0008) +[2026-06-02 16:53:29,034][255279] Updated weights for policy 0, policy_version 16765 (0.0009) +[2026-06-02 16:53:29,215][255279] Updated weights for policy 0, policy_version 16775 (0.0009) +[2026-06-02 16:53:29,406][255279] Updated weights for policy 0, policy_version 16785 (0.0008) +[2026-06-02 16:53:29,594][255279] Updated weights for policy 0, policy_version 16795 (0.0009) +[2026-06-02 16:53:29,788][255279] Updated weights for policy 0, policy_version 16805 (0.0009) +[2026-06-02 16:53:29,972][255279] Updated weights for policy 0, policy_version 16815 (0.0009) +[2026-06-02 16:53:30,502][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 8650752. Throughput: 0: 19552.7. Samples: 8656000. Policy #0 lag: (min: 61.0, avg: 77.6, max: 125.0) +[2026-06-02 16:53:30,502][253683] Avg episode reward: [(0, '517.944')] +[2026-06-02 16:53:30,637][255279] Updated weights for policy 0, policy_version 16825 (0.0008) +[2026-06-02 16:53:30,824][255279] Updated weights for policy 0, policy_version 16835 (0.0009) +[2026-06-02 16:53:31,013][255279] Updated weights for policy 0, policy_version 16845 (0.0008) +[2026-06-02 16:53:31,203][255279] Updated weights for policy 0, policy_version 16855 (0.0008) +[2026-06-02 16:53:31,411][255279] Updated weights for policy 0, policy_version 16866 (0.0008) +[2026-06-02 16:53:31,596][255279] Updated weights for policy 0, policy_version 16876 (0.0008) +[2026-06-02 16:53:32,272][255279] Updated weights for policy 0, policy_version 16886 (0.0008) +[2026-06-02 16:53:32,462][255279] Updated weights for policy 0, policy_version 16896 (0.0009) +[2026-06-02 16:53:32,644][255279] Updated weights for policy 0, policy_version 16906 (0.0008) +[2026-06-02 16:53:32,854][255279] Updated weights for policy 0, policy_version 16917 (0.0009) +[2026-06-02 16:53:33,048][255279] Updated weights for policy 0, policy_version 16927 (0.0009) +[2026-06-02 16:53:33,230][255279] Updated weights for policy 0, policy_version 16937 (0.0008) +[2026-06-02 16:53:33,903][255279] Updated weights for policy 0, policy_version 16947 (0.0007) +[2026-06-02 16:53:34,077][255279] Updated weights for policy 0, policy_version 16957 (0.0008) +[2026-06-02 16:53:34,279][255279] Updated weights for policy 0, policy_version 16968 (0.0008) +[2026-06-02 16:53:34,468][255279] Updated weights for policy 0, policy_version 16978 (0.0008) +[2026-06-02 16:53:34,648][255279] Updated weights for policy 0, policy_version 16988 (0.0008) +[2026-06-02 16:53:34,852][255279] Updated weights for policy 0, policy_version 16999 (0.0008) +[2026-06-02 16:53:35,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 8749056. Throughput: 0: 19427.5. Samples: 8765440. Policy #0 lag: (min: 16.0, avg: 52.6, max: 80.0) +[2026-06-02 16:53:35,502][253683] Avg episode reward: [(0, '499.702')] +[2026-06-02 16:53:35,543][255279] Updated weights for policy 0, policy_version 17009 (0.0008) +[2026-06-02 16:53:35,724][255279] Updated weights for policy 0, policy_version 17019 (0.0008) +[2026-06-02 16:53:35,906][255279] Updated weights for policy 0, policy_version 17029 (0.0008) +[2026-06-02 16:53:36,092][255279] Updated weights for policy 0, policy_version 17039 (0.0008) +[2026-06-02 16:53:36,273][255279] Updated weights for policy 0, policy_version 17049 (0.0008) +[2026-06-02 16:53:36,467][255279] Updated weights for policy 0, policy_version 17059 (0.0009) +[2026-06-02 16:53:36,655][255279] Updated weights for policy 0, policy_version 17069 (0.0009) +[2026-06-02 16:53:37,380][255279] Updated weights for policy 0, policy_version 17081 (0.0008) +[2026-06-02 16:53:37,552][255279] Updated weights for policy 0, policy_version 17091 (0.0008) +[2026-06-02 16:53:37,747][255279] Updated weights for policy 0, policy_version 17101 (0.0008) +[2026-06-02 16:53:37,926][255279] Updated weights for policy 0, policy_version 17111 (0.0008) +[2026-06-02 16:53:38,132][255279] Updated weights for policy 0, policy_version 17122 (0.0008) +[2026-06-02 16:53:38,320][255279] Updated weights for policy 0, policy_version 17132 (0.0009) +[2026-06-02 16:53:39,000][255279] Updated weights for policy 0, policy_version 17142 (0.0008) +[2026-06-02 16:53:39,180][255279] Updated weights for policy 0, policy_version 17152 (0.0008) +[2026-06-02 16:53:39,362][255279] Updated weights for policy 0, policy_version 17162 (0.0009) +[2026-06-02 16:53:39,551][255279] Updated weights for policy 0, policy_version 17172 (0.0008) +[2026-06-02 16:53:39,739][255279] Updated weights for policy 0, policy_version 17182 (0.0008) +[2026-06-02 16:53:39,926][255279] Updated weights for policy 0, policy_version 17192 (0.0008) +[2026-06-02 16:53:40,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 8847360. Throughput: 0: 19538.5. Samples: 8826368. Policy #0 lag: (min: 16.0, avg: 52.6, max: 80.0) +[2026-06-02 16:53:40,502][253683] Avg episode reward: [(0, '494.132')] +[2026-06-02 16:53:40,626][255279] Updated weights for policy 0, policy_version 17202 (0.0009) +[2026-06-02 16:53:40,797][255279] Updated weights for policy 0, policy_version 17212 (0.0008) +[2026-06-02 16:53:40,973][255279] Updated weights for policy 0, policy_version 17222 (0.0009) +[2026-06-02 16:53:41,166][255279] Updated weights for policy 0, policy_version 17232 (0.0008) +[2026-06-02 16:53:41,356][255279] Updated weights for policy 0, policy_version 17242 (0.0009) +[2026-06-02 16:53:41,561][255279] Updated weights for policy 0, policy_version 17253 (0.0008) +[2026-06-02 16:53:41,752][255279] Updated weights for policy 0, policy_version 17263 (0.0008) +[2026-06-02 16:53:42,433][255279] Updated weights for policy 0, policy_version 17274 (0.0008) +[2026-06-02 16:53:42,626][255279] Updated weights for policy 0, policy_version 17284 (0.0006) +[2026-06-02 16:53:42,811][255279] Updated weights for policy 0, policy_version 17294 (0.0005) +[2026-06-02 16:53:42,992][255279] Updated weights for policy 0, policy_version 17304 (0.0005) +[2026-06-02 16:53:43,180][255279] Updated weights for policy 0, policy_version 17314 (0.0005) +[2026-06-02 16:53:43,369][255279] Updated weights for policy 0, policy_version 17324 (0.0005) +[2026-06-02 16:53:44,066][255279] Updated weights for policy 0, policy_version 17334 (0.0007) +[2026-06-02 16:53:44,265][255279] Updated weights for policy 0, policy_version 17345 (0.0008) +[2026-06-02 16:53:44,473][255279] Updated weights for policy 0, policy_version 17356 (0.0007) +[2026-06-02 16:53:44,650][255279] Updated weights for policy 0, policy_version 17366 (0.0004) +[2026-06-02 16:53:44,843][255279] Updated weights for policy 0, policy_version 17376 (0.0004) +[2026-06-02 16:53:45,028][255279] Updated weights for policy 0, policy_version 17386 (0.0004) +[2026-06-02 16:53:45,502][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 8945664. Throughput: 0: 19436.1. Samples: 8943360. Policy #0 lag: (min: 60.0, avg: 76.7, max: 124.0) +[2026-06-02 16:53:45,502][253683] Avg episode reward: [(0, '501.333')] +[2026-06-02 16:53:45,704][255279] Updated weights for policy 0, policy_version 17396 (0.0004) +[2026-06-02 16:53:45,893][255279] Updated weights for policy 0, policy_version 17406 (0.0004) +[2026-06-02 16:53:46,072][255279] Updated weights for policy 0, policy_version 17416 (0.0005) +[2026-06-02 16:53:46,271][255279] Updated weights for policy 0, policy_version 17426 (0.0009) +[2026-06-02 16:53:46,459][255279] Updated weights for policy 0, policy_version 17436 (0.0008) +[2026-06-02 16:53:46,636][255279] Updated weights for policy 0, policy_version 17446 (0.0008) +[2026-06-02 16:53:46,822][255279] Updated weights for policy 0, policy_version 17456 (0.0008) +[2026-06-02 16:53:47,488][255279] Updated weights for policy 0, policy_version 17466 (0.0009) +[2026-06-02 16:53:47,664][255279] Updated weights for policy 0, policy_version 17476 (0.0008) +[2026-06-02 16:53:47,859][255279] Updated weights for policy 0, policy_version 17486 (0.0008) +[2026-06-02 16:53:48,036][255279] Updated weights for policy 0, policy_version 17496 (0.0008) +[2026-06-02 16:53:48,231][255279] Updated weights for policy 0, policy_version 17506 (0.0008) +[2026-06-02 16:53:48,415][255279] Updated weights for policy 0, policy_version 17516 (0.0008) +[2026-06-02 16:53:49,110][255279] Updated weights for policy 0, policy_version 17526 (0.0008) +[2026-06-02 16:53:49,288][255279] Updated weights for policy 0, policy_version 17536 (0.0008) +[2026-06-02 16:53:49,479][255279] Updated weights for policy 0, policy_version 17546 (0.0009) +[2026-06-02 16:53:49,663][255279] Updated weights for policy 0, policy_version 17556 (0.0008) +[2026-06-02 16:53:49,852][255279] Updated weights for policy 0, policy_version 17566 (0.0008) +[2026-06-02 16:53:50,037][255279] Updated weights for policy 0, policy_version 17576 (0.0008) +[2026-06-02 16:53:50,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 9043968. Throughput: 0: 19512.9. Samples: 9058688. Policy #0 lag: (min: 60.0, avg: 76.7, max: 124.0) +[2026-06-02 16:53:50,503][253683] Avg episode reward: [(0, '520.563')] +[2026-06-02 16:53:50,729][255279] Updated weights for policy 0, policy_version 17586 (0.0008) +[2026-06-02 16:53:50,908][255279] Updated weights for policy 0, policy_version 17596 (0.0009) +[2026-06-02 16:53:51,097][255279] Updated weights for policy 0, policy_version 17606 (0.0009) +[2026-06-02 16:53:51,283][255279] Updated weights for policy 0, policy_version 17616 (0.0006) +[2026-06-02 16:53:51,480][255279] Updated weights for policy 0, policy_version 17627 (0.0008) +[2026-06-02 16:53:51,673][255279] Updated weights for policy 0, policy_version 17637 (0.0009) +[2026-06-02 16:53:51,860][255279] Updated weights for policy 0, policy_version 17647 (0.0009) +[2026-06-02 16:53:52,543][255279] Updated weights for policy 0, policy_version 17657 (0.0009) +[2026-06-02 16:53:52,720][255279] Updated weights for policy 0, policy_version 17667 (0.0008) +[2026-06-02 16:53:52,913][255279] Updated weights for policy 0, policy_version 17677 (0.0009) +[2026-06-02 16:53:53,098][255279] Updated weights for policy 0, policy_version 17687 (0.0008) +[2026-06-02 16:53:53,284][255279] Updated weights for policy 0, policy_version 17697 (0.0009) +[2026-06-02 16:53:53,470][255279] Updated weights for policy 0, policy_version 17707 (0.0008) +[2026-06-02 16:53:54,166][255279] Updated weights for policy 0, policy_version 17717 (0.0008) +[2026-06-02 16:53:54,353][255279] Updated weights for policy 0, policy_version 17727 (0.0009) +[2026-06-02 16:53:54,533][255279] Updated weights for policy 0, policy_version 17737 (0.0009) +[2026-06-02 16:53:54,725][255279] Updated weights for policy 0, policy_version 17747 (0.0009) +[2026-06-02 16:53:54,930][255279] Updated weights for policy 0, policy_version 17758 (0.0008) +[2026-06-02 16:53:55,121][255279] Updated weights for policy 0, policy_version 17768 (0.0009) +[2026-06-02 16:53:55,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 9142272. Throughput: 0: 19521.4. Samples: 9119488. Policy #0 lag: (min: 60.0, avg: 76.7, max: 124.0) +[2026-06-02 16:53:55,503][253683] Avg episode reward: [(0, '550.926')] +[2026-06-02 16:53:55,507][255187] Saving new best policy, reward=550.926! +[2026-06-02 16:53:55,820][255279] Updated weights for policy 0, policy_version 17778 (0.0009) +[2026-06-02 16:53:55,995][255279] Updated weights for policy 0, policy_version 17788 (0.0009) +[2026-06-02 16:53:56,199][255279] Updated weights for policy 0, policy_version 17799 (0.0009) +[2026-06-02 16:53:56,392][255279] Updated weights for policy 0, policy_version 17809 (0.0008) +[2026-06-02 16:53:56,567][255279] Updated weights for policy 0, policy_version 17819 (0.0008) +[2026-06-02 16:53:56,776][255279] Updated weights for policy 0, policy_version 17829 (0.0009) +[2026-06-02 16:53:56,956][255279] Updated weights for policy 0, policy_version 17839 (0.0008) +[2026-06-02 16:53:57,617][255279] Updated weights for policy 0, policy_version 17849 (0.0008) +[2026-06-02 16:53:57,808][255279] Updated weights for policy 0, policy_version 17859 (0.0008) +[2026-06-02 16:53:57,993][255279] Updated weights for policy 0, policy_version 17869 (0.0008) +[2026-06-02 16:53:58,182][255279] Updated weights for policy 0, policy_version 17879 (0.0008) +[2026-06-02 16:53:58,372][255279] Updated weights for policy 0, policy_version 17889 (0.0008) +[2026-06-02 16:53:58,556][255279] Updated weights for policy 0, policy_version 17899 (0.0008) +[2026-06-02 16:53:59,248][255279] Updated weights for policy 0, policy_version 17909 (0.0009) +[2026-06-02 16:53:59,431][255279] Updated weights for policy 0, policy_version 17919 (0.0008) +[2026-06-02 16:53:59,618][255279] Updated weights for policy 0, policy_version 17929 (0.0008) +[2026-06-02 16:53:59,807][255279] Updated weights for policy 0, policy_version 17939 (0.0009) +[2026-06-02 16:53:59,993][255279] Updated weights for policy 0, policy_version 17949 (0.0009) +[2026-06-02 16:54:00,185][255279] Updated weights for policy 0, policy_version 17959 (0.0008) +[2026-06-02 16:54:00,502][253683] Fps is (10 sec: 19661.0, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 9240576. Throughput: 0: 19291.0. Samples: 9229952. Policy #0 lag: (min: 63.0, avg: 81.3, max: 127.0) +[2026-06-02 16:54:00,502][253683] Avg episode reward: [(0, '599.731')] +[2026-06-02 16:54:00,507][255187] Saving new best policy, reward=599.731! +[2026-06-02 16:54:00,897][255279] Updated weights for policy 0, policy_version 17969 (0.0008) +[2026-06-02 16:54:01,072][255279] Updated weights for policy 0, policy_version 17979 (0.0008) +[2026-06-02 16:54:01,252][255279] Updated weights for policy 0, policy_version 17989 (0.0008) +[2026-06-02 16:54:01,445][255279] Updated weights for policy 0, policy_version 17999 (0.0009) +[2026-06-02 16:54:01,632][255279] Updated weights for policy 0, policy_version 18009 (0.0009) +[2026-06-02 16:54:01,820][255279] Updated weights for policy 0, policy_version 18019 (0.0009) +[2026-06-02 16:54:02,005][255279] Updated weights for policy 0, policy_version 18029 (0.0009) +[2026-06-02 16:54:02,706][255279] Updated weights for policy 0, policy_version 18040 (0.0008) +[2026-06-02 16:54:02,893][255279] Updated weights for policy 0, policy_version 18050 (0.0008) +[2026-06-02 16:54:03,096][255279] Updated weights for policy 0, policy_version 18061 (0.0009) +[2026-06-02 16:54:03,284][255279] Updated weights for policy 0, policy_version 18071 (0.0008) +[2026-06-02 16:54:03,474][255279] Updated weights for policy 0, policy_version 18081 (0.0008) +[2026-06-02 16:54:03,656][255279] Updated weights for policy 0, policy_version 18091 (0.0009) +[2026-06-02 16:54:04,333][255279] Updated weights for policy 0, policy_version 18101 (0.0008) +[2026-06-02 16:54:04,502][255279] Updated weights for policy 0, policy_version 18111 (0.0008) +[2026-06-02 16:54:04,701][255279] Updated weights for policy 0, policy_version 18121 (0.0008) +[2026-06-02 16:54:04,880][255279] Updated weights for policy 0, policy_version 18131 (0.0009) +[2026-06-02 16:54:05,066][255279] Updated weights for policy 0, policy_version 18141 (0.0008) +[2026-06-02 16:54:05,253][255279] Updated weights for policy 0, policy_version 18151 (0.0008) +[2026-06-02 16:54:05,501][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.9, 300 sec: 19549.7). Total num frames: 9338880. Throughput: 0: 19558.4. Samples: 9352064. Policy #0 lag: (min: 63.0, avg: 81.3, max: 127.0) +[2026-06-02 16:54:05,502][253683] Avg episode reward: [(0, '621.531')] +[2026-06-02 16:54:05,507][255187] Saving new best policy, reward=621.531! +[2026-06-02 16:54:05,957][255279] Updated weights for policy 0, policy_version 18161 (0.0009) +[2026-06-02 16:54:06,130][255279] Updated weights for policy 0, policy_version 18171 (0.0012) +[2026-06-02 16:54:06,308][255279] Updated weights for policy 0, policy_version 18181 (0.0009) +[2026-06-02 16:54:06,484][255279] Updated weights for policy 0, policy_version 18191 (0.0008) +[2026-06-02 16:54:06,683][255279] Updated weights for policy 0, policy_version 18201 (0.0009) +[2026-06-02 16:54:06,869][255279] Updated weights for policy 0, policy_version 18211 (0.0009) +[2026-06-02 16:54:07,059][255279] Updated weights for policy 0, policy_version 18221 (0.0009) +[2026-06-02 16:54:07,752][255279] Updated weights for policy 0, policy_version 18231 (0.0008) +[2026-06-02 16:54:07,929][255279] Updated weights for policy 0, policy_version 18241 (0.0008) +[2026-06-02 16:54:08,114][255279] Updated weights for policy 0, policy_version 18251 (0.0008) +[2026-06-02 16:54:08,314][255279] Updated weights for policy 0, policy_version 18261 (0.0009) +[2026-06-02 16:54:08,497][255279] Updated weights for policy 0, policy_version 18271 (0.0008) +[2026-06-02 16:54:08,692][255279] Updated weights for policy 0, policy_version 18281 (0.0009) +[2026-06-02 16:54:09,374][255279] Updated weights for policy 0, policy_version 18291 (0.0008) +[2026-06-02 16:54:09,562][255279] Updated weights for policy 0, policy_version 18302 (0.0008) +[2026-06-02 16:54:09,784][255279] Updated weights for policy 0, policy_version 18314 (0.0008) +[2026-06-02 16:54:09,978][255279] Updated weights for policy 0, policy_version 18324 (0.0008) +[2026-06-02 16:54:10,168][255279] Updated weights for policy 0, policy_version 18334 (0.0008) +[2026-06-02 16:54:10,357][255279] Updated weights for policy 0, policy_version 18344 (0.0008) +[2026-06-02 16:54:10,501][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 9437184. Throughput: 0: 19549.9. Samples: 9412864. Policy #0 lag: (min: 63.0, avg: 81.3, max: 127.0) +[2026-06-02 16:54:10,502][253683] Avg episode reward: [(0, '619.935')] +[2026-06-02 16:54:11,052][255279] Updated weights for policy 0, policy_version 18354 (0.0008) +[2026-06-02 16:54:11,240][255279] Updated weights for policy 0, policy_version 18365 (0.0009) +[2026-06-02 16:54:11,433][255279] Updated weights for policy 0, policy_version 18375 (0.0008) +[2026-06-02 16:54:11,621][255279] Updated weights for policy 0, policy_version 18385 (0.0008) +[2026-06-02 16:54:11,806][255279] Updated weights for policy 0, policy_version 18395 (0.0008) +[2026-06-02 16:54:11,988][255279] Updated weights for policy 0, policy_version 18405 (0.0008) +[2026-06-02 16:54:12,180][255279] Updated weights for policy 0, policy_version 18415 (0.0009) +[2026-06-02 16:54:12,853][255279] Updated weights for policy 0, policy_version 18425 (0.0008) +[2026-06-02 16:54:13,043][255279] Updated weights for policy 0, policy_version 18435 (0.0009) +[2026-06-02 16:54:13,237][255279] Updated weights for policy 0, policy_version 18445 (0.0008) +[2026-06-02 16:54:13,427][255279] Updated weights for policy 0, policy_version 18455 (0.0008) +[2026-06-02 16:54:13,620][255279] Updated weights for policy 0, policy_version 18465 (0.0008) +[2026-06-02 16:54:13,807][255279] Updated weights for policy 0, policy_version 18475 (0.0009) +[2026-06-02 16:54:14,477][255279] Updated weights for policy 0, policy_version 18485 (0.0008) +[2026-06-02 16:54:14,649][255279] Updated weights for policy 0, policy_version 18495 (0.0008) +[2026-06-02 16:54:14,848][255279] Updated weights for policy 0, policy_version 18505 (0.0009) +[2026-06-02 16:54:15,026][255279] Updated weights for policy 0, policy_version 18515 (0.0008) +[2026-06-02 16:54:15,221][255279] Updated weights for policy 0, policy_version 18525 (0.0008) +[2026-06-02 16:54:15,403][255279] Updated weights for policy 0, policy_version 18535 (0.0009) +[2026-06-02 16:54:15,502][253683] Fps is (10 sec: 16383.9, 60 sec: 19114.6, 300 sec: 19438.6). Total num frames: 9502720. Throughput: 0: 19293.9. Samples: 9524224. Policy #0 lag: (min: 42.0, avg: 88.8, max: 106.0) +[2026-06-02 16:54:15,502][253683] Avg episode reward: [(0, '613.660')] +[2026-06-02 16:54:16,087][255279] Updated weights for policy 0, policy_version 18545 (0.0008) +[2026-06-02 16:54:16,271][255279] Updated weights for policy 0, policy_version 18555 (0.0008) +[2026-06-02 16:54:16,454][255279] Updated weights for policy 0, policy_version 18565 (0.0008) +[2026-06-02 16:54:16,641][255279] Updated weights for policy 0, policy_version 18575 (0.0009) +[2026-06-02 16:54:16,822][255279] Updated weights for policy 0, policy_version 18585 (0.0008) +[2026-06-02 16:54:17,014][255279] Updated weights for policy 0, policy_version 18595 (0.0008) +[2026-06-02 16:54:17,203][255279] Updated weights for policy 0, policy_version 18605 (0.0008) +[2026-06-02 16:54:17,887][255279] Updated weights for policy 0, policy_version 18615 (0.0009) +[2026-06-02 16:54:18,070][255279] Updated weights for policy 0, policy_version 18625 (0.0008) +[2026-06-02 16:54:18,274][255279] Updated weights for policy 0, policy_version 18636 (0.0008) +[2026-06-02 16:54:18,460][255279] Updated weights for policy 0, policy_version 18646 (0.0008) +[2026-06-02 16:54:18,649][255279] Updated weights for policy 0, policy_version 18656 (0.0009) +[2026-06-02 16:54:18,845][255279] Updated weights for policy 0, policy_version 18666 (0.0008) +[2026-06-02 16:54:19,528][255279] Updated weights for policy 0, policy_version 18676 (0.0009) +[2026-06-02 16:54:19,731][255279] Updated weights for policy 0, policy_version 18688 (0.0009) +[2026-06-02 16:54:19,926][255279] Updated weights for policy 0, policy_version 18698 (0.0008) +[2026-06-02 16:54:20,104][255279] Updated weights for policy 0, policy_version 18708 (0.0009) +[2026-06-02 16:54:20,289][255279] Updated weights for policy 0, policy_version 18718 (0.0008) +[2026-06-02 16:54:20,478][255279] Updated weights for policy 0, policy_version 18728 (0.0008) +[2026-06-02 16:54:20,502][253683] Fps is (10 sec: 16383.9, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 9601024. Throughput: 0: 19561.3. Samples: 9645696. Policy #0 lag: (min: 42.0, avg: 88.8, max: 106.0) +[2026-06-02 16:54:20,502][253683] Avg episode reward: [(0, '623.094')] +[2026-06-02 16:54:20,617][255187] Saving new best policy, reward=623.094! +[2026-06-02 16:54:21,170][255279] Updated weights for policy 0, policy_version 18738 (0.0009) +[2026-06-02 16:54:21,353][255279] Updated weights for policy 0, policy_version 18748 (0.0008) +[2026-06-02 16:54:21,545][255279] Updated weights for policy 0, policy_version 18758 (0.0008) +[2026-06-02 16:54:21,730][255279] Updated weights for policy 0, policy_version 18768 (0.0008) +[2026-06-02 16:54:21,911][255279] Updated weights for policy 0, policy_version 18778 (0.0009) +[2026-06-02 16:54:22,106][255279] Updated weights for policy 0, policy_version 18788 (0.0009) +[2026-06-02 16:54:22,300][255279] Updated weights for policy 0, policy_version 18798 (0.0009) +[2026-06-02 16:54:22,971][255279] Updated weights for policy 0, policy_version 18808 (0.0008) +[2026-06-02 16:54:23,164][255279] Updated weights for policy 0, policy_version 18819 (0.0008) +[2026-06-02 16:54:23,360][255279] Updated weights for policy 0, policy_version 18829 (0.0008) +[2026-06-02 16:54:23,545][255279] Updated weights for policy 0, policy_version 18839 (0.0008) +[2026-06-02 16:54:23,737][255279] Updated weights for policy 0, policy_version 18849 (0.0008) +[2026-06-02 16:54:23,942][255279] Updated weights for policy 0, policy_version 18860 (0.0008) +[2026-06-02 16:54:24,626][255279] Updated weights for policy 0, policy_version 18870 (0.0009) +[2026-06-02 16:54:24,811][255279] Updated weights for policy 0, policy_version 18880 (0.0009) +[2026-06-02 16:54:24,992][255279] Updated weights for policy 0, policy_version 18890 (0.0008) +[2026-06-02 16:54:25,182][255279] Updated weights for policy 0, policy_version 18900 (0.0008) +[2026-06-02 16:54:25,382][255279] Updated weights for policy 0, policy_version 18911 (0.0008) +[2026-06-02 16:54:25,502][253683] Fps is (10 sec: 19660.9, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 9699328. Throughput: 0: 19555.6. Samples: 9706368. Policy #0 lag: (min: 42.0, avg: 88.8, max: 106.0) +[2026-06-02 16:54:25,502][253683] Avg episode reward: [(0, '637.347')] +[2026-06-02 16:54:25,574][255279] Updated weights for policy 0, policy_version 18921 (0.0009) +[2026-06-02 16:54:25,700][255187] Saving new best policy, reward=637.347! +[2026-06-02 16:54:26,285][255279] Updated weights for policy 0, policy_version 18931 (0.0009) +[2026-06-02 16:54:26,464][255279] Updated weights for policy 0, policy_version 18941 (0.0008) +[2026-06-02 16:54:26,645][255279] Updated weights for policy 0, policy_version 18951 (0.0009) +[2026-06-02 16:54:26,841][255279] Updated weights for policy 0, policy_version 18961 (0.0009) +[2026-06-02 16:54:27,021][255279] Updated weights for policy 0, policy_version 18971 (0.0008) +[2026-06-02 16:54:27,220][255279] Updated weights for policy 0, policy_version 18981 (0.0009) +[2026-06-02 16:54:27,410][255279] Updated weights for policy 0, policy_version 18991 (0.0009) +[2026-06-02 16:54:28,085][255279] Updated weights for policy 0, policy_version 19002 (0.0008) +[2026-06-02 16:54:28,266][255279] Updated weights for policy 0, policy_version 19012 (0.0009) +[2026-06-02 16:54:28,445][255279] Updated weights for policy 0, policy_version 19022 (0.0009) +[2026-06-02 16:54:28,651][255279] Updated weights for policy 0, policy_version 19033 (0.0008) +[2026-06-02 16:54:28,839][255279] Updated weights for policy 0, policy_version 19043 (0.0009) +[2026-06-02 16:54:29,022][255279] Updated weights for policy 0, policy_version 19053 (0.0009) +[2026-06-02 16:54:29,703][255279] Updated weights for policy 0, policy_version 19063 (0.0008) +[2026-06-02 16:54:29,892][255279] Updated weights for policy 0, policy_version 19073 (0.0008) +[2026-06-02 16:54:30,073][255279] Updated weights for policy 0, policy_version 19083 (0.0008) +[2026-06-02 16:54:30,261][255279] Updated weights for policy 0, policy_version 19093 (0.0008) +[2026-06-02 16:54:30,304][255187] Early stopping after 5 epochs (40 sgd steps), loss delta 0.0000005 +[2026-06-02 16:54:30,502][253683] Fps is (10 sec: 22937.6, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 9830400. Throughput: 0: 19407.6. Samples: 9816704. Policy #0 lag: (min: 63.0, avg: 82.4, max: 127.0) +[2026-06-02 16:54:30,502][253683] Avg episode reward: [(0, '664.893')] +[2026-06-02 16:54:30,507][255187] Saving new best policy, reward=664.893! +[2026-06-02 16:54:31,076][255279] Updated weights for policy 0, policy_version 19104 (0.0009) +[2026-06-02 16:54:31,264][255279] Updated weights for policy 0, policy_version 19114 (0.0009) +[2026-06-02 16:54:31,434][255279] Updated weights for policy 0, policy_version 19124 (0.0009) +[2026-06-02 16:54:31,619][255279] Updated weights for policy 0, policy_version 19134 (0.0009) +[2026-06-02 16:54:31,808][255279] Updated weights for policy 0, policy_version 19144 (0.0008) +[2026-06-02 16:54:32,006][255279] Updated weights for policy 0, policy_version 19155 (0.0008) +[2026-06-02 16:54:32,761][255279] Updated weights for policy 0, policy_version 19166 (0.0008) +[2026-06-02 16:54:32,958][255279] Updated weights for policy 0, policy_version 19177 (0.0009) +[2026-06-02 16:54:33,151][255279] Updated weights for policy 0, policy_version 19187 (0.0008) +[2026-06-02 16:54:33,330][255279] Updated weights for policy 0, policy_version 19197 (0.0009) +[2026-06-02 16:54:33,509][255279] Updated weights for policy 0, policy_version 19207 (0.0009) +[2026-06-02 16:54:33,703][255279] Updated weights for policy 0, policy_version 19217 (0.0009) +[2026-06-02 16:54:34,397][255279] Updated weights for policy 0, policy_version 19227 (0.0009) +[2026-06-02 16:54:34,586][255279] Updated weights for policy 0, policy_version 19237 (0.0008) +[2026-06-02 16:54:34,765][255279] Updated weights for policy 0, policy_version 19247 (0.0009) +[2026-06-02 16:54:34,986][255279] Updated weights for policy 0, policy_version 19259 (0.0009) +[2026-06-02 16:54:35,180][255279] Updated weights for policy 0, policy_version 19270 (0.0008) +[2026-06-02 16:54:35,376][255279] Updated weights for policy 0, policy_version 19280 (0.0009) +[2026-06-02 16:54:35,501][253683] Fps is (10 sec: 19660.9, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 9895936. Throughput: 0: 19675.1. Samples: 9944064. Policy #0 lag: (min: 63.0, avg: 82.4, max: 127.0) +[2026-06-02 16:54:35,502][253683] Avg episode reward: [(0, '627.697')] +[2026-06-02 16:54:36,069][255279] Updated weights for policy 0, policy_version 19290 (0.0009) +[2026-06-02 16:54:36,235][255279] Updated weights for policy 0, policy_version 19300 (0.0008) +[2026-06-02 16:54:36,448][255279] Updated weights for policy 0, policy_version 19311 (0.0008) +[2026-06-02 16:54:36,634][255279] Updated weights for policy 0, policy_version 19321 (0.0008) +[2026-06-02 16:54:36,810][255279] Updated weights for policy 0, policy_version 19331 (0.0009) +[2026-06-02 16:54:37,022][255279] Updated weights for policy 0, policy_version 19342 (0.0009) +[2026-06-02 16:54:37,200][255279] Updated weights for policy 0, policy_version 19352 (0.0008) +[2026-06-02 16:54:37,875][255279] Updated weights for policy 0, policy_version 19362 (0.0008) +[2026-06-02 16:54:38,061][255279] Updated weights for policy 0, policy_version 19372 (0.0009) +[2026-06-02 16:54:38,253][255279] Updated weights for policy 0, policy_version 19382 (0.0008) +[2026-06-02 16:54:38,449][255279] Updated weights for policy 0, policy_version 19393 (0.0008) +[2026-06-02 16:54:38,637][255279] Updated weights for policy 0, policy_version 19403 (0.0008) +[2026-06-02 16:54:38,851][255279] Updated weights for policy 0, policy_version 19414 (0.0009) +[2026-06-02 16:54:39,535][255279] Updated weights for policy 0, policy_version 19424 (0.0009) +[2026-06-02 16:54:39,715][255279] Updated weights for policy 0, policy_version 19434 (0.0009) +[2026-06-02 16:54:39,911][255279] Updated weights for policy 0, policy_version 19445 (0.0008) +[2026-06-02 16:54:40,105][255279] Updated weights for policy 0, policy_version 19455 (0.0009) +[2026-06-02 16:54:40,312][255279] Updated weights for policy 0, policy_version 19466 (0.0009) +[2026-06-02 16:54:40,490][255279] Updated weights for policy 0, policy_version 19476 (0.0009) +[2026-06-02 16:54:40,502][253683] Fps is (10 sec: 16383.6, 60 sec: 19114.6, 300 sec: 19438.6). Total num frames: 9994240. Throughput: 0: 19674.9. Samples: 10004864. Policy #0 lag: (min: 63.0, avg: 82.4, max: 127.0) +[2026-06-02 16:54:40,503][253683] Avg episode reward: [(0, '641.852')] +[2026-06-02 16:54:41,182][255279] Updated weights for policy 0, policy_version 19486 (0.0009) +[2026-06-02 16:54:41,355][255279] Updated weights for policy 0, policy_version 19496 (0.0009) +[2026-06-02 16:54:41,544][255279] Updated weights for policy 0, policy_version 19506 (0.0009) +[2026-06-02 16:54:41,734][255279] Updated weights for policy 0, policy_version 19516 (0.0008) +[2026-06-02 16:54:41,923][255279] Updated weights for policy 0, policy_version 19526 (0.0009) +[2026-06-02 16:54:42,128][255279] Updated weights for policy 0, policy_version 19537 (0.0009) +[2026-06-02 16:54:42,804][255279] Updated weights for policy 0, policy_version 19547 (0.0009) +[2026-06-02 16:54:43,002][255279] Updated weights for policy 0, policy_version 19558 (0.0007) +[2026-06-02 16:54:43,202][255279] Updated weights for policy 0, policy_version 19569 (0.0009) +[2026-06-02 16:54:43,393][255279] Updated weights for policy 0, policy_version 19579 (0.0009) +[2026-06-02 16:54:43,582][255279] Updated weights for policy 0, policy_version 19589 (0.0009) +[2026-06-02 16:54:43,776][255279] Updated weights for policy 0, policy_version 19599 (0.0009) +[2026-06-02 16:54:44,452][255279] Updated weights for policy 0, policy_version 19609 (0.0009) +[2026-06-02 16:54:44,634][255279] Updated weights for policy 0, policy_version 19619 (0.0009) +[2026-06-02 16:54:44,817][255279] Updated weights for policy 0, policy_version 19629 (0.0008) +[2026-06-02 16:54:45,006][255279] Updated weights for policy 0, policy_version 19639 (0.0009) +[2026-06-02 16:54:45,193][255279] Updated weights for policy 0, policy_version 19649 (0.0009) +[2026-06-02 16:54:45,404][255279] Updated weights for policy 0, policy_version 19660 (0.0009) +[2026-06-02 16:54:45,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 10092544. Throughput: 0: 19669.3. Samples: 10115072. Policy #0 lag: (min: 22.0, avg: 39.1, max: 86.0) +[2026-06-02 16:54:45,502][253683] Avg episode reward: [(0, '622.513')] +[2026-06-02 16:54:45,590][255279] Updated weights for policy 0, policy_version 19670 (0.0009) +[2026-06-02 16:54:46,250][255279] Updated weights for policy 0, policy_version 19680 (0.0009) +[2026-06-02 16:54:46,439][255279] Updated weights for policy 0, policy_version 19690 (0.0009) +[2026-06-02 16:54:46,627][255279] Updated weights for policy 0, policy_version 19700 (0.0007) +[2026-06-02 16:54:46,819][255279] Updated weights for policy 0, policy_version 19710 (0.0009) +[2026-06-02 16:54:46,999][255279] Updated weights for policy 0, policy_version 19720 (0.0009) +[2026-06-02 16:54:47,186][255279] Updated weights for policy 0, policy_version 19730 (0.0009) +[2026-06-02 16:54:47,869][255279] Updated weights for policy 0, policy_version 19741 (0.0008) +[2026-06-02 16:54:48,051][255279] Updated weights for policy 0, policy_version 19751 (0.0008) +[2026-06-02 16:54:48,246][255279] Updated weights for policy 0, policy_version 19761 (0.0008) +[2026-06-02 16:54:48,431][255279] Updated weights for policy 0, policy_version 19771 (0.0008) +[2026-06-02 16:54:48,618][255279] Updated weights for policy 0, policy_version 19781 (0.0008) +[2026-06-02 16:54:48,813][255279] Updated weights for policy 0, policy_version 19791 (0.0009) +[2026-06-02 16:54:49,487][255279] Updated weights for policy 0, policy_version 19801 (0.0008) +[2026-06-02 16:54:49,657][255279] Updated weights for policy 0, policy_version 19811 (0.0008) +[2026-06-02 16:54:49,842][255279] Updated weights for policy 0, policy_version 19821 (0.0008) +[2026-06-02 16:54:50,037][255279] Updated weights for policy 0, policy_version 19831 (0.0008) +[2026-06-02 16:54:50,221][255279] Updated weights for policy 0, policy_version 19841 (0.0008) +[2026-06-02 16:54:50,440][255279] Updated weights for policy 0, policy_version 19852 (0.0008) +[2026-06-02 16:54:50,502][253683] Fps is (10 sec: 19660.6, 60 sec: 19114.6, 300 sec: 19438.6). Total num frames: 10190848. Throughput: 0: 19649.3. Samples: 10236288. Policy #0 lag: (min: 22.0, avg: 39.1, max: 86.0) +[2026-06-02 16:54:50,503][253683] Avg episode reward: [(0, '655.150')] +[2026-06-02 16:54:50,627][255279] Updated weights for policy 0, policy_version 19862 (0.0009) +[2026-06-02 16:54:51,287][255279] Updated weights for policy 0, policy_version 19872 (0.0008) +[2026-06-02 16:54:51,481][255279] Updated weights for policy 0, policy_version 19882 (0.0008) +[2026-06-02 16:54:51,674][255279] Updated weights for policy 0, policy_version 19893 (0.0008) +[2026-06-02 16:54:51,874][255279] Updated weights for policy 0, policy_version 19903 (0.0008) +[2026-06-02 16:54:52,066][255279] Updated weights for policy 0, policy_version 19913 (0.0009) +[2026-06-02 16:54:52,252][255279] Updated weights for policy 0, policy_version 19923 (0.0008) +[2026-06-02 16:54:52,933][255279] Updated weights for policy 0, policy_version 19933 (0.0008) +[2026-06-02 16:54:53,110][255279] Updated weights for policy 0, policy_version 19943 (0.0008) +[2026-06-02 16:54:53,298][255279] Updated weights for policy 0, policy_version 19953 (0.0008) +[2026-06-02 16:54:53,488][255279] Updated weights for policy 0, policy_version 19963 (0.0008) +[2026-06-02 16:54:53,675][255279] Updated weights for policy 0, policy_version 19973 (0.0008) +[2026-06-02 16:54:53,881][255279] Updated weights for policy 0, policy_version 19984 (0.0008) +[2026-06-02 16:54:54,570][255279] Updated weights for policy 0, policy_version 19994 (0.0008) +[2026-06-02 16:54:54,756][255279] Updated weights for policy 0, policy_version 20004 (0.0008) +[2026-06-02 16:54:54,935][255279] Updated weights for policy 0, policy_version 20014 (0.0008) +[2026-06-02 16:54:55,115][255279] Updated weights for policy 0, policy_version 20024 (0.0008) +[2026-06-02 16:54:55,310][255279] Updated weights for policy 0, policy_version 20034 (0.0008) +[2026-06-02 16:54:55,491][255279] Updated weights for policy 0, policy_version 20044 (0.0008) +[2026-06-02 16:54:55,501][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 10289152. Throughput: 0: 19638.0. Samples: 10296576. Policy #0 lag: (min: 22.0, avg: 39.1, max: 86.0) +[2026-06-02 16:54:55,502][253683] Avg episode reward: [(0, '666.547')] +[2026-06-02 16:54:55,691][255279] Updated weights for policy 0, policy_version 20054 (0.0008) +[2026-06-02 16:54:55,719][255187] Saving new best policy, reward=666.547! +[2026-06-02 16:54:56,393][255279] Updated weights for policy 0, policy_version 20064 (0.0009) +[2026-06-02 16:54:56,577][255279] Updated weights for policy 0, policy_version 20074 (0.0009) +[2026-06-02 16:54:56,762][255279] Updated weights for policy 0, policy_version 20084 (0.0009) +[2026-06-02 16:54:56,950][255279] Updated weights for policy 0, policy_version 20094 (0.0009) +[2026-06-02 16:54:57,138][255279] Updated weights for policy 0, policy_version 20104 (0.0008) +[2026-06-02 16:54:57,325][255279] Updated weights for policy 0, policy_version 20114 (0.0009) +[2026-06-02 16:54:57,977][255279] Updated weights for policy 0, policy_version 20124 (0.0008) +[2026-06-02 16:54:58,154][255279] Updated weights for policy 0, policy_version 20134 (0.0008) +[2026-06-02 16:54:58,348][255279] Updated weights for policy 0, policy_version 20144 (0.0008) +[2026-06-02 16:54:58,538][255279] Updated weights for policy 0, policy_version 20154 (0.0009) +[2026-06-02 16:54:58,732][255279] Updated weights for policy 0, policy_version 20164 (0.0008) +[2026-06-02 16:54:58,925][255279] Updated weights for policy 0, policy_version 20175 (0.0008) +[2026-06-02 16:54:59,638][255279] Updated weights for policy 0, policy_version 20185 (0.0009) +[2026-06-02 16:54:59,803][255279] Updated weights for policy 0, policy_version 20195 (0.0009) +[2026-06-02 16:54:59,990][255279] Updated weights for policy 0, policy_version 20205 (0.0009) +[2026-06-02 16:55:00,186][255279] Updated weights for policy 0, policy_version 20215 (0.0009) +[2026-06-02 16:55:00,367][255279] Updated weights for policy 0, policy_version 20225 (0.0009) +[2026-06-02 16:55:00,502][253683] Fps is (10 sec: 19661.4, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 10387456. Throughput: 0: 19640.9. Samples: 10408064. Policy #0 lag: (min: 38.0, avg: 57.3, max: 102.0) +[2026-06-02 16:55:00,503][253683] Avg episode reward: [(0, '668.170')] +[2026-06-02 16:55:00,557][255279] Updated weights for policy 0, policy_version 20235 (0.0009) +[2026-06-02 16:55:00,741][255279] Updated weights for policy 0, policy_version 20245 (0.0009) +[2026-06-02 16:55:00,799][255187] Saving new best policy, reward=668.170! +[2026-06-02 16:55:01,428][255279] Updated weights for policy 0, policy_version 20255 (0.0008) +[2026-06-02 16:55:01,612][255279] Updated weights for policy 0, policy_version 20265 (0.0008) +[2026-06-02 16:55:01,805][255279] Updated weights for policy 0, policy_version 20275 (0.0008) +[2026-06-02 16:55:01,994][255279] Updated weights for policy 0, policy_version 20285 (0.0009) +[2026-06-02 16:55:02,185][255279] Updated weights for policy 0, policy_version 20295 (0.0009) +[2026-06-02 16:55:02,378][255279] Updated weights for policy 0, policy_version 20305 (0.0009) +[2026-06-02 16:55:03,048][255279] Updated weights for policy 0, policy_version 20315 (0.0009) +[2026-06-02 16:55:03,232][255279] Updated weights for policy 0, policy_version 20325 (0.0008) +[2026-06-02 16:55:03,428][255279] Updated weights for policy 0, policy_version 20335 (0.0009) +[2026-06-02 16:55:03,618][255279] Updated weights for policy 0, policy_version 20345 (0.0008) +[2026-06-02 16:55:03,807][255279] Updated weights for policy 0, policy_version 20355 (0.0009) +[2026-06-02 16:55:03,992][255279] Updated weights for policy 0, policy_version 20365 (0.0009) +[2026-06-02 16:55:04,194][255279] Updated weights for policy 0, policy_version 20375 (0.0008) +[2026-06-02 16:55:04,851][255279] Updated weights for policy 0, policy_version 20385 (0.0009) +[2026-06-02 16:55:05,038][255279] Updated weights for policy 0, policy_version 20395 (0.0009) +[2026-06-02 16:55:05,224][255279] Updated weights for policy 0, policy_version 20405 (0.0009) +[2026-06-02 16:55:05,408][255279] Updated weights for policy 0, policy_version 20415 (0.0009) +[2026-06-02 16:55:05,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19114.6, 300 sec: 19438.6). Total num frames: 10485760. Throughput: 0: 19655.1. Samples: 10530176. Policy #0 lag: (min: 38.0, avg: 57.3, max: 102.0) +[2026-06-02 16:55:05,502][253683] Avg episode reward: [(0, '682.107')] +[2026-06-02 16:55:05,605][255279] Updated weights for policy 0, policy_version 20425 (0.0007) +[2026-06-02 16:55:05,818][255279] Updated weights for policy 0, policy_version 20436 (0.0006) +[2026-06-02 16:55:05,880][255187] Saving new best policy, reward=682.107! +[2026-06-02 16:55:06,499][255279] Updated weights for policy 0, policy_version 20447 (0.0008) +[2026-06-02 16:55:06,681][255279] Updated weights for policy 0, policy_version 20457 (0.0004) +[2026-06-02 16:55:06,870][255279] Updated weights for policy 0, policy_version 20467 (0.0004) +[2026-06-02 16:55:07,062][255279] Updated weights for policy 0, policy_version 20477 (0.0004) +[2026-06-02 16:55:07,252][255279] Updated weights for policy 0, policy_version 20487 (0.0004) +[2026-06-02 16:55:07,446][255279] Updated weights for policy 0, policy_version 20497 (0.0005) +[2026-06-02 16:55:08,122][255279] Updated weights for policy 0, policy_version 20507 (0.0009) +[2026-06-02 16:55:08,300][255279] Updated weights for policy 0, policy_version 20517 (0.0008) +[2026-06-02 16:55:08,477][255279] Updated weights for policy 0, policy_version 20527 (0.0008) +[2026-06-02 16:55:08,671][255279] Updated weights for policy 0, policy_version 20537 (0.0008) +[2026-06-02 16:55:08,861][255279] Updated weights for policy 0, policy_version 20547 (0.0008) +[2026-06-02 16:55:09,052][255279] Updated weights for policy 0, policy_version 20557 (0.0008) +[2026-06-02 16:55:09,238][255279] Updated weights for policy 0, policy_version 20567 (0.0008) +[2026-06-02 16:55:09,906][255279] Updated weights for policy 0, policy_version 20577 (0.0008) +[2026-06-02 16:55:10,084][255279] Updated weights for policy 0, policy_version 20587 (0.0008) +[2026-06-02 16:55:10,281][255279] Updated weights for policy 0, policy_version 20597 (0.0008) +[2026-06-02 16:55:10,466][255279] Updated weights for policy 0, policy_version 20607 (0.0008) +[2026-06-02 16:55:10,502][253683] Fps is (10 sec: 19660.3, 60 sec: 19114.6, 300 sec: 19438.6). Total num frames: 10584064. Throughput: 0: 19464.4. Samples: 10582272. Policy #0 lag: (min: 38.0, avg: 57.3, max: 102.0) +[2026-06-02 16:55:10,503][253683] Avg episode reward: [(0, '709.130')] +[2026-06-02 16:55:10,662][255279] Updated weights for policy 0, policy_version 20617 (0.0008) +[2026-06-02 16:55:10,849][255279] Updated weights for policy 0, policy_version 20627 (0.0009) +[2026-06-02 16:55:10,933][255187] Saving new best policy, reward=709.130! +[2026-06-02 16:55:11,539][255279] Updated weights for policy 0, policy_version 20637 (0.0009) +[2026-06-02 16:55:11,714][255279] Updated weights for policy 0, policy_version 20647 (0.0009) +[2026-06-02 16:55:11,903][255279] Updated weights for policy 0, policy_version 20657 (0.0009) +[2026-06-02 16:55:12,099][255279] Updated weights for policy 0, policy_version 20667 (0.0009) +[2026-06-02 16:55:12,283][255279] Updated weights for policy 0, policy_version 20677 (0.0009) +[2026-06-02 16:55:12,486][255279] Updated weights for policy 0, policy_version 20688 (0.0009) +[2026-06-02 16:55:13,184][255279] Updated weights for policy 0, policy_version 20698 (0.0009) +[2026-06-02 16:55:13,357][255279] Updated weights for policy 0, policy_version 20708 (0.0008) +[2026-06-02 16:55:13,544][255279] Updated weights for policy 0, policy_version 20718 (0.0009) +[2026-06-02 16:55:13,732][255279] Updated weights for policy 0, policy_version 20728 (0.0009) +[2026-06-02 16:55:13,917][255279] Updated weights for policy 0, policy_version 20738 (0.0008) +[2026-06-02 16:55:14,109][255279] Updated weights for policy 0, policy_version 20748 (0.0009) +[2026-06-02 16:55:14,298][255279] Updated weights for policy 0, policy_version 20758 (0.0008) +[2026-06-02 16:55:14,957][255279] Updated weights for policy 0, policy_version 20768 (0.0009) +[2026-06-02 16:55:15,138][255279] Updated weights for policy 0, policy_version 20778 (0.0009) +[2026-06-02 16:55:15,324][255279] Updated weights for policy 0, policy_version 20788 (0.0008) +[2026-06-02 16:55:15,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 10682368. Throughput: 0: 19669.3. Samples: 10701824. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 16:55:15,502][253683] Avg episode reward: [(0, '690.312')] +[2026-06-02 16:55:15,512][255279] Updated weights for policy 0, policy_version 20798 (0.0008) +[2026-06-02 16:55:15,697][255279] Updated weights for policy 0, policy_version 20808 (0.0008) +[2026-06-02 16:55:15,883][255279] Updated weights for policy 0, policy_version 20818 (0.0008) +[2026-06-02 16:55:16,568][255279] Updated weights for policy 0, policy_version 20828 (0.0008) +[2026-06-02 16:55:16,753][255279] Updated weights for policy 0, policy_version 20838 (0.0008) +[2026-06-02 16:55:16,929][255279] Updated weights for policy 0, policy_version 20848 (0.0008) +[2026-06-02 16:55:17,139][255279] Updated weights for policy 0, policy_version 20859 (0.0008) +[2026-06-02 16:55:17,325][255279] Updated weights for policy 0, policy_version 20869 (0.0008) +[2026-06-02 16:55:17,521][255279] Updated weights for policy 0, policy_version 20879 (0.0008) +[2026-06-02 16:55:18,199][255279] Updated weights for policy 0, policy_version 20889 (0.0009) +[2026-06-02 16:55:18,378][255279] Updated weights for policy 0, policy_version 20899 (0.0008) +[2026-06-02 16:55:18,580][255279] Updated weights for policy 0, policy_version 20910 (0.0010) +[2026-06-02 16:55:18,767][255279] Updated weights for policy 0, policy_version 20920 (0.0009) +[2026-06-02 16:55:18,954][255279] Updated weights for policy 0, policy_version 20930 (0.0010) +[2026-06-02 16:55:19,156][255279] Updated weights for policy 0, policy_version 20941 (0.0009) +[2026-06-02 16:55:19,352][255279] Updated weights for policy 0, policy_version 20951 (0.0009) +[2026-06-02 16:55:20,019][255279] Updated weights for policy 0, policy_version 20961 (0.0007) +[2026-06-02 16:55:20,207][255279] Updated weights for policy 0, policy_version 20971 (0.0011) +[2026-06-02 16:55:20,394][255279] Updated weights for policy 0, policy_version 20981 (0.0009) +[2026-06-02 16:55:20,502][253683] Fps is (10 sec: 19661.3, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 10780672. Throughput: 0: 19493.0. Samples: 10821248. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 16:55:20,502][253683] Avg episode reward: [(0, '738.894')] +[2026-06-02 16:55:20,589][255279] Updated weights for policy 0, policy_version 20991 (0.0013) +[2026-06-02 16:55:20,787][255279] Updated weights for policy 0, policy_version 21002 (0.0012) +[2026-06-02 16:55:20,989][255279] Updated weights for policy 0, policy_version 21012 (0.0010) +[2026-06-02 16:55:21,052][255187] Saving new best policy, reward=738.894! +[2026-06-02 16:55:21,656][255279] Updated weights for policy 0, policy_version 21022 (0.0009) +[2026-06-02 16:55:21,841][255279] Updated weights for policy 0, policy_version 21032 (0.0009) +[2026-06-02 16:55:22,031][255279] Updated weights for policy 0, policy_version 21042 (0.0008) +[2026-06-02 16:55:22,221][255279] Updated weights for policy 0, policy_version 21052 (0.0009) +[2026-06-02 16:55:22,416][255279] Updated weights for policy 0, policy_version 21062 (0.0008) +[2026-06-02 16:55:22,598][255279] Updated weights for policy 0, policy_version 21072 (0.0007) +[2026-06-02 16:55:23,256][255279] Updated weights for policy 0, policy_version 21082 (0.0009) +[2026-06-02 16:55:23,451][255279] Updated weights for policy 0, policy_version 21093 (0.0009) +[2026-06-02 16:55:23,643][255279] Updated weights for policy 0, policy_version 21103 (0.0009) +[2026-06-02 16:55:23,830][255279] Updated weights for policy 0, policy_version 21113 (0.0009) +[2026-06-02 16:55:24,013][255279] Updated weights for policy 0, policy_version 21123 (0.0009) +[2026-06-02 16:55:24,198][255279] Updated weights for policy 0, policy_version 21133 (0.0008) +[2026-06-02 16:55:24,385][255279] Updated weights for policy 0, policy_version 21143 (0.0009) +[2026-06-02 16:55:25,078][255279] Updated weights for policy 0, policy_version 21153 (0.0008) +[2026-06-02 16:55:25,261][255279] Updated weights for policy 0, policy_version 21163 (0.0009) +[2026-06-02 16:55:25,449][255279] Updated weights for policy 0, policy_version 21173 (0.0008) +[2026-06-02 16:55:25,502][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 10878976. Throughput: 0: 19276.9. Samples: 10872320. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 16:55:25,502][253683] Avg episode reward: [(0, '727.471')] +[2026-06-02 16:55:25,629][255279] Updated weights for policy 0, policy_version 21183 (0.0008) +[2026-06-02 16:55:25,828][255279] Updated weights for policy 0, policy_version 21193 (0.0009) +[2026-06-02 16:55:26,019][255279] Updated weights for policy 0, policy_version 21203 (0.0008) +[2026-06-02 16:55:26,695][255279] Updated weights for policy 0, policy_version 21213 (0.0008) +[2026-06-02 16:55:26,872][255279] Updated weights for policy 0, policy_version 21223 (0.0008) +[2026-06-02 16:55:27,060][255279] Updated weights for policy 0, policy_version 21233 (0.0008) +[2026-06-02 16:55:27,261][255279] Updated weights for policy 0, policy_version 21244 (0.0008) +[2026-06-02 16:55:27,452][255279] Updated weights for policy 0, policy_version 21254 (0.0008) +[2026-06-02 16:55:27,648][255279] Updated weights for policy 0, policy_version 21264 (0.0009) +[2026-06-02 16:55:28,304][255279] Updated weights for policy 0, policy_version 21274 (0.0008) +[2026-06-02 16:55:28,488][255279] Updated weights for policy 0, policy_version 21284 (0.0008) +[2026-06-02 16:55:28,671][255279] Updated weights for policy 0, policy_version 21294 (0.0008) +[2026-06-02 16:55:28,866][255279] Updated weights for policy 0, policy_version 21304 (0.0009) +[2026-06-02 16:55:29,055][255279] Updated weights for policy 0, policy_version 21314 (0.0008) +[2026-06-02 16:55:29,242][255279] Updated weights for policy 0, policy_version 21324 (0.0008) +[2026-06-02 16:55:29,439][255279] Updated weights for policy 0, policy_version 21334 (0.0008) +[2026-06-02 16:55:30,107][255279] Updated weights for policy 0, policy_version 21344 (0.0008) +[2026-06-02 16:55:30,287][255279] Updated weights for policy 0, policy_version 21354 (0.0008) +[2026-06-02 16:55:30,467][255279] Updated weights for policy 0, policy_version 21364 (0.0008) +[2026-06-02 16:55:30,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19114.6, 300 sec: 19438.6). Total num frames: 10977280. Throughput: 0: 19512.9. Samples: 10993152. Policy #0 lag: (min: 63.0, avg: 80.2, max: 127.0) +[2026-06-02 16:55:30,502][253683] Avg episode reward: [(0, '705.370')] +[2026-06-02 16:55:30,654][255279] Updated weights for policy 0, policy_version 21374 (0.0008) +[2026-06-02 16:55:30,845][255279] Updated weights for policy 0, policy_version 21384 (0.0008) +[2026-06-02 16:55:31,028][255279] Updated weights for policy 0, policy_version 21394 (0.0008) +[2026-06-02 16:55:31,708][255279] Updated weights for policy 0, policy_version 21404 (0.0008) +[2026-06-02 16:55:31,883][255279] Updated weights for policy 0, policy_version 21414 (0.0008) +[2026-06-02 16:55:32,084][255279] Updated weights for policy 0, policy_version 21424 (0.0008) +[2026-06-02 16:55:32,276][255279] Updated weights for policy 0, policy_version 21434 (0.0008) +[2026-06-02 16:55:32,462][255279] Updated weights for policy 0, policy_version 21444 (0.0008) +[2026-06-02 16:55:32,650][255279] Updated weights for policy 0, policy_version 21454 (0.0009) +[2026-06-02 16:55:32,835][255279] Updated weights for policy 0, policy_version 21464 (0.0008) +[2026-06-02 16:55:33,514][255279] Updated weights for policy 0, policy_version 21474 (0.0008) +[2026-06-02 16:55:33,696][255279] Updated weights for policy 0, policy_version 21484 (0.0008) +[2026-06-02 16:55:33,885][255279] Updated weights for policy 0, policy_version 21494 (0.0008) +[2026-06-02 16:55:34,074][255279] Updated weights for policy 0, policy_version 21504 (0.0008) +[2026-06-02 16:55:34,261][255279] Updated weights for policy 0, policy_version 21514 (0.0008) +[2026-06-02 16:55:34,454][255279] Updated weights for policy 0, policy_version 21524 (0.0008) +[2026-06-02 16:55:35,118][255279] Updated weights for policy 0, policy_version 21534 (0.0008) +[2026-06-02 16:55:35,308][255279] Updated weights for policy 0, policy_version 21544 (0.0008) +[2026-06-02 16:55:35,488][255279] Updated weights for policy 0, policy_version 21554 (0.0008) +[2026-06-02 16:55:35,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19438.7). Total num frames: 11075584. Throughput: 0: 19365.1. Samples: 11107712. Policy #0 lag: (min: 63.0, avg: 80.7, max: 127.0) +[2026-06-02 16:55:35,502][253683] Avg episode reward: [(0, '705.263')] +[2026-06-02 16:55:35,680][255279] Updated weights for policy 0, policy_version 21564 (0.0008) +[2026-06-02 16:55:35,883][255279] Updated weights for policy 0, policy_version 21574 (0.0009) +[2026-06-02 16:55:36,065][255279] Updated weights for policy 0, policy_version 21584 (0.0008) +[2026-06-02 16:55:36,734][255279] Updated weights for policy 0, policy_version 21594 (0.0009) +[2026-06-02 16:55:36,916][255279] Updated weights for policy 0, policy_version 21604 (0.0008) +[2026-06-02 16:55:37,102][255279] Updated weights for policy 0, policy_version 21614 (0.0008) +[2026-06-02 16:55:37,284][255279] Updated weights for policy 0, policy_version 21624 (0.0008) +[2026-06-02 16:55:37,469][255279] Updated weights for policy 0, policy_version 21634 (0.0008) +[2026-06-02 16:55:37,672][255279] Updated weights for policy 0, policy_version 21644 (0.0008) +[2026-06-02 16:55:37,851][255279] Updated weights for policy 0, policy_version 21654 (0.0008) +[2026-06-02 16:55:38,530][255279] Updated weights for policy 0, policy_version 21664 (0.0008) +[2026-06-02 16:55:38,708][255279] Updated weights for policy 0, policy_version 21674 (0.0008) +[2026-06-02 16:55:38,921][255279] Updated weights for policy 0, policy_version 21685 (0.0008) +[2026-06-02 16:55:39,115][255279] Updated weights for policy 0, policy_version 21695 (0.0008) +[2026-06-02 16:55:39,308][255279] Updated weights for policy 0, policy_version 21705 (0.0009) +[2026-06-02 16:55:39,502][255279] Updated weights for policy 0, policy_version 21715 (0.0008) +[2026-06-02 16:55:40,169][255279] Updated weights for policy 0, policy_version 21726 (0.0008) +[2026-06-02 16:55:40,344][255279] Updated weights for policy 0, policy_version 21736 (0.0008) +[2026-06-02 16:55:40,502][253683] Fps is (10 sec: 19660.6, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 11173888. Throughput: 0: 19288.1. Samples: 11164544. Policy #0 lag: (min: 63.0, avg: 80.7, max: 127.0) +[2026-06-02 16:55:40,503][253683] Avg episode reward: [(0, '675.484')] +[2026-06-02 16:55:40,538][255279] Updated weights for policy 0, policy_version 21746 (0.0008) +[2026-06-02 16:55:40,728][255279] Updated weights for policy 0, policy_version 21756 (0.0008) +[2026-06-02 16:55:40,931][255279] Updated weights for policy 0, policy_version 21766 (0.0009) +[2026-06-02 16:55:41,112][255279] Updated weights for policy 0, policy_version 21776 (0.0008) +[2026-06-02 16:55:41,795][255279] Updated weights for policy 0, policy_version 21786 (0.0008) +[2026-06-02 16:55:41,981][255279] Updated weights for policy 0, policy_version 21796 (0.0008) +[2026-06-02 16:55:42,172][255279] Updated weights for policy 0, policy_version 21806 (0.0008) +[2026-06-02 16:55:42,360][255279] Updated weights for policy 0, policy_version 21816 (0.0009) +[2026-06-02 16:55:42,542][255279] Updated weights for policy 0, policy_version 21826 (0.0008) +[2026-06-02 16:55:42,732][255279] Updated weights for policy 0, policy_version 21836 (0.0009) +[2026-06-02 16:55:42,926][255279] Updated weights for policy 0, policy_version 21846 (0.0008) +[2026-06-02 16:55:43,592][255279] Updated weights for policy 0, policy_version 21856 (0.0008) +[2026-06-02 16:55:43,784][255279] Updated weights for policy 0, policy_version 21866 (0.0008) +[2026-06-02 16:55:43,970][255279] Updated weights for policy 0, policy_version 21876 (0.0009) +[2026-06-02 16:55:44,148][255279] Updated weights for policy 0, policy_version 21886 (0.0009) +[2026-06-02 16:55:44,346][255279] Updated weights for policy 0, policy_version 21896 (0.0009) +[2026-06-02 16:55:44,534][255279] Updated weights for policy 0, policy_version 21906 (0.0008) +[2026-06-02 16:55:45,201][255279] Updated weights for policy 0, policy_version 21916 (0.0008) +[2026-06-02 16:55:45,392][255279] Updated weights for policy 0, policy_version 21926 (0.0009) +[2026-06-02 16:55:45,501][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 11272192. Throughput: 0: 19527.1. Samples: 11286784. Policy #0 lag: (min: 63.0, avg: 80.7, max: 127.0) +[2026-06-02 16:55:45,502][253683] Avg episode reward: [(0, '738.068')] +[2026-06-02 16:55:45,568][255279] Updated weights for policy 0, policy_version 21936 (0.0008) +[2026-06-02 16:55:45,769][255279] Updated weights for policy 0, policy_version 21946 (0.0009) +[2026-06-02 16:55:45,956][255279] Updated weights for policy 0, policy_version 21956 (0.0009) +[2026-06-02 16:55:46,146][255279] Updated weights for policy 0, policy_version 21966 (0.0008) +[2026-06-02 16:55:46,331][255279] Updated weights for policy 0, policy_version 21976 (0.0008) +[2026-06-02 16:55:47,012][255279] Updated weights for policy 0, policy_version 21986 (0.0008) +[2026-06-02 16:55:47,197][255279] Updated weights for policy 0, policy_version 21996 (0.0008) +[2026-06-02 16:55:47,375][255279] Updated weights for policy 0, policy_version 22006 (0.0009) +[2026-06-02 16:55:47,597][255279] Updated weights for policy 0, policy_version 22017 (0.0009) +[2026-06-02 16:55:47,784][255279] Updated weights for policy 0, policy_version 22027 (0.0009) +[2026-06-02 16:55:47,968][255279] Updated weights for policy 0, policy_version 22037 (0.0009) +[2026-06-02 16:55:48,650][255279] Updated weights for policy 0, policy_version 22047 (0.0009) +[2026-06-02 16:55:48,832][255279] Updated weights for policy 0, policy_version 22057 (0.0009) +[2026-06-02 16:55:49,037][255279] Updated weights for policy 0, policy_version 22068 (0.0009) +[2026-06-02 16:55:49,229][255279] Updated weights for policy 0, policy_version 22078 (0.0008) +[2026-06-02 16:55:49,415][255279] Updated weights for policy 0, policy_version 22088 (0.0009) +[2026-06-02 16:55:49,604][255279] Updated weights for policy 0, policy_version 22098 (0.0009) +[2026-06-02 16:55:50,272][255279] Updated weights for policy 0, policy_version 22108 (0.0009) +[2026-06-02 16:55:50,475][255279] Updated weights for policy 0, policy_version 22119 (0.0009) +[2026-06-02 16:55:50,502][253683] Fps is (10 sec: 19661.0, 60 sec: 19660.9, 300 sec: 19549.7). Total num frames: 11370496. Throughput: 0: 19288.2. Samples: 11398144. Policy #0 lag: (min: 30.0, avg: 47.7, max: 94.0) +[2026-06-02 16:55:50,503][253683] Avg episode reward: [(0, '740.080')] +[2026-06-02 16:55:50,661][255279] Updated weights for policy 0, policy_version 22129 (0.0009) +[2026-06-02 16:55:50,865][255279] Updated weights for policy 0, policy_version 22140 (0.0009) +[2026-06-02 16:55:51,075][255279] Updated weights for policy 0, policy_version 22151 (0.0009) +[2026-06-02 16:55:51,257][255279] Updated weights for policy 0, policy_version 22161 (0.0008) +[2026-06-02 16:55:51,390][255187] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs5_seed10/checkpoint_p0/checkpoint_000022168_11403264.pth... +[2026-06-02 16:55:51,410][255187] Saving new best policy, reward=740.080! +[2026-06-02 16:55:51,938][255279] Updated weights for policy 0, policy_version 22171 (0.0009) +[2026-06-02 16:55:52,116][255279] Updated weights for policy 0, policy_version 22181 (0.0008) +[2026-06-02 16:55:52,304][255279] Updated weights for policy 0, policy_version 22191 (0.0008) +[2026-06-02 16:55:52,491][255279] Updated weights for policy 0, policy_version 22201 (0.0009) +[2026-06-02 16:55:52,686][255279] Updated weights for policy 0, policy_version 22211 (0.0009) +[2026-06-02 16:55:52,870][255279] Updated weights for policy 0, policy_version 22221 (0.0008) +[2026-06-02 16:55:53,064][255279] Updated weights for policy 0, policy_version 22231 (0.0008) +[2026-06-02 16:55:53,727][255279] Updated weights for policy 0, policy_version 22241 (0.0008) +[2026-06-02 16:55:53,912][255279] Updated weights for policy 0, policy_version 22251 (0.0008) +[2026-06-02 16:55:54,101][255279] Updated weights for policy 0, policy_version 22261 (0.0008) +[2026-06-02 16:55:54,303][255279] Updated weights for policy 0, policy_version 22272 (0.0009) +[2026-06-02 16:55:54,458][255187] Early stopping after 6 epochs (48 sgd steps), loss delta 0.0000009 +[2026-06-02 16:55:55,068][255279] Updated weights for policy 0, policy_version 22282 (0.0009) +[2026-06-02 16:55:55,231][255279] Updated weights for policy 0, policy_version 22292 (0.0009) +[2026-06-02 16:55:55,419][255279] Updated weights for policy 0, policy_version 22302 (0.0009) +[2026-06-02 16:55:55,501][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 11468800. Throughput: 0: 19481.7. Samples: 11458944. Policy #0 lag: (min: 30.0, avg: 47.7, max: 94.0) +[2026-06-02 16:55:55,502][253683] Avg episode reward: [(0, '738.049')] +[2026-06-02 16:55:55,635][255279] Updated weights for policy 0, policy_version 22313 (0.0009) +[2026-06-02 16:55:55,823][255279] Updated weights for policy 0, policy_version 22323 (0.0009) +[2026-06-02 16:55:56,012][255279] Updated weights for policy 0, policy_version 22333 (0.0009) +[2026-06-02 16:55:56,198][255279] Updated weights for policy 0, policy_version 22343 (0.0009) +[2026-06-02 16:55:56,879][255279] Updated weights for policy 0, policy_version 22353 (0.0009) +[2026-06-02 16:55:57,068][255279] Updated weights for policy 0, policy_version 22363 (0.0009) +[2026-06-02 16:55:57,254][255279] Updated weights for policy 0, policy_version 22373 (0.0008) +[2026-06-02 16:55:57,442][255279] Updated weights for policy 0, policy_version 22383 (0.0009) +[2026-06-02 16:55:57,630][255279] Updated weights for policy 0, policy_version 22393 (0.0008) +[2026-06-02 16:55:57,809][255279] Updated weights for policy 0, policy_version 22403 (0.0008) +[2026-06-02 16:55:58,496][255279] Updated weights for policy 0, policy_version 22413 (0.0009) +[2026-06-02 16:55:58,702][255279] Updated weights for policy 0, policy_version 22424 (0.0009) +[2026-06-02 16:55:58,895][255279] Updated weights for policy 0, policy_version 22434 (0.0009) +[2026-06-02 16:55:59,098][255279] Updated weights for policy 0, policy_version 22445 (0.0008) +[2026-06-02 16:55:59,288][255279] Updated weights for policy 0, policy_version 22455 (0.0008) +[2026-06-02 16:55:59,480][255279] Updated weights for policy 0, policy_version 22465 (0.0009) +[2026-06-02 16:56:00,148][255279] Updated weights for policy 0, policy_version 22475 (0.0009) +[2026-06-02 16:56:00,345][255279] Updated weights for policy 0, policy_version 22486 (0.0009) +[2026-06-02 16:56:00,502][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 11567104. Throughput: 0: 19598.2. Samples: 11583744. Policy #0 lag: (min: 30.0, avg: 47.7, max: 94.0) +[2026-06-02 16:56:00,503][253683] Avg episode reward: [(0, '744.428')] +[2026-06-02 16:56:00,528][255279] Updated weights for policy 0, policy_version 22496 (0.0009) +[2026-06-02 16:56:00,742][255279] Updated weights for policy 0, policy_version 22507 (0.0008) +[2026-06-02 16:56:00,949][255279] Updated weights for policy 0, policy_version 22518 (0.0009) +[2026-06-02 16:56:01,131][255279] Updated weights for policy 0, policy_version 22528 (0.0009) +[2026-06-02 16:56:01,275][255187] Saving new best policy, reward=744.428! +[2026-06-02 16:56:01,827][255279] Updated weights for policy 0, policy_version 22539 (0.0009) +[2026-06-02 16:56:02,002][255279] Updated weights for policy 0, policy_version 22549 (0.0008) +[2026-06-02 16:56:02,195][255279] Updated weights for policy 0, policy_version 22559 (0.0009) +[2026-06-02 16:56:02,377][255279] Updated weights for policy 0, policy_version 22569 (0.0008) +[2026-06-02 16:56:02,570][255279] Updated weights for policy 0, policy_version 22579 (0.0008) +[2026-06-02 16:56:02,756][255279] Updated weights for policy 0, policy_version 22589 (0.0008) +[2026-06-02 16:56:02,949][255279] Updated weights for policy 0, policy_version 22599 (0.0008) +[2026-06-02 16:56:03,613][255279] Updated weights for policy 0, policy_version 22609 (0.0008) +[2026-06-02 16:56:03,793][255279] Updated weights for policy 0, policy_version 22619 (0.0008) +[2026-06-02 16:56:03,978][255279] Updated weights for policy 0, policy_version 22629 (0.0008) +[2026-06-02 16:56:04,171][255279] Updated weights for policy 0, policy_version 22639 (0.0008) +[2026-06-02 16:56:04,362][255279] Updated weights for policy 0, policy_version 22649 (0.0008) +[2026-06-02 16:56:04,552][255279] Updated weights for policy 0, policy_version 22659 (0.0008) +[2026-06-02 16:56:05,235][255279] Updated weights for policy 0, policy_version 22669 (0.0008) +[2026-06-02 16:56:05,414][255279] Updated weights for policy 0, policy_version 22679 (0.0008) +[2026-06-02 16:56:05,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 11665408. Throughput: 0: 19379.2. Samples: 11693312. Policy #0 lag: (min: 63.0, avg: 80.4, max: 127.0) +[2026-06-02 16:56:05,502][253683] Avg episode reward: [(0, '754.354')] +[2026-06-02 16:56:05,594][255279] Updated weights for policy 0, policy_version 22689 (0.0008) +[2026-06-02 16:56:05,779][255279] Updated weights for policy 0, policy_version 22699 (0.0008) +[2026-06-02 16:56:05,972][255279] Updated weights for policy 0, policy_version 22709 (0.0009) +[2026-06-02 16:56:06,162][255279] Updated weights for policy 0, policy_version 22719 (0.0008) +[2026-06-02 16:56:06,320][255187] Saving new best policy, reward=754.354! +[2026-06-02 16:56:06,864][255279] Updated weights for policy 0, policy_version 22729 (0.0008) +[2026-06-02 16:56:07,031][255279] Updated weights for policy 0, policy_version 22739 (0.0009) +[2026-06-02 16:56:07,214][255279] Updated weights for policy 0, policy_version 22749 (0.0009) +[2026-06-02 16:56:07,407][255279] Updated weights for policy 0, policy_version 22759 (0.0009) +[2026-06-02 16:56:07,612][255279] Updated weights for policy 0, policy_version 22770 (0.0009) +[2026-06-02 16:56:07,801][255279] Updated weights for policy 0, policy_version 22780 (0.0009) +[2026-06-02 16:56:07,990][255279] Updated weights for policy 0, policy_version 22790 (0.0008) +[2026-06-02 16:56:08,680][255279] Updated weights for policy 0, policy_version 22800 (0.0008) +[2026-06-02 16:56:08,877][255279] Updated weights for policy 0, policy_version 22811 (0.0009) +[2026-06-02 16:56:09,066][255279] Updated weights for policy 0, policy_version 22821 (0.0008) +[2026-06-02 16:56:09,265][255279] Updated weights for policy 0, policy_version 22831 (0.0008) +[2026-06-02 16:56:09,443][255279] Updated weights for policy 0, policy_version 22841 (0.0008) +[2026-06-02 16:56:09,660][255279] Updated weights for policy 0, policy_version 22852 (0.0008) +[2026-06-02 16:56:10,338][255279] Updated weights for policy 0, policy_version 22862 (0.0008) +[2026-06-02 16:56:10,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.9, 300 sec: 19549.7). Total num frames: 11763712. Throughput: 0: 19601.0. Samples: 11754368. Policy #0 lag: (min: 63.0, avg: 80.4, max: 127.0) +[2026-06-02 16:56:10,503][253683] Avg episode reward: [(0, '751.555')] +[2026-06-02 16:56:10,525][255279] Updated weights for policy 0, policy_version 22872 (0.0008) +[2026-06-02 16:56:10,705][255279] Updated weights for policy 0, policy_version 22882 (0.0008) +[2026-06-02 16:56:10,901][255279] Updated weights for policy 0, policy_version 22892 (0.0008) +[2026-06-02 16:56:11,079][255279] Updated weights for policy 0, policy_version 22902 (0.0008) +[2026-06-02 16:56:11,278][255279] Updated weights for policy 0, policy_version 22912 (0.0008) +[2026-06-02 16:56:11,963][255279] Updated weights for policy 0, policy_version 22922 (0.0008) +[2026-06-02 16:56:12,131][255279] Updated weights for policy 0, policy_version 22932 (0.0008) +[2026-06-02 16:56:12,322][255279] Updated weights for policy 0, policy_version 22942 (0.0008) +[2026-06-02 16:56:12,507][255279] Updated weights for policy 0, policy_version 22952 (0.0008) +[2026-06-02 16:56:12,703][255279] Updated weights for policy 0, policy_version 22962 (0.0008) +[2026-06-02 16:56:12,890][255279] Updated weights for policy 0, policy_version 22972 (0.0009) +[2026-06-02 16:56:13,083][255279] Updated weights for policy 0, policy_version 22982 (0.0009) +[2026-06-02 16:56:13,756][255279] Updated weights for policy 0, policy_version 22992 (0.0009) +[2026-06-02 16:56:13,942][255279] Updated weights for policy 0, policy_version 23002 (0.0008) +[2026-06-02 16:56:14,147][255279] Updated weights for policy 0, policy_version 23013 (0.0008) +[2026-06-02 16:56:14,327][255279] Updated weights for policy 0, policy_version 23023 (0.0009) +[2026-06-02 16:56:14,519][255279] Updated weights for policy 0, policy_version 23033 (0.0008) +[2026-06-02 16:56:14,718][255279] Updated weights for policy 0, policy_version 23044 (0.0008) +[2026-06-02 16:56:15,421][255279] Updated weights for policy 0, policy_version 23055 (0.0008) +[2026-06-02 16:56:15,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 11862016. Throughput: 0: 19658.0. Samples: 11877760. Policy #0 lag: (min: 63.0, avg: 80.4, max: 127.0) +[2026-06-02 16:56:15,502][253683] Avg episode reward: [(0, '756.000')] +[2026-06-02 16:56:15,628][255279] Updated weights for policy 0, policy_version 23066 (0.0008) +[2026-06-02 16:56:15,804][255279] Updated weights for policy 0, policy_version 23076 (0.0008) +[2026-06-02 16:56:16,015][255279] Updated weights for policy 0, policy_version 23087 (0.0008) +[2026-06-02 16:56:16,202][255279] Updated weights for policy 0, policy_version 23097 (0.0008) +[2026-06-02 16:56:16,395][255279] Updated weights for policy 0, policy_version 23107 (0.0009) +[2026-06-02 16:56:16,487][255187] Saving new best policy, reward=756.000! +[2026-06-02 16:56:17,052][255279] Updated weights for policy 0, policy_version 23117 (0.0009) +[2026-06-02 16:56:17,234][255279] Updated weights for policy 0, policy_version 23127 (0.0009) +[2026-06-02 16:56:17,423][255279] Updated weights for policy 0, policy_version 23137 (0.0008) +[2026-06-02 16:56:17,601][255279] Updated weights for policy 0, policy_version 23147 (0.0008) +[2026-06-02 16:56:17,789][255279] Updated weights for policy 0, policy_version 23157 (0.0009) +[2026-06-02 16:56:17,979][255279] Updated weights for policy 0, policy_version 23167 (0.0008) +[2026-06-02 16:56:18,675][255279] Updated weights for policy 0, policy_version 23177 (0.0009) +[2026-06-02 16:56:18,849][255279] Updated weights for policy 0, policy_version 23187 (0.0008) +[2026-06-02 16:56:19,040][255279] Updated weights for policy 0, policy_version 23197 (0.0009) +[2026-06-02 16:56:19,218][255279] Updated weights for policy 0, policy_version 23207 (0.0009) +[2026-06-02 16:56:19,404][255279] Updated weights for policy 0, policy_version 23217 (0.0009) +[2026-06-02 16:56:19,589][255279] Updated weights for policy 0, policy_version 23227 (0.0009) +[2026-06-02 16:56:19,780][255279] Updated weights for policy 0, policy_version 23237 (0.0008) +[2026-06-02 16:56:20,462][255279] Updated weights for policy 0, policy_version 23247 (0.0010) +[2026-06-02 16:56:20,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 11960320. Throughput: 0: 19552.7. Samples: 11987584. Policy #0 lag: (min: 63.0, avg: 80.4, max: 127.0) +[2026-06-02 16:56:20,502][253683] Avg episode reward: [(0, '786.036')] +[2026-06-02 16:56:20,649][255279] Updated weights for policy 0, policy_version 23257 (0.0010) +[2026-06-02 16:56:20,831][255279] Updated weights for policy 0, policy_version 23267 (0.0011) +[2026-06-02 16:56:21,022][255279] Updated weights for policy 0, policy_version 23277 (0.0011) +[2026-06-02 16:56:21,207][255279] Updated weights for policy 0, policy_version 23287 (0.0009) +[2026-06-02 16:56:21,392][255279] Updated weights for policy 0, policy_version 23297 (0.0008) +[2026-06-02 16:56:21,530][255187] Saving new best policy, reward=786.036! +[2026-06-02 16:56:22,093][255279] Updated weights for policy 0, policy_version 23307 (0.0008) +[2026-06-02 16:56:22,267][255279] Updated weights for policy 0, policy_version 23317 (0.0009) +[2026-06-02 16:56:22,458][255279] Updated weights for policy 0, policy_version 23327 (0.0009) +[2026-06-02 16:56:22,640][255279] Updated weights for policy 0, policy_version 23337 (0.0009) +[2026-06-02 16:56:22,837][255279] Updated weights for policy 0, policy_version 23347 (0.0009) +[2026-06-02 16:56:23,019][255279] Updated weights for policy 0, policy_version 23357 (0.0008) +[2026-06-02 16:56:23,227][255279] Updated weights for policy 0, policy_version 23368 (0.0009) +[2026-06-02 16:56:23,885][255279] Updated weights for policy 0, policy_version 23378 (0.0008) +[2026-06-02 16:56:24,067][255279] Updated weights for policy 0, policy_version 23388 (0.0008) +[2026-06-02 16:56:24,257][255279] Updated weights for policy 0, policy_version 23398 (0.0008) +[2026-06-02 16:56:24,440][255279] Updated weights for policy 0, policy_version 23408 (0.0008) +[2026-06-02 16:56:24,634][255279] Updated weights for policy 0, policy_version 23418 (0.0009) +[2026-06-02 16:56:24,853][255279] Updated weights for policy 0, policy_version 23429 (0.0009) +[2026-06-02 16:56:25,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 12058624. Throughput: 0: 19655.2. Samples: 12049024. Policy #0 lag: (min: 63.0, avg: 80.4, max: 127.0) +[2026-06-02 16:56:25,502][253683] Avg episode reward: [(0, '794.716')] +[2026-06-02 16:56:25,534][255279] Updated weights for policy 0, policy_version 23440 (0.0009) +[2026-06-02 16:56:25,735][255279] Updated weights for policy 0, policy_version 23450 (0.0009) +[2026-06-02 16:56:25,909][255279] Updated weights for policy 0, policy_version 23460 (0.0008) +[2026-06-02 16:56:26,103][255279] Updated weights for policy 0, policy_version 23470 (0.0009) +[2026-06-02 16:56:26,297][255279] Updated weights for policy 0, policy_version 23480 (0.0009) +[2026-06-02 16:56:26,498][255279] Updated weights for policy 0, policy_version 23491 (0.0009) +[2026-06-02 16:56:26,591][255187] Saving new best policy, reward=794.716! +[2026-06-02 16:56:27,195][255279] Updated weights for policy 0, policy_version 23501 (0.0009) +[2026-06-02 16:56:27,368][255279] Updated weights for policy 0, policy_version 23511 (0.0008) +[2026-06-02 16:56:27,580][255279] Updated weights for policy 0, policy_version 23522 (0.0009) +[2026-06-02 16:56:27,761][255279] Updated weights for policy 0, policy_version 23532 (0.0008) +[2026-06-02 16:56:27,957][255279] Updated weights for policy 0, policy_version 23542 (0.0008) +[2026-06-02 16:56:28,137][255279] Updated weights for policy 0, policy_version 23552 (0.0008) +[2026-06-02 16:56:28,803][255279] Updated weights for policy 0, policy_version 23562 (0.0008) +[2026-06-02 16:56:28,998][255279] Updated weights for policy 0, policy_version 23573 (0.0008) +[2026-06-02 16:56:29,179][255279] Updated weights for policy 0, policy_version 23583 (0.0008) +[2026-06-02 16:56:29,386][255279] Updated weights for policy 0, policy_version 23594 (0.0008) +[2026-06-02 16:56:29,577][255279] Updated weights for policy 0, policy_version 23604 (0.0008) +[2026-06-02 16:56:29,765][255279] Updated weights for policy 0, policy_version 23614 (0.0009) +[2026-06-02 16:56:29,947][255279] Updated weights for policy 0, policy_version 23624 (0.0008) +[2026-06-02 16:56:30,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 12156928. Throughput: 0: 19646.6. Samples: 12170880. Policy #0 lag: (min: 63.0, avg: 80.4, max: 127.0) +[2026-06-02 16:56:30,502][253683] Avg episode reward: [(0, '810.292')] +[2026-06-02 16:56:30,644][255279] Updated weights for policy 0, policy_version 23635 (0.0008) +[2026-06-02 16:56:30,842][255279] Updated weights for policy 0, policy_version 23646 (0.0008) +[2026-06-02 16:56:31,036][255279] Updated weights for policy 0, policy_version 23656 (0.0008) +[2026-06-02 16:56:31,235][255279] Updated weights for policy 0, policy_version 23666 (0.0009) +[2026-06-02 16:56:31,419][255279] Updated weights for policy 0, policy_version 23676 (0.0008) +[2026-06-02 16:56:31,597][255279] Updated weights for policy 0, policy_version 23686 (0.0008) +[2026-06-02 16:56:31,635][255187] Saving new best policy, reward=810.292! +[2026-06-02 16:56:32,287][255279] Updated weights for policy 0, policy_version 23697 (0.0009) +[2026-06-02 16:56:32,484][255279] Updated weights for policy 0, policy_version 23707 (0.0009) +[2026-06-02 16:56:32,662][255279] Updated weights for policy 0, policy_version 23717 (0.0009) +[2026-06-02 16:56:32,861][255279] Updated weights for policy 0, policy_version 23727 (0.0008) +[2026-06-02 16:56:33,040][255279] Updated weights for policy 0, policy_version 23737 (0.0008) +[2026-06-02 16:56:33,254][255279] Updated weights for policy 0, policy_version 23748 (0.0009) +[2026-06-02 16:56:33,925][255279] Updated weights for policy 0, policy_version 23758 (0.0008) +[2026-06-02 16:56:34,104][255279] Updated weights for policy 0, policy_version 23768 (0.0008) +[2026-06-02 16:56:34,298][255279] Updated weights for policy 0, policy_version 23778 (0.0008) +[2026-06-02 16:56:34,487][255279] Updated weights for policy 0, policy_version 23788 (0.0008) +[2026-06-02 16:56:34,674][255279] Updated weights for policy 0, policy_version 23798 (0.0008) +[2026-06-02 16:56:34,861][255279] Updated weights for policy 0, policy_version 23808 (0.0009) +[2026-06-02 16:56:35,502][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 12255232. Throughput: 0: 19626.7. Samples: 12281344. Policy #0 lag: (min: 63.0, avg: 80.4, max: 127.0) +[2026-06-02 16:56:35,502][253683] Avg episode reward: [(0, '819.243')] +[2026-06-02 16:56:35,552][255279] Updated weights for policy 0, policy_version 23819 (0.0009) +[2026-06-02 16:56:35,743][255279] Updated weights for policy 0, policy_version 23829 (0.0009) +[2026-06-02 16:56:35,925][255279] Updated weights for policy 0, policy_version 23839 (0.0008) +[2026-06-02 16:56:36,109][255279] Updated weights for policy 0, policy_version 23849 (0.0008) +[2026-06-02 16:56:36,305][255279] Updated weights for policy 0, policy_version 23859 (0.0008) +[2026-06-02 16:56:36,491][255279] Updated weights for policy 0, policy_version 23869 (0.0008) +[2026-06-02 16:56:36,686][255279] Updated weights for policy 0, policy_version 23879 (0.0008) +[2026-06-02 16:56:36,696][255187] Saving new best policy, reward=819.243! +[2026-06-02 16:56:37,363][255279] Updated weights for policy 0, policy_version 23890 (0.0008) +[2026-06-02 16:56:37,548][255279] Updated weights for policy 0, policy_version 23900 (0.0008) +[2026-06-02 16:56:37,736][255279] Updated weights for policy 0, policy_version 23910 (0.0008) +[2026-06-02 16:56:37,933][255279] Updated weights for policy 0, policy_version 23920 (0.0008) +[2026-06-02 16:56:38,114][255279] Updated weights for policy 0, policy_version 23930 (0.0008) +[2026-06-02 16:56:38,309][255279] Updated weights for policy 0, policy_version 23940 (0.0009) +[2026-06-02 16:56:38,969][255279] Updated weights for policy 0, policy_version 23950 (0.0007) +[2026-06-02 16:56:39,150][255279] Updated weights for policy 0, policy_version 23960 (0.0009) +[2026-06-02 16:56:39,329][255279] Updated weights for policy 0, policy_version 23970 (0.0008) +[2026-06-02 16:56:39,526][255279] Updated weights for policy 0, policy_version 23980 (0.0008) +[2026-06-02 16:56:39,731][255279] Updated weights for policy 0, policy_version 23990 (0.0009) +[2026-06-02 16:56:39,933][255279] Updated weights for policy 0, policy_version 24001 (0.0008) +[2026-06-02 16:56:40,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.9, 300 sec: 19549.7). Total num frames: 12353536. Throughput: 0: 19632.3. Samples: 12342400. Policy #0 lag: (min: 29.0, avg: 47.1, max: 93.0) +[2026-06-02 16:56:40,502][253683] Avg episode reward: [(0, '836.585')] +[2026-06-02 16:56:40,605][255279] Updated weights for policy 0, policy_version 24011 (0.0008) +[2026-06-02 16:56:40,788][255279] Updated weights for policy 0, policy_version 24021 (0.0008) +[2026-06-02 16:56:40,973][255279] Updated weights for policy 0, policy_version 24031 (0.0008) +[2026-06-02 16:56:41,161][255279] Updated weights for policy 0, policy_version 24041 (0.0009) +[2026-06-02 16:56:41,348][255279] Updated weights for policy 0, policy_version 24051 (0.0008) +[2026-06-02 16:56:41,534][255279] Updated weights for policy 0, policy_version 24061 (0.0008) +[2026-06-02 16:56:41,720][255279] Updated weights for policy 0, policy_version 24071 (0.0009) +[2026-06-02 16:56:41,728][255187] Saving new best policy, reward=836.585! +[2026-06-02 16:56:42,390][255279] Updated weights for policy 0, policy_version 24081 (0.0009) +[2026-06-02 16:56:42,575][255279] Updated weights for policy 0, policy_version 24091 (0.0009) +[2026-06-02 16:56:42,751][255279] Updated weights for policy 0, policy_version 24101 (0.0008) +[2026-06-02 16:56:42,941][255279] Updated weights for policy 0, policy_version 24111 (0.0008) +[2026-06-02 16:56:43,151][255279] Updated weights for policy 0, policy_version 24122 (0.0008) +[2026-06-02 16:56:43,339][255279] Updated weights for policy 0, policy_version 24132 (0.0009) +[2026-06-02 16:56:44,037][255279] Updated weights for policy 0, policy_version 24142 (0.0008) +[2026-06-02 16:56:44,221][255279] Updated weights for policy 0, policy_version 24152 (0.0008) +[2026-06-02 16:56:44,407][255279] Updated weights for policy 0, policy_version 24162 (0.0008) +[2026-06-02 16:56:44,617][255279] Updated weights for policy 0, policy_version 24173 (0.0008) +[2026-06-02 16:56:44,798][255279] Updated weights for policy 0, policy_version 24183 (0.0008) +[2026-06-02 16:56:44,987][255279] Updated weights for policy 0, policy_version 24193 (0.0008) +[2026-06-02 16:56:45,502][253683] Fps is (10 sec: 19660.6, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 12451840. Throughput: 0: 19490.1. Samples: 12460800. Policy #0 lag: (min: 29.0, avg: 47.1, max: 93.0) +[2026-06-02 16:56:45,503][253683] Avg episode reward: [(0, '851.589')] +[2026-06-02 16:56:45,671][255279] Updated weights for policy 0, policy_version 24203 (0.0008) +[2026-06-02 16:56:45,852][255279] Updated weights for policy 0, policy_version 24213 (0.0008) +[2026-06-02 16:56:46,034][255279] Updated weights for policy 0, policy_version 24223 (0.0008) +[2026-06-02 16:56:46,229][255279] Updated weights for policy 0, policy_version 24233 (0.0008) +[2026-06-02 16:56:46,416][255279] Updated weights for policy 0, policy_version 24243 (0.0007) +[2026-06-02 16:56:46,615][255279] Updated weights for policy 0, policy_version 24254 (0.0008) +[2026-06-02 16:56:46,807][255187] Saving new best policy, reward=851.589! +[2026-06-02 16:56:46,809][255279] Updated weights for policy 0, policy_version 24264 (0.0008) +[2026-06-02 16:56:47,459][255279] Updated weights for policy 0, policy_version 24274 (0.0009) +[2026-06-02 16:56:47,650][255279] Updated weights for policy 0, policy_version 24284 (0.0008) +[2026-06-02 16:56:47,833][255279] Updated weights for policy 0, policy_version 24294 (0.0008) +[2026-06-02 16:56:48,021][255279] Updated weights for policy 0, policy_version 24304 (0.0008) +[2026-06-02 16:56:48,219][255279] Updated weights for policy 0, policy_version 24314 (0.0009) +[2026-06-02 16:56:48,418][255279] Updated weights for policy 0, policy_version 24325 (0.0008) +[2026-06-02 16:56:49,098][255279] Updated weights for policy 0, policy_version 24335 (0.0009) +[2026-06-02 16:56:49,277][255279] Updated weights for policy 0, policy_version 24345 (0.0008) +[2026-06-02 16:56:49,459][255279] Updated weights for policy 0, policy_version 24355 (0.0008) +[2026-06-02 16:56:49,656][255279] Updated weights for policy 0, policy_version 24365 (0.0009) +[2026-06-02 16:56:49,838][255279] Updated weights for policy 0, policy_version 24375 (0.0008) +[2026-06-02 16:56:50,030][255279] Updated weights for policy 0, policy_version 24385 (0.0008) +[2026-06-02 16:56:50,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 12550144. Throughput: 0: 19595.4. Samples: 12575104. Policy #0 lag: (min: 29.0, avg: 47.1, max: 93.0) +[2026-06-02 16:56:50,502][253683] Avg episode reward: [(0, '870.766')] +[2026-06-02 16:56:50,707][255279] Updated weights for policy 0, policy_version 24395 (0.0008) +[2026-06-02 16:56:50,897][255279] Updated weights for policy 0, policy_version 24405 (0.0008) +[2026-06-02 16:56:51,076][255279] Updated weights for policy 0, policy_version 24415 (0.0008) +[2026-06-02 16:56:51,265][255279] Updated weights for policy 0, policy_version 24425 (0.0008) +[2026-06-02 16:56:51,459][255279] Updated weights for policy 0, policy_version 24435 (0.0008) +[2026-06-02 16:56:51,642][255279] Updated weights for policy 0, policy_version 24445 (0.0009) +[2026-06-02 16:56:51,837][255279] Updated weights for policy 0, policy_version 24455 (0.0008) +[2026-06-02 16:56:51,847][255187] Saving new best policy, reward=870.766! +[2026-06-02 16:56:52,506][255279] Updated weights for policy 0, policy_version 24465 (0.0008) +[2026-06-02 16:56:52,689][255279] Updated weights for policy 0, policy_version 24475 (0.0008) +[2026-06-02 16:56:52,865][255279] Updated weights for policy 0, policy_version 24485 (0.0008) +[2026-06-02 16:56:53,061][255279] Updated weights for policy 0, policy_version 24495 (0.0008) +[2026-06-02 16:56:53,251][255279] Updated weights for policy 0, policy_version 24505 (0.0009) +[2026-06-02 16:56:53,439][255279] Updated weights for policy 0, policy_version 24515 (0.0008) +[2026-06-02 16:56:54,120][255279] Updated weights for policy 0, policy_version 24525 (0.0009) +[2026-06-02 16:56:54,302][255279] Updated weights for policy 0, policy_version 24535 (0.0009) +[2026-06-02 16:56:54,485][255279] Updated weights for policy 0, policy_version 24545 (0.0008) +[2026-06-02 16:56:54,676][255279] Updated weights for policy 0, policy_version 24555 (0.0008) +[2026-06-02 16:56:54,879][255279] Updated weights for policy 0, policy_version 24566 (0.0008) +[2026-06-02 16:56:55,060][255279] Updated weights for policy 0, policy_version 24576 (0.0008) +[2026-06-02 16:56:55,502][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 12648448. Throughput: 0: 19601.1. Samples: 12636416. Policy #0 lag: (min: 29.0, avg: 47.1, max: 93.0) +[2026-06-02 16:56:55,502][253683] Avg episode reward: [(0, '854.598')] +[2026-06-02 16:56:55,767][255279] Updated weights for policy 0, policy_version 24586 (0.0008) +[2026-06-02 16:56:55,952][255279] Updated weights for policy 0, policy_version 24596 (0.0009) +[2026-06-02 16:56:56,164][255279] Updated weights for policy 0, policy_version 24607 (0.0009) +[2026-06-02 16:56:56,365][255279] Updated weights for policy 0, policy_version 24618 (0.0009) +[2026-06-02 16:56:56,564][255279] Updated weights for policy 0, policy_version 24628 (0.0008) +[2026-06-02 16:56:56,750][255279] Updated weights for policy 0, policy_version 24638 (0.0008) +[2026-06-02 16:56:56,930][255279] Updated weights for policy 0, policy_version 24648 (0.0006) +[2026-06-02 16:56:57,585][255279] Updated weights for policy 0, policy_version 24658 (0.0008) +[2026-06-02 16:56:57,758][255279] Updated weights for policy 0, policy_version 24668 (0.0009) +[2026-06-02 16:56:57,957][255279] Updated weights for policy 0, policy_version 24678 (0.0008) +[2026-06-02 16:56:58,136][255279] Updated weights for policy 0, policy_version 24688 (0.0008) +[2026-06-02 16:56:58,328][255279] Updated weights for policy 0, policy_version 24698 (0.0008) +[2026-06-02 16:56:58,528][255279] Updated weights for policy 0, policy_version 24709 (0.0008) +[2026-06-02 16:56:59,215][255279] Updated weights for policy 0, policy_version 24719 (0.0008) +[2026-06-02 16:56:59,395][255279] Updated weights for policy 0, policy_version 24729 (0.0009) +[2026-06-02 16:56:59,578][255279] Updated weights for policy 0, policy_version 24739 (0.0008) +[2026-06-02 16:56:59,772][255279] Updated weights for policy 0, policy_version 24749 (0.0009) +[2026-06-02 16:56:59,954][255279] Updated weights for policy 0, policy_version 24759 (0.0009) +[2026-06-02 16:57:00,153][255279] Updated weights for policy 0, policy_version 24769 (0.0009) +[2026-06-02 16:57:00,501][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 12746752. Throughput: 0: 19345.1. Samples: 12748288. Policy #0 lag: (min: 12.0, avg: 29.2, max: 76.0) +[2026-06-02 16:57:00,502][253683] Avg episode reward: [(0, '848.262')] +[2026-06-02 16:57:00,826][255279] Updated weights for policy 0, policy_version 24779 (0.0009) +[2026-06-02 16:57:01,013][255279] Updated weights for policy 0, policy_version 24789 (0.0008) +[2026-06-02 16:57:01,193][255279] Updated weights for policy 0, policy_version 24799 (0.0008) +[2026-06-02 16:57:01,371][255279] Updated weights for policy 0, policy_version 24809 (0.0008) +[2026-06-02 16:57:01,559][255279] Updated weights for policy 0, policy_version 24819 (0.0008) +[2026-06-02 16:57:01,756][255279] Updated weights for policy 0, policy_version 24829 (0.0008) +[2026-06-02 16:57:01,942][255279] Updated weights for policy 0, policy_version 24839 (0.0008) +[2026-06-02 16:57:02,617][255279] Updated weights for policy 0, policy_version 24849 (0.0008) +[2026-06-02 16:57:02,810][255279] Updated weights for policy 0, policy_version 24859 (0.0008) +[2026-06-02 16:57:02,987][255279] Updated weights for policy 0, policy_version 24869 (0.0008) +[2026-06-02 16:57:03,199][255279] Updated weights for policy 0, policy_version 24880 (0.0008) +[2026-06-02 16:57:03,392][255279] Updated weights for policy 0, policy_version 24890 (0.0008) +[2026-06-02 16:57:03,576][255279] Updated weights for policy 0, policy_version 24900 (0.0008) +[2026-06-02 16:57:04,235][255279] Updated weights for policy 0, policy_version 24910 (0.0008) +[2026-06-02 16:57:04,414][255279] Updated weights for policy 0, policy_version 24920 (0.0008) +[2026-06-02 16:57:04,625][255279] Updated weights for policy 0, policy_version 24931 (0.0008) +[2026-06-02 16:57:04,806][255279] Updated weights for policy 0, policy_version 24941 (0.0008) +[2026-06-02 16:57:04,995][255279] Updated weights for policy 0, policy_version 24951 (0.0009) +[2026-06-02 16:57:05,184][255279] Updated weights for policy 0, policy_version 24961 (0.0009) +[2026-06-02 16:57:05,502][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 12845056. Throughput: 0: 19581.2. Samples: 12868736. Policy #0 lag: (min: 12.0, avg: 29.2, max: 76.0) +[2026-06-02 16:57:05,502][253683] Avg episode reward: [(0, '839.694')] +[2026-06-02 16:57:05,891][255279] Updated weights for policy 0, policy_version 24972 (0.0008) +[2026-06-02 16:57:06,074][255279] Updated weights for policy 0, policy_version 24982 (0.0009) +[2026-06-02 16:57:06,259][255279] Updated weights for policy 0, policy_version 24992 (0.0009) +[2026-06-02 16:57:06,446][255279] Updated weights for policy 0, policy_version 25002 (0.0009) +[2026-06-02 16:57:06,633][255279] Updated weights for policy 0, policy_version 25012 (0.0009) +[2026-06-02 16:57:06,822][255279] Updated weights for policy 0, policy_version 25022 (0.0008) +[2026-06-02 16:57:07,002][255279] Updated weights for policy 0, policy_version 25032 (0.0008) +[2026-06-02 16:57:07,680][255279] Updated weights for policy 0, policy_version 25042 (0.0008) +[2026-06-02 16:57:07,875][255279] Updated weights for policy 0, policy_version 25052 (0.0009) +[2026-06-02 16:57:08,052][255279] Updated weights for policy 0, policy_version 25062 (0.0008) +[2026-06-02 16:57:08,240][255279] Updated weights for policy 0, policy_version 25072 (0.0008) +[2026-06-02 16:57:08,440][255279] Updated weights for policy 0, policy_version 25082 (0.0008) +[2026-06-02 16:57:08,622][255279] Updated weights for policy 0, policy_version 25092 (0.0009) +[2026-06-02 16:57:09,304][255279] Updated weights for policy 0, policy_version 25102 (0.0008) +[2026-06-02 16:57:09,500][255279] Updated weights for policy 0, policy_version 25113 (0.0008) +[2026-06-02 16:57:09,694][255279] Updated weights for policy 0, policy_version 25123 (0.0009) +[2026-06-02 16:57:09,882][255279] Updated weights for policy 0, policy_version 25133 (0.0009) +[2026-06-02 16:57:10,072][255279] Updated weights for policy 0, policy_version 25143 (0.0008) +[2026-06-02 16:57:10,259][255279] Updated weights for policy 0, policy_version 25153 (0.0008) +[2026-06-02 16:57:10,501][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 12943360. Throughput: 0: 19569.8. Samples: 12929664. Policy #0 lag: (min: 12.0, avg: 29.2, max: 76.0) +[2026-06-02 16:57:10,502][253683] Avg episode reward: [(0, '825.599')] +[2026-06-02 16:57:10,922][255279] Updated weights for policy 0, policy_version 25163 (0.0008) +[2026-06-02 16:57:11,108][255279] Updated weights for policy 0, policy_version 25173 (0.0009) +[2026-06-02 16:57:11,292][255279] Updated weights for policy 0, policy_version 25183 (0.0007) +[2026-06-02 16:57:11,470][255279] Updated weights for policy 0, policy_version 25193 (0.0009) +[2026-06-02 16:57:11,665][255279] Updated weights for policy 0, policy_version 25203 (0.0008) +[2026-06-02 16:57:11,854][255279] Updated weights for policy 0, policy_version 25213 (0.0008) +[2026-06-02 16:57:12,040][255279] Updated weights for policy 0, policy_version 25223 (0.0008) +[2026-06-02 16:57:12,713][255279] Updated weights for policy 0, policy_version 25233 (0.0008) +[2026-06-02 16:57:12,913][255279] Updated weights for policy 0, policy_version 25244 (0.0008) +[2026-06-02 16:57:13,119][255279] Updated weights for policy 0, policy_version 25255 (0.0008) +[2026-06-02 16:57:13,316][255279] Updated weights for policy 0, policy_version 25266 (0.0008) +[2026-06-02 16:57:13,514][255279] Updated weights for policy 0, policy_version 25276 (0.0008) +[2026-06-02 16:57:13,695][255279] Updated weights for policy 0, policy_version 25286 (0.0009) +[2026-06-02 16:57:14,361][255279] Updated weights for policy 0, policy_version 25296 (0.0008) +[2026-06-02 16:57:14,552][255279] Updated weights for policy 0, policy_version 25306 (0.0008) +[2026-06-02 16:57:14,735][255279] Updated weights for policy 0, policy_version 25316 (0.0008) +[2026-06-02 16:57:14,922][255279] Updated weights for policy 0, policy_version 25326 (0.0008) +[2026-06-02 16:57:15,112][255279] Updated weights for policy 0, policy_version 25336 (0.0008) +[2026-06-02 16:57:15,302][255279] Updated weights for policy 0, policy_version 25346 (0.0008) +[2026-06-02 16:57:15,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 13041664. Throughput: 0: 19308.1. Samples: 13039744. Policy #0 lag: (min: 63.0, avg: 80.9, max: 127.0) +[2026-06-02 16:57:15,502][253683] Avg episode reward: [(0, '829.142')] +[2026-06-02 16:57:15,985][255279] Updated weights for policy 0, policy_version 25356 (0.0008) +[2026-06-02 16:57:16,172][255279] Updated weights for policy 0, policy_version 25366 (0.0008) +[2026-06-02 16:57:16,358][255279] Updated weights for policy 0, policy_version 25376 (0.0008) +[2026-06-02 16:57:16,564][255279] Updated weights for policy 0, policy_version 25387 (0.0008) +[2026-06-02 16:57:16,746][255279] Updated weights for policy 0, policy_version 25397 (0.0009) +[2026-06-02 16:57:16,936][255279] Updated weights for policy 0, policy_version 25407 (0.0009) +[2026-06-02 16:57:17,613][255279] Updated weights for policy 0, policy_version 25417 (0.0008) +[2026-06-02 16:57:17,788][255279] Updated weights for policy 0, policy_version 25427 (0.0008) +[2026-06-02 16:57:17,971][255279] Updated weights for policy 0, policy_version 25437 (0.0008) +[2026-06-02 16:57:18,167][255279] Updated weights for policy 0, policy_version 25447 (0.0009) +[2026-06-02 16:57:18,350][255279] Updated weights for policy 0, policy_version 25457 (0.0008) +[2026-06-02 16:57:18,536][255279] Updated weights for policy 0, policy_version 25467 (0.0008) +[2026-06-02 16:57:18,725][255279] Updated weights for policy 0, policy_version 25477 (0.0008) +[2026-06-02 16:57:19,389][255279] Updated weights for policy 0, policy_version 25487 (0.0008) +[2026-06-02 16:57:19,567][255279] Updated weights for policy 0, policy_version 25497 (0.0008) +[2026-06-02 16:57:19,761][255279] Updated weights for policy 0, policy_version 25507 (0.0008) +[2026-06-02 16:57:19,946][255279] Updated weights for policy 0, policy_version 25517 (0.0008) +[2026-06-02 16:57:20,126][255279] Updated weights for policy 0, policy_version 25527 (0.0008) +[2026-06-02 16:57:20,322][255279] Updated weights for policy 0, policy_version 25537 (0.0008) +[2026-06-02 16:57:20,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 13139968. Throughput: 0: 19561.2. Samples: 13161600. Policy #0 lag: (min: 63.0, avg: 80.9, max: 127.0) +[2026-06-02 16:57:20,503][253683] Avg episode reward: [(0, '835.247')] +[2026-06-02 16:57:21,008][255279] Updated weights for policy 0, policy_version 25547 (0.0008) +[2026-06-02 16:57:21,181][255279] Updated weights for policy 0, policy_version 25557 (0.0009) +[2026-06-02 16:57:21,370][255279] Updated weights for policy 0, policy_version 25567 (0.0009) +[2026-06-02 16:57:21,552][255279] Updated weights for policy 0, policy_version 25577 (0.0008) +[2026-06-02 16:57:21,738][255279] Updated weights for policy 0, policy_version 25587 (0.0008) +[2026-06-02 16:57:21,961][255279] Updated weights for policy 0, policy_version 25599 (0.0009) +[2026-06-02 16:57:22,651][255279] Updated weights for policy 0, policy_version 25609 (0.0008) +[2026-06-02 16:57:22,838][255279] Updated weights for policy 0, policy_version 25620 (0.0008) +[2026-06-02 16:57:23,031][255279] Updated weights for policy 0, policy_version 25630 (0.0009) +[2026-06-02 16:57:23,215][255279] Updated weights for policy 0, policy_version 25640 (0.0008) +[2026-06-02 16:57:23,401][255279] Updated weights for policy 0, policy_version 25650 (0.0008) +[2026-06-02 16:57:23,605][255279] Updated weights for policy 0, policy_version 25661 (0.0008) +[2026-06-02 16:57:23,793][255279] Updated weights for policy 0, policy_version 25671 (0.0008) +[2026-06-02 16:57:24,473][255279] Updated weights for policy 0, policy_version 25682 (0.0008) +[2026-06-02 16:57:24,660][255279] Updated weights for policy 0, policy_version 25692 (0.0008) +[2026-06-02 16:57:24,837][255279] Updated weights for policy 0, policy_version 25702 (0.0008) +[2026-06-02 16:57:25,019][255279] Updated weights for policy 0, policy_version 25712 (0.0009) +[2026-06-02 16:57:25,211][255279] Updated weights for policy 0, policy_version 25722 (0.0008) +[2026-06-02 16:57:25,399][255279] Updated weights for policy 0, policy_version 25732 (0.0008) +[2026-06-02 16:57:25,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 13238272. Throughput: 0: 19564.1. Samples: 13222784. Policy #0 lag: (min: 63.0, avg: 80.9, max: 127.0) +[2026-06-02 16:57:25,502][253683] Avg episode reward: [(0, '868.767')] +[2026-06-02 16:57:26,111][255279] Updated weights for policy 0, policy_version 25742 (0.0008) +[2026-06-02 16:57:26,314][255279] Updated weights for policy 0, policy_version 25753 (0.0008) +[2026-06-02 16:57:26,499][255279] Updated weights for policy 0, policy_version 25763 (0.0008) +[2026-06-02 16:57:26,697][255279] Updated weights for policy 0, policy_version 25774 (0.0009) +[2026-06-02 16:57:26,883][255279] Updated weights for policy 0, policy_version 25784 (0.0008) +[2026-06-02 16:57:27,070][255279] Updated weights for policy 0, policy_version 25794 (0.0008) +[2026-06-02 16:57:27,737][255279] Updated weights for policy 0, policy_version 25804 (0.0009) +[2026-06-02 16:57:27,936][255279] Updated weights for policy 0, policy_version 25815 (0.0008) +[2026-06-02 16:57:28,118][255279] Updated weights for policy 0, policy_version 25825 (0.0008) +[2026-06-02 16:57:28,295][255279] Updated weights for policy 0, policy_version 25835 (0.0009) +[2026-06-02 16:57:28,488][255279] Updated weights for policy 0, policy_version 25845 (0.0008) +[2026-06-02 16:57:28,686][255279] Updated weights for policy 0, policy_version 25856 (0.0008) +[2026-06-02 16:57:29,401][255279] Updated weights for policy 0, policy_version 25866 (0.0009) +[2026-06-02 16:57:29,564][255279] Updated weights for policy 0, policy_version 25876 (0.0008) +[2026-06-02 16:57:29,748][255279] Updated weights for policy 0, policy_version 25886 (0.0008) +[2026-06-02 16:57:29,931][255279] Updated weights for policy 0, policy_version 25896 (0.0008) +[2026-06-02 16:57:30,119][255279] Updated weights for policy 0, policy_version 25906 (0.0008) +[2026-06-02 16:57:30,305][255279] Updated weights for policy 0, policy_version 25916 (0.0008) +[2026-06-02 16:57:30,502][253683] Fps is (10 sec: 16384.1, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 13303808. Throughput: 0: 19362.2. Samples: 13332096. Policy #0 lag: (min: 63.0, avg: 80.9, max: 127.0) +[2026-06-02 16:57:30,502][253683] Avg episode reward: [(0, '885.439')] +[2026-06-02 16:57:30,505][255279] Updated weights for policy 0, policy_version 25927 (0.0008) +[2026-06-02 16:57:30,520][255187] Saving new best policy, reward=885.439! +[2026-06-02 16:57:31,182][255279] Updated weights for policy 0, policy_version 25937 (0.0008) +[2026-06-02 16:57:31,366][255279] Updated weights for policy 0, policy_version 25947 (0.0008) +[2026-06-02 16:57:31,549][255279] Updated weights for policy 0, policy_version 25957 (0.0008) +[2026-06-02 16:57:31,735][255279] Updated weights for policy 0, policy_version 25967 (0.0008) +[2026-06-02 16:57:31,916][255279] Updated weights for policy 0, policy_version 25977 (0.0009) +[2026-06-02 16:57:32,125][255279] Updated weights for policy 0, policy_version 25988 (0.0008) +[2026-06-02 16:57:32,824][255279] Updated weights for policy 0, policy_version 25998 (0.0008) +[2026-06-02 16:57:33,014][255279] Updated weights for policy 0, policy_version 26009 (0.0009) +[2026-06-02 16:57:33,199][255279] Updated weights for policy 0, policy_version 26019 (0.0008) +[2026-06-02 16:57:33,391][255279] Updated weights for policy 0, policy_version 26029 (0.0008) +[2026-06-02 16:57:33,572][255279] Updated weights for policy 0, policy_version 26039 (0.0008) +[2026-06-02 16:57:33,763][255279] Updated weights for policy 0, policy_version 26049 (0.0008) +[2026-06-02 16:57:34,453][255279] Updated weights for policy 0, policy_version 26059 (0.0009) +[2026-06-02 16:57:34,642][255279] Updated weights for policy 0, policy_version 26070 (0.0009) +[2026-06-02 16:57:34,829][255279] Updated weights for policy 0, policy_version 26080 (0.0009) +[2026-06-02 16:57:35,014][255279] Updated weights for policy 0, policy_version 26090 (0.0008) +[2026-06-02 16:57:35,193][255279] Updated weights for policy 0, policy_version 26100 (0.0008) +[2026-06-02 16:57:35,406][255279] Updated weights for policy 0, policy_version 26111 (0.0008) +[2026-06-02 16:57:35,502][253683] Fps is (10 sec: 16383.8, 60 sec: 19114.6, 300 sec: 19438.6). Total num frames: 13402112. Throughput: 0: 19552.6. Samples: 13454976. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) +[2026-06-02 16:57:35,503][253683] Avg episode reward: [(0, '912.822')] +[2026-06-02 16:57:35,564][255187] Saving new best policy, reward=912.822! +[2026-06-02 16:57:36,099][255279] Updated weights for policy 0, policy_version 26121 (0.0009) +[2026-06-02 16:57:36,276][255279] Updated weights for policy 0, policy_version 26131 (0.0008) +[2026-06-02 16:57:36,456][255279] Updated weights for policy 0, policy_version 26141 (0.0008) +[2026-06-02 16:57:36,657][255279] Updated weights for policy 0, policy_version 26152 (0.0009) +[2026-06-02 16:57:36,844][255279] Updated weights for policy 0, policy_version 26162 (0.0008) +[2026-06-02 16:57:37,081][255279] Updated weights for policy 0, policy_version 26175 (0.0009) +[2026-06-02 16:57:37,778][255279] Updated weights for policy 0, policy_version 26185 (0.0009) +[2026-06-02 16:57:37,955][255279] Updated weights for policy 0, policy_version 26195 (0.0008) +[2026-06-02 16:57:38,135][255279] Updated weights for policy 0, policy_version 26205 (0.0008) +[2026-06-02 16:57:38,313][255279] Updated weights for policy 0, policy_version 26215 (0.0008) +[2026-06-02 16:57:38,499][255279] Updated weights for policy 0, policy_version 26225 (0.0009) +[2026-06-02 16:57:38,691][255279] Updated weights for policy 0, policy_version 26235 (0.0008) +[2026-06-02 16:57:38,892][255279] Updated weights for policy 0, policy_version 26246 (0.0008) +[2026-06-02 16:57:39,585][255279] Updated weights for policy 0, policy_version 26257 (0.0009) +[2026-06-02 16:57:39,775][255279] Updated weights for policy 0, policy_version 26267 (0.0009) +[2026-06-02 16:57:39,957][255279] Updated weights for policy 0, policy_version 26277 (0.0009) +[2026-06-02 16:57:40,131][255279] Updated weights for policy 0, policy_version 26287 (0.0009) +[2026-06-02 16:57:40,325][255279] Updated weights for policy 0, policy_version 26297 (0.0009) +[2026-06-02 16:57:40,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 13500416. Throughput: 0: 19561.2. Samples: 13516672. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) +[2026-06-02 16:57:40,502][253683] Avg episode reward: [(0, '906.115')] +[2026-06-02 16:57:40,542][255279] Updated weights for policy 0, policy_version 26309 (0.0009) +[2026-06-02 16:57:41,224][255279] Updated weights for policy 0, policy_version 26319 (0.0009) +[2026-06-02 16:57:41,410][255279] Updated weights for policy 0, policy_version 26329 (0.0009) +[2026-06-02 16:57:41,592][255279] Updated weights for policy 0, policy_version 26339 (0.0009) +[2026-06-02 16:57:41,780][255279] Updated weights for policy 0, policy_version 26349 (0.0009) +[2026-06-02 16:57:41,966][255279] Updated weights for policy 0, policy_version 26359 (0.0009) +[2026-06-02 16:57:42,169][255279] Updated weights for policy 0, policy_version 26370 (0.0009) +[2026-06-02 16:57:42,875][255279] Updated weights for policy 0, policy_version 26381 (0.0009) +[2026-06-02 16:57:43,059][255279] Updated weights for policy 0, policy_version 26391 (0.0009) +[2026-06-02 16:57:43,264][255279] Updated weights for policy 0, policy_version 26402 (0.0009) +[2026-06-02 16:57:43,441][255279] Updated weights for policy 0, policy_version 26412 (0.0008) +[2026-06-02 16:57:43,636][255279] Updated weights for policy 0, policy_version 26422 (0.0009) +[2026-06-02 16:57:43,825][255279] Updated weights for policy 0, policy_version 26432 (0.0008) +[2026-06-02 16:57:44,504][255279] Updated weights for policy 0, policy_version 26442 (0.0009) +[2026-06-02 16:57:44,673][255279] Updated weights for policy 0, policy_version 26452 (0.0009) +[2026-06-02 16:57:44,867][255279] Updated weights for policy 0, policy_version 26462 (0.0008) +[2026-06-02 16:57:45,071][255279] Updated weights for policy 0, policy_version 26473 (0.0008) +[2026-06-02 16:57:45,258][255279] Updated weights for policy 0, policy_version 26483 (0.0008) +[2026-06-02 16:57:45,466][255279] Updated weights for policy 0, policy_version 26494 (0.0008) +[2026-06-02 16:57:45,502][253683] Fps is (10 sec: 19661.1, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 13598720. Throughput: 0: 19493.0. Samples: 13625472. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) +[2026-06-02 16:57:45,502][253683] Avg episode reward: [(0, '932.920')] +[2026-06-02 16:57:45,646][255187] Saving new best policy, reward=932.920! +[2026-06-02 16:57:45,648][255279] Updated weights for policy 0, policy_version 26504 (0.0009) +[2026-06-02 16:57:46,308][255279] Updated weights for policy 0, policy_version 26514 (0.0008) +[2026-06-02 16:57:46,518][255279] Updated weights for policy 0, policy_version 26525 (0.0008) +[2026-06-02 16:57:46,705][255279] Updated weights for policy 0, policy_version 26535 (0.0009) +[2026-06-02 16:57:46,883][255279] Updated weights for policy 0, policy_version 26545 (0.0008) +[2026-06-02 16:57:47,074][255279] Updated weights for policy 0, policy_version 26555 (0.0008) +[2026-06-02 16:57:47,268][255279] Updated weights for policy 0, policy_version 26565 (0.0008) +[2026-06-02 16:57:47,941][255279] Updated weights for policy 0, policy_version 26575 (0.0008) +[2026-06-02 16:57:48,121][255279] Updated weights for policy 0, policy_version 26585 (0.0008) +[2026-06-02 16:57:48,317][255279] Updated weights for policy 0, policy_version 26595 (0.0009) +[2026-06-02 16:57:48,506][255279] Updated weights for policy 0, policy_version 26605 (0.0008) +[2026-06-02 16:57:48,690][255279] Updated weights for policy 0, policy_version 26615 (0.0008) +[2026-06-02 16:57:48,874][255279] Updated weights for policy 0, policy_version 26625 (0.0008) +[2026-06-02 16:57:49,537][255279] Updated weights for policy 0, policy_version 26635 (0.0008) +[2026-06-02 16:57:49,718][255279] Updated weights for policy 0, policy_version 26645 (0.0009) +[2026-06-02 16:57:49,901][255279] Updated weights for policy 0, policy_version 26655 (0.0008) +[2026-06-02 16:57:50,072][255279] Updated weights for policy 0, policy_version 26665 (0.0008) +[2026-06-02 16:57:50,265][255279] Updated weights for policy 0, policy_version 26675 (0.0008) +[2026-06-02 16:57:50,450][255279] Updated weights for policy 0, policy_version 26685 (0.0008) +[2026-06-02 16:57:50,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19114.6, 300 sec: 19438.6). Total num frames: 13697024. Throughput: 0: 19501.5. Samples: 13746304. Policy #0 lag: (min: 63.0, avg: 79.6, max: 127.0) +[2026-06-02 16:57:50,502][253683] Avg episode reward: [(0, '912.875')] +[2026-06-02 16:57:50,636][255279] Updated weights for policy 0, policy_version 26695 (0.0008) +[2026-06-02 16:57:51,314][255279] Updated weights for policy 0, policy_version 26705 (0.0008) +[2026-06-02 16:57:51,504][255279] Updated weights for policy 0, policy_version 26715 (0.0009) +[2026-06-02 16:57:51,682][255279] Updated weights for policy 0, policy_version 26725 (0.0008) +[2026-06-02 16:57:51,877][255279] Updated weights for policy 0, policy_version 26735 (0.0009) +[2026-06-02 16:57:52,061][255279] Updated weights for policy 0, policy_version 26745 (0.0008) +[2026-06-02 16:57:52,248][255279] Updated weights for policy 0, policy_version 26755 (0.0008) +[2026-06-02 16:57:52,930][255279] Updated weights for policy 0, policy_version 26765 (0.0008) +[2026-06-02 16:57:53,117][255279] Updated weights for policy 0, policy_version 26775 (0.0008) +[2026-06-02 16:57:53,300][255279] Updated weights for policy 0, policy_version 26785 (0.0008) +[2026-06-02 16:57:53,491][255279] Updated weights for policy 0, policy_version 26795 (0.0009) +[2026-06-02 16:57:53,677][255279] Updated weights for policy 0, policy_version 26805 (0.0008) +[2026-06-02 16:57:53,870][255279] Updated weights for policy 0, policy_version 26815 (0.0009) +[2026-06-02 16:57:54,535][255279] Updated weights for policy 0, policy_version 26825 (0.0009) +[2026-06-02 16:57:54,714][255279] Updated weights for policy 0, policy_version 26835 (0.0008) +[2026-06-02 16:57:54,897][255279] Updated weights for policy 0, policy_version 26845 (0.0008) +[2026-06-02 16:57:55,095][255279] Updated weights for policy 0, policy_version 26856 (0.0008) +[2026-06-02 16:57:55,284][255279] Updated weights for policy 0, policy_version 26866 (0.0008) +[2026-06-02 16:57:55,480][255279] Updated weights for policy 0, policy_version 26876 (0.0009) +[2026-06-02 16:57:55,501][253683] Fps is (10 sec: 19660.9, 60 sec: 19114.7, 300 sec: 19438.7). Total num frames: 13795328. Throughput: 0: 19475.9. Samples: 13806080. Policy #0 lag: (min: 50.0, avg: 77.7, max: 114.0) +[2026-06-02 16:57:55,502][253683] Avg episode reward: [(0, '911.388')] +[2026-06-02 16:57:55,662][255279] Updated weights for policy 0, policy_version 26886 (0.0008) +[2026-06-02 16:57:56,371][255279] Updated weights for policy 0, policy_version 26896 (0.0009) +[2026-06-02 16:57:56,565][255279] Updated weights for policy 0, policy_version 26906 (0.0009) +[2026-06-02 16:57:56,746][255279] Updated weights for policy 0, policy_version 26916 (0.0008) +[2026-06-02 16:57:56,934][255279] Updated weights for policy 0, policy_version 26926 (0.0009) +[2026-06-02 16:57:57,127][255279] Updated weights for policy 0, policy_version 26936 (0.0009) +[2026-06-02 16:57:57,317][255279] Updated weights for policy 0, policy_version 26946 (0.0009) +[2026-06-02 16:57:57,984][255279] Updated weights for policy 0, policy_version 26956 (0.0009) +[2026-06-02 16:57:58,158][255279] Updated weights for policy 0, policy_version 26966 (0.0009) +[2026-06-02 16:57:58,347][255279] Updated weights for policy 0, policy_version 26976 (0.0008) +[2026-06-02 16:57:58,534][255279] Updated weights for policy 0, policy_version 26986 (0.0009) +[2026-06-02 16:57:58,727][255279] Updated weights for policy 0, policy_version 26996 (0.0009) +[2026-06-02 16:57:58,935][255279] Updated weights for policy 0, policy_version 27007 (0.0009) +[2026-06-02 16:57:59,616][255279] Updated weights for policy 0, policy_version 27017 (0.0009) +[2026-06-02 16:57:59,797][255279] Updated weights for policy 0, policy_version 27027 (0.0009) +[2026-06-02 16:57:59,976][255279] Updated weights for policy 0, policy_version 27037 (0.0009) +[2026-06-02 16:58:00,167][255279] Updated weights for policy 0, policy_version 27047 (0.0009) +[2026-06-02 16:58:00,353][255279] Updated weights for policy 0, policy_version 27057 (0.0009) +[2026-06-02 16:58:00,502][253683] Fps is (10 sec: 19661.0, 60 sec: 19114.7, 300 sec: 19438.7). Total num frames: 13893632. Throughput: 0: 19512.9. Samples: 13917824. Policy #0 lag: (min: 50.0, avg: 77.7, max: 114.0) +[2026-06-02 16:58:00,502][253683] Avg episode reward: [(0, '954.661')] +[2026-06-02 16:58:00,552][255279] Updated weights for policy 0, policy_version 27068 (0.0009) +[2026-06-02 16:58:00,739][255279] Updated weights for policy 0, policy_version 27078 (0.0009) +[2026-06-02 16:58:00,773][255187] Saving new best policy, reward=954.661! +[2026-06-02 16:58:01,399][255279] Updated weights for policy 0, policy_version 27088 (0.0009) +[2026-06-02 16:58:01,602][255279] Updated weights for policy 0, policy_version 27098 (0.0009) +[2026-06-02 16:58:01,771][255279] Updated weights for policy 0, policy_version 27108 (0.0009) +[2026-06-02 16:58:01,971][255279] Updated weights for policy 0, policy_version 27118 (0.0008) +[2026-06-02 16:58:02,166][255279] Updated weights for policy 0, policy_version 27129 (0.0008) +[2026-06-02 16:58:02,349][255279] Updated weights for policy 0, policy_version 27139 (0.0008) +[2026-06-02 16:58:03,053][255279] Updated weights for policy 0, policy_version 27150 (0.0009) +[2026-06-02 16:58:03,244][255279] Updated weights for policy 0, policy_version 27160 (0.0009) +[2026-06-02 16:58:03,443][255279] Updated weights for policy 0, policy_version 27171 (0.0008) +[2026-06-02 16:58:03,618][255279] Updated weights for policy 0, policy_version 27181 (0.0008) +[2026-06-02 16:58:03,805][255279] Updated weights for policy 0, policy_version 27191 (0.0009) +[2026-06-02 16:58:03,996][255279] Updated weights for policy 0, policy_version 27201 (0.0008) +[2026-06-02 16:58:04,689][255279] Updated weights for policy 0, policy_version 27211 (0.0008) +[2026-06-02 16:58:04,890][255279] Updated weights for policy 0, policy_version 27222 (0.0008) +[2026-06-02 16:58:05,074][255279] Updated weights for policy 0, policy_version 27232 (0.0008) +[2026-06-02 16:58:05,286][255279] Updated weights for policy 0, policy_version 27243 (0.0008) +[2026-06-02 16:58:05,471][255279] Updated weights for policy 0, policy_version 27253 (0.0008) +[2026-06-02 16:58:05,502][253683] Fps is (10 sec: 19660.6, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 13991936. Throughput: 0: 19507.2. Samples: 14039424. Policy #0 lag: (min: 50.0, avg: 77.7, max: 114.0) +[2026-06-02 16:58:05,502][253683] Avg episode reward: [(0, '968.833')] +[2026-06-02 16:58:05,661][255279] Updated weights for policy 0, policy_version 27263 (0.0009) +[2026-06-02 16:58:05,823][255187] Saving new best policy, reward=968.833! +[2026-06-02 16:58:06,339][255279] Updated weights for policy 0, policy_version 27273 (0.0008) +[2026-06-02 16:58:06,527][255279] Updated weights for policy 0, policy_version 27284 (0.0008) +[2026-06-02 16:58:06,706][255279] Updated weights for policy 0, policy_version 27294 (0.0008) +[2026-06-02 16:58:06,885][255279] Updated weights for policy 0, policy_version 27304 (0.0008) +[2026-06-02 16:58:07,073][255279] Updated weights for policy 0, policy_version 27314 (0.0008) +[2026-06-02 16:58:07,267][255279] Updated weights for policy 0, policy_version 27324 (0.0008) +[2026-06-02 16:58:07,446][255279] Updated weights for policy 0, policy_version 27334 (0.0008) +[2026-06-02 16:58:08,131][255279] Updated weights for policy 0, policy_version 27344 (0.0008) +[2026-06-02 16:58:08,338][255279] Updated weights for policy 0, policy_version 27355 (0.0008) +[2026-06-02 16:58:08,516][255279] Updated weights for policy 0, policy_version 27365 (0.0008) +[2026-06-02 16:58:08,700][255279] Updated weights for policy 0, policy_version 27375 (0.0009) +[2026-06-02 16:58:08,890][255279] Updated weights for policy 0, policy_version 27385 (0.0008) +[2026-06-02 16:58:09,075][255279] Updated weights for policy 0, policy_version 27395 (0.0008) +[2026-06-02 16:58:09,773][255279] Updated weights for policy 0, policy_version 27406 (0.0009) +[2026-06-02 16:58:09,953][255279] Updated weights for policy 0, policy_version 27416 (0.0008) +[2026-06-02 16:58:10,155][255279] Updated weights for policy 0, policy_version 27427 (0.0009) +[2026-06-02 16:58:10,352][255279] Updated weights for policy 0, policy_version 27437 (0.0008) +[2026-06-02 16:58:10,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 14090240. Throughput: 0: 19350.8. Samples: 14093568. Policy #0 lag: (min: 50.0, avg: 77.7, max: 114.0) +[2026-06-02 16:58:10,502][253683] Avg episode reward: [(0, '987.375')] +[2026-06-02 16:58:10,535][255279] Updated weights for policy 0, policy_version 27447 (0.0008) +[2026-06-02 16:58:10,730][255279] Updated weights for policy 0, policy_version 27457 (0.0008) +[2026-06-02 16:58:10,853][255187] Saving new best policy, reward=987.375! +[2026-06-02 16:58:11,420][255279] Updated weights for policy 0, policy_version 27467 (0.0008) +[2026-06-02 16:58:11,617][255279] Updated weights for policy 0, policy_version 27478 (0.0009) +[2026-06-02 16:58:11,796][255279] Updated weights for policy 0, policy_version 27488 (0.0009) +[2026-06-02 16:58:12,001][255279] Updated weights for policy 0, policy_version 27498 (0.0008) +[2026-06-02 16:58:12,193][255279] Updated weights for policy 0, policy_version 27508 (0.0008) +[2026-06-02 16:58:12,381][255279] Updated weights for policy 0, policy_version 27518 (0.0008) +[2026-06-02 16:58:12,568][255279] Updated weights for policy 0, policy_version 27528 (0.0008) +[2026-06-02 16:58:13,226][255279] Updated weights for policy 0, policy_version 27538 (0.0008) +[2026-06-02 16:58:13,408][255279] Updated weights for policy 0, policy_version 27548 (0.0007) +[2026-06-02 16:58:13,591][255279] Updated weights for policy 0, policy_version 27558 (0.0007) +[2026-06-02 16:58:13,770][255279] Updated weights for policy 0, policy_version 27568 (0.0007) +[2026-06-02 16:58:13,965][255279] Updated weights for policy 0, policy_version 27578 (0.0009) +[2026-06-02 16:58:14,168][255279] Updated weights for policy 0, policy_version 27588 (0.0009) +[2026-06-02 16:58:14,832][255279] Updated weights for policy 0, policy_version 27598 (0.0009) +[2026-06-02 16:58:15,014][255279] Updated weights for policy 0, policy_version 27608 (0.0008) +[2026-06-02 16:58:15,204][255279] Updated weights for policy 0, policy_version 27618 (0.0008) +[2026-06-02 16:58:15,398][255279] Updated weights for policy 0, policy_version 27628 (0.0009) +[2026-06-02 16:58:15,502][253683] Fps is (10 sec: 19660.9, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 14188544. Throughput: 0: 19510.1. Samples: 14210048. Policy #0 lag: (min: 28.0, avg: 44.4, max: 92.0) +[2026-06-02 16:58:15,503][253683] Avg episode reward: [(0, '995.261')] +[2026-06-02 16:58:15,584][255279] Updated weights for policy 0, policy_version 27638 (0.0006) +[2026-06-02 16:58:15,774][255279] Updated weights for policy 0, policy_version 27648 (0.0006) +[2026-06-02 16:58:15,915][255187] Saving new best policy, reward=995.261! +[2026-06-02 16:58:16,435][255279] Updated weights for policy 0, policy_version 27658 (0.0008) +[2026-06-02 16:58:16,611][255279] Updated weights for policy 0, policy_version 27668 (0.0008) +[2026-06-02 16:58:16,808][255279] Updated weights for policy 0, policy_version 27679 (0.0008) +[2026-06-02 16:58:17,002][255279] Updated weights for policy 0, policy_version 27689 (0.0008) +[2026-06-02 16:58:17,192][255279] Updated weights for policy 0, policy_version 27699 (0.0008) +[2026-06-02 16:58:17,381][255279] Updated weights for policy 0, policy_version 27709 (0.0008) +[2026-06-02 16:58:17,568][255279] Updated weights for policy 0, policy_version 27719 (0.0008) +[2026-06-02 16:58:18,222][255279] Updated weights for policy 0, policy_version 27729 (0.0008) +[2026-06-02 16:58:18,420][255279] Updated weights for policy 0, policy_version 27740 (0.0008) +[2026-06-02 16:58:18,604][255279] Updated weights for policy 0, policy_version 27750 (0.0008) +[2026-06-02 16:58:18,796][255279] Updated weights for policy 0, policy_version 27760 (0.0009) +[2026-06-02 16:58:18,988][255279] Updated weights for policy 0, policy_version 27770 (0.0008) +[2026-06-02 16:58:19,174][255279] Updated weights for policy 0, policy_version 27780 (0.0008) +[2026-06-02 16:58:19,846][255279] Updated weights for policy 0, policy_version 27790 (0.0009) +[2026-06-02 16:58:20,022][255279] Updated weights for policy 0, policy_version 27800 (0.0009) +[2026-06-02 16:58:20,228][255279] Updated weights for policy 0, policy_version 27811 (0.0010) +[2026-06-02 16:58:20,408][255279] Updated weights for policy 0, policy_version 27821 (0.0008) +[2026-06-02 16:58:20,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 14286848. Throughput: 0: 19476.0. Samples: 14331392. Policy #0 lag: (min: 28.0, avg: 44.4, max: 92.0) +[2026-06-02 16:58:20,502][253683] Avg episode reward: [(0, '1062.492')] +[2026-06-02 16:58:20,602][255279] Updated weights for policy 0, policy_version 27831 (0.0009) +[2026-06-02 16:58:20,793][255279] Updated weights for policy 0, policy_version 27841 (0.0009) +[2026-06-02 16:58:20,914][255187] Saving new best policy, reward=1062.492! +[2026-06-02 16:58:21,505][255279] Updated weights for policy 0, policy_version 27852 (0.0009) +[2026-06-02 16:58:21,712][255279] Updated weights for policy 0, policy_version 27864 (0.0009) +[2026-06-02 16:58:21,904][255279] Updated weights for policy 0, policy_version 27874 (0.0009) +[2026-06-02 16:58:22,093][255279] Updated weights for policy 0, policy_version 27884 (0.0009) +[2026-06-02 16:58:22,279][255279] Updated weights for policy 0, policy_version 27894 (0.0009) +[2026-06-02 16:58:22,454][255279] Updated weights for policy 0, policy_version 27904 (0.0009) +[2026-06-02 16:58:23,153][255279] Updated weights for policy 0, policy_version 27914 (0.0008) +[2026-06-02 16:58:23,357][255279] Updated weights for policy 0, policy_version 27925 (0.0009) +[2026-06-02 16:58:23,538][255279] Updated weights for policy 0, policy_version 27935 (0.0009) +[2026-06-02 16:58:23,729][255279] Updated weights for policy 0, policy_version 27945 (0.0008) +[2026-06-02 16:58:23,910][255279] Updated weights for policy 0, policy_version 27955 (0.0008) +[2026-06-02 16:58:24,111][255279] Updated weights for policy 0, policy_version 27966 (0.0009) +[2026-06-02 16:58:24,295][255279] Updated weights for policy 0, policy_version 27976 (0.0008) +[2026-06-02 16:58:24,982][255279] Updated weights for policy 0, policy_version 27986 (0.0008) +[2026-06-02 16:58:25,169][255279] Updated weights for policy 0, policy_version 27996 (0.0008) +[2026-06-02 16:58:25,349][255279] Updated weights for policy 0, policy_version 28006 (0.0009) +[2026-06-02 16:58:25,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 14385152. Throughput: 0: 19200.0. Samples: 14380672. Policy #0 lag: (min: 28.0, avg: 44.4, max: 92.0) +[2026-06-02 16:58:25,502][253683] Avg episode reward: [(0, '1057.599')] +[2026-06-02 16:58:25,531][255279] Updated weights for policy 0, policy_version 28016 (0.0008) +[2026-06-02 16:58:25,728][255279] Updated weights for policy 0, policy_version 28026 (0.0009) +[2026-06-02 16:58:25,903][255279] Updated weights for policy 0, policy_version 28036 (0.0008) +[2026-06-02 16:58:26,601][255279] Updated weights for policy 0, policy_version 28046 (0.0009) +[2026-06-02 16:58:26,775][255279] Updated weights for policy 0, policy_version 28056 (0.0009) +[2026-06-02 16:58:26,964][255279] Updated weights for policy 0, policy_version 28066 (0.0008) +[2026-06-02 16:58:27,150][255279] Updated weights for policy 0, policy_version 28076 (0.0009) +[2026-06-02 16:58:27,335][255279] Updated weights for policy 0, policy_version 28086 (0.0008) +[2026-06-02 16:58:27,530][255279] Updated weights for policy 0, policy_version 28096 (0.0009) +[2026-06-02 16:58:28,215][255279] Updated weights for policy 0, policy_version 28106 (0.0008) +[2026-06-02 16:58:28,383][255279] Updated weights for policy 0, policy_version 28116 (0.0009) +[2026-06-02 16:58:28,565][255279] Updated weights for policy 0, policy_version 28126 (0.0008) +[2026-06-02 16:58:28,762][255279] Updated weights for policy 0, policy_version 28137 (0.0008) +[2026-06-02 16:58:28,952][255279] Updated weights for policy 0, policy_version 28147 (0.0009) +[2026-06-02 16:58:29,156][255279] Updated weights for policy 0, policy_version 28158 (0.0009) +[2026-06-02 16:58:29,341][255279] Updated weights for policy 0, policy_version 28168 (0.0008) +[2026-06-02 16:58:30,019][255279] Updated weights for policy 0, policy_version 28179 (0.0008) +[2026-06-02 16:58:30,203][255279] Updated weights for policy 0, policy_version 28189 (0.0009) +[2026-06-02 16:58:30,392][255279] Updated weights for policy 0, policy_version 28199 (0.0008) +[2026-06-02 16:58:30,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 14483456. Throughput: 0: 19481.6. Samples: 14502144. Policy #0 lag: (min: 28.0, avg: 44.4, max: 92.0) +[2026-06-02 16:58:30,502][253683] Avg episode reward: [(0, '1091.419')] +[2026-06-02 16:58:30,589][255279] Updated weights for policy 0, policy_version 28209 (0.0009) +[2026-06-02 16:58:30,772][255279] Updated weights for policy 0, policy_version 28219 (0.0008) +[2026-06-02 16:58:30,954][255279] Updated weights for policy 0, policy_version 28229 (0.0008) +[2026-06-02 16:58:31,010][255187] Saving new best policy, reward=1091.419! +[2026-06-02 16:58:31,645][255279] Updated weights for policy 0, policy_version 28239 (0.0009) +[2026-06-02 16:58:31,823][255279] Updated weights for policy 0, policy_version 28249 (0.0009) +[2026-06-02 16:58:32,016][255279] Updated weights for policy 0, policy_version 28259 (0.0009) +[2026-06-02 16:58:32,230][255279] Updated weights for policy 0, policy_version 28271 (0.0009) +[2026-06-02 16:58:32,425][255279] Updated weights for policy 0, policy_version 28281 (0.0009) +[2026-06-02 16:58:32,609][255279] Updated weights for policy 0, policy_version 28291 (0.0009) +[2026-06-02 16:58:33,282][255279] Updated weights for policy 0, policy_version 28301 (0.0009) +[2026-06-02 16:58:33,465][255279] Updated weights for policy 0, policy_version 28311 (0.0009) +[2026-06-02 16:58:33,641][255279] Updated weights for policy 0, policy_version 28321 (0.0009) +[2026-06-02 16:58:33,833][255279] Updated weights for policy 0, policy_version 28331 (0.0009) +[2026-06-02 16:58:34,025][255279] Updated weights for policy 0, policy_version 28341 (0.0009) +[2026-06-02 16:58:34,211][255279] Updated weights for policy 0, policy_version 28351 (0.0008) +[2026-06-02 16:58:34,883][255279] Updated weights for policy 0, policy_version 28361 (0.0009) +[2026-06-02 16:58:35,055][255279] Updated weights for policy 0, policy_version 28371 (0.0008) +[2026-06-02 16:58:35,242][255279] Updated weights for policy 0, policy_version 28381 (0.0009) +[2026-06-02 16:58:35,431][255279] Updated weights for policy 0, policy_version 28391 (0.0009) +[2026-06-02 16:58:35,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.9, 300 sec: 19438.6). Total num frames: 14581760. Throughput: 0: 19447.5. Samples: 14621440. Policy #0 lag: (min: 47.0, avg: 85.4, max: 115.0) +[2026-06-02 16:58:35,502][253683] Avg episode reward: [(0, '1090.574')] +[2026-06-02 16:58:35,616][255279] Updated weights for policy 0, policy_version 28401 (0.0008) +[2026-06-02 16:58:35,809][255279] Updated weights for policy 0, policy_version 28411 (0.0008) +[2026-06-02 16:58:35,996][255279] Updated weights for policy 0, policy_version 28421 (0.0009) +[2026-06-02 16:58:36,677][255279] Updated weights for policy 0, policy_version 28431 (0.0009) +[2026-06-02 16:58:36,860][255279] Updated weights for policy 0, policy_version 28441 (0.0008) +[2026-06-02 16:58:37,049][255279] Updated weights for policy 0, policy_version 28451 (0.0008) +[2026-06-02 16:58:37,238][255279] Updated weights for policy 0, policy_version 28461 (0.0009) +[2026-06-02 16:58:37,428][255279] Updated weights for policy 0, policy_version 28471 (0.0009) +[2026-06-02 16:58:37,616][255279] Updated weights for policy 0, policy_version 28481 (0.0008) +[2026-06-02 16:58:38,288][255279] Updated weights for policy 0, policy_version 28491 (0.0008) +[2026-06-02 16:58:38,470][255279] Updated weights for policy 0, policy_version 28501 (0.0008) +[2026-06-02 16:58:38,653][255279] Updated weights for policy 0, policy_version 28511 (0.0008) +[2026-06-02 16:58:38,843][255279] Updated weights for policy 0, policy_version 28521 (0.0008) +[2026-06-02 16:58:39,030][255279] Updated weights for policy 0, policy_version 28531 (0.0008) +[2026-06-02 16:58:39,216][255279] Updated weights for policy 0, policy_version 28541 (0.0008) +[2026-06-02 16:58:39,397][255279] Updated weights for policy 0, policy_version 28551 (0.0008) +[2026-06-02 16:58:40,066][255279] Updated weights for policy 0, policy_version 28561 (0.0008) +[2026-06-02 16:58:40,253][255279] Updated weights for policy 0, policy_version 28571 (0.0008) +[2026-06-02 16:58:40,434][255279] Updated weights for policy 0, policy_version 28581 (0.0008) +[2026-06-02 16:58:40,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 14680064. Throughput: 0: 19262.5. Samples: 14672896. Policy #0 lag: (min: 47.0, avg: 85.4, max: 115.0) +[2026-06-02 16:58:40,502][253683] Avg episode reward: [(0, '1082.566')] +[2026-06-02 16:58:40,632][255279] Updated weights for policy 0, policy_version 28591 (0.0008) +[2026-06-02 16:58:40,821][255279] Updated weights for policy 0, policy_version 28601 (0.0009) +[2026-06-02 16:58:41,011][255279] Updated weights for policy 0, policy_version 28611 (0.0008) +[2026-06-02 16:58:41,670][255279] Updated weights for policy 0, policy_version 28621 (0.0009) +[2026-06-02 16:58:41,852][255279] Updated weights for policy 0, policy_version 28631 (0.0009) +[2026-06-02 16:58:42,031][255279] Updated weights for policy 0, policy_version 28641 (0.0009) +[2026-06-02 16:58:42,222][255279] Updated weights for policy 0, policy_version 28651 (0.0008) +[2026-06-02 16:58:42,418][255279] Updated weights for policy 0, policy_version 28661 (0.0008) +[2026-06-02 16:58:42,601][255279] Updated weights for policy 0, policy_version 28671 (0.0009) +[2026-06-02 16:58:43,275][255279] Updated weights for policy 0, policy_version 28681 (0.0009) +[2026-06-02 16:58:43,445][255279] Updated weights for policy 0, policy_version 28691 (0.0008) +[2026-06-02 16:58:43,635][255279] Updated weights for policy 0, policy_version 28701 (0.0008) +[2026-06-02 16:58:43,829][255279] Updated weights for policy 0, policy_version 28711 (0.0009) +[2026-06-02 16:58:44,007][255279] Updated weights for policy 0, policy_version 28721 (0.0009) +[2026-06-02 16:58:44,201][255279] Updated weights for policy 0, policy_version 28731 (0.0008) +[2026-06-02 16:58:44,409][255279] Updated weights for policy 0, policy_version 28742 (0.0008) +[2026-06-02 16:58:45,075][255279] Updated weights for policy 0, policy_version 28752 (0.0008) +[2026-06-02 16:58:45,261][255279] Updated weights for policy 0, policy_version 28762 (0.0008) +[2026-06-02 16:58:45,441][255279] Updated weights for policy 0, policy_version 28772 (0.0008) +[2026-06-02 16:58:45,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 14778368. Throughput: 0: 19461.7. Samples: 14793600. Policy #0 lag: (min: 47.0, avg: 85.4, max: 115.0) +[2026-06-02 16:58:45,503][253683] Avg episode reward: [(0, '1083.013')] +[2026-06-02 16:58:45,629][255279] Updated weights for policy 0, policy_version 28782 (0.0008) +[2026-06-02 16:58:45,828][255279] Updated weights for policy 0, policy_version 28792 (0.0008) +[2026-06-02 16:58:46,030][255279] Updated weights for policy 0, policy_version 28803 (0.0008) +[2026-06-02 16:58:46,710][255279] Updated weights for policy 0, policy_version 28813 (0.0009) +[2026-06-02 16:58:46,881][255279] Updated weights for policy 0, policy_version 28823 (0.0008) +[2026-06-02 16:58:47,072][255279] Updated weights for policy 0, policy_version 28833 (0.0008) +[2026-06-02 16:58:47,273][255279] Updated weights for policy 0, policy_version 28844 (0.0008) +[2026-06-02 16:58:47,486][255279] Updated weights for policy 0, policy_version 28855 (0.0009) +[2026-06-02 16:58:47,672][255279] Updated weights for policy 0, policy_version 28865 (0.0008) +[2026-06-02 16:58:48,349][255279] Updated weights for policy 0, policy_version 28875 (0.0009) +[2026-06-02 16:58:48,519][255279] Updated weights for policy 0, policy_version 28885 (0.0008) +[2026-06-02 16:58:48,717][255279] Updated weights for policy 0, policy_version 28895 (0.0008) +[2026-06-02 16:58:48,902][255279] Updated weights for policy 0, policy_version 28905 (0.0008) +[2026-06-02 16:58:49,080][255279] Updated weights for policy 0, policy_version 28915 (0.0008) +[2026-06-02 16:58:49,274][255279] Updated weights for policy 0, policy_version 28925 (0.0008) +[2026-06-02 16:58:49,469][255279] Updated weights for policy 0, policy_version 28935 (0.0008) +[2026-06-02 16:58:50,137][255279] Updated weights for policy 0, policy_version 28945 (0.0009) +[2026-06-02 16:58:50,313][255279] Updated weights for policy 0, policy_version 28955 (0.0009) +[2026-06-02 16:58:50,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 14876672. Throughput: 0: 19333.7. Samples: 14909440. Policy #0 lag: (min: 47.0, avg: 85.4, max: 115.0) +[2026-06-02 16:58:50,503][253683] Avg episode reward: [(0, '1101.432')] +[2026-06-02 16:58:50,507][255279] Updated weights for policy 0, policy_version 28965 (0.0008) +[2026-06-02 16:58:50,695][255279] Updated weights for policy 0, policy_version 28975 (0.0008) +[2026-06-02 16:58:50,888][255279] Updated weights for policy 0, policy_version 28985 (0.0009) +[2026-06-02 16:58:51,075][255279] Updated weights for policy 0, policy_version 28995 (0.0008) +[2026-06-02 16:58:51,159][255187] Saving new best policy, reward=1101.432! +[2026-06-02 16:58:51,743][255279] Updated weights for policy 0, policy_version 29005 (0.0009) +[2026-06-02 16:58:51,923][255279] Updated weights for policy 0, policy_version 29015 (0.0009) +[2026-06-02 16:58:52,108][255279] Updated weights for policy 0, policy_version 29025 (0.0008) +[2026-06-02 16:58:52,294][255279] Updated weights for policy 0, policy_version 29035 (0.0009) +[2026-06-02 16:58:52,491][255279] Updated weights for policy 0, policy_version 29045 (0.0008) +[2026-06-02 16:58:52,675][255279] Updated weights for policy 0, policy_version 29055 (0.0008) +[2026-06-02 16:58:53,369][255279] Updated weights for policy 0, policy_version 29065 (0.0008) +[2026-06-02 16:58:53,548][255279] Updated weights for policy 0, policy_version 29075 (0.0008) +[2026-06-02 16:58:53,729][255279] Updated weights for policy 0, policy_version 29085 (0.0009) +[2026-06-02 16:58:53,922][255279] Updated weights for policy 0, policy_version 29095 (0.0009) +[2026-06-02 16:58:54,110][255279] Updated weights for policy 0, policy_version 29105 (0.0009) +[2026-06-02 16:58:54,294][255279] Updated weights for policy 0, policy_version 29115 (0.0009) +[2026-06-02 16:58:54,490][255279] Updated weights for policy 0, policy_version 29125 (0.0009) +[2026-06-02 16:58:55,148][255279] Updated weights for policy 0, policy_version 29135 (0.0009) +[2026-06-02 16:58:55,331][255279] Updated weights for policy 0, policy_version 29145 (0.0008) +[2026-06-02 16:58:55,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 14974976. Throughput: 0: 19356.4. Samples: 14964608. Policy #0 lag: (min: 63.0, avg: 81.0, max: 127.0) +[2026-06-02 16:58:55,502][253683] Avg episode reward: [(0, '1092.024')] +[2026-06-02 16:58:55,519][255279] Updated weights for policy 0, policy_version 29155 (0.0008) +[2026-06-02 16:58:55,708][255279] Updated weights for policy 0, policy_version 29165 (0.0009) +[2026-06-02 16:58:55,892][255279] Updated weights for policy 0, policy_version 29175 (0.0008) +[2026-06-02 16:58:56,087][255279] Updated weights for policy 0, policy_version 29185 (0.0009) +[2026-06-02 16:58:56,788][255279] Updated weights for policy 0, policy_version 29195 (0.0009) +[2026-06-02 16:58:56,978][255279] Updated weights for policy 0, policy_version 29206 (0.0008) +[2026-06-02 16:58:57,166][255279] Updated weights for policy 0, policy_version 29216 (0.0008) +[2026-06-02 16:58:57,349][255279] Updated weights for policy 0, policy_version 29226 (0.0008) +[2026-06-02 16:58:57,543][255279] Updated weights for policy 0, policy_version 29236 (0.0008) +[2026-06-02 16:58:57,727][255279] Updated weights for policy 0, policy_version 29246 (0.0008) +[2026-06-02 16:58:57,913][255279] Updated weights for policy 0, policy_version 29256 (0.0008) +[2026-06-02 16:58:58,579][255279] Updated weights for policy 0, policy_version 29266 (0.0008) +[2026-06-02 16:58:58,767][255279] Updated weights for policy 0, policy_version 29276 (0.0009) +[2026-06-02 16:58:58,962][255279] Updated weights for policy 0, policy_version 29286 (0.0009) +[2026-06-02 16:58:59,154][255279] Updated weights for policy 0, policy_version 29297 (0.0009) +[2026-06-02 16:58:59,349][255279] Updated weights for policy 0, policy_version 29307 (0.0008) +[2026-06-02 16:58:59,544][255279] Updated weights for policy 0, policy_version 29317 (0.0009) +[2026-06-02 16:59:00,197][255279] Updated weights for policy 0, policy_version 29327 (0.0009) +[2026-06-02 16:59:00,382][255279] Updated weights for policy 0, policy_version 29337 (0.0009) +[2026-06-02 16:59:00,502][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 15073280. Throughput: 0: 19470.2. Samples: 15086208. Policy #0 lag: (min: 63.0, avg: 81.0, max: 127.0) +[2026-06-02 16:59:00,502][253683] Avg episode reward: [(0, '1126.838')] +[2026-06-02 16:59:00,582][255279] Updated weights for policy 0, policy_version 29348 (0.0009) +[2026-06-02 16:59:00,790][255279] Updated weights for policy 0, policy_version 29359 (0.0009) +[2026-06-02 16:59:00,985][255279] Updated weights for policy 0, policy_version 29369 (0.0009) +[2026-06-02 16:59:01,172][255279] Updated weights for policy 0, policy_version 29379 (0.0009) +[2026-06-02 16:59:01,256][255187] Saving new best policy, reward=1126.838! +[2026-06-02 16:59:01,834][255279] Updated weights for policy 0, policy_version 29389 (0.0009) +[2026-06-02 16:59:02,027][255279] Updated weights for policy 0, policy_version 29399 (0.0009) +[2026-06-02 16:59:02,201][255279] Updated weights for policy 0, policy_version 29409 (0.0009) +[2026-06-02 16:59:02,397][255279] Updated weights for policy 0, policy_version 29419 (0.0009) +[2026-06-02 16:59:02,589][255279] Updated weights for policy 0, policy_version 29429 (0.0009) +[2026-06-02 16:59:02,777][255279] Updated weights for policy 0, policy_version 29439 (0.0009) +[2026-06-02 16:59:03,461][255279] Updated weights for policy 0, policy_version 29449 (0.0009) +[2026-06-02 16:59:03,631][255279] Updated weights for policy 0, policy_version 29459 (0.0008) +[2026-06-02 16:59:03,818][255279] Updated weights for policy 0, policy_version 29469 (0.0008) +[2026-06-02 16:59:04,007][255279] Updated weights for policy 0, policy_version 29479 (0.0009) +[2026-06-02 16:59:04,194][255279] Updated weights for policy 0, policy_version 29489 (0.0009) +[2026-06-02 16:59:04,375][255279] Updated weights for policy 0, policy_version 29499 (0.0008) +[2026-06-02 16:59:04,569][255279] Updated weights for policy 0, policy_version 29509 (0.0011) +[2026-06-02 16:59:05,237][255279] Updated weights for policy 0, policy_version 29519 (0.0009) +[2026-06-02 16:59:05,417][255279] Updated weights for policy 0, policy_version 29529 (0.0011) +[2026-06-02 16:59:05,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 15171584. Throughput: 0: 19214.2. Samples: 15196032. Policy #0 lag: (min: 63.0, avg: 81.0, max: 127.0) +[2026-06-02 16:59:05,502][253683] Avg episode reward: [(0, '1179.140')] +[2026-06-02 16:59:05,608][255279] Updated weights for policy 0, policy_version 29539 (0.0010) +[2026-06-02 16:59:05,814][255279] Updated weights for policy 0, policy_version 29550 (0.0006) +[2026-06-02 16:59:06,001][255279] Updated weights for policy 0, policy_version 29560 (0.0010) +[2026-06-02 16:59:06,194][255279] Updated weights for policy 0, policy_version 29570 (0.0012) +[2026-06-02 16:59:06,296][255187] Saving new best policy, reward=1179.140! +[2026-06-02 16:59:06,870][255279] Updated weights for policy 0, policy_version 29580 (0.0010) +[2026-06-02 16:59:07,047][255279] Updated weights for policy 0, policy_version 29590 (0.0009) +[2026-06-02 16:59:07,233][255279] Updated weights for policy 0, policy_version 29600 (0.0009) +[2026-06-02 16:59:07,430][255279] Updated weights for policy 0, policy_version 29610 (0.0009) +[2026-06-02 16:59:07,612][255279] Updated weights for policy 0, policy_version 29620 (0.0009) +[2026-06-02 16:59:07,805][255279] Updated weights for policy 0, policy_version 29630 (0.0013) +[2026-06-02 16:59:07,986][255279] Updated weights for policy 0, policy_version 29640 (0.0010) +[2026-06-02 16:59:08,663][255279] Updated weights for policy 0, policy_version 29650 (0.0009) +[2026-06-02 16:59:08,844][255279] Updated weights for policy 0, policy_version 29660 (0.0008) +[2026-06-02 16:59:09,035][255279] Updated weights for policy 0, policy_version 29670 (0.0008) +[2026-06-02 16:59:09,221][255279] Updated weights for policy 0, policy_version 29680 (0.0009) +[2026-06-02 16:59:09,410][255279] Updated weights for policy 0, policy_version 29690 (0.0008) +[2026-06-02 16:59:09,602][255279] Updated weights for policy 0, policy_version 29700 (0.0009) +[2026-06-02 16:59:10,271][255279] Updated weights for policy 0, policy_version 29710 (0.0009) +[2026-06-02 16:59:10,451][255279] Updated weights for policy 0, policy_version 29720 (0.0009) +[2026-06-02 16:59:10,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 15269888. Throughput: 0: 19470.2. Samples: 15256832. Policy #0 lag: (min: 63.0, avg: 81.0, max: 127.0) +[2026-06-02 16:59:10,502][253683] Avg episode reward: [(0, '1198.613')] +[2026-06-02 16:59:10,637][255279] Updated weights for policy 0, policy_version 29730 (0.0009) +[2026-06-02 16:59:10,820][255279] Updated weights for policy 0, policy_version 29740 (0.0008) +[2026-06-02 16:59:11,019][255279] Updated weights for policy 0, policy_version 29750 (0.0008) +[2026-06-02 16:59:11,227][255279] Updated weights for policy 0, policy_version 29761 (0.0008) +[2026-06-02 16:59:11,349][255187] Saving new best policy, reward=1198.613! +[2026-06-02 16:59:11,900][255279] Updated weights for policy 0, policy_version 29771 (0.0009) +[2026-06-02 16:59:12,099][255279] Updated weights for policy 0, policy_version 29782 (0.0009) +[2026-06-02 16:59:12,283][255279] Updated weights for policy 0, policy_version 29792 (0.0008) +[2026-06-02 16:59:12,467][255279] Updated weights for policy 0, policy_version 29802 (0.0008) +[2026-06-02 16:59:12,665][255279] Updated weights for policy 0, policy_version 29812 (0.0008) +[2026-06-02 16:59:12,862][255279] Updated weights for policy 0, policy_version 29822 (0.0009) +[2026-06-02 16:59:13,045][255279] Updated weights for policy 0, policy_version 29832 (0.0008) +[2026-06-02 16:59:13,706][255279] Updated weights for policy 0, policy_version 29842 (0.0008) +[2026-06-02 16:59:13,904][255279] Updated weights for policy 0, policy_version 29852 (0.0009) +[2026-06-02 16:59:14,086][255279] Updated weights for policy 0, policy_version 29862 (0.0009) +[2026-06-02 16:59:14,283][255279] Updated weights for policy 0, policy_version 29872 (0.0008) +[2026-06-02 16:59:14,479][255279] Updated weights for policy 0, policy_version 29882 (0.0008) +[2026-06-02 16:59:14,672][255279] Updated weights for policy 0, policy_version 29892 (0.0009) +[2026-06-02 16:59:15,329][255279] Updated weights for policy 0, policy_version 29902 (0.0009) +[2026-06-02 16:59:15,502][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 15368192. Throughput: 0: 19470.2. Samples: 15378304. Policy #0 lag: (min: 14.0, avg: 31.6, max: 78.0) +[2026-06-02 16:59:15,502][253683] Avg episode reward: [(0, '1248.704')] +[2026-06-02 16:59:15,513][255279] Updated weights for policy 0, policy_version 29912 (0.0009) +[2026-06-02 16:59:15,694][255279] Updated weights for policy 0, policy_version 29922 (0.0009) +[2026-06-02 16:59:15,889][255279] Updated weights for policy 0, policy_version 29932 (0.0008) +[2026-06-02 16:59:16,079][255279] Updated weights for policy 0, policy_version 29942 (0.0008) +[2026-06-02 16:59:16,261][255279] Updated weights for policy 0, policy_version 29952 (0.0007) +[2026-06-02 16:59:16,407][255187] Saving new best policy, reward=1248.704! +[2026-06-02 16:59:16,927][255279] Updated weights for policy 0, policy_version 29962 (0.0009) +[2026-06-02 16:59:17,097][255279] Updated weights for policy 0, policy_version 29972 (0.0008) +[2026-06-02 16:59:17,297][255279] Updated weights for policy 0, policy_version 29982 (0.0008) +[2026-06-02 16:59:17,486][255279] Updated weights for policy 0, policy_version 29992 (0.0008) +[2026-06-02 16:59:17,674][255279] Updated weights for policy 0, policy_version 30002 (0.0008) +[2026-06-02 16:59:17,865][255279] Updated weights for policy 0, policy_version 30012 (0.0009) +[2026-06-02 16:59:18,057][255279] Updated weights for policy 0, policy_version 30022 (0.0009) +[2026-06-02 16:59:18,733][255279] Updated weights for policy 0, policy_version 30032 (0.0008) +[2026-06-02 16:59:18,919][255279] Updated weights for policy 0, policy_version 30042 (0.0008) +[2026-06-02 16:59:19,109][255279] Updated weights for policy 0, policy_version 30052 (0.0009) +[2026-06-02 16:59:19,297][255279] Updated weights for policy 0, policy_version 30062 (0.0009) +[2026-06-02 16:59:19,476][255279] Updated weights for policy 0, policy_version 30072 (0.0008) +[2026-06-02 16:59:19,670][255279] Updated weights for policy 0, policy_version 30082 (0.0008) +[2026-06-02 16:59:20,351][255279] Updated weights for policy 0, policy_version 30092 (0.0009) +[2026-06-02 16:59:20,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 15466496. Throughput: 0: 19288.1. Samples: 15489408. Policy #0 lag: (min: 14.0, avg: 31.6, max: 78.0) +[2026-06-02 16:59:20,502][253683] Avg episode reward: [(0, '1265.521')] +[2026-06-02 16:59:20,532][255279] Updated weights for policy 0, policy_version 30102 (0.0008) +[2026-06-02 16:59:20,730][255279] Updated weights for policy 0, policy_version 30112 (0.0009) +[2026-06-02 16:59:20,911][255279] Updated weights for policy 0, policy_version 30122 (0.0008) +[2026-06-02 16:59:21,102][255279] Updated weights for policy 0, policy_version 30132 (0.0008) +[2026-06-02 16:59:21,294][255279] Updated weights for policy 0, policy_version 30142 (0.0009) +[2026-06-02 16:59:21,473][255187] Saving new best policy, reward=1265.521! +[2026-06-02 16:59:21,475][255279] Updated weights for policy 0, policy_version 30152 (0.0008) +[2026-06-02 16:59:22,152][255279] Updated weights for policy 0, policy_version 30162 (0.0009) +[2026-06-02 16:59:22,340][255279] Updated weights for policy 0, policy_version 30172 (0.0008) +[2026-06-02 16:59:22,521][255279] Updated weights for policy 0, policy_version 30182 (0.0008) +[2026-06-02 16:59:22,719][255279] Updated weights for policy 0, policy_version 30192 (0.0009) +[2026-06-02 16:59:22,921][255279] Updated weights for policy 0, policy_version 30203 (0.0009) +[2026-06-02 16:59:23,115][255279] Updated weights for policy 0, policy_version 30213 (0.0009) +[2026-06-02 16:59:23,770][255279] Updated weights for policy 0, policy_version 30223 (0.0009) +[2026-06-02 16:59:23,961][255279] Updated weights for policy 0, policy_version 30233 (0.0009) +[2026-06-02 16:59:24,144][255279] Updated weights for policy 0, policy_version 30243 (0.0009) +[2026-06-02 16:59:24,356][255279] Updated weights for policy 0, policy_version 30254 (0.0009) +[2026-06-02 16:59:24,546][255279] Updated weights for policy 0, policy_version 30264 (0.0008) +[2026-06-02 16:59:24,742][255279] Updated weights for policy 0, policy_version 30274 (0.0009) +[2026-06-02 16:59:25,399][255279] Updated weights for policy 0, policy_version 30284 (0.0009) +[2026-06-02 16:59:25,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 15564800. Throughput: 0: 19510.1. Samples: 15550848. Policy #0 lag: (min: 14.0, avg: 31.6, max: 78.0) +[2026-06-02 16:59:25,502][253683] Avg episode reward: [(0, '1296.705')] +[2026-06-02 16:59:25,567][255279] Updated weights for policy 0, policy_version 30294 (0.0009) +[2026-06-02 16:59:25,790][255279] Updated weights for policy 0, policy_version 30306 (0.0008) +[2026-06-02 16:59:25,979][255279] Updated weights for policy 0, policy_version 30316 (0.0009) +[2026-06-02 16:59:26,167][255279] Updated weights for policy 0, policy_version 30326 (0.0008) +[2026-06-02 16:59:26,364][255279] Updated weights for policy 0, policy_version 30337 (0.0008) +[2026-06-02 16:59:26,491][255187] Saving new best policy, reward=1296.705! +[2026-06-02 16:59:27,086][255279] Updated weights for policy 0, policy_version 30347 (0.0009) +[2026-06-02 16:59:27,267][255279] Updated weights for policy 0, policy_version 30357 (0.0008) +[2026-06-02 16:59:27,466][255279] Updated weights for policy 0, policy_version 30368 (0.0009) +[2026-06-02 16:59:27,676][255279] Updated weights for policy 0, policy_version 30379 (0.0009) +[2026-06-02 16:59:27,859][255279] Updated weights for policy 0, policy_version 30389 (0.0009) +[2026-06-02 16:59:28,057][255279] Updated weights for policy 0, policy_version 30400 (0.0009) +[2026-06-02 16:59:28,730][255279] Updated weights for policy 0, policy_version 30410 (0.0009) +[2026-06-02 16:59:28,903][255279] Updated weights for policy 0, policy_version 30420 (0.0009) +[2026-06-02 16:59:29,100][255279] Updated weights for policy 0, policy_version 30430 (0.0009) +[2026-06-02 16:59:29,279][255279] Updated weights for policy 0, policy_version 30440 (0.0009) +[2026-06-02 16:59:29,466][255279] Updated weights for policy 0, policy_version 30450 (0.0009) +[2026-06-02 16:59:29,662][255279] Updated weights for policy 0, policy_version 30460 (0.0009) +[2026-06-02 16:59:29,841][255279] Updated weights for policy 0, policy_version 30470 (0.0009) +[2026-06-02 16:59:30,502][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 15663104. Throughput: 0: 19518.6. Samples: 15671936. Policy #0 lag: (min: 14.0, avg: 31.6, max: 78.0) +[2026-06-02 16:59:30,502][253683] Avg episode reward: [(0, '1271.609')] +[2026-06-02 16:59:30,511][255279] Updated weights for policy 0, policy_version 30480 (0.0009) +[2026-06-02 16:59:30,701][255279] Updated weights for policy 0, policy_version 30490 (0.0009) +[2026-06-02 16:59:30,880][255279] Updated weights for policy 0, policy_version 30500 (0.0008) +[2026-06-02 16:59:31,063][255279] Updated weights for policy 0, policy_version 30510 (0.0009) +[2026-06-02 16:59:31,291][255279] Updated weights for policy 0, policy_version 30522 (0.0009) +[2026-06-02 16:59:31,488][255279] Updated weights for policy 0, policy_version 30532 (0.0009) +[2026-06-02 16:59:32,184][255279] Updated weights for policy 0, policy_version 30543 (0.0009) +[2026-06-02 16:59:32,393][255279] Updated weights for policy 0, policy_version 30554 (0.0009) +[2026-06-02 16:59:32,571][255279] Updated weights for policy 0, policy_version 30564 (0.0009) +[2026-06-02 16:59:32,761][255279] Updated weights for policy 0, policy_version 30574 (0.0009) +[2026-06-02 16:59:32,952][255279] Updated weights for policy 0, policy_version 30584 (0.0009) +[2026-06-02 16:59:33,138][255279] Updated weights for policy 0, policy_version 30594 (0.0009) +[2026-06-02 16:59:33,857][255279] Updated weights for policy 0, policy_version 30606 (0.0009) +[2026-06-02 16:59:34,052][255279] Updated weights for policy 0, policy_version 30617 (0.0009) +[2026-06-02 16:59:34,282][255279] Updated weights for policy 0, policy_version 30629 (0.0009) +[2026-06-02 16:59:34,455][255279] Updated weights for policy 0, policy_version 30639 (0.0009) +[2026-06-02 16:59:34,660][255279] Updated weights for policy 0, policy_version 30650 (0.0009) +[2026-06-02 16:59:34,854][255279] Updated weights for policy 0, policy_version 30660 (0.0009) +[2026-06-02 16:59:35,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 15761408. Throughput: 0: 19382.1. Samples: 15781632. Policy #0 lag: (min: 16.0, avg: 64.0, max: 80.0) +[2026-06-02 16:59:35,502][253683] Avg episode reward: [(0, '1347.911')] +[2026-06-02 16:59:35,552][255279] Updated weights for policy 0, policy_version 30670 (0.0009) +[2026-06-02 16:59:35,736][255279] Updated weights for policy 0, policy_version 30680 (0.0009) +[2026-06-02 16:59:35,908][255279] Updated weights for policy 0, policy_version 30690 (0.0009) +[2026-06-02 16:59:36,112][255279] Updated weights for policy 0, policy_version 30701 (0.0009) +[2026-06-02 16:59:36,316][255279] Updated weights for policy 0, policy_version 30712 (0.0009) +[2026-06-02 16:59:36,504][255279] Updated weights for policy 0, policy_version 30722 (0.0009) +[2026-06-02 16:59:36,605][255187] Saving new best policy, reward=1347.911! +[2026-06-02 16:59:37,215][255279] Updated weights for policy 0, policy_version 30733 (0.0008) +[2026-06-02 16:59:37,406][255279] Updated weights for policy 0, policy_version 30744 (0.0009) +[2026-06-02 16:59:37,596][255279] Updated weights for policy 0, policy_version 30754 (0.0009) +[2026-06-02 16:59:37,772][255279] Updated weights for policy 0, policy_version 30764 (0.0009) +[2026-06-02 16:59:37,966][255279] Updated weights for policy 0, policy_version 30774 (0.0009) +[2026-06-02 16:59:38,154][255279] Updated weights for policy 0, policy_version 30784 (0.0009) +[2026-06-02 16:59:38,843][255279] Updated weights for policy 0, policy_version 30794 (0.0009) +[2026-06-02 16:59:39,024][255279] Updated weights for policy 0, policy_version 30804 (0.0009) +[2026-06-02 16:59:39,209][255279] Updated weights for policy 0, policy_version 30814 (0.0009) +[2026-06-02 16:59:39,410][255279] Updated weights for policy 0, policy_version 30825 (0.0009) +[2026-06-02 16:59:39,609][255279] Updated weights for policy 0, policy_version 30836 (0.0009) +[2026-06-02 16:59:39,818][255279] Updated weights for policy 0, policy_version 30847 (0.0009) +[2026-06-02 16:59:40,501][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 15859712. Throughput: 0: 19515.8. Samples: 15842816. Policy #0 lag: (min: 16.0, avg: 64.0, max: 80.0) +[2026-06-02 16:59:40,502][253683] Avg episode reward: [(0, '1356.775')] +[2026-06-02 16:59:40,521][255279] Updated weights for policy 0, policy_version 30857 (0.0009) +[2026-06-02 16:59:40,698][255279] Updated weights for policy 0, policy_version 30867 (0.0008) +[2026-06-02 16:59:40,884][255279] Updated weights for policy 0, policy_version 30877 (0.0009) +[2026-06-02 16:59:41,069][255279] Updated weights for policy 0, policy_version 30887 (0.0008) +[2026-06-02 16:59:41,254][255279] Updated weights for policy 0, policy_version 30897 (0.0008) +[2026-06-02 16:59:41,451][255279] Updated weights for policy 0, policy_version 30907 (0.0009) +[2026-06-02 16:59:41,629][255279] Updated weights for policy 0, policy_version 30917 (0.0009) +[2026-06-02 16:59:41,685][255187] Saving new best policy, reward=1356.775! +[2026-06-02 16:59:42,314][255279] Updated weights for policy 0, policy_version 30927 (0.0009) +[2026-06-02 16:59:42,494][255279] Updated weights for policy 0, policy_version 30937 (0.0009) +[2026-06-02 16:59:42,679][255279] Updated weights for policy 0, policy_version 30947 (0.0008) +[2026-06-02 16:59:42,879][255279] Updated weights for policy 0, policy_version 30957 (0.0008) +[2026-06-02 16:59:43,060][255279] Updated weights for policy 0, policy_version 30967 (0.0009) +[2026-06-02 16:59:43,269][255279] Updated weights for policy 0, policy_version 30978 (0.0008) +[2026-06-02 16:59:43,959][255279] Updated weights for policy 0, policy_version 30988 (0.0009) +[2026-06-02 16:59:44,142][255279] Updated weights for policy 0, policy_version 30998 (0.0008) +[2026-06-02 16:59:44,328][255279] Updated weights for policy 0, policy_version 31008 (0.0009) +[2026-06-02 16:59:44,524][255279] Updated weights for policy 0, policy_version 31018 (0.0009) +[2026-06-02 16:59:44,712][255279] Updated weights for policy 0, policy_version 31028 (0.0008) +[2026-06-02 16:59:44,899][255279] Updated weights for policy 0, policy_version 31038 (0.0008) +[2026-06-02 16:59:45,085][255279] Updated weights for policy 0, policy_version 31048 (0.0009) +[2026-06-02 16:59:45,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 15958016. Throughput: 0: 19490.2. Samples: 15963264. Policy #0 lag: (min: 16.0, avg: 64.0, max: 80.0) +[2026-06-02 16:59:45,502][253683] Avg episode reward: [(0, '1412.830')] +[2026-06-02 16:59:45,736][255279] Updated weights for policy 0, policy_version 31058 (0.0008) +[2026-06-02 16:59:45,925][255279] Updated weights for policy 0, policy_version 31068 (0.0008) +[2026-06-02 16:59:46,124][255279] Updated weights for policy 0, policy_version 31078 (0.0008) +[2026-06-02 16:59:46,316][255279] Updated weights for policy 0, policy_version 31088 (0.0008) +[2026-06-02 16:59:46,501][255279] Updated weights for policy 0, policy_version 31098 (0.0008) +[2026-06-02 16:59:46,684][255279] Updated weights for policy 0, policy_version 31108 (0.0008) +[2026-06-02 16:59:46,754][255187] Saving new best policy, reward=1412.830! +[2026-06-02 16:59:47,350][255279] Updated weights for policy 0, policy_version 31118 (0.0009) +[2026-06-02 16:59:47,541][255279] Updated weights for policy 0, policy_version 31128 (0.0008) +[2026-06-02 16:59:47,727][255279] Updated weights for policy 0, policy_version 31138 (0.0008) +[2026-06-02 16:59:47,920][255279] Updated weights for policy 0, policy_version 31148 (0.0008) +[2026-06-02 16:59:48,109][255279] Updated weights for policy 0, policy_version 31158 (0.0008) +[2026-06-02 16:59:48,311][255279] Updated weights for policy 0, policy_version 31169 (0.0008) +[2026-06-02 16:59:48,983][255279] Updated weights for policy 0, policy_version 31179 (0.0009) +[2026-06-02 16:59:49,160][255279] Updated weights for policy 0, policy_version 31189 (0.0008) +[2026-06-02 16:59:49,345][255279] Updated weights for policy 0, policy_version 31199 (0.0008) +[2026-06-02 16:59:49,537][255279] Updated weights for policy 0, policy_version 31209 (0.0008) +[2026-06-02 16:59:49,728][255279] Updated weights for policy 0, policy_version 31219 (0.0009) +[2026-06-02 16:59:49,935][255279] Updated weights for policy 0, policy_version 31230 (0.0008) +[2026-06-02 16:59:50,118][255279] Updated weights for policy 0, policy_version 31240 (0.0009) +[2026-06-02 16:59:50,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 16056320. Throughput: 0: 19558.4. Samples: 16076160. Policy #0 lag: (min: 16.0, avg: 64.0, max: 80.0) +[2026-06-02 16:59:50,502][253683] Avg episode reward: [(0, '1411.561')] +[2026-06-02 16:59:50,783][255279] Updated weights for policy 0, policy_version 31250 (0.0008) +[2026-06-02 16:59:50,971][255279] Updated weights for policy 0, policy_version 31260 (0.0008) +[2026-06-02 16:59:51,163][255279] Updated weights for policy 0, policy_version 31270 (0.0008) +[2026-06-02 16:59:51,350][255279] Updated weights for policy 0, policy_version 31280 (0.0009) +[2026-06-02 16:59:51,544][255279] Updated weights for policy 0, policy_version 31290 (0.0009) +[2026-06-02 16:59:51,731][255279] Updated weights for policy 0, policy_version 31300 (0.0009) +[2026-06-02 16:59:52,377][255279] Updated weights for policy 0, policy_version 31310 (0.0009) +[2026-06-02 16:59:52,564][255279] Updated weights for policy 0, policy_version 31320 (0.0008) +[2026-06-02 16:59:52,747][255279] Updated weights for policy 0, policy_version 31330 (0.0008) +[2026-06-02 16:59:52,936][255279] Updated weights for policy 0, policy_version 31340 (0.0009) +[2026-06-02 16:59:53,124][255279] Updated weights for policy 0, policy_version 31350 (0.0009) +[2026-06-02 16:59:53,317][255279] Updated weights for policy 0, policy_version 31360 (0.0009) +[2026-06-02 16:59:53,980][255279] Updated weights for policy 0, policy_version 31370 (0.0009) +[2026-06-02 16:59:54,156][255279] Updated weights for policy 0, policy_version 31380 (0.0009) +[2026-06-02 16:59:54,365][255279] Updated weights for policy 0, policy_version 31391 (0.0009) +[2026-06-02 16:59:54,552][255279] Updated weights for policy 0, policy_version 31401 (0.0008) +[2026-06-02 16:59:54,743][255279] Updated weights for policy 0, policy_version 31411 (0.0009) +[2026-06-02 16:59:54,929][255279] Updated weights for policy 0, policy_version 31421 (0.0009) +[2026-06-02 16:59:55,123][255279] Updated weights for policy 0, policy_version 31431 (0.0009) +[2026-06-02 16:59:55,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 16154624. Throughput: 0: 19552.7. Samples: 16136704. Policy #0 lag: (min: 16.0, avg: 64.0, max: 80.0) +[2026-06-02 16:59:55,503][253683] Avg episode reward: [(0, '1441.020')] +[2026-06-02 16:59:55,804][255279] Updated weights for policy 0, policy_version 31441 (0.0008) +[2026-06-02 16:59:55,990][255279] Updated weights for policy 0, policy_version 31451 (0.0009) +[2026-06-02 16:59:56,179][255279] Updated weights for policy 0, policy_version 31461 (0.0009) +[2026-06-02 16:59:56,360][255279] Updated weights for policy 0, policy_version 31471 (0.0009) +[2026-06-02 16:59:56,566][255279] Updated weights for policy 0, policy_version 31481 (0.0009) +[2026-06-02 16:59:56,755][255279] Updated weights for policy 0, policy_version 31491 (0.0008) +[2026-06-02 16:59:56,838][255187] Saving new best policy, reward=1441.020! +[2026-06-02 16:59:57,396][255279] Updated weights for policy 0, policy_version 31501 (0.0008) +[2026-06-02 16:59:57,579][255279] Updated weights for policy 0, policy_version 31511 (0.0009) +[2026-06-02 16:59:57,760][255279] Updated weights for policy 0, policy_version 31521 (0.0009) +[2026-06-02 16:59:57,959][255279] Updated weights for policy 0, policy_version 31531 (0.0008) +[2026-06-02 16:59:58,146][255279] Updated weights for policy 0, policy_version 31541 (0.0008) +[2026-06-02 16:59:58,335][255279] Updated weights for policy 0, policy_version 31551 (0.0009) +[2026-06-02 16:59:59,003][255279] Updated weights for policy 0, policy_version 31561 (0.0008) +[2026-06-02 16:59:59,194][255279] Updated weights for policy 0, policy_version 31572 (0.0005) +[2026-06-02 16:59:59,384][255279] Updated weights for policy 0, policy_version 31582 (0.0005) +[2026-06-02 16:59:59,570][255279] Updated weights for policy 0, policy_version 31592 (0.0007) +[2026-06-02 16:59:59,756][255279] Updated weights for policy 0, policy_version 31602 (0.0008) +[2026-06-02 16:59:59,937][255279] Updated weights for policy 0, policy_version 31612 (0.0009) +[2026-06-02 17:00:00,141][255279] Updated weights for policy 0, policy_version 31622 (0.0008) +[2026-06-02 17:00:00,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 16252928. Throughput: 0: 19404.8. Samples: 16251520. Policy #0 lag: (min: 45.0, avg: 63.4, max: 109.0) +[2026-06-02 17:00:00,502][253683] Avg episode reward: [(0, '1464.211')] +[2026-06-02 17:00:00,800][255279] Updated weights for policy 0, policy_version 31632 (0.0008) +[2026-06-02 17:00:00,983][255279] Updated weights for policy 0, policy_version 31642 (0.0008) +[2026-06-02 17:00:01,171][255279] Updated weights for policy 0, policy_version 31652 (0.0008) +[2026-06-02 17:00:01,359][255279] Updated weights for policy 0, policy_version 31662 (0.0008) +[2026-06-02 17:00:01,542][255279] Updated weights for policy 0, policy_version 31672 (0.0009) +[2026-06-02 17:00:01,741][255279] Updated weights for policy 0, policy_version 31682 (0.0008) +[2026-06-02 17:00:01,844][255187] Saving new best policy, reward=1464.211! +[2026-06-02 17:00:02,410][255279] Updated weights for policy 0, policy_version 31692 (0.0008) +[2026-06-02 17:00:02,600][255279] Updated weights for policy 0, policy_version 31703 (0.0009) +[2026-06-02 17:00:02,803][255279] Updated weights for policy 0, policy_version 31713 (0.0009) +[2026-06-02 17:00:02,989][255279] Updated weights for policy 0, policy_version 31723 (0.0008) +[2026-06-02 17:00:03,177][255279] Updated weights for policy 0, policy_version 31733 (0.0008) +[2026-06-02 17:00:03,355][255279] Updated weights for policy 0, policy_version 31743 (0.0009) +[2026-06-02 17:00:04,043][255279] Updated weights for policy 0, policy_version 31753 (0.0009) +[2026-06-02 17:00:04,222][255279] Updated weights for policy 0, policy_version 31763 (0.0008) +[2026-06-02 17:00:04,426][255279] Updated weights for policy 0, policy_version 31774 (0.0008) +[2026-06-02 17:00:04,613][255279] Updated weights for policy 0, policy_version 31784 (0.0008) +[2026-06-02 17:00:04,806][255279] Updated weights for policy 0, policy_version 31794 (0.0009) +[2026-06-02 17:00:04,996][255279] Updated weights for policy 0, policy_version 31804 (0.0008) +[2026-06-02 17:00:05,176][255279] Updated weights for policy 0, policy_version 31814 (0.0008) +[2026-06-02 17:00:05,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 16351232. Throughput: 0: 19530.0. Samples: 16368256. Policy #0 lag: (min: 45.0, avg: 63.4, max: 109.0) +[2026-06-02 17:00:05,502][253683] Avg episode reward: [(0, '1519.524')] +[2026-06-02 17:00:05,508][255187] Saving new best policy, reward=1519.524! +[2026-06-02 17:00:05,860][255279] Updated weights for policy 0, policy_version 31824 (0.0009) +[2026-06-02 17:00:06,052][255279] Updated weights for policy 0, policy_version 31834 (0.0008) +[2026-06-02 17:00:06,238][255279] Updated weights for policy 0, policy_version 31844 (0.0008) +[2026-06-02 17:00:06,429][255279] Updated weights for policy 0, policy_version 31854 (0.0009) +[2026-06-02 17:00:06,620][255279] Updated weights for policy 0, policy_version 31864 (0.0008) +[2026-06-02 17:00:06,801][255279] Updated weights for policy 0, policy_version 31874 (0.0009) +[2026-06-02 17:00:07,478][255279] Updated weights for policy 0, policy_version 31885 (0.0008) +[2026-06-02 17:00:07,656][255279] Updated weights for policy 0, policy_version 31895 (0.0008) +[2026-06-02 17:00:07,843][255279] Updated weights for policy 0, policy_version 31905 (0.0008) +[2026-06-02 17:00:08,041][255279] Updated weights for policy 0, policy_version 31915 (0.0008) +[2026-06-02 17:00:08,241][255279] Updated weights for policy 0, policy_version 31925 (0.0008) +[2026-06-02 17:00:08,428][255279] Updated weights for policy 0, policy_version 31935 (0.0009) +[2026-06-02 17:00:09,104][255279] Updated weights for policy 0, policy_version 31945 (0.0008) +[2026-06-02 17:00:09,273][255279] Updated weights for policy 0, policy_version 31955 (0.0008) +[2026-06-02 17:00:09,467][255279] Updated weights for policy 0, policy_version 31965 (0.0009) +[2026-06-02 17:00:09,651][255279] Updated weights for policy 0, policy_version 31975 (0.0008) +[2026-06-02 17:00:09,834][255279] Updated weights for policy 0, policy_version 31985 (0.0008) +[2026-06-02 17:00:10,022][255279] Updated weights for policy 0, policy_version 31995 (0.0009) +[2026-06-02 17:00:10,210][255279] Updated weights for policy 0, policy_version 32005 (0.0008) +[2026-06-02 17:00:10,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 16449536. Throughput: 0: 19512.9. Samples: 16428928. Policy #0 lag: (min: 45.0, avg: 63.4, max: 109.0) +[2026-06-02 17:00:10,502][253683] Avg episode reward: [(0, '1499.327')] +[2026-06-02 17:00:10,888][255279] Updated weights for policy 0, policy_version 32015 (0.0009) +[2026-06-02 17:00:11,090][255279] Updated weights for policy 0, policy_version 32026 (0.0008) +[2026-06-02 17:00:11,271][255279] Updated weights for policy 0, policy_version 32036 (0.0009) +[2026-06-02 17:00:11,463][255279] Updated weights for policy 0, policy_version 32046 (0.0009) +[2026-06-02 17:00:11,652][255279] Updated weights for policy 0, policy_version 32056 (0.0008) +[2026-06-02 17:00:11,861][255279] Updated weights for policy 0, policy_version 32067 (0.0008) +[2026-06-02 17:00:12,529][255279] Updated weights for policy 0, policy_version 32077 (0.0008) +[2026-06-02 17:00:12,713][255279] Updated weights for policy 0, policy_version 32087 (0.0008) +[2026-06-02 17:00:12,899][255279] Updated weights for policy 0, policy_version 32097 (0.0008) +[2026-06-02 17:00:13,089][255279] Updated weights for policy 0, policy_version 32107 (0.0009) +[2026-06-02 17:00:13,284][255279] Updated weights for policy 0, policy_version 32117 (0.0008) +[2026-06-02 17:00:13,479][255279] Updated weights for policy 0, policy_version 32127 (0.0008) +[2026-06-02 17:00:14,145][255279] Updated weights for policy 0, policy_version 32137 (0.0008) +[2026-06-02 17:00:14,319][255279] Updated weights for policy 0, policy_version 32147 (0.0008) +[2026-06-02 17:00:14,505][255279] Updated weights for policy 0, policy_version 32157 (0.0008) +[2026-06-02 17:00:14,688][255279] Updated weights for policy 0, policy_version 32167 (0.0008) +[2026-06-02 17:00:14,874][255279] Updated weights for policy 0, policy_version 32177 (0.0009) +[2026-06-02 17:00:15,070][255279] Updated weights for policy 0, policy_version 32187 (0.0008) +[2026-06-02 17:00:15,280][255279] Updated weights for policy 0, policy_version 32199 (0.0009) +[2026-06-02 17:00:15,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 16547840. Throughput: 0: 19288.2. Samples: 16539904. Policy #0 lag: (min: 45.0, avg: 63.4, max: 109.0) +[2026-06-02 17:00:15,502][253683] Avg episode reward: [(0, '1507.244')] +[2026-06-02 17:00:15,953][255279] Updated weights for policy 0, policy_version 32209 (0.0008) +[2026-06-02 17:00:16,133][255279] Updated weights for policy 0, policy_version 32219 (0.0008) +[2026-06-02 17:00:16,315][255279] Updated weights for policy 0, policy_version 32229 (0.0009) +[2026-06-02 17:00:16,506][255279] Updated weights for policy 0, policy_version 32239 (0.0009) +[2026-06-02 17:00:16,689][255279] Updated weights for policy 0, policy_version 32249 (0.0008) +[2026-06-02 17:00:16,882][255279] Updated weights for policy 0, policy_version 32259 (0.0008) +[2026-06-02 17:00:17,560][255279] Updated weights for policy 0, policy_version 32269 (0.0009) +[2026-06-02 17:00:17,740][255279] Updated weights for policy 0, policy_version 32279 (0.0008) +[2026-06-02 17:00:17,934][255279] Updated weights for policy 0, policy_version 32289 (0.0009) +[2026-06-02 17:00:18,122][255279] Updated weights for policy 0, policy_version 32299 (0.0008) +[2026-06-02 17:00:18,305][255279] Updated weights for policy 0, policy_version 32309 (0.0008) +[2026-06-02 17:00:18,491][255279] Updated weights for policy 0, policy_version 32319 (0.0008) +[2026-06-02 17:00:19,177][255279] Updated weights for policy 0, policy_version 32329 (0.0008) +[2026-06-02 17:00:19,355][255279] Updated weights for policy 0, policy_version 32339 (0.0007) +[2026-06-02 17:00:19,531][255279] Updated weights for policy 0, policy_version 32349 (0.0009) +[2026-06-02 17:00:19,721][255279] Updated weights for policy 0, policy_version 32359 (0.0009) +[2026-06-02 17:00:19,915][255279] Updated weights for policy 0, policy_version 32369 (0.0009) +[2026-06-02 17:00:20,100][255279] Updated weights for policy 0, policy_version 32379 (0.0009) +[2026-06-02 17:00:20,287][255279] Updated weights for policy 0, policy_version 32389 (0.0009) +[2026-06-02 17:00:20,501][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 16646144. Throughput: 0: 19535.6. Samples: 16660736. Policy #0 lag: (min: 63.0, avg: 79.7, max: 127.0) +[2026-06-02 17:00:20,502][253683] Avg episode reward: [(0, '1559.234')] +[2026-06-02 17:00:20,507][255187] Saving new best policy, reward=1559.234! +[2026-06-02 17:00:20,952][255279] Updated weights for policy 0, policy_version 32399 (0.0009) +[2026-06-02 17:00:21,137][255279] Updated weights for policy 0, policy_version 32409 (0.0009) +[2026-06-02 17:00:21,326][255279] Updated weights for policy 0, policy_version 32419 (0.0009) +[2026-06-02 17:00:21,509][255279] Updated weights for policy 0, policy_version 32429 (0.0009) +[2026-06-02 17:00:21,697][255279] Updated weights for policy 0, policy_version 32439 (0.0009) +[2026-06-02 17:00:21,888][255279] Updated weights for policy 0, policy_version 32449 (0.0009) +[2026-06-02 17:00:22,559][255279] Updated weights for policy 0, policy_version 32459 (0.0009) +[2026-06-02 17:00:22,744][255279] Updated weights for policy 0, policy_version 32469 (0.0007) +[2026-06-02 17:00:22,925][255279] Updated weights for policy 0, policy_version 32479 (0.0009) +[2026-06-02 17:00:23,117][255279] Updated weights for policy 0, policy_version 32489 (0.0009) +[2026-06-02 17:00:23,300][255279] Updated weights for policy 0, policy_version 32499 (0.0009) +[2026-06-02 17:00:23,511][255279] Updated weights for policy 0, policy_version 32510 (0.0008) +[2026-06-02 17:00:23,690][255187] Early stopping after 8 epochs (64 sgd steps), loss delta 0.0000002 +[2026-06-02 17:00:23,694][255279] Updated weights for policy 0, policy_version 32520 (0.0009) +[2026-06-02 17:00:24,361][255279] Updated weights for policy 0, policy_version 32530 (0.0008) +[2026-06-02 17:00:24,565][255279] Updated weights for policy 0, policy_version 32541 (0.0009) +[2026-06-02 17:00:24,750][255279] Updated weights for policy 0, policy_version 32551 (0.0008) +[2026-06-02 17:00:24,934][255279] Updated weights for policy 0, policy_version 32561 (0.0008) +[2026-06-02 17:00:25,134][255279] Updated weights for policy 0, policy_version 32571 (0.0009) +[2026-06-02 17:00:25,318][255279] Updated weights for policy 0, policy_version 32581 (0.0009) +[2026-06-02 17:00:25,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 16744448. Throughput: 0: 19524.3. Samples: 16721408. Policy #0 lag: (min: 63.0, avg: 79.7, max: 127.0) +[2026-06-02 17:00:25,502][253683] Avg episode reward: [(0, '1566.492')] +[2026-06-02 17:00:25,507][255187] Saving new best policy, reward=1566.492! +[2026-06-02 17:00:26,005][255279] Updated weights for policy 0, policy_version 32591 (0.0009) +[2026-06-02 17:00:26,194][255279] Updated weights for policy 0, policy_version 32601 (0.0009) +[2026-06-02 17:00:26,393][255279] Updated weights for policy 0, policy_version 32611 (0.0009) +[2026-06-02 17:00:26,574][255279] Updated weights for policy 0, policy_version 32621 (0.0009) +[2026-06-02 17:00:26,767][255279] Updated weights for policy 0, policy_version 32631 (0.0009) +[2026-06-02 17:00:26,956][255279] Updated weights for policy 0, policy_version 32641 (0.0009) +[2026-06-02 17:00:27,605][255279] Updated weights for policy 0, policy_version 32651 (0.0009) +[2026-06-02 17:00:27,777][255279] Updated weights for policy 0, policy_version 32661 (0.0009) +[2026-06-02 17:00:27,969][255279] Updated weights for policy 0, policy_version 32671 (0.0006) +[2026-06-02 17:00:28,148][255279] Updated weights for policy 0, policy_version 32681 (0.0008) +[2026-06-02 17:00:28,345][255279] Updated weights for policy 0, policy_version 32691 (0.0009) +[2026-06-02 17:00:28,535][255279] Updated weights for policy 0, policy_version 32701 (0.0009) +[2026-06-02 17:00:28,716][255279] Updated weights for policy 0, policy_version 32711 (0.0009) +[2026-06-02 17:00:29,380][255279] Updated weights for policy 0, policy_version 32721 (0.0009) +[2026-06-02 17:00:29,561][255279] Updated weights for policy 0, policy_version 32731 (0.0008) +[2026-06-02 17:00:29,757][255279] Updated weights for policy 0, policy_version 32741 (0.0008) +[2026-06-02 17:00:29,957][255279] Updated weights for policy 0, policy_version 32752 (0.0008) +[2026-06-02 17:00:30,155][255279] Updated weights for policy 0, policy_version 32762 (0.0008) +[2026-06-02 17:00:30,333][255279] Updated weights for policy 0, policy_version 32772 (0.0008) +[2026-06-02 17:00:30,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 16842752. Throughput: 0: 19282.5. Samples: 16830976. Policy #0 lag: (min: 63.0, avg: 79.7, max: 127.0) +[2026-06-02 17:00:30,502][253683] Avg episode reward: [(0, '1586.455')] +[2026-06-02 17:00:30,509][255187] Saving new best policy, reward=1586.455! +[2026-06-02 17:00:31,009][255279] Updated weights for policy 0, policy_version 32782 (0.0008) +[2026-06-02 17:00:31,184][255279] Updated weights for policy 0, policy_version 32792 (0.0008) +[2026-06-02 17:00:31,387][255279] Updated weights for policy 0, policy_version 32803 (0.0008) +[2026-06-02 17:00:31,586][255279] Updated weights for policy 0, policy_version 32813 (0.0008) +[2026-06-02 17:00:31,766][255279] Updated weights for policy 0, policy_version 32823 (0.0009) +[2026-06-02 17:00:31,958][255279] Updated weights for policy 0, policy_version 32833 (0.0008) +[2026-06-02 17:00:32,637][255279] Updated weights for policy 0, policy_version 32843 (0.0009) +[2026-06-02 17:00:32,809][255279] Updated weights for policy 0, policy_version 32853 (0.0008) +[2026-06-02 17:00:32,997][255279] Updated weights for policy 0, policy_version 32863 (0.0008) +[2026-06-02 17:00:33,209][255279] Updated weights for policy 0, policy_version 32874 (0.0008) +[2026-06-02 17:00:33,387][255279] Updated weights for policy 0, policy_version 32884 (0.0008) +[2026-06-02 17:00:33,582][255279] Updated weights for policy 0, policy_version 32894 (0.0008) +[2026-06-02 17:00:33,766][255279] Updated weights for policy 0, policy_version 32904 (0.0008) +[2026-06-02 17:00:34,437][255279] Updated weights for policy 0, policy_version 32914 (0.0008) +[2026-06-02 17:00:34,622][255279] Updated weights for policy 0, policy_version 32924 (0.0008) +[2026-06-02 17:00:34,814][255279] Updated weights for policy 0, policy_version 32934 (0.0008) +[2026-06-02 17:00:34,995][255279] Updated weights for policy 0, policy_version 32944 (0.0008) +[2026-06-02 17:00:35,185][255279] Updated weights for policy 0, policy_version 32954 (0.0009) +[2026-06-02 17:00:35,365][255279] Updated weights for policy 0, policy_version 32964 (0.0009) +[2026-06-02 17:00:35,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 16941056. Throughput: 0: 19473.1. Samples: 16952448. Policy #0 lag: (min: 63.0, avg: 79.7, max: 127.0) +[2026-06-02 17:00:35,502][253683] Avg episode reward: [(0, '1652.666')] +[2026-06-02 17:00:35,508][255187] Saving new best policy, reward=1652.666! +[2026-06-02 17:00:36,038][255279] Updated weights for policy 0, policy_version 32974 (0.0008) +[2026-06-02 17:00:36,229][255279] Updated weights for policy 0, policy_version 32984 (0.0008) +[2026-06-02 17:00:36,418][255279] Updated weights for policy 0, policy_version 32994 (0.0008) +[2026-06-02 17:00:36,597][255279] Updated weights for policy 0, policy_version 33004 (0.0008) +[2026-06-02 17:00:36,794][255279] Updated weights for policy 0, policy_version 33014 (0.0009) +[2026-06-02 17:00:36,982][255279] Updated weights for policy 0, policy_version 33024 (0.0009) +[2026-06-02 17:00:37,652][255279] Updated weights for policy 0, policy_version 33034 (0.0008) +[2026-06-02 17:00:37,832][255279] Updated weights for policy 0, policy_version 33044 (0.0009) +[2026-06-02 17:00:38,010][255279] Updated weights for policy 0, policy_version 33054 (0.0008) +[2026-06-02 17:00:38,211][255279] Updated weights for policy 0, policy_version 33064 (0.0008) +[2026-06-02 17:00:38,390][255279] Updated weights for policy 0, policy_version 33074 (0.0008) +[2026-06-02 17:00:38,583][255279] Updated weights for policy 0, policy_version 33084 (0.0008) +[2026-06-02 17:00:38,774][255279] Updated weights for policy 0, policy_version 33094 (0.0008) +[2026-06-02 17:00:39,433][255279] Updated weights for policy 0, policy_version 33104 (0.0008) +[2026-06-02 17:00:39,628][255279] Updated weights for policy 0, policy_version 33114 (0.0008) +[2026-06-02 17:00:39,809][255279] Updated weights for policy 0, policy_version 33124 (0.0008) +[2026-06-02 17:00:39,998][255279] Updated weights for policy 0, policy_version 33134 (0.0008) +[2026-06-02 17:00:40,183][255279] Updated weights for policy 0, policy_version 33144 (0.0008) +[2026-06-02 17:00:40,372][255279] Updated weights for policy 0, policy_version 33154 (0.0008) +[2026-06-02 17:00:40,501][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 17039360. Throughput: 0: 19475.9. Samples: 17013120. Policy #0 lag: (min: 63.0, avg: 79.7, max: 127.0) +[2026-06-02 17:00:40,502][253683] Avg episode reward: [(0, '1650.914')] +[2026-06-02 17:00:41,079][255279] Updated weights for policy 0, policy_version 33165 (0.0008) +[2026-06-02 17:00:41,260][255279] Updated weights for policy 0, policy_version 33175 (0.0008) +[2026-06-02 17:00:41,449][255279] Updated weights for policy 0, policy_version 33185 (0.0007) +[2026-06-02 17:00:41,638][255279] Updated weights for policy 0, policy_version 33195 (0.0008) +[2026-06-02 17:00:41,840][255279] Updated weights for policy 0, policy_version 33206 (0.0009) +[2026-06-02 17:00:42,030][255279] Updated weights for policy 0, policy_version 33216 (0.0008) +[2026-06-02 17:00:42,698][255279] Updated weights for policy 0, policy_version 33226 (0.0008) +[2026-06-02 17:00:42,869][255279] Updated weights for policy 0, policy_version 33236 (0.0009) +[2026-06-02 17:00:43,057][255279] Updated weights for policy 0, policy_version 33246 (0.0009) +[2026-06-02 17:00:43,237][255279] Updated weights for policy 0, policy_version 33256 (0.0009) +[2026-06-02 17:00:43,429][255279] Updated weights for policy 0, policy_version 33266 (0.0009) +[2026-06-02 17:00:43,621][255279] Updated weights for policy 0, policy_version 33276 (0.0009) +[2026-06-02 17:00:43,806][255279] Updated weights for policy 0, policy_version 33286 (0.0009) +[2026-06-02 17:00:44,488][255279] Updated weights for policy 0, policy_version 33296 (0.0009) +[2026-06-02 17:00:44,661][255279] Updated weights for policy 0, policy_version 33306 (0.0008) +[2026-06-02 17:00:44,854][255279] Updated weights for policy 0, policy_version 33316 (0.0008) +[2026-06-02 17:00:45,034][255279] Updated weights for policy 0, policy_version 33326 (0.0008) +[2026-06-02 17:00:45,224][255279] Updated weights for policy 0, policy_version 33336 (0.0009) +[2026-06-02 17:00:45,405][255279] Updated weights for policy 0, policy_version 33346 (0.0008) +[2026-06-02 17:00:45,502][253683] Fps is (10 sec: 16384.0, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 17104896. Throughput: 0: 19370.7. Samples: 17123200. Policy #0 lag: (min: 61.0, avg: 78.9, max: 125.0) +[2026-06-02 17:00:45,502][253683] Avg episode reward: [(0, '1677.110')] +[2026-06-02 17:00:45,514][255187] Saving new best policy, reward=1677.110! +[2026-06-02 17:00:46,090][255279] Updated weights for policy 0, policy_version 33356 (0.0009) +[2026-06-02 17:00:46,275][255279] Updated weights for policy 0, policy_version 33366 (0.0008) +[2026-06-02 17:00:46,459][255279] Updated weights for policy 0, policy_version 33376 (0.0008) +[2026-06-02 17:00:46,655][255279] Updated weights for policy 0, policy_version 33386 (0.0008) +[2026-06-02 17:00:46,841][255279] Updated weights for policy 0, policy_version 33396 (0.0008) +[2026-06-02 17:00:47,024][255279] Updated weights for policy 0, policy_version 33406 (0.0009) +[2026-06-02 17:00:47,210][255279] Updated weights for policy 0, policy_version 33416 (0.0008) +[2026-06-02 17:00:47,892][255279] Updated weights for policy 0, policy_version 33426 (0.0009) +[2026-06-02 17:00:48,064][255279] Updated weights for policy 0, policy_version 33436 (0.0009) +[2026-06-02 17:00:48,263][255279] Updated weights for policy 0, policy_version 33446 (0.0008) +[2026-06-02 17:00:48,450][255279] Updated weights for policy 0, policy_version 33456 (0.0008) +[2026-06-02 17:00:48,638][255279] Updated weights for policy 0, policy_version 33466 (0.0008) +[2026-06-02 17:00:48,842][255279] Updated weights for policy 0, policy_version 33476 (0.0008) +[2026-06-02 17:00:49,524][255279] Updated weights for policy 0, policy_version 33487 (0.0008) +[2026-06-02 17:00:49,706][255279] Updated weights for policy 0, policy_version 33497 (0.0008) +[2026-06-02 17:00:49,896][255279] Updated weights for policy 0, policy_version 33507 (0.0008) +[2026-06-02 17:00:50,084][255279] Updated weights for policy 0, policy_version 33517 (0.0008) +[2026-06-02 17:00:50,271][255279] Updated weights for policy 0, policy_version 33527 (0.0008) +[2026-06-02 17:00:50,482][255279] Updated weights for policy 0, policy_version 33538 (0.0008) +[2026-06-02 17:00:50,502][253683] Fps is (10 sec: 16383.9, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 17203200. Throughput: 0: 19495.8. Samples: 17245568. Policy #0 lag: (min: 61.0, avg: 78.9, max: 125.0) +[2026-06-02 17:00:50,502][253683] Avg episode reward: [(0, '1696.674')] +[2026-06-02 17:00:50,585][255187] Saving new best policy, reward=1696.674! +[2026-06-02 17:00:51,146][255279] Updated weights for policy 0, policy_version 33548 (0.0008) +[2026-06-02 17:00:51,327][255279] Updated weights for policy 0, policy_version 33558 (0.0009) +[2026-06-02 17:00:51,517][255279] Updated weights for policy 0, policy_version 33568 (0.0009) +[2026-06-02 17:00:51,711][255279] Updated weights for policy 0, policy_version 33578 (0.0009) +[2026-06-02 17:00:51,898][255279] Updated weights for policy 0, policy_version 33588 (0.0009) +[2026-06-02 17:00:52,087][255279] Updated weights for policy 0, policy_version 33598 (0.0009) +[2026-06-02 17:00:52,269][255279] Updated weights for policy 0, policy_version 33608 (0.0009) +[2026-06-02 17:00:52,940][255279] Updated weights for policy 0, policy_version 33618 (0.0009) +[2026-06-02 17:00:53,121][255279] Updated weights for policy 0, policy_version 33628 (0.0008) +[2026-06-02 17:00:53,302][255279] Updated weights for policy 0, policy_version 33638 (0.0008) +[2026-06-02 17:00:53,503][255279] Updated weights for policy 0, policy_version 33648 (0.0009) +[2026-06-02 17:00:53,688][255279] Updated weights for policy 0, policy_version 33658 (0.0008) +[2026-06-02 17:00:53,876][255279] Updated weights for policy 0, policy_version 33668 (0.0008) +[2026-06-02 17:00:54,558][255279] Updated weights for policy 0, policy_version 33679 (0.0008) +[2026-06-02 17:00:54,738][255279] Updated weights for policy 0, policy_version 33689 (0.0008) +[2026-06-02 17:00:54,924][255279] Updated weights for policy 0, policy_version 33699 (0.0008) +[2026-06-02 17:00:55,112][255279] Updated weights for policy 0, policy_version 33709 (0.0009) +[2026-06-02 17:00:55,299][255279] Updated weights for policy 0, policy_version 33719 (0.0008) +[2026-06-02 17:00:55,484][255279] Updated weights for policy 0, policy_version 33729 (0.0008) +[2026-06-02 17:00:55,501][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 17301504. Throughput: 0: 19518.6. Samples: 17307264. Policy #0 lag: (min: 61.0, avg: 78.9, max: 125.0) +[2026-06-02 17:00:55,502][253683] Avg episode reward: [(0, '1688.192')] +[2026-06-02 17:00:56,185][255279] Updated weights for policy 0, policy_version 33739 (0.0008) +[2026-06-02 17:00:56,363][255279] Updated weights for policy 0, policy_version 33749 (0.0008) +[2026-06-02 17:00:56,537][255279] Updated weights for policy 0, policy_version 33759 (0.0009) +[2026-06-02 17:00:56,732][255279] Updated weights for policy 0, policy_version 33769 (0.0008) +[2026-06-02 17:00:56,930][255279] Updated weights for policy 0, policy_version 33779 (0.0009) +[2026-06-02 17:00:57,109][255279] Updated weights for policy 0, policy_version 33789 (0.0008) +[2026-06-02 17:00:57,290][255279] Updated weights for policy 0, policy_version 33799 (0.0008) +[2026-06-02 17:00:57,956][255279] Updated weights for policy 0, policy_version 33809 (0.0008) +[2026-06-02 17:00:58,139][255279] Updated weights for policy 0, policy_version 33819 (0.0008) +[2026-06-02 17:00:58,324][255279] Updated weights for policy 0, policy_version 33829 (0.0009) +[2026-06-02 17:00:58,513][255279] Updated weights for policy 0, policy_version 33839 (0.0008) +[2026-06-02 17:00:58,705][255279] Updated weights for policy 0, policy_version 33849 (0.0008) +[2026-06-02 17:00:58,898][255279] Updated weights for policy 0, policy_version 33859 (0.0008) +[2026-06-02 17:00:59,570][255279] Updated weights for policy 0, policy_version 33869 (0.0008) +[2026-06-02 17:00:59,745][255279] Updated weights for policy 0, policy_version 33879 (0.0009) +[2026-06-02 17:00:59,943][255279] Updated weights for policy 0, policy_version 33889 (0.0008) +[2026-06-02 17:01:00,117][255279] Updated weights for policy 0, policy_version 33899 (0.0008) +[2026-06-02 17:01:00,308][255279] Updated weights for policy 0, policy_version 33909 (0.0009) +[2026-06-02 17:01:00,502][253683] Fps is (10 sec: 19660.9, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 17399808. Throughput: 0: 19475.9. Samples: 17416320. Policy #0 lag: (min: 61.0, avg: 78.9, max: 125.0) +[2026-06-02 17:01:00,502][253683] Avg episode reward: [(0, '1683.949')] +[2026-06-02 17:01:00,506][255279] Updated weights for policy 0, policy_version 33919 (0.0008) +[2026-06-02 17:01:01,175][255279] Updated weights for policy 0, policy_version 33929 (0.0009) +[2026-06-02 17:01:01,349][255279] Updated weights for policy 0, policy_version 33939 (0.0007) +[2026-06-02 17:01:01,538][255279] Updated weights for policy 0, policy_version 33949 (0.0004) +[2026-06-02 17:01:01,733][255279] Updated weights for policy 0, policy_version 33959 (0.0004) +[2026-06-02 17:01:01,922][255279] Updated weights for policy 0, policy_version 33969 (0.0004) +[2026-06-02 17:01:02,113][255279] Updated weights for policy 0, policy_version 33979 (0.0004) +[2026-06-02 17:01:02,301][255279] Updated weights for policy 0, policy_version 33989 (0.0006) +[2026-06-02 17:01:02,958][255279] Updated weights for policy 0, policy_version 33999 (0.0008) +[2026-06-02 17:01:03,142][255279] Updated weights for policy 0, policy_version 34009 (0.0008) +[2026-06-02 17:01:03,320][255279] Updated weights for policy 0, policy_version 34019 (0.0008) +[2026-06-02 17:01:03,515][255279] Updated weights for policy 0, policy_version 34029 (0.0009) +[2026-06-02 17:01:03,704][255279] Updated weights for policy 0, policy_version 34039 (0.0009) +[2026-06-02 17:01:03,891][255279] Updated weights for policy 0, policy_version 34049 (0.0008) +[2026-06-02 17:01:04,558][255279] Updated weights for policy 0, policy_version 34059 (0.0008) +[2026-06-02 17:01:04,747][255279] Updated weights for policy 0, policy_version 34069 (0.0004) +[2026-06-02 17:01:04,932][255279] Updated weights for policy 0, policy_version 34079 (0.0006) +[2026-06-02 17:01:05,133][255279] Updated weights for policy 0, policy_version 34089 (0.0006) +[2026-06-02 17:01:05,312][255279] Updated weights for policy 0, policy_version 34099 (0.0008) +[2026-06-02 17:01:05,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 17498112. Throughput: 0: 19478.7. Samples: 17537280. Policy #0 lag: (min: 61.0, avg: 78.9, max: 125.0) +[2026-06-02 17:01:05,503][253683] Avg episode reward: [(0, '1702.839')] +[2026-06-02 17:01:05,511][255279] Updated weights for policy 0, policy_version 34109 (0.0009) +[2026-06-02 17:01:05,704][255187] Saving new best policy, reward=1702.839! +[2026-06-02 17:01:05,706][255279] Updated weights for policy 0, policy_version 34120 (0.0009) +[2026-06-02 17:01:06,372][255279] Updated weights for policy 0, policy_version 34130 (0.0008) +[2026-06-02 17:01:06,556][255279] Updated weights for policy 0, policy_version 34140 (0.0009) +[2026-06-02 17:01:06,744][255279] Updated weights for policy 0, policy_version 34150 (0.0008) +[2026-06-02 17:01:06,933][255279] Updated weights for policy 0, policy_version 34160 (0.0008) +[2026-06-02 17:01:07,109][255279] Updated weights for policy 0, policy_version 34170 (0.0008) +[2026-06-02 17:01:07,321][255279] Updated weights for policy 0, policy_version 34181 (0.0008) +[2026-06-02 17:01:07,997][255279] Updated weights for policy 0, policy_version 34191 (0.0008) +[2026-06-02 17:01:08,192][255279] Updated weights for policy 0, policy_version 34201 (0.0008) +[2026-06-02 17:01:08,399][255279] Updated weights for policy 0, policy_version 34212 (0.0009) +[2026-06-02 17:01:08,606][255279] Updated weights for policy 0, policy_version 34223 (0.0009) +[2026-06-02 17:01:08,796][255279] Updated weights for policy 0, policy_version 34233 (0.0006) +[2026-06-02 17:01:08,982][255279] Updated weights for policy 0, policy_version 34243 (0.0006) +[2026-06-02 17:01:09,631][255279] Updated weights for policy 0, policy_version 34253 (0.0007) +[2026-06-02 17:01:09,820][255279] Updated weights for policy 0, policy_version 34263 (0.0005) +[2026-06-02 17:01:10,005][255279] Updated weights for policy 0, policy_version 34273 (0.0004) +[2026-06-02 17:01:10,197][255279] Updated weights for policy 0, policy_version 34283 (0.0004) +[2026-06-02 17:01:10,374][255279] Updated weights for policy 0, policy_version 34293 (0.0004) +[2026-06-02 17:01:10,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 17596416. Throughput: 0: 19436.1. Samples: 17596032. Policy #0 lag: (min: 25.0, avg: 56.6, max: 89.0) +[2026-06-02 17:01:10,502][253683] Avg episode reward: [(0, '1686.241')] +[2026-06-02 17:01:10,559][255279] Updated weights for policy 0, policy_version 34303 (0.0004) +[2026-06-02 17:01:11,251][255279] Updated weights for policy 0, policy_version 34313 (0.0006) +[2026-06-02 17:01:11,425][255279] Updated weights for policy 0, policy_version 34323 (0.0008) +[2026-06-02 17:01:11,616][255279] Updated weights for policy 0, policy_version 34333 (0.0008) +[2026-06-02 17:01:11,811][255279] Updated weights for policy 0, policy_version 34344 (0.0006) +[2026-06-02 17:01:12,001][255279] Updated weights for policy 0, policy_version 34354 (0.0008) +[2026-06-02 17:01:12,181][255279] Updated weights for policy 0, policy_version 34364 (0.0009) +[2026-06-02 17:01:12,376][255279] Updated weights for policy 0, policy_version 34374 (0.0009) +[2026-06-02 17:01:13,111][255279] Updated weights for policy 0, policy_version 34385 (0.0009) +[2026-06-02 17:01:13,284][255279] Updated weights for policy 0, policy_version 34395 (0.0005) +[2026-06-02 17:01:13,477][255279] Updated weights for policy 0, policy_version 34405 (0.0004) +[2026-06-02 17:01:13,662][255279] Updated weights for policy 0, policy_version 34415 (0.0004) +[2026-06-02 17:01:13,854][255279] Updated weights for policy 0, policy_version 34425 (0.0004) +[2026-06-02 17:01:14,039][255279] Updated weights for policy 0, policy_version 34435 (0.0004) +[2026-06-02 17:01:14,714][255279] Updated weights for policy 0, policy_version 34445 (0.0004) +[2026-06-02 17:01:14,885][255279] Updated weights for policy 0, policy_version 34455 (0.0007) +[2026-06-02 17:01:15,081][255279] Updated weights for policy 0, policy_version 34465 (0.0004) +[2026-06-02 17:01:15,280][255279] Updated weights for policy 0, policy_version 34476 (0.0006) +[2026-06-02 17:01:15,472][255279] Updated weights for policy 0, policy_version 34486 (0.0008) +[2026-06-02 17:01:15,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.6, 300 sec: 19438.6). Total num frames: 17694720. Throughput: 0: 19512.9. Samples: 17709056. Policy #0 lag: (min: 25.0, avg: 56.6, max: 89.0) +[2026-06-02 17:01:15,502][253683] Avg episode reward: [(0, '1713.214')] +[2026-06-02 17:01:15,657][255279] Updated weights for policy 0, policy_version 34496 (0.0009) +[2026-06-02 17:01:15,794][255187] Saving new best policy, reward=1713.214! +[2026-06-02 17:01:16,358][255279] Updated weights for policy 0, policy_version 34507 (0.0008) +[2026-06-02 17:01:16,569][255279] Updated weights for policy 0, policy_version 34519 (0.0009) +[2026-06-02 17:01:16,760][255279] Updated weights for policy 0, policy_version 34529 (0.0008) +[2026-06-02 17:01:16,942][255279] Updated weights for policy 0, policy_version 34539 (0.0008) +[2026-06-02 17:01:17,137][255279] Updated weights for policy 0, policy_version 34550 (0.0009) +[2026-06-02 17:01:17,360][255279] Updated weights for policy 0, policy_version 34562 (0.0008) +[2026-06-02 17:01:18,054][255279] Updated weights for policy 0, policy_version 34572 (0.0009) +[2026-06-02 17:01:18,230][255279] Updated weights for policy 0, policy_version 34582 (0.0008) +[2026-06-02 17:01:18,427][255279] Updated weights for policy 0, policy_version 34593 (0.0009) +[2026-06-02 17:01:18,627][255279] Updated weights for policy 0, policy_version 34604 (0.0009) +[2026-06-02 17:01:18,824][255279] Updated weights for policy 0, policy_version 34614 (0.0009) +[2026-06-02 17:01:19,008][255279] Updated weights for policy 0, policy_version 34624 (0.0009) +[2026-06-02 17:01:19,707][255279] Updated weights for policy 0, policy_version 34634 (0.0009) +[2026-06-02 17:01:19,894][255279] Updated weights for policy 0, policy_version 34645 (0.0008) +[2026-06-02 17:01:20,082][255279] Updated weights for policy 0, policy_version 34655 (0.0008) +[2026-06-02 17:01:20,263][255279] Updated weights for policy 0, policy_version 34665 (0.0009) +[2026-06-02 17:01:20,452][255279] Updated weights for policy 0, policy_version 34675 (0.0009) +[2026-06-02 17:01:20,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19114.6, 300 sec: 19438.6). Total num frames: 17793024. Throughput: 0: 19524.2. Samples: 17831040. Policy #0 lag: (min: 25.0, avg: 56.6, max: 89.0) +[2026-06-02 17:01:20,502][253683] Avg episode reward: [(0, '1697.078')] +[2026-06-02 17:01:20,638][255279] Updated weights for policy 0, policy_version 34685 (0.0008) +[2026-06-02 17:01:20,834][255279] Updated weights for policy 0, policy_version 34695 (0.0009) +[2026-06-02 17:01:21,522][255279] Updated weights for policy 0, policy_version 34705 (0.0009) +[2026-06-02 17:01:21,724][255279] Updated weights for policy 0, policy_version 34716 (0.0009) +[2026-06-02 17:01:21,909][255279] Updated weights for policy 0, policy_version 34726 (0.0008) +[2026-06-02 17:01:22,103][255279] Updated weights for policy 0, policy_version 34736 (0.0009) +[2026-06-02 17:01:22,288][255279] Updated weights for policy 0, policy_version 34746 (0.0009) +[2026-06-02 17:01:22,466][255279] Updated weights for policy 0, policy_version 34756 (0.0008) +[2026-06-02 17:01:23,135][255279] Updated weights for policy 0, policy_version 34766 (0.0009) +[2026-06-02 17:01:23,315][255279] Updated weights for policy 0, policy_version 34776 (0.0009) +[2026-06-02 17:01:23,507][255279] Updated weights for policy 0, policy_version 34786 (0.0008) +[2026-06-02 17:01:23,711][255279] Updated weights for policy 0, policy_version 34797 (0.0009) +[2026-06-02 17:01:23,891][255279] Updated weights for policy 0, policy_version 34807 (0.0008) +[2026-06-02 17:01:24,084][255279] Updated weights for policy 0, policy_version 34817 (0.0008) +[2026-06-02 17:01:24,776][255279] Updated weights for policy 0, policy_version 34827 (0.0008) +[2026-06-02 17:01:24,955][255279] Updated weights for policy 0, policy_version 34837 (0.0009) +[2026-06-02 17:01:25,139][255279] Updated weights for policy 0, policy_version 34847 (0.0008) +[2026-06-02 17:01:25,327][255279] Updated weights for policy 0, policy_version 34857 (0.0008) +[2026-06-02 17:01:25,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 17891328. Throughput: 0: 19350.7. Samples: 17883904. Policy #0 lag: (min: 25.0, avg: 56.6, max: 89.0) +[2026-06-02 17:01:25,502][253683] Avg episode reward: [(0, '1709.697')] +[2026-06-02 17:01:25,514][255279] Updated weights for policy 0, policy_version 34867 (0.0008) +[2026-06-02 17:01:25,701][255279] Updated weights for policy 0, policy_version 34877 (0.0008) +[2026-06-02 17:01:25,886][255279] Updated weights for policy 0, policy_version 34887 (0.0009) +[2026-06-02 17:01:26,576][255279] Updated weights for policy 0, policy_version 34897 (0.0009) +[2026-06-02 17:01:26,754][255279] Updated weights for policy 0, policy_version 34907 (0.0009) +[2026-06-02 17:01:26,948][255279] Updated weights for policy 0, policy_version 34917 (0.0009) +[2026-06-02 17:01:27,138][255279] Updated weights for policy 0, policy_version 34927 (0.0009) +[2026-06-02 17:01:27,325][255279] Updated weights for policy 0, policy_version 34937 (0.0009) +[2026-06-02 17:01:27,509][255279] Updated weights for policy 0, policy_version 34947 (0.0009) +[2026-06-02 17:01:28,183][255279] Updated weights for policy 0, policy_version 34957 (0.0009) +[2026-06-02 17:01:28,361][255279] Updated weights for policy 0, policy_version 34967 (0.0008) +[2026-06-02 17:01:28,554][255279] Updated weights for policy 0, policy_version 34977 (0.0009) +[2026-06-02 17:01:28,747][255279] Updated weights for policy 0, policy_version 34987 (0.0008) +[2026-06-02 17:01:28,925][255279] Updated weights for policy 0, policy_version 34997 (0.0008) +[2026-06-02 17:01:29,117][255279] Updated weights for policy 0, policy_version 35007 (0.0009) +[2026-06-02 17:01:29,797][255279] Updated weights for policy 0, policy_version 35017 (0.0008) +[2026-06-02 17:01:29,966][255279] Updated weights for policy 0, policy_version 35027 (0.0008) +[2026-06-02 17:01:30,177][255279] Updated weights for policy 0, policy_version 35038 (0.0009) +[2026-06-02 17:01:30,363][255279] Updated weights for policy 0, policy_version 35048 (0.0009) +[2026-06-02 17:01:30,502][253683] Fps is (10 sec: 19660.9, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 17989632. Throughput: 0: 19532.8. Samples: 18002176. Policy #0 lag: (min: 63.0, avg: 81.1, max: 127.0) +[2026-06-02 17:01:30,502][253683] Avg episode reward: [(0, '1734.228')] +[2026-06-02 17:01:30,551][255279] Updated weights for policy 0, policy_version 35058 (0.0008) +[2026-06-02 17:01:30,743][255279] Updated weights for policy 0, policy_version 35068 (0.0008) +[2026-06-02 17:01:30,925][255279] Updated weights for policy 0, policy_version 35078 (0.0008) +[2026-06-02 17:01:30,954][255187] Saving new best policy, reward=1734.228! +[2026-06-02 17:01:31,593][255279] Updated weights for policy 0, policy_version 35088 (0.0009) +[2026-06-02 17:01:31,787][255279] Updated weights for policy 0, policy_version 35098 (0.0009) +[2026-06-02 17:01:31,971][255279] Updated weights for policy 0, policy_version 35108 (0.0008) +[2026-06-02 17:01:32,155][255279] Updated weights for policy 0, policy_version 35118 (0.0008) +[2026-06-02 17:01:32,348][255279] Updated weights for policy 0, policy_version 35128 (0.0008) +[2026-06-02 17:01:32,530][255279] Updated weights for policy 0, policy_version 35138 (0.0008) +[2026-06-02 17:01:33,206][255279] Updated weights for policy 0, policy_version 35148 (0.0008) +[2026-06-02 17:01:33,400][255279] Updated weights for policy 0, policy_version 35159 (0.0009) +[2026-06-02 17:01:33,590][255279] Updated weights for policy 0, policy_version 35169 (0.0008) +[2026-06-02 17:01:33,788][255279] Updated weights for policy 0, policy_version 35180 (0.0008) +[2026-06-02 17:01:33,982][255279] Updated weights for policy 0, policy_version 35190 (0.0009) +[2026-06-02 17:01:34,161][255279] Updated weights for policy 0, policy_version 35200 (0.0008) +[2026-06-02 17:01:34,854][255279] Updated weights for policy 0, policy_version 35210 (0.0009) +[2026-06-02 17:01:35,028][255279] Updated weights for policy 0, policy_version 35220 (0.0008) +[2026-06-02 17:01:35,211][255279] Updated weights for policy 0, policy_version 35230 (0.0009) +[2026-06-02 17:01:35,404][255279] Updated weights for policy 0, policy_version 35240 (0.0008) +[2026-06-02 17:01:35,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 18087936. Throughput: 0: 19515.7. Samples: 18123776. Policy #0 lag: (min: 63.0, avg: 81.1, max: 127.0) +[2026-06-02 17:01:35,502][253683] Avg episode reward: [(0, '1718.699')] +[2026-06-02 17:01:35,577][255279] Updated weights for policy 0, policy_version 35250 (0.0008) +[2026-06-02 17:01:35,767][255279] Updated weights for policy 0, policy_version 35260 (0.0008) +[2026-06-02 17:01:35,964][255279] Updated weights for policy 0, policy_version 35271 (0.0008) +[2026-06-02 17:01:36,661][255279] Updated weights for policy 0, policy_version 35281 (0.0005) +[2026-06-02 17:01:36,847][255279] Updated weights for policy 0, policy_version 35291 (0.0005) +[2026-06-02 17:01:37,036][255279] Updated weights for policy 0, policy_version 35301 (0.0005) +[2026-06-02 17:01:37,233][255279] Updated weights for policy 0, policy_version 35311 (0.0005) +[2026-06-02 17:01:37,406][255279] Updated weights for policy 0, policy_version 35321 (0.0006) +[2026-06-02 17:01:37,594][255279] Updated weights for policy 0, policy_version 35331 (0.0008) +[2026-06-02 17:01:38,269][255279] Updated weights for policy 0, policy_version 35341 (0.0008) +[2026-06-02 17:01:38,464][255279] Updated weights for policy 0, policy_version 35352 (0.0009) +[2026-06-02 17:01:38,652][255279] Updated weights for policy 0, policy_version 35362 (0.0008) +[2026-06-02 17:01:38,836][255279] Updated weights for policy 0, policy_version 35372 (0.0008) +[2026-06-02 17:01:39,035][255279] Updated weights for policy 0, policy_version 35382 (0.0008) +[2026-06-02 17:01:39,224][255279] Updated weights for policy 0, policy_version 35392 (0.0009) +[2026-06-02 17:01:39,899][255279] Updated weights for policy 0, policy_version 35402 (0.0008) +[2026-06-02 17:01:40,075][255279] Updated weights for policy 0, policy_version 35412 (0.0008) +[2026-06-02 17:01:40,256][255279] Updated weights for policy 0, policy_version 35422 (0.0009) +[2026-06-02 17:01:40,452][255279] Updated weights for policy 0, policy_version 35432 (0.0008) +[2026-06-02 17:01:40,501][253683] Fps is (10 sec: 19661.0, 60 sec: 19114.7, 300 sec: 19438.7). Total num frames: 18186240. Throughput: 0: 19237.0. Samples: 18172928. Policy #0 lag: (min: 63.0, avg: 81.1, max: 127.0) +[2026-06-02 17:01:40,502][253683] Avg episode reward: [(0, '1725.908')] +[2026-06-02 17:01:40,639][255279] Updated weights for policy 0, policy_version 35442 (0.0008) +[2026-06-02 17:01:40,837][255279] Updated weights for policy 0, policy_version 35453 (0.0008) +[2026-06-02 17:01:41,031][255279] Updated weights for policy 0, policy_version 35463 (0.0008) +[2026-06-02 17:01:41,708][255279] Updated weights for policy 0, policy_version 35473 (0.0008) +[2026-06-02 17:01:41,892][255279] Updated weights for policy 0, policy_version 35483 (0.0008) +[2026-06-02 17:01:42,098][255279] Updated weights for policy 0, policy_version 35494 (0.0008) +[2026-06-02 17:01:42,284][255279] Updated weights for policy 0, policy_version 35504 (0.0008) +[2026-06-02 17:01:42,468][255279] Updated weights for policy 0, policy_version 35514 (0.0008) +[2026-06-02 17:01:42,654][255279] Updated weights for policy 0, policy_version 35524 (0.0008) +[2026-06-02 17:01:43,322][255279] Updated weights for policy 0, policy_version 35534 (0.0009) +[2026-06-02 17:01:43,533][255279] Updated weights for policy 0, policy_version 35546 (0.0008) +[2026-06-02 17:01:43,739][255279] Updated weights for policy 0, policy_version 35557 (0.0008) +[2026-06-02 17:01:43,925][255279] Updated weights for policy 0, policy_version 35567 (0.0008) +[2026-06-02 17:01:44,108][255279] Updated weights for policy 0, policy_version 35577 (0.0008) +[2026-06-02 17:01:44,298][255279] Updated weights for policy 0, policy_version 35587 (0.0009) +[2026-06-02 17:01:44,991][255279] Updated weights for policy 0, policy_version 35597 (0.0008) +[2026-06-02 17:01:45,190][255279] Updated weights for policy 0, policy_version 35608 (0.0008) +[2026-06-02 17:01:45,373][255279] Updated weights for policy 0, policy_version 35618 (0.0009) +[2026-06-02 17:01:45,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 18284544. Throughput: 0: 19501.5. Samples: 18293888. Policy #0 lag: (min: 63.0, avg: 81.1, max: 127.0) +[2026-06-02 17:01:45,502][253683] Avg episode reward: [(0, '1686.634')] +[2026-06-02 17:01:45,557][255279] Updated weights for policy 0, policy_version 35628 (0.0009) +[2026-06-02 17:01:45,741][255279] Updated weights for policy 0, policy_version 35638 (0.0008) +[2026-06-02 17:01:45,946][255279] Updated weights for policy 0, policy_version 35649 (0.0009) +[2026-06-02 17:01:46,694][255279] Updated weights for policy 0, policy_version 35662 (0.0009) +[2026-06-02 17:01:46,894][255279] Updated weights for policy 0, policy_version 35673 (0.0009) +[2026-06-02 17:01:47,079][255279] Updated weights for policy 0, policy_version 35683 (0.0008) +[2026-06-02 17:01:47,266][255279] Updated weights for policy 0, policy_version 35693 (0.0008) +[2026-06-02 17:01:47,459][255279] Updated weights for policy 0, policy_version 35703 (0.0008) +[2026-06-02 17:01:47,629][255279] Updated weights for policy 0, policy_version 35713 (0.0009) +[2026-06-02 17:01:48,328][255279] Updated weights for policy 0, policy_version 35723 (0.0008) +[2026-06-02 17:01:48,512][255279] Updated weights for policy 0, policy_version 35734 (0.0008) +[2026-06-02 17:01:48,699][255279] Updated weights for policy 0, policy_version 35744 (0.0008) +[2026-06-02 17:01:48,898][255279] Updated weights for policy 0, policy_version 35754 (0.0008) +[2026-06-02 17:01:49,085][255279] Updated weights for policy 0, policy_version 35764 (0.0008) +[2026-06-02 17:01:49,292][255279] Updated weights for policy 0, policy_version 35775 (0.0008) +[2026-06-02 17:01:49,979][255279] Updated weights for policy 0, policy_version 35785 (0.0009) +[2026-06-02 17:01:50,164][255279] Updated weights for policy 0, policy_version 35795 (0.0009) +[2026-06-02 17:01:50,369][255279] Updated weights for policy 0, policy_version 35806 (0.0008) +[2026-06-02 17:01:50,502][253683] Fps is (10 sec: 19660.6, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 18382848. Throughput: 0: 19410.5. Samples: 18410752. Policy #0 lag: (min: 63.0, avg: 81.1, max: 127.0) +[2026-06-02 17:01:50,502][253683] Avg episode reward: [(0, '1673.779')] +[2026-06-02 17:01:50,572][255279] Updated weights for policy 0, policy_version 35817 (0.0008) +[2026-06-02 17:01:50,759][255279] Updated weights for policy 0, policy_version 35827 (0.0009) +[2026-06-02 17:01:50,945][255279] Updated weights for policy 0, policy_version 35837 (0.0008) +[2026-06-02 17:01:51,136][255279] Updated weights for policy 0, policy_version 35847 (0.0009) +[2026-06-02 17:01:51,831][255279] Updated weights for policy 0, policy_version 35859 (0.0008) +[2026-06-02 17:01:52,016][255279] Updated weights for policy 0, policy_version 35869 (0.0008) +[2026-06-02 17:01:52,208][255279] Updated weights for policy 0, policy_version 35879 (0.0008) +[2026-06-02 17:01:52,395][255279] Updated weights for policy 0, policy_version 35889 (0.0008) +[2026-06-02 17:01:52,585][255279] Updated weights for policy 0, policy_version 35899 (0.0008) +[2026-06-02 17:01:52,774][255279] Updated weights for policy 0, policy_version 35909 (0.0008) +[2026-06-02 17:01:53,443][255279] Updated weights for policy 0, policy_version 35919 (0.0008) +[2026-06-02 17:01:53,629][255279] Updated weights for policy 0, policy_version 35929 (0.0008) +[2026-06-02 17:01:53,819][255279] Updated weights for policy 0, policy_version 35939 (0.0008) +[2026-06-02 17:01:54,004][255279] Updated weights for policy 0, policy_version 35949 (0.0008) +[2026-06-02 17:01:54,195][255279] Updated weights for policy 0, policy_version 35959 (0.0008) +[2026-06-02 17:01:54,382][255279] Updated weights for policy 0, policy_version 35969 (0.0007) +[2026-06-02 17:01:55,056][255279] Updated weights for policy 0, policy_version 35979 (0.0007) +[2026-06-02 17:01:55,237][255279] Updated weights for policy 0, policy_version 35989 (0.0009) +[2026-06-02 17:01:55,422][255279] Updated weights for policy 0, policy_version 35999 (0.0009) +[2026-06-02 17:01:55,502][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 18481152. Throughput: 0: 19322.3. Samples: 18465536. Policy #0 lag: (min: 63.0, avg: 79.9, max: 127.0) +[2026-06-02 17:01:55,502][253683] Avg episode reward: [(0, '1733.461')] +[2026-06-02 17:01:55,607][255279] Updated weights for policy 0, policy_version 36009 (0.0008) +[2026-06-02 17:01:55,805][255279] Updated weights for policy 0, policy_version 36019 (0.0009) +[2026-06-02 17:01:56,006][255279] Updated weights for policy 0, policy_version 36030 (0.0009) +[2026-06-02 17:01:56,189][255279] Updated weights for policy 0, policy_version 36040 (0.0008) +[2026-06-02 17:01:56,889][255279] Updated weights for policy 0, policy_version 36051 (0.0009) +[2026-06-02 17:01:57,077][255279] Updated weights for policy 0, policy_version 36061 (0.0008) +[2026-06-02 17:01:57,263][255279] Updated weights for policy 0, policy_version 36071 (0.0008) +[2026-06-02 17:01:57,447][255279] Updated weights for policy 0, policy_version 36081 (0.0008) +[2026-06-02 17:01:57,636][255279] Updated weights for policy 0, policy_version 36091 (0.0008) +[2026-06-02 17:01:57,823][255279] Updated weights for policy 0, policy_version 36101 (0.0008) +[2026-06-02 17:01:58,488][255279] Updated weights for policy 0, policy_version 36111 (0.0008) +[2026-06-02 17:01:58,674][255279] Updated weights for policy 0, policy_version 36121 (0.0008) +[2026-06-02 17:01:58,858][255279] Updated weights for policy 0, policy_version 36131 (0.0008) +[2026-06-02 17:01:59,040][255279] Updated weights for policy 0, policy_version 36141 (0.0008) +[2026-06-02 17:01:59,232][255279] Updated weights for policy 0, policy_version 36151 (0.0008) +[2026-06-02 17:01:59,424][255279] Updated weights for policy 0, policy_version 36161 (0.0008) +[2026-06-02 17:02:00,103][255279] Updated weights for policy 0, policy_version 36171 (0.0008) +[2026-06-02 17:02:00,279][255279] Updated weights for policy 0, policy_version 36181 (0.0009) +[2026-06-02 17:02:00,461][255279] Updated weights for policy 0, policy_version 36191 (0.0008) +[2026-06-02 17:02:00,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 18579456. Throughput: 0: 19527.1. Samples: 18587776. Policy #0 lag: (min: 63.0, avg: 79.9, max: 127.0) +[2026-06-02 17:02:00,502][253683] Avg episode reward: [(0, '1813.605')] +[2026-06-02 17:02:00,643][255279] Updated weights for policy 0, policy_version 36201 (0.0008) +[2026-06-02 17:02:00,866][255279] Updated weights for policy 0, policy_version 36213 (0.0009) +[2026-06-02 17:02:01,051][255279] Updated weights for policy 0, policy_version 36223 (0.0009) +[2026-06-02 17:02:01,210][255187] Saving new best policy, reward=1813.605! +[2026-06-02 17:02:01,753][255279] Updated weights for policy 0, policy_version 36233 (0.0009) +[2026-06-02 17:02:01,923][255279] Updated weights for policy 0, policy_version 36243 (0.0009) +[2026-06-02 17:02:02,115][255279] Updated weights for policy 0, policy_version 36253 (0.0009) +[2026-06-02 17:02:02,297][255279] Updated weights for policy 0, policy_version 36263 (0.0008) +[2026-06-02 17:02:02,476][255279] Updated weights for policy 0, policy_version 36273 (0.0009) +[2026-06-02 17:02:02,683][255279] Updated weights for policy 0, policy_version 36284 (0.0009) +[2026-06-02 17:02:02,877][255279] Updated weights for policy 0, policy_version 36294 (0.0009) +[2026-06-02 17:02:03,563][255279] Updated weights for policy 0, policy_version 36304 (0.0008) +[2026-06-02 17:02:03,744][255279] Updated weights for policy 0, policy_version 36314 (0.0008) +[2026-06-02 17:02:03,937][255279] Updated weights for policy 0, policy_version 36324 (0.0008) +[2026-06-02 17:02:04,110][255279] Updated weights for policy 0, policy_version 36334 (0.0008) +[2026-06-02 17:02:04,302][255279] Updated weights for policy 0, policy_version 36344 (0.0009) +[2026-06-02 17:02:04,492][255279] Updated weights for policy 0, policy_version 36354 (0.0008) +[2026-06-02 17:02:05,176][255279] Updated weights for policy 0, policy_version 36364 (0.0008) +[2026-06-02 17:02:05,376][255279] Updated weights for policy 0, policy_version 36375 (0.0008) +[2026-06-02 17:02:05,501][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 18677760. Throughput: 0: 19274.0. Samples: 18698368. Policy #0 lag: (min: 63.0, avg: 79.9, max: 127.0) +[2026-06-02 17:02:05,502][253683] Avg episode reward: [(0, '1806.099')] +[2026-06-02 17:02:05,567][255279] Updated weights for policy 0, policy_version 36385 (0.0008) +[2026-06-02 17:02:05,751][255279] Updated weights for policy 0, policy_version 36395 (0.0008) +[2026-06-02 17:02:05,941][255279] Updated weights for policy 0, policy_version 36405 (0.0009) +[2026-06-02 17:02:06,127][255279] Updated weights for policy 0, policy_version 36415 (0.0008) +[2026-06-02 17:02:06,816][255279] Updated weights for policy 0, policy_version 36425 (0.0008) +[2026-06-02 17:02:06,997][255279] Updated weights for policy 0, policy_version 36435 (0.0009) +[2026-06-02 17:02:07,182][255279] Updated weights for policy 0, policy_version 36445 (0.0008) +[2026-06-02 17:02:07,367][255279] Updated weights for policy 0, policy_version 36455 (0.0008) +[2026-06-02 17:02:07,553][255279] Updated weights for policy 0, policy_version 36465 (0.0008) +[2026-06-02 17:02:07,740][255279] Updated weights for policy 0, policy_version 36475 (0.0008) +[2026-06-02 17:02:07,944][255279] Updated weights for policy 0, policy_version 36486 (0.0009) +[2026-06-02 17:02:08,602][255279] Updated weights for policy 0, policy_version 36496 (0.0009) +[2026-06-02 17:02:08,792][255279] Updated weights for policy 0, policy_version 36506 (0.0009) +[2026-06-02 17:02:08,974][255279] Updated weights for policy 0, policy_version 36516 (0.0009) +[2026-06-02 17:02:09,164][255279] Updated weights for policy 0, policy_version 36526 (0.0009) +[2026-06-02 17:02:09,353][255279] Updated weights for policy 0, policy_version 36536 (0.0009) +[2026-06-02 17:02:09,540][255279] Updated weights for policy 0, policy_version 36546 (0.0008) +[2026-06-02 17:02:10,233][255279] Updated weights for policy 0, policy_version 36557 (0.0009) +[2026-06-02 17:02:10,418][255279] Updated weights for policy 0, policy_version 36567 (0.0009) +[2026-06-02 17:02:10,501][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 18776064. Throughput: 0: 19441.8. Samples: 18758784. Policy #0 lag: (min: 63.0, avg: 79.9, max: 127.0) +[2026-06-02 17:02:10,502][253683] Avg episode reward: [(0, '1825.394')] +[2026-06-02 17:02:10,595][255279] Updated weights for policy 0, policy_version 36577 (0.0009) +[2026-06-02 17:02:10,790][255279] Updated weights for policy 0, policy_version 36587 (0.0008) +[2026-06-02 17:02:10,975][255279] Updated weights for policy 0, policy_version 36597 (0.0009) +[2026-06-02 17:02:11,169][255279] Updated weights for policy 0, policy_version 36607 (0.0009) +[2026-06-02 17:02:11,330][255187] Saving new best policy, reward=1825.394! +[2026-06-02 17:02:11,865][255279] Updated weights for policy 0, policy_version 36617 (0.0009) +[2026-06-02 17:02:12,033][255279] Updated weights for policy 0, policy_version 36627 (0.0008) +[2026-06-02 17:02:12,227][255279] Updated weights for policy 0, policy_version 36637 (0.0009) +[2026-06-02 17:02:12,415][255279] Updated weights for policy 0, policy_version 36647 (0.0009) +[2026-06-02 17:02:12,601][255279] Updated weights for policy 0, policy_version 36657 (0.0009) +[2026-06-02 17:02:12,789][255279] Updated weights for policy 0, policy_version 36667 (0.0008) +[2026-06-02 17:02:12,967][255279] Updated weights for policy 0, policy_version 36677 (0.0008) +[2026-06-02 17:02:13,651][255279] Updated weights for policy 0, policy_version 36687 (0.0008) +[2026-06-02 17:02:13,851][255279] Updated weights for policy 0, policy_version 36698 (0.0008) +[2026-06-02 17:02:14,034][255279] Updated weights for policy 0, policy_version 36708 (0.0008) +[2026-06-02 17:02:14,226][255279] Updated weights for policy 0, policy_version 36718 (0.0008) +[2026-06-02 17:02:14,405][255279] Updated weights for policy 0, policy_version 36728 (0.0008) +[2026-06-02 17:02:14,597][255279] Updated weights for policy 0, policy_version 36738 (0.0008) +[2026-06-02 17:02:15,277][255279] Updated weights for policy 0, policy_version 36748 (0.0009) +[2026-06-02 17:02:15,465][255279] Updated weights for policy 0, policy_version 36758 (0.0008) +[2026-06-02 17:02:15,501][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 18874368. Throughput: 0: 19507.2. Samples: 18880000. Policy #0 lag: (min: 63.0, avg: 79.9, max: 127.0) +[2026-06-02 17:02:15,502][253683] Avg episode reward: [(0, '1816.587')] +[2026-06-02 17:02:15,648][255279] Updated weights for policy 0, policy_version 36768 (0.0008) +[2026-06-02 17:02:15,835][255279] Updated weights for policy 0, policy_version 36778 (0.0009) +[2026-06-02 17:02:16,020][255279] Updated weights for policy 0, policy_version 36788 (0.0008) +[2026-06-02 17:02:16,204][255279] Updated weights for policy 0, policy_version 36798 (0.0008) +[2026-06-02 17:02:16,392][255279] Updated weights for policy 0, policy_version 36808 (0.0008) +[2026-06-02 17:02:17,066][255279] Updated weights for policy 0, policy_version 36818 (0.0008) +[2026-06-02 17:02:17,249][255279] Updated weights for policy 0, policy_version 36828 (0.0008) +[2026-06-02 17:02:17,436][255279] Updated weights for policy 0, policy_version 36838 (0.0008) +[2026-06-02 17:02:17,620][255279] Updated weights for policy 0, policy_version 36848 (0.0008) +[2026-06-02 17:02:17,818][255279] Updated weights for policy 0, policy_version 36858 (0.0008) +[2026-06-02 17:02:18,008][255279] Updated weights for policy 0, policy_version 36868 (0.0008) +[2026-06-02 17:02:18,698][255279] Updated weights for policy 0, policy_version 36878 (0.0008) +[2026-06-02 17:02:18,876][255279] Updated weights for policy 0, policy_version 36888 (0.0008) +[2026-06-02 17:02:19,056][255279] Updated weights for policy 0, policy_version 36898 (0.0008) +[2026-06-02 17:02:19,246][255279] Updated weights for policy 0, policy_version 36908 (0.0008) +[2026-06-02 17:02:19,460][255279] Updated weights for policy 0, policy_version 36919 (0.0008) +[2026-06-02 17:02:19,647][255279] Updated weights for policy 0, policy_version 36929 (0.0008) +[2026-06-02 17:02:20,321][255279] Updated weights for policy 0, policy_version 36939 (0.0009) +[2026-06-02 17:02:20,491][255279] Updated weights for policy 0, policy_version 36949 (0.0009) +[2026-06-02 17:02:20,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 18972672. Throughput: 0: 19273.9. Samples: 18991104. Policy #0 lag: (min: 51.0, avg: 77.9, max: 115.0) +[2026-06-02 17:02:20,502][253683] Avg episode reward: [(0, '1779.754')] +[2026-06-02 17:02:20,693][255279] Updated weights for policy 0, policy_version 36959 (0.0009) +[2026-06-02 17:02:20,889][255279] Updated weights for policy 0, policy_version 36969 (0.0008) +[2026-06-02 17:02:21,069][255279] Updated weights for policy 0, policy_version 36979 (0.0008) +[2026-06-02 17:02:21,248][255279] Updated weights for policy 0, policy_version 36989 (0.0008) +[2026-06-02 17:02:21,439][255279] Updated weights for policy 0, policy_version 36999 (0.0008) +[2026-06-02 17:02:22,105][255279] Updated weights for policy 0, policy_version 37009 (0.0009) +[2026-06-02 17:02:22,290][255279] Updated weights for policy 0, policy_version 37019 (0.0008) +[2026-06-02 17:02:22,487][255279] Updated weights for policy 0, policy_version 37030 (0.0008) +[2026-06-02 17:02:22,678][255279] Updated weights for policy 0, policy_version 37040 (0.0009) +[2026-06-02 17:02:22,864][255279] Updated weights for policy 0, policy_version 37050 (0.0008) +[2026-06-02 17:02:23,060][255279] Updated weights for policy 0, policy_version 37060 (0.0008) +[2026-06-02 17:02:23,741][255279] Updated weights for policy 0, policy_version 37070 (0.0008) +[2026-06-02 17:02:23,917][255279] Updated weights for policy 0, policy_version 37080 (0.0008) +[2026-06-02 17:02:24,130][255279] Updated weights for policy 0, policy_version 37091 (0.0008) +[2026-06-02 17:02:24,321][255279] Updated weights for policy 0, policy_version 37101 (0.0008) +[2026-06-02 17:02:24,507][255279] Updated weights for policy 0, policy_version 37111 (0.0008) +[2026-06-02 17:02:24,694][255279] Updated weights for policy 0, policy_version 37121 (0.0009) +[2026-06-02 17:02:25,380][255279] Updated weights for policy 0, policy_version 37132 (0.0008) +[2026-06-02 17:02:25,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 19070976. Throughput: 0: 19518.5. Samples: 19051264. Policy #0 lag: (min: 51.0, avg: 77.9, max: 115.0) +[2026-06-02 17:02:25,502][253683] Avg episode reward: [(0, '1766.141')] +[2026-06-02 17:02:25,563][255279] Updated weights for policy 0, policy_version 37142 (0.0008) +[2026-06-02 17:02:25,772][255279] Updated weights for policy 0, policy_version 37153 (0.0008) +[2026-06-02 17:02:25,950][255279] Updated weights for policy 0, policy_version 37163 (0.0008) +[2026-06-02 17:02:26,139][255279] Updated weights for policy 0, policy_version 37173 (0.0008) +[2026-06-02 17:02:26,329][255279] Updated weights for policy 0, policy_version 37183 (0.0008) +[2026-06-02 17:02:27,041][255279] Updated weights for policy 0, policy_version 37193 (0.0008) +[2026-06-02 17:02:27,212][255279] Updated weights for policy 0, policy_version 37203 (0.0008) +[2026-06-02 17:02:27,395][255279] Updated weights for policy 0, policy_version 37213 (0.0008) +[2026-06-02 17:02:27,583][255279] Updated weights for policy 0, policy_version 37223 (0.0008) +[2026-06-02 17:02:27,770][255279] Updated weights for policy 0, policy_version 37233 (0.0008) +[2026-06-02 17:02:27,957][255279] Updated weights for policy 0, policy_version 37243 (0.0008) +[2026-06-02 17:02:28,144][255279] Updated weights for policy 0, policy_version 37253 (0.0008) +[2026-06-02 17:02:28,853][255279] Updated weights for policy 0, policy_version 37265 (0.0009) +[2026-06-02 17:02:29,036][255279] Updated weights for policy 0, policy_version 37275 (0.0008) +[2026-06-02 17:02:29,223][255279] Updated weights for policy 0, policy_version 37285 (0.0009) +[2026-06-02 17:02:29,415][255279] Updated weights for policy 0, policy_version 37295 (0.0009) +[2026-06-02 17:02:29,612][255279] Updated weights for policy 0, policy_version 37306 (0.0009) +[2026-06-02 17:02:29,801][255279] Updated weights for policy 0, policy_version 37316 (0.0009) +[2026-06-02 17:02:30,473][255279] Updated weights for policy 0, policy_version 37326 (0.0009) +[2026-06-02 17:02:30,502][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 19169280. Throughput: 0: 19555.6. Samples: 19173888. Policy #0 lag: (min: 51.0, avg: 77.9, max: 115.0) +[2026-06-02 17:02:30,502][253683] Avg episode reward: [(0, '1768.113')] +[2026-06-02 17:02:30,655][255279] Updated weights for policy 0, policy_version 37336 (0.0008) +[2026-06-02 17:02:30,868][255279] Updated weights for policy 0, policy_version 37347 (0.0009) +[2026-06-02 17:02:31,050][255279] Updated weights for policy 0, policy_version 37357 (0.0008) +[2026-06-02 17:02:31,247][255279] Updated weights for policy 0, policy_version 37367 (0.0009) +[2026-06-02 17:02:31,429][255279] Updated weights for policy 0, policy_version 37377 (0.0008) +[2026-06-02 17:02:32,096][255279] Updated weights for policy 0, policy_version 37387 (0.0008) +[2026-06-02 17:02:32,297][255279] Updated weights for policy 0, policy_version 37398 (0.0008) +[2026-06-02 17:02:32,503][255279] Updated weights for policy 0, policy_version 37409 (0.0009) +[2026-06-02 17:02:32,696][255279] Updated weights for policy 0, policy_version 37419 (0.0009) +[2026-06-02 17:02:32,867][255279] Updated weights for policy 0, policy_version 37429 (0.0008) +[2026-06-02 17:02:33,065][255279] Updated weights for policy 0, policy_version 37439 (0.0009) +[2026-06-02 17:02:33,747][255279] Updated weights for policy 0, policy_version 37449 (0.0008) +[2026-06-02 17:02:33,912][255279] Updated weights for policy 0, policy_version 37459 (0.0008) +[2026-06-02 17:02:34,100][255279] Updated weights for policy 0, policy_version 37469 (0.0008) +[2026-06-02 17:02:34,292][255279] Updated weights for policy 0, policy_version 37479 (0.0008) +[2026-06-02 17:02:34,480][255279] Updated weights for policy 0, policy_version 37489 (0.0008) +[2026-06-02 17:02:34,686][255279] Updated weights for policy 0, policy_version 37500 (0.0009) +[2026-06-02 17:02:34,870][255279] Updated weights for policy 0, policy_version 37510 (0.0008) +[2026-06-02 17:02:35,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 19267584. Throughput: 0: 19387.7. Samples: 19283200. Policy #0 lag: (min: 51.0, avg: 77.9, max: 115.0) +[2026-06-02 17:02:35,502][253683] Avg episode reward: [(0, '1740.434')] +[2026-06-02 17:02:35,568][255279] Updated weights for policy 0, policy_version 37521 (0.0008) +[2026-06-02 17:02:35,750][255279] Updated weights for policy 0, policy_version 37531 (0.0008) +[2026-06-02 17:02:35,960][255279] Updated weights for policy 0, policy_version 37542 (0.0009) +[2026-06-02 17:02:36,136][255279] Updated weights for policy 0, policy_version 37552 (0.0009) +[2026-06-02 17:02:36,323][255279] Updated weights for policy 0, policy_version 37562 (0.0008) +[2026-06-02 17:02:36,509][255279] Updated weights for policy 0, policy_version 37572 (0.0008) +[2026-06-02 17:02:37,201][255279] Updated weights for policy 0, policy_version 37582 (0.0006) +[2026-06-02 17:02:37,385][255279] Updated weights for policy 0, policy_version 37592 (0.0004) +[2026-06-02 17:02:37,571][255279] Updated weights for policy 0, policy_version 37602 (0.0004) +[2026-06-02 17:02:37,766][255279] Updated weights for policy 0, policy_version 37612 (0.0007) +[2026-06-02 17:02:37,950][255279] Updated weights for policy 0, policy_version 37622 (0.0009) +[2026-06-02 17:02:38,141][255279] Updated weights for policy 0, policy_version 37632 (0.0008) +[2026-06-02 17:02:38,802][255279] Updated weights for policy 0, policy_version 37642 (0.0008) +[2026-06-02 17:02:38,980][255279] Updated weights for policy 0, policy_version 37652 (0.0008) +[2026-06-02 17:02:39,163][255279] Updated weights for policy 0, policy_version 37662 (0.0008) +[2026-06-02 17:02:39,350][255279] Updated weights for policy 0, policy_version 37672 (0.0008) +[2026-06-02 17:02:39,532][255279] Updated weights for policy 0, policy_version 37682 (0.0008) +[2026-06-02 17:02:39,718][255279] Updated weights for policy 0, policy_version 37692 (0.0008) +[2026-06-02 17:02:39,903][255279] Updated weights for policy 0, policy_version 37702 (0.0008) +[2026-06-02 17:02:40,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 19365888. Throughput: 0: 19538.5. Samples: 19344768. Policy #0 lag: (min: 51.0, avg: 77.9, max: 115.0) +[2026-06-02 17:02:40,502][253683] Avg episode reward: [(0, '1805.183')] +[2026-06-02 17:02:40,615][255279] Updated weights for policy 0, policy_version 37712 (0.0009) +[2026-06-02 17:02:40,791][255279] Updated weights for policy 0, policy_version 37722 (0.0008) +[2026-06-02 17:02:40,984][255279] Updated weights for policy 0, policy_version 37732 (0.0008) +[2026-06-02 17:02:41,171][255279] Updated weights for policy 0, policy_version 37742 (0.0008) +[2026-06-02 17:02:41,360][255279] Updated weights for policy 0, policy_version 37752 (0.0009) +[2026-06-02 17:02:41,548][255279] Updated weights for policy 0, policy_version 37762 (0.0008) +[2026-06-02 17:02:42,248][255279] Updated weights for policy 0, policy_version 37774 (0.0008) +[2026-06-02 17:02:42,430][255279] Updated weights for policy 0, policy_version 37784 (0.0008) +[2026-06-02 17:02:42,634][255279] Updated weights for policy 0, policy_version 37795 (0.0008) +[2026-06-02 17:02:42,819][255279] Updated weights for policy 0, policy_version 37805 (0.0008) +[2026-06-02 17:02:43,003][255279] Updated weights for policy 0, policy_version 37815 (0.0009) +[2026-06-02 17:02:43,189][255279] Updated weights for policy 0, policy_version 37825 (0.0008) +[2026-06-02 17:02:43,894][255279] Updated weights for policy 0, policy_version 37835 (0.0009) +[2026-06-02 17:02:44,074][255279] Updated weights for policy 0, policy_version 37845 (0.0008) +[2026-06-02 17:02:44,258][255279] Updated weights for policy 0, policy_version 37855 (0.0009) +[2026-06-02 17:02:44,437][255279] Updated weights for policy 0, policy_version 37865 (0.0008) +[2026-06-02 17:02:44,669][255279] Updated weights for policy 0, policy_version 37877 (0.0008) +[2026-06-02 17:02:44,861][255279] Updated weights for policy 0, policy_version 37887 (0.0008) +[2026-06-02 17:02:45,501][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 19464192. Throughput: 0: 19518.6. Samples: 19466112. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) +[2026-06-02 17:02:45,502][253683] Avg episode reward: [(0, '1814.015')] +[2026-06-02 17:02:45,540][255279] Updated weights for policy 0, policy_version 37897 (0.0009) +[2026-06-02 17:02:45,734][255279] Updated weights for policy 0, policy_version 37908 (0.0008) +[2026-06-02 17:02:45,921][255279] Updated weights for policy 0, policy_version 37918 (0.0008) +[2026-06-02 17:02:46,107][255279] Updated weights for policy 0, policy_version 37928 (0.0008) +[2026-06-02 17:02:46,296][255279] Updated weights for policy 0, policy_version 37938 (0.0008) +[2026-06-02 17:02:46,478][255279] Updated weights for policy 0, policy_version 37948 (0.0008) +[2026-06-02 17:02:46,660][255279] Updated weights for policy 0, policy_version 37958 (0.0008) +[2026-06-02 17:02:47,341][255279] Updated weights for policy 0, policy_version 37968 (0.0008) +[2026-06-02 17:02:47,525][255279] Updated weights for policy 0, policy_version 37978 (0.0008) +[2026-06-02 17:02:47,718][255279] Updated weights for policy 0, policy_version 37988 (0.0008) +[2026-06-02 17:02:47,909][255279] Updated weights for policy 0, policy_version 37998 (0.0008) +[2026-06-02 17:02:48,111][255279] Updated weights for policy 0, policy_version 38009 (0.0008) +[2026-06-02 17:02:48,310][255279] Updated weights for policy 0, policy_version 38019 (0.0008) +[2026-06-02 17:02:48,965][255279] Updated weights for policy 0, policy_version 38029 (0.0009) +[2026-06-02 17:02:49,149][255279] Updated weights for policy 0, policy_version 38039 (0.0008) +[2026-06-02 17:02:49,338][255279] Updated weights for policy 0, policy_version 38049 (0.0008) +[2026-06-02 17:02:49,527][255279] Updated weights for policy 0, policy_version 38059 (0.0008) +[2026-06-02 17:02:49,716][255279] Updated weights for policy 0, policy_version 38069 (0.0009) +[2026-06-02 17:02:49,904][255279] Updated weights for policy 0, policy_version 38079 (0.0009) +[2026-06-02 17:02:50,502][253683] Fps is (10 sec: 19660.6, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 19562496. Throughput: 0: 19512.8. Samples: 19576448. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) +[2026-06-02 17:02:50,502][253683] Avg episode reward: [(0, '1779.886')] +[2026-06-02 17:02:50,602][255279] Updated weights for policy 0, policy_version 38090 (0.0009) +[2026-06-02 17:02:50,777][255279] Updated weights for policy 0, policy_version 38100 (0.0008) +[2026-06-02 17:02:50,961][255279] Updated weights for policy 0, policy_version 38110 (0.0008) +[2026-06-02 17:02:51,160][255279] Updated weights for policy 0, policy_version 38121 (0.0009) +[2026-06-02 17:02:51,355][255279] Updated weights for policy 0, policy_version 38131 (0.0009) +[2026-06-02 17:02:51,534][255279] Updated weights for policy 0, policy_version 38141 (0.0008) +[2026-06-02 17:02:51,732][255279] Updated weights for policy 0, policy_version 38151 (0.0008) +[2026-06-02 17:02:52,392][255279] Updated weights for policy 0, policy_version 38161 (0.0008) +[2026-06-02 17:02:52,582][255279] Updated weights for policy 0, policy_version 38171 (0.0008) +[2026-06-02 17:02:52,768][255279] Updated weights for policy 0, policy_version 38181 (0.0009) +[2026-06-02 17:02:52,953][255279] Updated weights for policy 0, policy_version 38191 (0.0008) +[2026-06-02 17:02:53,141][255279] Updated weights for policy 0, policy_version 38201 (0.0008) +[2026-06-02 17:02:53,332][255279] Updated weights for policy 0, policy_version 38211 (0.0009) +[2026-06-02 17:02:54,031][255279] Updated weights for policy 0, policy_version 38221 (0.0008) +[2026-06-02 17:02:54,210][255279] Updated weights for policy 0, policy_version 38231 (0.0008) +[2026-06-02 17:02:54,389][255279] Updated weights for policy 0, policy_version 38241 (0.0008) +[2026-06-02 17:02:54,578][255279] Updated weights for policy 0, policy_version 38251 (0.0008) +[2026-06-02 17:02:54,761][255279] Updated weights for policy 0, policy_version 38261 (0.0008) +[2026-06-02 17:02:54,950][255279] Updated weights for policy 0, policy_version 38271 (0.0008) +[2026-06-02 17:02:55,502][253683] Fps is (10 sec: 19660.6, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 19660800. Throughput: 0: 19515.7. Samples: 19636992. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) +[2026-06-02 17:02:55,503][253683] Avg episode reward: [(0, '1723.840')] +[2026-06-02 17:02:55,669][255279] Updated weights for policy 0, policy_version 38281 (0.0009) +[2026-06-02 17:02:55,837][255279] Updated weights for policy 0, policy_version 38291 (0.0008) +[2026-06-02 17:02:56,021][255279] Updated weights for policy 0, policy_version 38301 (0.0008) +[2026-06-02 17:02:56,214][255279] Updated weights for policy 0, policy_version 38311 (0.0009) +[2026-06-02 17:02:56,401][255279] Updated weights for policy 0, policy_version 38321 (0.0009) +[2026-06-02 17:02:56,593][255279] Updated weights for policy 0, policy_version 38331 (0.0009) +[2026-06-02 17:02:56,776][255279] Updated weights for policy 0, policy_version 38341 (0.0009) +[2026-06-02 17:02:57,444][255279] Updated weights for policy 0, policy_version 38351 (0.0008) +[2026-06-02 17:02:57,627][255279] Updated weights for policy 0, policy_version 38361 (0.0009) +[2026-06-02 17:02:57,811][255279] Updated weights for policy 0, policy_version 38371 (0.0009) +[2026-06-02 17:02:58,006][255279] Updated weights for policy 0, policy_version 38381 (0.0009) +[2026-06-02 17:02:58,187][255279] Updated weights for policy 0, policy_version 38391 (0.0009) +[2026-06-02 17:02:58,372][255279] Updated weights for policy 0, policy_version 38401 (0.0009) +[2026-06-02 17:02:59,053][255279] Updated weights for policy 0, policy_version 38411 (0.0009) +[2026-06-02 17:02:59,227][255279] Updated weights for policy 0, policy_version 38421 (0.0009) +[2026-06-02 17:02:59,418][255279] Updated weights for policy 0, policy_version 38431 (0.0009) +[2026-06-02 17:02:59,603][255279] Updated weights for policy 0, policy_version 38441 (0.0008) +[2026-06-02 17:02:59,796][255279] Updated weights for policy 0, policy_version 38451 (0.0008) +[2026-06-02 17:02:59,982][255279] Updated weights for policy 0, policy_version 38461 (0.0009) +[2026-06-02 17:03:00,166][255279] Updated weights for policy 0, policy_version 38471 (0.0008) +[2026-06-02 17:03:00,502][253683] Fps is (10 sec: 19661.0, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 19759104. Throughput: 0: 19393.4. Samples: 19752704. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) +[2026-06-02 17:03:00,502][253683] Avg episode reward: [(0, '1732.121')] +[2026-06-02 17:03:00,930][255279] Updated weights for policy 0, policy_version 38488 (0.0009) +[2026-06-02 17:03:01,116][255279] Updated weights for policy 0, policy_version 38498 (0.0009) +[2026-06-02 17:03:01,293][255279] Updated weights for policy 0, policy_version 38508 (0.0008) +[2026-06-02 17:03:01,489][255279] Updated weights for policy 0, policy_version 38518 (0.0009) +[2026-06-02 17:03:01,675][255279] Updated weights for policy 0, policy_version 38528 (0.0009) +[2026-06-02 17:03:02,402][255279] Updated weights for policy 0, policy_version 38538 (0.0008) +[2026-06-02 17:03:02,580][255279] Updated weights for policy 0, policy_version 38548 (0.0008) +[2026-06-02 17:03:02,762][255279] Updated weights for policy 0, policy_version 38558 (0.0009) +[2026-06-02 17:03:02,949][255279] Updated weights for policy 0, policy_version 38568 (0.0009) +[2026-06-02 17:03:03,131][255279] Updated weights for policy 0, policy_version 38578 (0.0008) +[2026-06-02 17:03:03,320][255279] Updated weights for policy 0, policy_version 38588 (0.0008) +[2026-06-02 17:03:03,506][255279] Updated weights for policy 0, policy_version 38598 (0.0006) +[2026-06-02 17:03:04,184][255279] Updated weights for policy 0, policy_version 38608 (0.0004) +[2026-06-02 17:03:04,357][255279] Updated weights for policy 0, policy_version 38618 (0.0004) +[2026-06-02 17:03:04,554][255279] Updated weights for policy 0, policy_version 38628 (0.0006) +[2026-06-02 17:03:04,754][255279] Updated weights for policy 0, policy_version 38639 (0.0008) +[2026-06-02 17:03:04,949][255279] Updated weights for policy 0, policy_version 38649 (0.0009) +[2026-06-02 17:03:05,139][255279] Updated weights for policy 0, policy_version 38659 (0.0008) +[2026-06-02 17:03:05,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 19857408. Throughput: 0: 19527.1. Samples: 19869824. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) +[2026-06-02 17:03:05,502][253683] Avg episode reward: [(0, '1709.706')] +[2026-06-02 17:03:05,813][255279] Updated weights for policy 0, policy_version 38669 (0.0009) +[2026-06-02 17:03:05,991][255279] Updated weights for policy 0, policy_version 38679 (0.0008) +[2026-06-02 17:03:06,167][255279] Updated weights for policy 0, policy_version 38689 (0.0008) +[2026-06-02 17:03:06,361][255279] Updated weights for policy 0, policy_version 38699 (0.0008) +[2026-06-02 17:03:06,585][255279] Updated weights for policy 0, policy_version 38711 (0.0008) +[2026-06-02 17:03:06,769][255279] Updated weights for policy 0, policy_version 38721 (0.0008) +[2026-06-02 17:03:07,455][255279] Updated weights for policy 0, policy_version 38731 (0.0008) +[2026-06-02 17:03:07,650][255279] Updated weights for policy 0, policy_version 38742 (0.0008) +[2026-06-02 17:03:07,870][255279] Updated weights for policy 0, policy_version 38754 (0.0009) +[2026-06-02 17:03:08,055][255279] Updated weights for policy 0, policy_version 38764 (0.0007) +[2026-06-02 17:03:08,237][255279] Updated weights for policy 0, policy_version 38774 (0.0008) +[2026-06-02 17:03:08,429][255279] Updated weights for policy 0, policy_version 38784 (0.0009) +[2026-06-02 17:03:09,138][255279] Updated weights for policy 0, policy_version 38795 (0.0009) +[2026-06-02 17:03:09,321][255279] Updated weights for policy 0, policy_version 38805 (0.0008) +[2026-06-02 17:03:09,505][255279] Updated weights for policy 0, policy_version 38815 (0.0009) +[2026-06-02 17:03:09,710][255279] Updated weights for policy 0, policy_version 38826 (0.0009) +[2026-06-02 17:03:09,899][255279] Updated weights for policy 0, policy_version 38836 (0.0008) +[2026-06-02 17:03:10,082][255279] Updated weights for policy 0, policy_version 38846 (0.0009) +[2026-06-02 17:03:10,269][255279] Updated weights for policy 0, policy_version 38856 (0.0008) +[2026-06-02 17:03:10,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 19955712. Throughput: 0: 19538.5. Samples: 19930496. Policy #0 lag: (min: 63.0, avg: 78.9, max: 127.0) +[2026-06-02 17:03:10,502][253683] Avg episode reward: [(0, '1713.873')] +[2026-06-02 17:03:10,961][255279] Updated weights for policy 0, policy_version 38866 (0.0009) +[2026-06-02 17:03:11,146][255279] Updated weights for policy 0, policy_version 38876 (0.0009) +[2026-06-02 17:03:11,339][255279] Updated weights for policy 0, policy_version 38886 (0.0008) +[2026-06-02 17:03:11,521][255279] Updated weights for policy 0, policy_version 38896 (0.0008) +[2026-06-02 17:03:11,718][255279] Updated weights for policy 0, policy_version 38906 (0.0009) +[2026-06-02 17:03:11,903][255279] Updated weights for policy 0, policy_version 38916 (0.0008) +[2026-06-02 17:03:12,578][255279] Updated weights for policy 0, policy_version 38926 (0.0008) +[2026-06-02 17:03:12,762][255279] Updated weights for policy 0, policy_version 38936 (0.0008) +[2026-06-02 17:03:12,973][255279] Updated weights for policy 0, policy_version 38947 (0.0009) +[2026-06-02 17:03:13,171][255279] Updated weights for policy 0, policy_version 38958 (0.0009) +[2026-06-02 17:03:13,365][255279] Updated weights for policy 0, policy_version 38968 (0.0009) +[2026-06-02 17:03:13,558][255279] Updated weights for policy 0, policy_version 38978 (0.0008) +[2026-06-02 17:03:14,214][255279] Updated weights for policy 0, policy_version 38988 (0.0008) +[2026-06-02 17:03:14,392][255279] Updated weights for policy 0, policy_version 38998 (0.0009) +[2026-06-02 17:03:14,577][255279] Updated weights for policy 0, policy_version 39008 (0.0008) +[2026-06-02 17:03:14,757][255279] Updated weights for policy 0, policy_version 39018 (0.0008) +[2026-06-02 17:03:14,953][255279] Updated weights for policy 0, policy_version 39028 (0.0009) +[2026-06-02 17:03:15,132][255279] Updated weights for policy 0, policy_version 39038 (0.0008) +[2026-06-02 17:03:15,317][255279] Updated weights for policy 0, policy_version 39048 (0.0008) +[2026-06-02 17:03:15,501][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 20054016. Throughput: 0: 19279.7. Samples: 20041472. Policy #0 lag: (min: 16.0, avg: 46.5, max: 80.0) +[2026-06-02 17:03:15,502][253683] Avg episode reward: [(0, '1709.937')] +[2026-06-02 17:03:15,990][255279] Updated weights for policy 0, policy_version 39058 (0.0009) +[2026-06-02 17:03:16,178][255279] Updated weights for policy 0, policy_version 39068 (0.0008) +[2026-06-02 17:03:16,363][255279] Updated weights for policy 0, policy_version 39078 (0.0008) +[2026-06-02 17:03:16,551][255279] Updated weights for policy 0, policy_version 39088 (0.0008) +[2026-06-02 17:03:16,732][255279] Updated weights for policy 0, policy_version 39098 (0.0009) +[2026-06-02 17:03:16,924][255279] Updated weights for policy 0, policy_version 39108 (0.0009) +[2026-06-02 17:03:17,631][255279] Updated weights for policy 0, policy_version 39119 (0.0009) +[2026-06-02 17:03:17,803][255279] Updated weights for policy 0, policy_version 39129 (0.0008) +[2026-06-02 17:03:17,986][255279] Updated weights for policy 0, policy_version 39139 (0.0008) +[2026-06-02 17:03:18,176][255279] Updated weights for policy 0, policy_version 39149 (0.0008) +[2026-06-02 17:03:18,360][255279] Updated weights for policy 0, policy_version 39159 (0.0008) +[2026-06-02 17:03:18,560][255279] Updated weights for policy 0, policy_version 39170 (0.0009) +[2026-06-02 17:03:19,264][255279] Updated weights for policy 0, policy_version 39180 (0.0008) +[2026-06-02 17:03:19,462][255279] Updated weights for policy 0, policy_version 39191 (0.0005) +[2026-06-02 17:03:19,653][255279] Updated weights for policy 0, policy_version 39201 (0.0004) +[2026-06-02 17:03:19,826][255279] Updated weights for policy 0, policy_version 39211 (0.0005) +[2026-06-02 17:03:20,020][255279] Updated weights for policy 0, policy_version 39221 (0.0005) +[2026-06-02 17:03:20,202][255279] Updated weights for policy 0, policy_version 39231 (0.0005) +[2026-06-02 17:03:20,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 20152320. Throughput: 0: 19549.8. Samples: 20162944. Policy #0 lag: (min: 16.0, avg: 46.5, max: 80.0) +[2026-06-02 17:03:20,503][253683] Avg episode reward: [(0, '1728.198')] +[2026-06-02 17:03:20,899][255279] Updated weights for policy 0, policy_version 39241 (0.0005) +[2026-06-02 17:03:21,067][255279] Updated weights for policy 0, policy_version 39251 (0.0008) +[2026-06-02 17:03:21,256][255279] Updated weights for policy 0, policy_version 39262 (0.0009) +[2026-06-02 17:03:21,453][255279] Updated weights for policy 0, policy_version 39272 (0.0008) +[2026-06-02 17:03:21,650][255279] Updated weights for policy 0, policy_version 39283 (0.0008) +[2026-06-02 17:03:21,857][255279] Updated weights for policy 0, policy_version 39294 (0.0009) +[2026-06-02 17:03:22,584][255279] Updated weights for policy 0, policy_version 39305 (0.0008) +[2026-06-02 17:03:22,752][255279] Updated weights for policy 0, policy_version 39315 (0.0009) +[2026-06-02 17:03:22,927][255279] Updated weights for policy 0, policy_version 39325 (0.0008) +[2026-06-02 17:03:23,157][255279] Updated weights for policy 0, policy_version 39337 (0.0008) +[2026-06-02 17:03:23,358][255279] Updated weights for policy 0, policy_version 39348 (0.0008) +[2026-06-02 17:03:23,545][255279] Updated weights for policy 0, policy_version 39358 (0.0008) +[2026-06-02 17:03:24,263][255279] Updated weights for policy 0, policy_version 39370 (0.0008) +[2026-06-02 17:03:24,461][255279] Updated weights for policy 0, policy_version 39381 (0.0008) +[2026-06-02 17:03:24,646][255279] Updated weights for policy 0, policy_version 39391 (0.0008) +[2026-06-02 17:03:24,825][255279] Updated weights for policy 0, policy_version 39401 (0.0008) +[2026-06-02 17:03:25,011][255279] Updated weights for policy 0, policy_version 39411 (0.0008) +[2026-06-02 17:03:25,218][255279] Updated weights for policy 0, policy_version 39422 (0.0008) +[2026-06-02 17:03:25,394][255279] Updated weights for policy 0, policy_version 39432 (0.0009) +[2026-06-02 17:03:25,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 20250624. Throughput: 0: 19541.3. Samples: 20224128. Policy #0 lag: (min: 16.0, avg: 46.5, max: 80.0) +[2026-06-02 17:03:25,502][253683] Avg episode reward: [(0, '1726.863')] +[2026-06-02 17:03:26,126][255279] Updated weights for policy 0, policy_version 39443 (0.0009) +[2026-06-02 17:03:26,301][255279] Updated weights for policy 0, policy_version 39453 (0.0008) +[2026-06-02 17:03:26,507][255279] Updated weights for policy 0, policy_version 39464 (0.0008) +[2026-06-02 17:03:26,690][255279] Updated weights for policy 0, policy_version 39474 (0.0008) +[2026-06-02 17:03:26,873][255279] Updated weights for policy 0, policy_version 39484 (0.0008) +[2026-06-02 17:03:27,061][255279] Updated weights for policy 0, policy_version 39494 (0.0009) +[2026-06-02 17:03:27,716][255279] Updated weights for policy 0, policy_version 39504 (0.0008) +[2026-06-02 17:03:27,909][255279] Updated weights for policy 0, policy_version 39515 (0.0008) +[2026-06-02 17:03:28,120][255279] Updated weights for policy 0, policy_version 39526 (0.0008) +[2026-06-02 17:03:28,331][255279] Updated weights for policy 0, policy_version 39538 (0.0008) +[2026-06-02 17:03:28,521][255279] Updated weights for policy 0, policy_version 39548 (0.0008) +[2026-06-02 17:03:28,734][255279] Updated weights for policy 0, policy_version 39559 (0.0008) +[2026-06-02 17:03:29,413][255279] Updated weights for policy 0, policy_version 39569 (0.0008) +[2026-06-02 17:03:29,596][255279] Updated weights for policy 0, policy_version 39579 (0.0008) +[2026-06-02 17:03:29,782][255279] Updated weights for policy 0, policy_version 39589 (0.0009) +[2026-06-02 17:03:29,990][255279] Updated weights for policy 0, policy_version 39600 (0.0009) +[2026-06-02 17:03:30,171][255279] Updated weights for policy 0, policy_version 39610 (0.0009) +[2026-06-02 17:03:30,360][255279] Updated weights for policy 0, policy_version 39620 (0.0008) +[2026-06-02 17:03:30,502][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 20348928. Throughput: 0: 19262.6. Samples: 20332928. Policy #0 lag: (min: 16.0, avg: 46.5, max: 80.0) +[2026-06-02 17:03:30,502][253683] Avg episode reward: [(0, '1746.509')] +[2026-06-02 17:03:31,030][255279] Updated weights for policy 0, policy_version 39630 (0.0007) +[2026-06-02 17:03:31,213][255279] Updated weights for policy 0, policy_version 39640 (0.0007) +[2026-06-02 17:03:31,400][255279] Updated weights for policy 0, policy_version 39650 (0.0007) +[2026-06-02 17:03:31,600][255279] Updated weights for policy 0, policy_version 39661 (0.0008) +[2026-06-02 17:03:31,781][255279] Updated weights for policy 0, policy_version 39671 (0.0005) +[2026-06-02 17:03:31,972][255279] Updated weights for policy 0, policy_version 39681 (0.0010) +[2026-06-02 17:03:32,668][255279] Updated weights for policy 0, policy_version 39692 (0.0009) +[2026-06-02 17:03:32,840][255279] Updated weights for policy 0, policy_version 39702 (0.0004) +[2026-06-02 17:03:33,039][255279] Updated weights for policy 0, policy_version 39713 (0.0004) +[2026-06-02 17:03:33,234][255279] Updated weights for policy 0, policy_version 39723 (0.0004) +[2026-06-02 17:03:33,433][255279] Updated weights for policy 0, policy_version 39734 (0.0004) +[2026-06-02 17:03:33,609][255279] Updated weights for policy 0, policy_version 39744 (0.0004) +[2026-06-02 17:03:34,323][255279] Updated weights for policy 0, policy_version 39755 (0.0004) +[2026-06-02 17:03:34,495][255279] Updated weights for policy 0, policy_version 39765 (0.0004) +[2026-06-02 17:03:34,679][255279] Updated weights for policy 0, policy_version 39775 (0.0004) +[2026-06-02 17:03:34,856][255279] Updated weights for policy 0, policy_version 39785 (0.0004) +[2026-06-02 17:03:35,049][255279] Updated weights for policy 0, policy_version 39795 (0.0008) +[2026-06-02 17:03:35,251][255279] Updated weights for policy 0, policy_version 39806 (0.0008) +[2026-06-02 17:03:35,428][255279] Updated weights for policy 0, policy_version 39816 (0.0009) +[2026-06-02 17:03:35,502][253683] Fps is (10 sec: 19660.6, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 20447232. Throughput: 0: 19524.3. Samples: 20455040. Policy #0 lag: (min: 16.0, avg: 46.5, max: 80.0) +[2026-06-02 17:03:35,503][253683] Avg episode reward: [(0, '1722.259')] +[2026-06-02 17:03:36,126][255279] Updated weights for policy 0, policy_version 39826 (0.0008) +[2026-06-02 17:03:36,299][255279] Updated weights for policy 0, policy_version 39836 (0.0008) +[2026-06-02 17:03:36,510][255279] Updated weights for policy 0, policy_version 39847 (0.0008) +[2026-06-02 17:03:36,693][255279] Updated weights for policy 0, policy_version 39857 (0.0008) +[2026-06-02 17:03:36,875][255279] Updated weights for policy 0, policy_version 39867 (0.0009) +[2026-06-02 17:03:37,065][255279] Updated weights for policy 0, policy_version 39877 (0.0009) +[2026-06-02 17:03:37,745][255279] Updated weights for policy 0, policy_version 39887 (0.0009) +[2026-06-02 17:03:37,917][255279] Updated weights for policy 0, policy_version 39897 (0.0008) +[2026-06-02 17:03:38,109][255279] Updated weights for policy 0, policy_version 39907 (0.0008) +[2026-06-02 17:03:38,296][255279] Updated weights for policy 0, policy_version 39917 (0.0009) +[2026-06-02 17:03:38,496][255279] Updated weights for policy 0, policy_version 39927 (0.0009) +[2026-06-02 17:03:38,715][255279] Updated weights for policy 0, policy_version 39939 (0.0008) +[2026-06-02 17:03:39,395][255279] Updated weights for policy 0, policy_version 39950 (0.0009) +[2026-06-02 17:03:39,578][255279] Updated weights for policy 0, policy_version 39960 (0.0008) +[2026-06-02 17:03:39,762][255279] Updated weights for policy 0, policy_version 39970 (0.0008) +[2026-06-02 17:03:39,966][255279] Updated weights for policy 0, policy_version 39981 (0.0009) +[2026-06-02 17:03:40,142][255279] Updated weights for policy 0, policy_version 39991 (0.0008) +[2026-06-02 17:03:40,339][255279] Updated weights for policy 0, policy_version 40001 (0.0008) +[2026-06-02 17:03:40,501][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 20545536. Throughput: 0: 19535.7. Samples: 20516096. Policy #0 lag: (min: 29.0, avg: 45.9, max: 93.0) +[2026-06-02 17:03:40,502][253683] Avg episode reward: [(0, '1722.259')] +[2026-06-02 17:03:41,020][255279] Updated weights for policy 0, policy_version 40011 (0.0009) +[2026-06-02 17:03:41,199][255279] Updated weights for policy 0, policy_version 40021 (0.0009) +[2026-06-02 17:03:41,370][255279] Updated weights for policy 0, policy_version 40031 (0.0008) +[2026-06-02 17:03:41,554][255279] Updated weights for policy 0, policy_version 40041 (0.0008) +[2026-06-02 17:03:41,746][255279] Updated weights for policy 0, policy_version 40051 (0.0008) +[2026-06-02 17:03:41,933][255279] Updated weights for policy 0, policy_version 40061 (0.0008) +[2026-06-02 17:03:42,110][255279] Updated weights for policy 0, policy_version 40071 (0.0009) +[2026-06-02 17:03:42,800][255279] Updated weights for policy 0, policy_version 40081 (0.0005) +[2026-06-02 17:03:42,992][255279] Updated weights for policy 0, policy_version 40091 (0.0004) +[2026-06-02 17:03:43,198][255279] Updated weights for policy 0, policy_version 40102 (0.0005) +[2026-06-02 17:03:43,403][255279] Updated weights for policy 0, policy_version 40113 (0.0004) +[2026-06-02 17:03:43,592][255279] Updated weights for policy 0, policy_version 40123 (0.0005) +[2026-06-02 17:03:43,780][255279] Updated weights for policy 0, policy_version 40133 (0.0005) +[2026-06-02 17:03:44,444][255279] Updated weights for policy 0, policy_version 40143 (0.0005) +[2026-06-02 17:03:44,625][255279] Updated weights for policy 0, policy_version 40153 (0.0004) +[2026-06-02 17:03:44,814][255279] Updated weights for policy 0, policy_version 40163 (0.0005) +[2026-06-02 17:03:45,007][255279] Updated weights for policy 0, policy_version 40173 (0.0005) +[2026-06-02 17:03:45,184][255279] Updated weights for policy 0, policy_version 40183 (0.0005) +[2026-06-02 17:03:45,381][255279] Updated weights for policy 0, policy_version 40193 (0.0005) +[2026-06-02 17:03:45,502][253683] Fps is (10 sec: 16384.2, 60 sec: 19114.7, 300 sec: 19438.7). Total num frames: 20611072. Throughput: 0: 19399.1. Samples: 20625664. Policy #0 lag: (min: 29.0, avg: 45.9, max: 93.0) +[2026-06-02 17:03:45,502][253683] Avg episode reward: [(0, '1671.539')] +[2026-06-02 17:03:46,057][255279] Updated weights for policy 0, policy_version 40203 (0.0006) +[2026-06-02 17:03:46,262][255279] Updated weights for policy 0, policy_version 40215 (0.0008) +[2026-06-02 17:03:46,449][255279] Updated weights for policy 0, policy_version 40225 (0.0009) +[2026-06-02 17:03:46,634][255279] Updated weights for policy 0, policy_version 40235 (0.0009) +[2026-06-02 17:03:46,822][255279] Updated weights for policy 0, policy_version 40245 (0.0008) +[2026-06-02 17:03:47,007][255279] Updated weights for policy 0, policy_version 40255 (0.0008) +[2026-06-02 17:03:47,696][255279] Updated weights for policy 0, policy_version 40265 (0.0008) +[2026-06-02 17:03:47,871][255279] Updated weights for policy 0, policy_version 40275 (0.0008) +[2026-06-02 17:03:48,052][255279] Updated weights for policy 0, policy_version 40285 (0.0009) +[2026-06-02 17:03:48,246][255279] Updated weights for policy 0, policy_version 40295 (0.0008) +[2026-06-02 17:03:48,429][255279] Updated weights for policy 0, policy_version 40305 (0.0008) +[2026-06-02 17:03:48,621][255279] Updated weights for policy 0, policy_version 40315 (0.0009) +[2026-06-02 17:03:48,810][255279] Updated weights for policy 0, policy_version 40325 (0.0009) +[2026-06-02 17:03:49,488][255279] Updated weights for policy 0, policy_version 40335 (0.0008) +[2026-06-02 17:03:49,664][255279] Updated weights for policy 0, policy_version 40345 (0.0008) +[2026-06-02 17:03:49,854][255279] Updated weights for policy 0, policy_version 40355 (0.0008) +[2026-06-02 17:03:50,048][255279] Updated weights for policy 0, policy_version 40365 (0.0008) +[2026-06-02 17:03:50,229][255279] Updated weights for policy 0, policy_version 40375 (0.0008) +[2026-06-02 17:03:50,418][255279] Updated weights for policy 0, policy_version 40385 (0.0008) +[2026-06-02 17:03:50,502][253683] Fps is (10 sec: 16383.9, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 20709376. Throughput: 0: 19498.7. Samples: 20747264. Policy #0 lag: (min: 29.0, avg: 45.9, max: 93.0) +[2026-06-02 17:03:50,502][253683] Avg episode reward: [(0, '1640.397')] +[2026-06-02 17:03:51,090][255279] Updated weights for policy 0, policy_version 40395 (0.0009) +[2026-06-02 17:03:51,274][255279] Updated weights for policy 0, policy_version 40405 (0.0009) +[2026-06-02 17:03:51,460][255279] Updated weights for policy 0, policy_version 40415 (0.0009) +[2026-06-02 17:03:51,647][255279] Updated weights for policy 0, policy_version 40425 (0.0009) +[2026-06-02 17:03:51,851][255279] Updated weights for policy 0, policy_version 40436 (0.0009) +[2026-06-02 17:03:52,041][255279] Updated weights for policy 0, policy_version 40446 (0.0009) +[2026-06-02 17:03:52,221][255279] Updated weights for policy 0, policy_version 40456 (0.0009) +[2026-06-02 17:03:52,911][255279] Updated weights for policy 0, policy_version 40466 (0.0009) +[2026-06-02 17:03:53,094][255279] Updated weights for policy 0, policy_version 40476 (0.0009) +[2026-06-02 17:03:53,285][255279] Updated weights for policy 0, policy_version 40486 (0.0009) +[2026-06-02 17:03:53,487][255279] Updated weights for policy 0, policy_version 40497 (0.0009) +[2026-06-02 17:03:53,676][255279] Updated weights for policy 0, policy_version 40507 (0.0008) +[2026-06-02 17:03:53,859][255279] Updated weights for policy 0, policy_version 40517 (0.0009) +[2026-06-02 17:03:54,556][255279] Updated weights for policy 0, policy_version 40527 (0.0008) +[2026-06-02 17:03:54,732][255279] Updated weights for policy 0, policy_version 40537 (0.0009) +[2026-06-02 17:03:54,917][255279] Updated weights for policy 0, policy_version 40547 (0.0009) +[2026-06-02 17:03:55,106][255279] Updated weights for policy 0, policy_version 40557 (0.0009) +[2026-06-02 17:03:55,316][255279] Updated weights for policy 0, policy_version 40568 (0.0008) +[2026-06-02 17:03:55,499][255279] Updated weights for policy 0, policy_version 40578 (0.0008) +[2026-06-02 17:03:55,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 20807680. Throughput: 0: 19521.4. Samples: 20808960. Policy #0 lag: (min: 29.0, avg: 45.9, max: 93.0) +[2026-06-02 17:03:55,503][253683] Avg episode reward: [(0, '1693.165')] +[2026-06-02 17:03:56,193][255279] Updated weights for policy 0, policy_version 40588 (0.0008) +[2026-06-02 17:03:56,384][255279] Updated weights for policy 0, policy_version 40598 (0.0008) +[2026-06-02 17:03:56,586][255279] Updated weights for policy 0, policy_version 40609 (0.0008) +[2026-06-02 17:03:56,773][255279] Updated weights for policy 0, policy_version 40619 (0.0008) +[2026-06-02 17:03:56,958][255279] Updated weights for policy 0, policy_version 40629 (0.0008) +[2026-06-02 17:03:57,137][255279] Updated weights for policy 0, policy_version 40639 (0.0008) +[2026-06-02 17:03:57,826][255279] Updated weights for policy 0, policy_version 40649 (0.0009) +[2026-06-02 17:03:58,001][255279] Updated weights for policy 0, policy_version 40659 (0.0008) +[2026-06-02 17:03:58,183][255279] Updated weights for policy 0, policy_version 40669 (0.0009) +[2026-06-02 17:03:58,368][255279] Updated weights for policy 0, policy_version 40679 (0.0009) +[2026-06-02 17:03:58,551][255279] Updated weights for policy 0, policy_version 40689 (0.0009) +[2026-06-02 17:03:58,744][255279] Updated weights for policy 0, policy_version 40699 (0.0008) +[2026-06-02 17:03:58,936][255279] Updated weights for policy 0, policy_version 40709 (0.0010) +[2026-06-02 17:03:59,603][255279] Updated weights for policy 0, policy_version 40719 (0.0009) +[2026-06-02 17:03:59,790][255279] Updated weights for policy 0, policy_version 40729 (0.0009) +[2026-06-02 17:03:59,975][255279] Updated weights for policy 0, policy_version 40739 (0.0009) +[2026-06-02 17:04:00,163][255279] Updated weights for policy 0, policy_version 40749 (0.0009) +[2026-06-02 17:04:00,352][255279] Updated weights for policy 0, policy_version 40759 (0.0008) +[2026-06-02 17:04:00,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 20905984. Throughput: 0: 19524.2. Samples: 20920064. Policy #0 lag: (min: 29.0, avg: 45.9, max: 93.0) +[2026-06-02 17:04:00,502][253683] Avg episode reward: [(0, '1753.454')] +[2026-06-02 17:04:00,538][255279] Updated weights for policy 0, policy_version 40769 (0.0008) +[2026-06-02 17:04:01,229][255279] Updated weights for policy 0, policy_version 40779 (0.0009) +[2026-06-02 17:04:01,404][255279] Updated weights for policy 0, policy_version 40789 (0.0010) +[2026-06-02 17:04:01,588][255279] Updated weights for policy 0, policy_version 40799 (0.0009) +[2026-06-02 17:04:01,775][255279] Updated weights for policy 0, policy_version 40809 (0.0009) +[2026-06-02 17:04:01,947][255279] Updated weights for policy 0, policy_version 40819 (0.0008) +[2026-06-02 17:04:02,149][255279] Updated weights for policy 0, policy_version 40829 (0.0009) +[2026-06-02 17:04:02,339][255279] Updated weights for policy 0, policy_version 40839 (0.0009) +[2026-06-02 17:04:03,012][255279] Updated weights for policy 0, policy_version 40849 (0.0008) +[2026-06-02 17:04:03,194][255279] Updated weights for policy 0, policy_version 40859 (0.0008) +[2026-06-02 17:04:03,373][255279] Updated weights for policy 0, policy_version 40869 (0.0009) +[2026-06-02 17:04:03,562][255279] Updated weights for policy 0, policy_version 40879 (0.0009) +[2026-06-02 17:04:03,754][255279] Updated weights for policy 0, policy_version 40889 (0.0009) +[2026-06-02 17:04:03,944][255279] Updated weights for policy 0, policy_version 40899 (0.0010) +[2026-06-02 17:04:04,629][255279] Updated weights for policy 0, policy_version 40909 (0.0009) +[2026-06-02 17:04:04,813][255279] Updated weights for policy 0, policy_version 40919 (0.0008) +[2026-06-02 17:04:04,996][255279] Updated weights for policy 0, policy_version 40929 (0.0008) +[2026-06-02 17:04:05,183][255279] Updated weights for policy 0, policy_version 40939 (0.0008) +[2026-06-02 17:04:05,359][255279] Updated weights for policy 0, policy_version 40949 (0.0008) +[2026-06-02 17:04:05,502][253683] Fps is (10 sec: 19660.5, 60 sec: 19114.6, 300 sec: 19438.6). Total num frames: 21004288. Throughput: 0: 19524.2. Samples: 21041536. Policy #0 lag: (min: 29.0, avg: 45.9, max: 93.0) +[2026-06-02 17:04:05,503][253683] Avg episode reward: [(0, '1766.425')] +[2026-06-02 17:04:05,557][255279] Updated weights for policy 0, policy_version 40959 (0.0008) +[2026-06-02 17:04:06,240][255279] Updated weights for policy 0, policy_version 40969 (0.0008) +[2026-06-02 17:04:06,431][255279] Updated weights for policy 0, policy_version 40980 (0.0007) +[2026-06-02 17:04:06,639][255279] Updated weights for policy 0, policy_version 40991 (0.0009) +[2026-06-02 17:04:06,840][255279] Updated weights for policy 0, policy_version 41002 (0.0009) +[2026-06-02 17:04:07,022][255279] Updated weights for policy 0, policy_version 41012 (0.0008) +[2026-06-02 17:04:07,200][255279] Updated weights for policy 0, policy_version 41022 (0.0008) +[2026-06-02 17:04:07,919][255279] Updated weights for policy 0, policy_version 41033 (0.0008) +[2026-06-02 17:04:08,092][255279] Updated weights for policy 0, policy_version 41043 (0.0009) +[2026-06-02 17:04:08,298][255279] Updated weights for policy 0, policy_version 41054 (0.0009) +[2026-06-02 17:04:08,467][255279] Updated weights for policy 0, policy_version 41064 (0.0008) +[2026-06-02 17:04:08,672][255279] Updated weights for policy 0, policy_version 41075 (0.0008) +[2026-06-02 17:04:08,859][255279] Updated weights for policy 0, policy_version 41085 (0.0008) +[2026-06-02 17:04:09,061][255279] Updated weights for policy 0, policy_version 41095 (0.0008) +[2026-06-02 17:04:09,733][255279] Updated weights for policy 0, policy_version 41105 (0.0009) +[2026-06-02 17:04:09,915][255279] Updated weights for policy 0, policy_version 41115 (0.0009) +[2026-06-02 17:04:10,095][255279] Updated weights for policy 0, policy_version 41125 (0.0009) +[2026-06-02 17:04:10,276][255279] Updated weights for policy 0, policy_version 41135 (0.0008) +[2026-06-02 17:04:10,497][255279] Updated weights for policy 0, policy_version 41146 (0.0008) +[2026-06-02 17:04:10,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 21102592. Throughput: 0: 19438.9. Samples: 21098880. Policy #0 lag: (min: 61.0, avg: 77.5, max: 125.0) +[2026-06-02 17:04:10,502][253683] Avg episode reward: [(0, '1781.570')] +[2026-06-02 17:04:10,696][255279] Updated weights for policy 0, policy_version 41156 (0.0009) +[2026-06-02 17:04:11,391][255279] Updated weights for policy 0, policy_version 41166 (0.0009) +[2026-06-02 17:04:11,565][255279] Updated weights for policy 0, policy_version 41176 (0.0009) +[2026-06-02 17:04:11,759][255279] Updated weights for policy 0, policy_version 41186 (0.0009) +[2026-06-02 17:04:11,943][255279] Updated weights for policy 0, policy_version 41196 (0.0009) +[2026-06-02 17:04:12,130][255279] Updated weights for policy 0, policy_version 41206 (0.0009) +[2026-06-02 17:04:12,308][255279] Updated weights for policy 0, policy_version 41216 (0.0009) +[2026-06-02 17:04:13,009][255279] Updated weights for policy 0, policy_version 41226 (0.0009) +[2026-06-02 17:04:13,193][255279] Updated weights for policy 0, policy_version 41237 (0.0008) +[2026-06-02 17:04:13,382][255279] Updated weights for policy 0, policy_version 41247 (0.0009) +[2026-06-02 17:04:13,586][255279] Updated weights for policy 0, policy_version 41258 (0.0008) +[2026-06-02 17:04:13,770][255279] Updated weights for policy 0, policy_version 41268 (0.0008) +[2026-06-02 17:04:13,981][255279] Updated weights for policy 0, policy_version 41279 (0.0009) +[2026-06-02 17:04:14,674][255279] Updated weights for policy 0, policy_version 41289 (0.0008) +[2026-06-02 17:04:14,848][255279] Updated weights for policy 0, policy_version 41299 (0.0008) +[2026-06-02 17:04:15,030][255279] Updated weights for policy 0, policy_version 41309 (0.0008) +[2026-06-02 17:04:15,216][255279] Updated weights for policy 0, policy_version 41319 (0.0008) +[2026-06-02 17:04:15,402][255279] Updated weights for policy 0, policy_version 41329 (0.0009) +[2026-06-02 17:04:15,501][253683] Fps is (10 sec: 19661.3, 60 sec: 19114.7, 300 sec: 19438.7). Total num frames: 21200896. Throughput: 0: 19541.4. Samples: 21212288. Policy #0 lag: (min: 61.0, avg: 77.5, max: 125.0) +[2026-06-02 17:04:15,502][253683] Avg episode reward: [(0, '1783.250')] +[2026-06-02 17:04:15,581][255279] Updated weights for policy 0, policy_version 41339 (0.0008) +[2026-06-02 17:04:15,797][255279] Updated weights for policy 0, policy_version 41350 (0.0009) +[2026-06-02 17:04:16,483][255279] Updated weights for policy 0, policy_version 41360 (0.0009) +[2026-06-02 17:04:16,670][255279] Updated weights for policy 0, policy_version 41370 (0.0008) +[2026-06-02 17:04:16,854][255279] Updated weights for policy 0, policy_version 41380 (0.0008) +[2026-06-02 17:04:17,047][255279] Updated weights for policy 0, policy_version 41390 (0.0008) +[2026-06-02 17:04:17,249][255279] Updated weights for policy 0, policy_version 41401 (0.0008) +[2026-06-02 17:04:17,437][255279] Updated weights for policy 0, policy_version 41411 (0.0009) +[2026-06-02 17:04:18,111][255279] Updated weights for policy 0, policy_version 41421 (0.0008) +[2026-06-02 17:04:18,321][255279] Updated weights for policy 0, policy_version 41432 (0.0009) +[2026-06-02 17:04:18,521][255279] Updated weights for policy 0, policy_version 41443 (0.0008) +[2026-06-02 17:04:18,730][255279] Updated weights for policy 0, policy_version 41454 (0.0009) +[2026-06-02 17:04:18,912][255279] Updated weights for policy 0, policy_version 41464 (0.0009) +[2026-06-02 17:04:19,103][255279] Updated weights for policy 0, policy_version 41474 (0.0008) +[2026-06-02 17:04:19,766][255279] Updated weights for policy 0, policy_version 41484 (0.0009) +[2026-06-02 17:04:19,952][255279] Updated weights for policy 0, policy_version 41494 (0.0009) +[2026-06-02 17:04:20,136][255279] Updated weights for policy 0, policy_version 41504 (0.0009) +[2026-06-02 17:04:20,314][255279] Updated weights for policy 0, policy_version 41514 (0.0008) +[2026-06-02 17:04:20,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 21299200. Throughput: 0: 19541.4. Samples: 21334400. Policy #0 lag: (min: 61.0, avg: 77.5, max: 125.0) +[2026-06-02 17:04:20,502][253683] Avg episode reward: [(0, '1754.083')] +[2026-06-02 17:04:20,506][255279] Updated weights for policy 0, policy_version 41524 (0.0008) +[2026-06-02 17:04:20,684][255279] Updated weights for policy 0, policy_version 41534 (0.0005) +[2026-06-02 17:04:20,874][255279] Updated weights for policy 0, policy_version 41544 (0.0005) +[2026-06-02 17:04:21,551][255279] Updated weights for policy 0, policy_version 41554 (0.0009) +[2026-06-02 17:04:21,738][255279] Updated weights for policy 0, policy_version 41564 (0.0009) +[2026-06-02 17:04:21,917][255279] Updated weights for policy 0, policy_version 41574 (0.0008) +[2026-06-02 17:04:22,115][255279] Updated weights for policy 0, policy_version 41584 (0.0008) +[2026-06-02 17:04:22,293][255279] Updated weights for policy 0, policy_version 41594 (0.0008) +[2026-06-02 17:04:22,473][255279] Updated weights for policy 0, policy_version 41604 (0.0008) +[2026-06-02 17:04:23,176][255279] Updated weights for policy 0, policy_version 41614 (0.0009) +[2026-06-02 17:04:23,357][255279] Updated weights for policy 0, policy_version 41624 (0.0008) +[2026-06-02 17:04:23,546][255279] Updated weights for policy 0, policy_version 41634 (0.0008) +[2026-06-02 17:04:23,734][255279] Updated weights for policy 0, policy_version 41644 (0.0008) +[2026-06-02 17:04:23,919][255279] Updated weights for policy 0, policy_version 41654 (0.0008) +[2026-06-02 17:04:24,121][255279] Updated weights for policy 0, policy_version 41665 (0.0008) +[2026-06-02 17:04:24,813][255279] Updated weights for policy 0, policy_version 41675 (0.0009) +[2026-06-02 17:04:24,993][255279] Updated weights for policy 0, policy_version 41685 (0.0009) +[2026-06-02 17:04:25,172][255279] Updated weights for policy 0, policy_version 41695 (0.0009) +[2026-06-02 17:04:25,369][255279] Updated weights for policy 0, policy_version 41705 (0.0009) +[2026-06-02 17:04:25,502][253683] Fps is (10 sec: 19660.5, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 21397504. Throughput: 0: 19339.4. Samples: 21386368. Policy #0 lag: (min: 61.0, avg: 77.5, max: 125.0) +[2026-06-02 17:04:25,503][253683] Avg episode reward: [(0, '1805.362')] +[2026-06-02 17:04:25,557][255279] Updated weights for policy 0, policy_version 41715 (0.0009) +[2026-06-02 17:04:25,742][255279] Updated weights for policy 0, policy_version 41725 (0.0009) +[2026-06-02 17:04:25,925][255279] Updated weights for policy 0, policy_version 41735 (0.0009) +[2026-06-02 17:04:26,641][255279] Updated weights for policy 0, policy_version 41745 (0.0009) +[2026-06-02 17:04:26,822][255279] Updated weights for policy 0, policy_version 41755 (0.0008) +[2026-06-02 17:04:27,011][255279] Updated weights for policy 0, policy_version 41765 (0.0007) +[2026-06-02 17:04:27,193][255279] Updated weights for policy 0, policy_version 41775 (0.0009) +[2026-06-02 17:04:27,390][255279] Updated weights for policy 0, policy_version 41785 (0.0009) +[2026-06-02 17:04:27,597][255279] Updated weights for policy 0, policy_version 41796 (0.0009) +[2026-06-02 17:04:28,282][255279] Updated weights for policy 0, policy_version 41807 (0.0009) +[2026-06-02 17:04:28,486][255279] Updated weights for policy 0, policy_version 41818 (0.0009) +[2026-06-02 17:04:28,666][255279] Updated weights for policy 0, policy_version 41828 (0.0008) +[2026-06-02 17:04:28,860][255279] Updated weights for policy 0, policy_version 41838 (0.0008) +[2026-06-02 17:04:29,052][255279] Updated weights for policy 0, policy_version 41848 (0.0008) +[2026-06-02 17:04:29,236][255279] Updated weights for policy 0, policy_version 41858 (0.0008) +[2026-06-02 17:04:29,905][255279] Updated weights for policy 0, policy_version 41868 (0.0008) +[2026-06-02 17:04:30,114][255279] Updated weights for policy 0, policy_version 41879 (0.0008) +[2026-06-02 17:04:30,293][255279] Updated weights for policy 0, policy_version 41889 (0.0008) +[2026-06-02 17:04:30,483][255279] Updated weights for policy 0, policy_version 41899 (0.0009) +[2026-06-02 17:04:30,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 21495808. Throughput: 0: 19575.5. Samples: 21506560. Policy #0 lag: (min: 61.0, avg: 77.5, max: 125.0) +[2026-06-02 17:04:30,502][253683] Avg episode reward: [(0, '1809.536')] +[2026-06-02 17:04:30,661][255279] Updated weights for policy 0, policy_version 41909 (0.0008) +[2026-06-02 17:04:30,855][255279] Updated weights for policy 0, policy_version 41919 (0.0008) +[2026-06-02 17:04:31,548][255279] Updated weights for policy 0, policy_version 41929 (0.0008) +[2026-06-02 17:04:31,714][255279] Updated weights for policy 0, policy_version 41939 (0.0008) +[2026-06-02 17:04:31,901][255279] Updated weights for policy 0, policy_version 41949 (0.0008) +[2026-06-02 17:04:32,091][255279] Updated weights for policy 0, policy_version 41959 (0.0009) +[2026-06-02 17:04:32,297][255279] Updated weights for policy 0, policy_version 41970 (0.0008) +[2026-06-02 17:04:32,499][255279] Updated weights for policy 0, policy_version 41981 (0.0009) +[2026-06-02 17:04:32,683][255279] Updated weights for policy 0, policy_version 41991 (0.0008) +[2026-06-02 17:04:33,357][255279] Updated weights for policy 0, policy_version 42001 (0.0008) +[2026-06-02 17:04:33,578][255279] Updated weights for policy 0, policy_version 42012 (0.0008) +[2026-06-02 17:04:33,754][255279] Updated weights for policy 0, policy_version 42022 (0.0008) +[2026-06-02 17:04:33,947][255279] Updated weights for policy 0, policy_version 42032 (0.0009) +[2026-06-02 17:04:34,133][255279] Updated weights for policy 0, policy_version 42042 (0.0009) +[2026-06-02 17:04:34,327][255279] Updated weights for policy 0, policy_version 42052 (0.0009) +[2026-06-02 17:04:34,999][255279] Updated weights for policy 0, policy_version 42062 (0.0008) +[2026-06-02 17:04:35,186][255279] Updated weights for policy 0, policy_version 42072 (0.0009) +[2026-06-02 17:04:35,366][255279] Updated weights for policy 0, policy_version 42082 (0.0009) +[2026-06-02 17:04:35,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 21594112. Throughput: 0: 19521.4. Samples: 21625728. Policy #0 lag: (min: 63.0, avg: 80.7, max: 127.0) +[2026-06-02 17:04:35,502][253683] Avg episode reward: [(0, '1820.360')] +[2026-06-02 17:04:35,553][255279] Updated weights for policy 0, policy_version 42092 (0.0009) +[2026-06-02 17:04:35,741][255279] Updated weights for policy 0, policy_version 42102 (0.0009) +[2026-06-02 17:04:35,922][255279] Updated weights for policy 0, policy_version 42112 (0.0009) +[2026-06-02 17:04:36,067][255187] Early stopping after 8 epochs (64 sgd steps), loss delta 0.0000006 +[2026-06-02 17:04:36,642][255279] Updated weights for policy 0, policy_version 42123 (0.0009) +[2026-06-02 17:04:36,818][255279] Updated weights for policy 0, policy_version 42133 (0.0009) +[2026-06-02 17:04:36,995][255279] Updated weights for policy 0, policy_version 42143 (0.0009) +[2026-06-02 17:04:37,206][255279] Updated weights for policy 0, policy_version 42154 (0.0009) +[2026-06-02 17:04:37,394][255279] Updated weights for policy 0, policy_version 42164 (0.0009) +[2026-06-02 17:04:37,575][255279] Updated weights for policy 0, policy_version 42174 (0.0009) +[2026-06-02 17:04:37,759][255279] Updated weights for policy 0, policy_version 42184 (0.0009) +[2026-06-02 17:04:38,459][255279] Updated weights for policy 0, policy_version 42195 (0.0009) +[2026-06-02 17:04:38,643][255279] Updated weights for policy 0, policy_version 42205 (0.0009) +[2026-06-02 17:04:38,828][255279] Updated weights for policy 0, policy_version 42215 (0.0009) +[2026-06-02 17:04:39,012][255279] Updated weights for policy 0, policy_version 42225 (0.0009) +[2026-06-02 17:04:39,201][255279] Updated weights for policy 0, policy_version 42235 (0.0009) +[2026-06-02 17:04:39,391][255279] Updated weights for policy 0, policy_version 42245 (0.0009) +[2026-06-02 17:04:40,084][255279] Updated weights for policy 0, policy_version 42255 (0.0009) +[2026-06-02 17:04:40,278][255279] Updated weights for policy 0, policy_version 42266 (0.0009) +[2026-06-02 17:04:40,463][255279] Updated weights for policy 0, policy_version 42276 (0.0009) +[2026-06-02 17:04:40,502][253683] Fps is (10 sec: 19660.3, 60 sec: 19114.6, 300 sec: 19438.6). Total num frames: 21692416. Throughput: 0: 19299.4. Samples: 21677440. Policy #0 lag: (min: 63.0, avg: 80.7, max: 127.0) +[2026-06-02 17:04:40,504][253683] Avg episode reward: [(0, '1815.797')] +[2026-06-02 17:04:40,656][255279] Updated weights for policy 0, policy_version 42286 (0.0009) +[2026-06-02 17:04:40,842][255279] Updated weights for policy 0, policy_version 42296 (0.0009) +[2026-06-02 17:04:41,027][255279] Updated weights for policy 0, policy_version 42306 (0.0009) +[2026-06-02 17:04:41,717][255279] Updated weights for policy 0, policy_version 42316 (0.0009) +[2026-06-02 17:04:41,899][255279] Updated weights for policy 0, policy_version 42326 (0.0008) +[2026-06-02 17:04:42,086][255279] Updated weights for policy 0, policy_version 42336 (0.0008) +[2026-06-02 17:04:42,272][255279] Updated weights for policy 0, policy_version 42346 (0.0008) +[2026-06-02 17:04:42,471][255279] Updated weights for policy 0, policy_version 42356 (0.0009) +[2026-06-02 17:04:42,658][255279] Updated weights for policy 0, policy_version 42366 (0.0009) +[2026-06-02 17:04:42,838][255279] Updated weights for policy 0, policy_version 42376 (0.0008) +[2026-06-02 17:04:43,527][255279] Updated weights for policy 0, policy_version 42386 (0.0009) +[2026-06-02 17:04:43,706][255279] Updated weights for policy 0, policy_version 42396 (0.0008) +[2026-06-02 17:04:43,891][255279] Updated weights for policy 0, policy_version 42406 (0.0008) +[2026-06-02 17:04:44,077][255279] Updated weights for policy 0, policy_version 42416 (0.0008) +[2026-06-02 17:04:44,256][255279] Updated weights for policy 0, policy_version 42426 (0.0009) +[2026-06-02 17:04:44,450][255279] Updated weights for policy 0, policy_version 42436 (0.0008) +[2026-06-02 17:04:45,125][255279] Updated weights for policy 0, policy_version 42446 (0.0008) +[2026-06-02 17:04:45,311][255279] Updated weights for policy 0, policy_version 42456 (0.0008) +[2026-06-02 17:04:45,488][255279] Updated weights for policy 0, policy_version 42466 (0.0008) +[2026-06-02 17:04:45,502][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 21790720. Throughput: 0: 19561.3. Samples: 21800320. Policy #0 lag: (min: 63.0, avg: 80.7, max: 127.0) +[2026-06-02 17:04:45,502][253683] Avg episode reward: [(0, '1753.934')] +[2026-06-02 17:04:45,679][255279] Updated weights for policy 0, policy_version 42476 (0.0008) +[2026-06-02 17:04:45,869][255279] Updated weights for policy 0, policy_version 42486 (0.0009) +[2026-06-02 17:04:46,056][255279] Updated weights for policy 0, policy_version 42496 (0.0008) +[2026-06-02 17:04:46,752][255279] Updated weights for policy 0, policy_version 42506 (0.0009) +[2026-06-02 17:04:46,924][255279] Updated weights for policy 0, policy_version 42516 (0.0008) +[2026-06-02 17:04:47,118][255279] Updated weights for policy 0, policy_version 42526 (0.0008) +[2026-06-02 17:04:47,299][255279] Updated weights for policy 0, policy_version 42536 (0.0008) +[2026-06-02 17:04:47,494][255279] Updated weights for policy 0, policy_version 42546 (0.0008) +[2026-06-02 17:04:47,676][255279] Updated weights for policy 0, policy_version 42556 (0.0009) +[2026-06-02 17:04:47,868][255279] Updated weights for policy 0, policy_version 42566 (0.0009) +[2026-06-02 17:04:48,541][255279] Updated weights for policy 0, policy_version 42576 (0.0008) +[2026-06-02 17:04:48,726][255279] Updated weights for policy 0, policy_version 42586 (0.0008) +[2026-06-02 17:04:48,936][255279] Updated weights for policy 0, policy_version 42597 (0.0009) +[2026-06-02 17:04:49,126][255279] Updated weights for policy 0, policy_version 42607 (0.0009) +[2026-06-02 17:04:49,315][255279] Updated weights for policy 0, policy_version 42617 (0.0009) +[2026-06-02 17:04:49,500][255279] Updated weights for policy 0, policy_version 42627 (0.0008) +[2026-06-02 17:04:50,181][255279] Updated weights for policy 0, policy_version 42637 (0.0009) +[2026-06-02 17:04:50,356][255279] Updated weights for policy 0, policy_version 42647 (0.0009) +[2026-06-02 17:04:50,502][253683] Fps is (10 sec: 19661.1, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 21889024. Throughput: 0: 19350.8. Samples: 21912320. Policy #0 lag: (min: 63.0, avg: 80.7, max: 127.0) +[2026-06-02 17:04:50,502][253683] Avg episode reward: [(0, '1756.457')] +[2026-06-02 17:04:50,537][255279] Updated weights for policy 0, policy_version 42657 (0.0009) +[2026-06-02 17:04:50,726][255279] Updated weights for policy 0, policy_version 42667 (0.0008) +[2026-06-02 17:04:50,912][255279] Updated weights for policy 0, policy_version 42677 (0.0008) +[2026-06-02 17:04:51,101][255279] Updated weights for policy 0, policy_version 42687 (0.0008) +[2026-06-02 17:04:51,812][255279] Updated weights for policy 0, policy_version 42697 (0.0008) +[2026-06-02 17:04:51,985][255279] Updated weights for policy 0, policy_version 42707 (0.0008) +[2026-06-02 17:04:52,168][255279] Updated weights for policy 0, policy_version 42717 (0.0008) +[2026-06-02 17:04:52,349][255279] Updated weights for policy 0, policy_version 42727 (0.0008) +[2026-06-02 17:04:52,539][255279] Updated weights for policy 0, policy_version 42737 (0.0008) +[2026-06-02 17:04:52,744][255279] Updated weights for policy 0, policy_version 42748 (0.0008) +[2026-06-02 17:04:52,938][255279] Updated weights for policy 0, policy_version 42758 (0.0009) +[2026-06-02 17:04:53,615][255279] Updated weights for policy 0, policy_version 42768 (0.0008) +[2026-06-02 17:04:53,808][255279] Updated weights for policy 0, policy_version 42778 (0.0008) +[2026-06-02 17:04:53,989][255279] Updated weights for policy 0, policy_version 42788 (0.0009) +[2026-06-02 17:04:54,178][255279] Updated weights for policy 0, policy_version 42798 (0.0008) +[2026-06-02 17:04:54,357][255279] Updated weights for policy 0, policy_version 42808 (0.0008) +[2026-06-02 17:04:54,553][255279] Updated weights for policy 0, policy_version 42818 (0.0008) +[2026-06-02 17:04:55,236][255279] Updated weights for policy 0, policy_version 42828 (0.0009) +[2026-06-02 17:04:55,413][255279] Updated weights for policy 0, policy_version 42838 (0.0008) +[2026-06-02 17:04:55,501][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 21987328. Throughput: 0: 19410.5. Samples: 21972352. Policy #0 lag: (min: 63.0, avg: 80.7, max: 127.0) +[2026-06-02 17:04:55,502][253683] Avg episode reward: [(0, '1734.522')] +[2026-06-02 17:04:55,600][255279] Updated weights for policy 0, policy_version 42848 (0.0008) +[2026-06-02 17:04:55,784][255279] Updated weights for policy 0, policy_version 42858 (0.0008) +[2026-06-02 17:04:55,985][255279] Updated weights for policy 0, policy_version 42868 (0.0008) +[2026-06-02 17:04:56,172][255279] Updated weights for policy 0, policy_version 42878 (0.0008) +[2026-06-02 17:04:56,351][255279] Updated weights for policy 0, policy_version 42888 (0.0009) +[2026-06-02 17:04:57,067][255279] Updated weights for policy 0, policy_version 42899 (0.0009) +[2026-06-02 17:04:57,251][255279] Updated weights for policy 0, policy_version 42909 (0.0008) +[2026-06-02 17:04:57,429][255279] Updated weights for policy 0, policy_version 42919 (0.0008) +[2026-06-02 17:04:57,619][255279] Updated weights for policy 0, policy_version 42929 (0.0009) +[2026-06-02 17:04:57,812][255279] Updated weights for policy 0, policy_version 42939 (0.0008) +[2026-06-02 17:04:57,998][255279] Updated weights for policy 0, policy_version 42949 (0.0008) +[2026-06-02 17:04:58,655][255279] Updated weights for policy 0, policy_version 42959 (0.0009) +[2026-06-02 17:04:58,832][255279] Updated weights for policy 0, policy_version 42969 (0.0009) +[2026-06-02 17:04:59,020][255279] Updated weights for policy 0, policy_version 42979 (0.0008) +[2026-06-02 17:04:59,200][255279] Updated weights for policy 0, policy_version 42989 (0.0008) +[2026-06-02 17:04:59,385][255279] Updated weights for policy 0, policy_version 42999 (0.0005) +[2026-06-02 17:04:59,577][255279] Updated weights for policy 0, policy_version 43009 (0.0005) +[2026-06-02 17:05:00,249][255279] Updated weights for policy 0, policy_version 43019 (0.0006) +[2026-06-02 17:05:00,434][255279] Updated weights for policy 0, policy_version 43029 (0.0006) +[2026-06-02 17:05:00,502][253683] Fps is (10 sec: 19661.0, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 22085632. Throughput: 0: 19601.0. Samples: 22094336. Policy #0 lag: (min: 63.0, avg: 80.7, max: 127.0) +[2026-06-02 17:05:00,502][253683] Avg episode reward: [(0, '1794.426')] +[2026-06-02 17:05:00,612][255279] Updated weights for policy 0, policy_version 43039 (0.0005) +[2026-06-02 17:05:00,803][255279] Updated weights for policy 0, policy_version 43049 (0.0005) +[2026-06-02 17:05:00,993][255279] Updated weights for policy 0, policy_version 43059 (0.0005) +[2026-06-02 17:05:01,179][255279] Updated weights for policy 0, policy_version 43069 (0.0006) +[2026-06-02 17:05:01,370][255279] Updated weights for policy 0, policy_version 43079 (0.0006) +[2026-06-02 17:05:02,024][255279] Updated weights for policy 0, policy_version 43089 (0.0005) +[2026-06-02 17:05:02,198][255279] Updated weights for policy 0, policy_version 43099 (0.0006) +[2026-06-02 17:05:02,390][255279] Updated weights for policy 0, policy_version 43109 (0.0005) +[2026-06-02 17:05:02,579][255279] Updated weights for policy 0, policy_version 43119 (0.0009) +[2026-06-02 17:05:02,760][255279] Updated weights for policy 0, policy_version 43129 (0.0008) +[2026-06-02 17:05:02,950][255279] Updated weights for policy 0, policy_version 43139 (0.0008) +[2026-06-02 17:05:03,659][255279] Updated weights for policy 0, policy_version 43150 (0.0008) +[2026-06-02 17:05:03,841][255279] Updated weights for policy 0, policy_version 43160 (0.0008) +[2026-06-02 17:05:04,023][255279] Updated weights for policy 0, policy_version 43170 (0.0009) +[2026-06-02 17:05:04,246][255279] Updated weights for policy 0, policy_version 43181 (0.0008) +[2026-06-02 17:05:04,425][255279] Updated weights for policy 0, policy_version 43191 (0.0008) +[2026-06-02 17:05:04,627][255279] Updated weights for policy 0, policy_version 43202 (0.0008) +[2026-06-02 17:05:05,315][255279] Updated weights for policy 0, policy_version 43212 (0.0008) +[2026-06-02 17:05:05,498][255279] Updated weights for policy 0, policy_version 43222 (0.0009) +[2026-06-02 17:05:05,502][253683] Fps is (10 sec: 19660.5, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 22183936. Throughput: 0: 19347.9. Samples: 22205056. Policy #0 lag: (min: 63.0, avg: 79.8, max: 127.0) +[2026-06-02 17:05:05,503][253683] Avg episode reward: [(0, '1800.867')] +[2026-06-02 17:05:05,678][255279] Updated weights for policy 0, policy_version 43232 (0.0008) +[2026-06-02 17:05:05,866][255279] Updated weights for policy 0, policy_version 43242 (0.0008) +[2026-06-02 17:05:06,051][255279] Updated weights for policy 0, policy_version 43252 (0.0008) +[2026-06-02 17:05:06,231][255279] Updated weights for policy 0, policy_version 43262 (0.0008) +[2026-06-02 17:05:06,418][255279] Updated weights for policy 0, policy_version 43272 (0.0008) +[2026-06-02 17:05:07,104][255279] Updated weights for policy 0, policy_version 43282 (0.0008) +[2026-06-02 17:05:07,281][255279] Updated weights for policy 0, policy_version 43292 (0.0008) +[2026-06-02 17:05:07,470][255279] Updated weights for policy 0, policy_version 43302 (0.0008) +[2026-06-02 17:05:07,655][255279] Updated weights for policy 0, policy_version 43312 (0.0008) +[2026-06-02 17:05:07,842][255279] Updated weights for policy 0, policy_version 43322 (0.0008) +[2026-06-02 17:05:08,027][255279] Updated weights for policy 0, policy_version 43332 (0.0008) +[2026-06-02 17:05:08,713][255279] Updated weights for policy 0, policy_version 43342 (0.0008) +[2026-06-02 17:05:08,897][255279] Updated weights for policy 0, policy_version 43352 (0.0008) +[2026-06-02 17:05:09,083][255279] Updated weights for policy 0, policy_version 43362 (0.0008) +[2026-06-02 17:05:09,267][255279] Updated weights for policy 0, policy_version 43372 (0.0009) +[2026-06-02 17:05:09,460][255279] Updated weights for policy 0, policy_version 43382 (0.0008) +[2026-06-02 17:05:09,644][255279] Updated weights for policy 0, policy_version 43392 (0.0008) +[2026-06-02 17:05:10,347][255279] Updated weights for policy 0, policy_version 43403 (0.0009) +[2026-06-02 17:05:10,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 22282240. Throughput: 0: 19541.3. Samples: 22265728. Policy #0 lag: (min: 63.0, avg: 79.8, max: 127.0) +[2026-06-02 17:05:10,502][253683] Avg episode reward: [(0, '1833.228')] +[2026-06-02 17:05:10,528][255279] Updated weights for policy 0, policy_version 43413 (0.0009) +[2026-06-02 17:05:10,719][255279] Updated weights for policy 0, policy_version 43423 (0.0009) +[2026-06-02 17:05:10,897][255279] Updated weights for policy 0, policy_version 43433 (0.0009) +[2026-06-02 17:05:11,091][255279] Updated weights for policy 0, policy_version 43443 (0.0009) +[2026-06-02 17:05:11,297][255279] Updated weights for policy 0, policy_version 43454 (0.0009) +[2026-06-02 17:05:11,474][255187] Saving new best policy, reward=1833.228! +[2026-06-02 17:05:12,013][255279] Updated weights for policy 0, policy_version 43465 (0.0008) +[2026-06-02 17:05:12,182][255279] Updated weights for policy 0, policy_version 43475 (0.0008) +[2026-06-02 17:05:12,362][255279] Updated weights for policy 0, policy_version 43485 (0.0008) +[2026-06-02 17:05:12,561][255279] Updated weights for policy 0, policy_version 43496 (0.0009) +[2026-06-02 17:05:12,756][255279] Updated weights for policy 0, policy_version 43506 (0.0009) +[2026-06-02 17:05:12,954][255279] Updated weights for policy 0, policy_version 43517 (0.0008) +[2026-06-02 17:05:13,154][255279] Updated weights for policy 0, policy_version 43528 (0.0008) +[2026-06-02 17:05:13,850][255279] Updated weights for policy 0, policy_version 43538 (0.0008) +[2026-06-02 17:05:14,042][255279] Updated weights for policy 0, policy_version 43548 (0.0008) +[2026-06-02 17:05:14,222][255279] Updated weights for policy 0, policy_version 43558 (0.0008) +[2026-06-02 17:05:14,413][255279] Updated weights for policy 0, policy_version 43568 (0.0008) +[2026-06-02 17:05:14,607][255279] Updated weights for policy 0, policy_version 43578 (0.0009) +[2026-06-02 17:05:14,782][255279] Updated weights for policy 0, policy_version 43588 (0.0008) +[2026-06-02 17:05:15,462][255279] Updated weights for policy 0, policy_version 43598 (0.0008) +[2026-06-02 17:05:15,501][253683] Fps is (10 sec: 19661.1, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 22380544. Throughput: 0: 19589.7. Samples: 22388096. Policy #0 lag: (min: 63.0, avg: 79.8, max: 127.0) +[2026-06-02 17:05:15,502][253683] Avg episode reward: [(0, '1866.884')] +[2026-06-02 17:05:15,642][255279] Updated weights for policy 0, policy_version 43608 (0.0008) +[2026-06-02 17:05:15,844][255279] Updated weights for policy 0, policy_version 43619 (0.0008) +[2026-06-02 17:05:16,033][255279] Updated weights for policy 0, policy_version 43629 (0.0008) +[2026-06-02 17:05:16,227][255279] Updated weights for policy 0, policy_version 43639 (0.0008) +[2026-06-02 17:05:16,414][255279] Updated weights for policy 0, policy_version 43649 (0.0008) +[2026-06-02 17:05:16,536][255187] Saving new best policy, reward=1866.884! +[2026-06-02 17:05:17,109][255279] Updated weights for policy 0, policy_version 43659 (0.0009) +[2026-06-02 17:05:17,287][255279] Updated weights for policy 0, policy_version 43669 (0.0009) +[2026-06-02 17:05:17,466][255279] Updated weights for policy 0, policy_version 43679 (0.0009) +[2026-06-02 17:05:17,656][255279] Updated weights for policy 0, policy_version 43689 (0.0009) +[2026-06-02 17:05:17,839][255279] Updated weights for policy 0, policy_version 43699 (0.0009) +[2026-06-02 17:05:18,023][255279] Updated weights for policy 0, policy_version 43709 (0.0008) +[2026-06-02 17:05:18,207][255279] Updated weights for policy 0, policy_version 43719 (0.0008) +[2026-06-02 17:05:18,896][255279] Updated weights for policy 0, policy_version 43729 (0.0008) +[2026-06-02 17:05:19,075][255279] Updated weights for policy 0, policy_version 43739 (0.0008) +[2026-06-02 17:05:19,254][255279] Updated weights for policy 0, policy_version 43749 (0.0008) +[2026-06-02 17:05:19,444][255279] Updated weights for policy 0, policy_version 43759 (0.0008) +[2026-06-02 17:05:19,644][255279] Updated weights for policy 0, policy_version 43770 (0.0008) +[2026-06-02 17:05:19,838][255279] Updated weights for policy 0, policy_version 43780 (0.0008) +[2026-06-02 17:05:20,502][253683] Fps is (10 sec: 19660.4, 60 sec: 19660.7, 300 sec: 19438.6). Total num frames: 22478848. Throughput: 0: 19379.2. Samples: 22497792. Policy #0 lag: (min: 63.0, avg: 79.8, max: 127.0) +[2026-06-02 17:05:20,503][253683] Avg episode reward: [(0, '1953.031')] +[2026-06-02 17:05:20,536][255279] Updated weights for policy 0, policy_version 43790 (0.0009) +[2026-06-02 17:05:20,712][255279] Updated weights for policy 0, policy_version 43800 (0.0008) +[2026-06-02 17:05:20,902][255279] Updated weights for policy 0, policy_version 43810 (0.0008) +[2026-06-02 17:05:21,082][255279] Updated weights for policy 0, policy_version 43820 (0.0008) +[2026-06-02 17:05:21,296][255279] Updated weights for policy 0, policy_version 43831 (0.0008) +[2026-06-02 17:05:21,498][255279] Updated weights for policy 0, policy_version 43842 (0.0008) +[2026-06-02 17:05:21,604][255187] Saving new best policy, reward=1953.031! +[2026-06-02 17:05:22,186][255279] Updated weights for policy 0, policy_version 43852 (0.0008) +[2026-06-02 17:05:22,382][255279] Updated weights for policy 0, policy_version 43863 (0.0008) +[2026-06-02 17:05:22,566][255279] Updated weights for policy 0, policy_version 43873 (0.0008) +[2026-06-02 17:05:22,755][255279] Updated weights for policy 0, policy_version 43883 (0.0008) +[2026-06-02 17:05:22,940][255279] Updated weights for policy 0, policy_version 43893 (0.0008) +[2026-06-02 17:05:23,137][255279] Updated weights for policy 0, policy_version 43903 (0.0008) +[2026-06-02 17:05:23,822][255279] Updated weights for policy 0, policy_version 43913 (0.0008) +[2026-06-02 17:05:23,991][255279] Updated weights for policy 0, policy_version 43923 (0.0008) +[2026-06-02 17:05:24,173][255279] Updated weights for policy 0, policy_version 43933 (0.0008) +[2026-06-02 17:05:24,366][255279] Updated weights for policy 0, policy_version 43943 (0.0008) +[2026-06-02 17:05:24,557][255279] Updated weights for policy 0, policy_version 43953 (0.0008) +[2026-06-02 17:05:24,743][255279] Updated weights for policy 0, policy_version 43963 (0.0008) +[2026-06-02 17:05:24,939][255279] Updated weights for policy 0, policy_version 43974 (0.0008) +[2026-06-02 17:05:25,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 22577152. Throughput: 0: 19598.3. Samples: 22559360. Policy #0 lag: (min: 63.0, avg: 79.8, max: 127.0) +[2026-06-02 17:05:25,502][253683] Avg episode reward: [(0, '2004.241')] +[2026-06-02 17:05:25,624][255279] Updated weights for policy 0, policy_version 43984 (0.0009) +[2026-06-02 17:05:25,796][255279] Updated weights for policy 0, policy_version 43994 (0.0008) +[2026-06-02 17:05:25,985][255279] Updated weights for policy 0, policy_version 44004 (0.0008) +[2026-06-02 17:05:26,181][255279] Updated weights for policy 0, policy_version 44014 (0.0008) +[2026-06-02 17:05:26,361][255279] Updated weights for policy 0, policy_version 44024 (0.0008) +[2026-06-02 17:05:26,550][255279] Updated weights for policy 0, policy_version 44034 (0.0008) +[2026-06-02 17:05:26,652][255187] Saving new best policy, reward=2004.241! +[2026-06-02 17:05:27,253][255279] Updated weights for policy 0, policy_version 44044 (0.0008) +[2026-06-02 17:05:27,456][255279] Updated weights for policy 0, policy_version 44055 (0.0008) +[2026-06-02 17:05:27,639][255279] Updated weights for policy 0, policy_version 44065 (0.0008) +[2026-06-02 17:05:27,850][255279] Updated weights for policy 0, policy_version 44076 (0.0008) +[2026-06-02 17:05:28,034][255279] Updated weights for policy 0, policy_version 44086 (0.0008) +[2026-06-02 17:05:28,216][255279] Updated weights for policy 0, policy_version 44096 (0.0010) +[2026-06-02 17:05:28,895][255279] Updated weights for policy 0, policy_version 44106 (0.0008) +[2026-06-02 17:05:29,072][255279] Updated weights for policy 0, policy_version 44116 (0.0009) +[2026-06-02 17:05:29,277][255279] Updated weights for policy 0, policy_version 44127 (0.0009) +[2026-06-02 17:05:29,462][255279] Updated weights for policy 0, policy_version 44137 (0.0009) +[2026-06-02 17:05:29,645][255279] Updated weights for policy 0, policy_version 44147 (0.0009) +[2026-06-02 17:05:29,846][255279] Updated weights for policy 0, policy_version 44158 (0.0008) +[2026-06-02 17:05:30,502][253683] Fps is (10 sec: 19661.1, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 22675456. Throughput: 0: 19538.5. Samples: 22679552. Policy #0 lag: (min: 63.0, avg: 79.8, max: 127.0) +[2026-06-02 17:05:30,503][253683] Avg episode reward: [(0, '1961.381')] +[2026-06-02 17:05:30,561][255279] Updated weights for policy 0, policy_version 44169 (0.0008) +[2026-06-02 17:05:30,736][255279] Updated weights for policy 0, policy_version 44179 (0.0008) +[2026-06-02 17:05:30,921][255279] Updated weights for policy 0, policy_version 44189 (0.0008) +[2026-06-02 17:05:31,106][255279] Updated weights for policy 0, policy_version 44199 (0.0008) +[2026-06-02 17:05:31,328][255279] Updated weights for policy 0, policy_version 44211 (0.0009) +[2026-06-02 17:05:31,530][255279] Updated weights for policy 0, policy_version 44222 (0.0007) +[2026-06-02 17:05:32,245][255279] Updated weights for policy 0, policy_version 44233 (0.0007) +[2026-06-02 17:05:32,428][255279] Updated weights for policy 0, policy_version 44243 (0.0008) +[2026-06-02 17:05:32,605][255279] Updated weights for policy 0, policy_version 44253 (0.0009) +[2026-06-02 17:05:32,789][255279] Updated weights for policy 0, policy_version 44263 (0.0008) +[2026-06-02 17:05:32,981][255279] Updated weights for policy 0, policy_version 44273 (0.0008) +[2026-06-02 17:05:33,171][255279] Updated weights for policy 0, policy_version 44283 (0.0008) +[2026-06-02 17:05:33,355][255279] Updated weights for policy 0, policy_version 44293 (0.0009) +[2026-06-02 17:05:34,043][255279] Updated weights for policy 0, policy_version 44303 (0.0009) +[2026-06-02 17:05:34,229][255279] Updated weights for policy 0, policy_version 44313 (0.0008) +[2026-06-02 17:05:34,413][255279] Updated weights for policy 0, policy_version 44323 (0.0008) +[2026-06-02 17:05:34,598][255279] Updated weights for policy 0, policy_version 44333 (0.0008) +[2026-06-02 17:05:34,780][255279] Updated weights for policy 0, policy_version 44343 (0.0008) +[2026-06-02 17:05:34,967][255279] Updated weights for policy 0, policy_version 44353 (0.0008) +[2026-06-02 17:05:35,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 22773760. Throughput: 0: 19538.5. Samples: 22791552. Policy #0 lag: (min: 50.0, avg: 93.8, max: 114.0) +[2026-06-02 17:05:35,503][253683] Avg episode reward: [(0, '1984.082')] +[2026-06-02 17:05:35,674][255279] Updated weights for policy 0, policy_version 44363 (0.0008) +[2026-06-02 17:05:35,849][255279] Updated weights for policy 0, policy_version 44373 (0.0008) +[2026-06-02 17:05:36,053][255279] Updated weights for policy 0, policy_version 44384 (0.0008) +[2026-06-02 17:05:36,257][255279] Updated weights for policy 0, policy_version 44395 (0.0009) +[2026-06-02 17:05:36,444][255279] Updated weights for policy 0, policy_version 44405 (0.0008) +[2026-06-02 17:05:36,631][255279] Updated weights for policy 0, policy_version 44415 (0.0009) +[2026-06-02 17:05:37,350][255279] Updated weights for policy 0, policy_version 44426 (0.0009) +[2026-06-02 17:05:37,525][255279] Updated weights for policy 0, policy_version 44436 (0.0009) +[2026-06-02 17:05:37,710][255279] Updated weights for policy 0, policy_version 44446 (0.0008) +[2026-06-02 17:05:37,885][255279] Updated weights for policy 0, policy_version 44456 (0.0008) +[2026-06-02 17:05:38,089][255279] Updated weights for policy 0, policy_version 44466 (0.0008) +[2026-06-02 17:05:38,263][255279] Updated weights for policy 0, policy_version 44476 (0.0009) +[2026-06-02 17:05:38,447][255279] Updated weights for policy 0, policy_version 44486 (0.0008) +[2026-06-02 17:05:39,146][255279] Updated weights for policy 0, policy_version 44496 (0.0009) +[2026-06-02 17:05:39,333][255279] Updated weights for policy 0, policy_version 44506 (0.0009) +[2026-06-02 17:05:39,513][255279] Updated weights for policy 0, policy_version 44516 (0.0009) +[2026-06-02 17:05:39,705][255279] Updated weights for policy 0, policy_version 44526 (0.0009) +[2026-06-02 17:05:39,895][255279] Updated weights for policy 0, policy_version 44536 (0.0008) +[2026-06-02 17:05:40,099][255279] Updated weights for policy 0, policy_version 44547 (0.0008) +[2026-06-02 17:05:40,501][253683] Fps is (10 sec: 19661.0, 60 sec: 19660.9, 300 sec: 19549.7). Total num frames: 22872064. Throughput: 0: 19581.2. Samples: 22853504. Policy #0 lag: (min: 50.0, avg: 93.8, max: 114.0) +[2026-06-02 17:05:40,502][253683] Avg episode reward: [(0, '2036.850')] +[2026-06-02 17:05:40,506][255187] Saving new best policy, reward=2036.850! +[2026-06-02 17:05:40,801][255279] Updated weights for policy 0, policy_version 44557 (0.0009) +[2026-06-02 17:05:40,982][255279] Updated weights for policy 0, policy_version 44567 (0.0008) +[2026-06-02 17:05:41,171][255279] Updated weights for policy 0, policy_version 44577 (0.0009) +[2026-06-02 17:05:41,349][255279] Updated weights for policy 0, policy_version 44587 (0.0008) +[2026-06-02 17:05:41,538][255279] Updated weights for policy 0, policy_version 44597 (0.0009) +[2026-06-02 17:05:41,735][255279] Updated weights for policy 0, policy_version 44607 (0.0008) +[2026-06-02 17:05:42,392][255279] Updated weights for policy 0, policy_version 44617 (0.0009) +[2026-06-02 17:05:42,589][255279] Updated weights for policy 0, policy_version 44628 (0.0009) +[2026-06-02 17:05:42,774][255279] Updated weights for policy 0, policy_version 44638 (0.0009) +[2026-06-02 17:05:42,961][255279] Updated weights for policy 0, policy_version 44648 (0.0009) +[2026-06-02 17:05:43,145][255279] Updated weights for policy 0, policy_version 44658 (0.0008) +[2026-06-02 17:05:43,339][255279] Updated weights for policy 0, policy_version 44668 (0.0008) +[2026-06-02 17:05:43,515][255279] Updated weights for policy 0, policy_version 44678 (0.0009) +[2026-06-02 17:05:44,206][255279] Updated weights for policy 0, policy_version 44689 (0.0009) +[2026-06-02 17:05:44,403][255279] Updated weights for policy 0, policy_version 44699 (0.0009) +[2026-06-02 17:05:44,600][255279] Updated weights for policy 0, policy_version 44710 (0.0008) +[2026-06-02 17:05:44,785][255279] Updated weights for policy 0, policy_version 44720 (0.0007) +[2026-06-02 17:05:44,975][255279] Updated weights for policy 0, policy_version 44730 (0.0009) +[2026-06-02 17:05:45,158][255279] Updated weights for policy 0, policy_version 44740 (0.0008) +[2026-06-02 17:05:45,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 22970368. Throughput: 0: 19390.6. Samples: 22966912. Policy #0 lag: (min: 50.0, avg: 93.8, max: 114.0) +[2026-06-02 17:05:45,502][253683] Avg episode reward: [(0, '2078.716')] +[2026-06-02 17:05:45,507][255187] Saving new best policy, reward=2078.716! +[2026-06-02 17:05:45,850][255279] Updated weights for policy 0, policy_version 44750 (0.0008) +[2026-06-02 17:05:46,029][255279] Updated weights for policy 0, policy_version 44760 (0.0008) +[2026-06-02 17:05:46,211][255279] Updated weights for policy 0, policy_version 44770 (0.0008) +[2026-06-02 17:05:46,397][255279] Updated weights for policy 0, policy_version 44780 (0.0008) +[2026-06-02 17:05:46,583][255279] Updated weights for policy 0, policy_version 44790 (0.0008) +[2026-06-02 17:05:46,767][255279] Updated weights for policy 0, policy_version 44800 (0.0008) +[2026-06-02 17:05:47,467][255279] Updated weights for policy 0, policy_version 44810 (0.0008) +[2026-06-02 17:05:47,640][255279] Updated weights for policy 0, policy_version 44820 (0.0009) +[2026-06-02 17:05:47,842][255279] Updated weights for policy 0, policy_version 44831 (0.0008) +[2026-06-02 17:05:48,025][255279] Updated weights for policy 0, policy_version 44841 (0.0008) +[2026-06-02 17:05:48,224][255279] Updated weights for policy 0, policy_version 44852 (0.0008) +[2026-06-02 17:05:48,413][255279] Updated weights for policy 0, policy_version 44862 (0.0008) +[2026-06-02 17:05:48,594][255279] Updated weights for policy 0, policy_version 44872 (0.0008) +[2026-06-02 17:05:49,299][255279] Updated weights for policy 0, policy_version 44882 (0.0009) +[2026-06-02 17:05:49,477][255279] Updated weights for policy 0, policy_version 44892 (0.0008) +[2026-06-02 17:05:49,675][255279] Updated weights for policy 0, policy_version 44903 (0.0009) +[2026-06-02 17:05:49,886][255279] Updated weights for policy 0, policy_version 44915 (0.0008) +[2026-06-02 17:05:50,068][255279] Updated weights for policy 0, policy_version 44925 (0.0009) +[2026-06-02 17:05:50,258][255279] Updated weights for policy 0, policy_version 44935 (0.0009) +[2026-06-02 17:05:50,502][253683] Fps is (10 sec: 19660.6, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 23068672. Throughput: 0: 19581.2. Samples: 23086208. Policy #0 lag: (min: 50.0, avg: 93.8, max: 114.0) +[2026-06-02 17:05:50,503][253683] Avg episode reward: [(0, '2160.860')] +[2026-06-02 17:05:50,509][255187] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs5_seed10/checkpoint_p0/checkpoint_000044936_23068672.pth... +[2026-06-02 17:05:50,541][255187] Saving new best policy, reward=2160.860! +[2026-06-02 17:05:50,969][255279] Updated weights for policy 0, policy_version 44945 (0.0008) +[2026-06-02 17:05:51,167][255279] Updated weights for policy 0, policy_version 44956 (0.0009) +[2026-06-02 17:05:51,382][255279] Updated weights for policy 0, policy_version 44967 (0.0009) +[2026-06-02 17:05:51,565][255279] Updated weights for policy 0, policy_version 44977 (0.0009) +[2026-06-02 17:05:51,761][255279] Updated weights for policy 0, policy_version 44988 (0.0009) +[2026-06-02 17:05:51,983][255279] Updated weights for policy 0, policy_version 45000 (0.0009) +[2026-06-02 17:05:52,677][255279] Updated weights for policy 0, policy_version 45010 (0.0008) +[2026-06-02 17:05:52,859][255279] Updated weights for policy 0, policy_version 45020 (0.0008) +[2026-06-02 17:05:53,049][255279] Updated weights for policy 0, policy_version 45030 (0.0009) +[2026-06-02 17:05:53,247][255279] Updated weights for policy 0, policy_version 45040 (0.0008) +[2026-06-02 17:05:53,449][255279] Updated weights for policy 0, policy_version 45051 (0.0008) +[2026-06-02 17:05:53,643][255279] Updated weights for policy 0, policy_version 45061 (0.0009) +[2026-06-02 17:05:54,322][255279] Updated weights for policy 0, policy_version 45071 (0.0009) +[2026-06-02 17:05:54,539][255279] Updated weights for policy 0, policy_version 45083 (0.0009) +[2026-06-02 17:05:54,716][255279] Updated weights for policy 0, policy_version 45093 (0.0008) +[2026-06-02 17:05:54,904][255279] Updated weights for policy 0, policy_version 45103 (0.0008) +[2026-06-02 17:05:55,100][255279] Updated weights for policy 0, policy_version 45113 (0.0008) +[2026-06-02 17:05:55,275][255279] Updated weights for policy 0, policy_version 45123 (0.0008) +[2026-06-02 17:05:55,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 23166976. Throughput: 0: 19603.9. Samples: 23147904. Policy #0 lag: (min: 50.0, avg: 93.8, max: 114.0) +[2026-06-02 17:05:55,502][253683] Avg episode reward: [(0, '2157.667')] +[2026-06-02 17:05:56,001][255279] Updated weights for policy 0, policy_version 45133 (0.0009) +[2026-06-02 17:05:56,180][255279] Updated weights for policy 0, policy_version 45143 (0.0008) +[2026-06-02 17:05:56,369][255279] Updated weights for policy 0, policy_version 45153 (0.0008) +[2026-06-02 17:05:56,556][255279] Updated weights for policy 0, policy_version 45163 (0.0009) +[2026-06-02 17:05:56,739][255279] Updated weights for policy 0, policy_version 45173 (0.0009) +[2026-06-02 17:05:56,948][255279] Updated weights for policy 0, policy_version 45184 (0.0009) +[2026-06-02 17:05:57,606][255279] Updated weights for policy 0, policy_version 45194 (0.0008) +[2026-06-02 17:05:57,799][255279] Updated weights for policy 0, policy_version 45205 (0.0008) +[2026-06-02 17:05:57,981][255279] Updated weights for policy 0, policy_version 45215 (0.0008) +[2026-06-02 17:05:58,167][255279] Updated weights for policy 0, policy_version 45225 (0.0009) +[2026-06-02 17:05:58,354][255279] Updated weights for policy 0, policy_version 45235 (0.0008) +[2026-06-02 17:05:58,531][255279] Updated weights for policy 0, policy_version 45245 (0.0008) +[2026-06-02 17:05:58,721][255279] Updated weights for policy 0, policy_version 45255 (0.0008) +[2026-06-02 17:05:59,430][255279] Updated weights for policy 0, policy_version 45266 (0.0009) +[2026-06-02 17:05:59,612][255279] Updated weights for policy 0, policy_version 45276 (0.0008) +[2026-06-02 17:05:59,810][255279] Updated weights for policy 0, policy_version 45287 (0.0008) +[2026-06-02 17:06:00,003][255279] Updated weights for policy 0, policy_version 45297 (0.0009) +[2026-06-02 17:06:00,187][255279] Updated weights for policy 0, policy_version 45307 (0.0008) +[2026-06-02 17:06:00,372][255279] Updated weights for policy 0, policy_version 45317 (0.0008) +[2026-06-02 17:06:00,501][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 23265280. Throughput: 0: 19319.5. Samples: 23257472. Policy #0 lag: (min: 50.0, avg: 93.8, max: 114.0) +[2026-06-02 17:06:00,502][253683] Avg episode reward: [(0, '2157.667')] +[2026-06-02 17:06:01,102][255279] Updated weights for policy 0, policy_version 45328 (0.0008) +[2026-06-02 17:06:01,281][255279] Updated weights for policy 0, policy_version 45338 (0.0008) +[2026-06-02 17:06:01,463][255279] Updated weights for policy 0, policy_version 45348 (0.0008) +[2026-06-02 17:06:01,649][255279] Updated weights for policy 0, policy_version 45358 (0.0008) +[2026-06-02 17:06:01,853][255279] Updated weights for policy 0, policy_version 45369 (0.0008) +[2026-06-02 17:06:02,039][255279] Updated weights for policy 0, policy_version 45379 (0.0008) +[2026-06-02 17:06:02,743][255279] Updated weights for policy 0, policy_version 45389 (0.0008) +[2026-06-02 17:06:02,922][255279] Updated weights for policy 0, policy_version 45399 (0.0008) +[2026-06-02 17:06:03,102][255279] Updated weights for policy 0, policy_version 45409 (0.0009) +[2026-06-02 17:06:03,314][255279] Updated weights for policy 0, policy_version 45420 (0.0008) +[2026-06-02 17:06:03,494][255279] Updated weights for policy 0, policy_version 45430 (0.0008) +[2026-06-02 17:06:03,698][255279] Updated weights for policy 0, policy_version 45441 (0.0009) +[2026-06-02 17:06:04,373][255279] Updated weights for policy 0, policy_version 45451 (0.0009) +[2026-06-02 17:06:04,554][255279] Updated weights for policy 0, policy_version 45461 (0.0007) +[2026-06-02 17:06:04,734][255279] Updated weights for policy 0, policy_version 45471 (0.0009) +[2026-06-02 17:06:04,915][255279] Updated weights for policy 0, policy_version 45481 (0.0009) +[2026-06-02 17:06:05,123][255279] Updated weights for policy 0, policy_version 45492 (0.0009) +[2026-06-02 17:06:05,313][255279] Updated weights for policy 0, policy_version 45502 (0.0009) +[2026-06-02 17:06:05,493][255279] Updated weights for policy 0, policy_version 45512 (0.0009) +[2026-06-02 17:06:05,501][253683] Fps is (10 sec: 19661.1, 60 sec: 19660.8, 300 sec: 19549.7). Total num frames: 23363584. Throughput: 0: 19615.4. Samples: 23380480. Policy #0 lag: (min: 27.0, avg: 61.6, max: 86.0) +[2026-06-02 17:06:05,502][253683] Avg episode reward: [(0, '2151.343')] +[2026-06-02 17:06:06,190][255279] Updated weights for policy 0, policy_version 45522 (0.0004) +[2026-06-02 17:06:06,383][255279] Updated weights for policy 0, policy_version 45533 (0.0004) +[2026-06-02 17:06:06,601][255279] Updated weights for policy 0, policy_version 45545 (0.0004) +[2026-06-02 17:06:06,796][255279] Updated weights for policy 0, policy_version 45556 (0.0004) +[2026-06-02 17:06:06,981][255279] Updated weights for policy 0, policy_version 45566 (0.0004) +[2026-06-02 17:06:07,163][255279] Updated weights for policy 0, policy_version 45576 (0.0004) +[2026-06-02 17:06:07,880][255279] Updated weights for policy 0, policy_version 45587 (0.0007) +[2026-06-02 17:06:08,059][255279] Updated weights for policy 0, policy_version 45597 (0.0004) +[2026-06-02 17:06:08,248][255279] Updated weights for policy 0, policy_version 45607 (0.0004) +[2026-06-02 17:06:08,441][255279] Updated weights for policy 0, policy_version 45617 (0.0004) +[2026-06-02 17:06:08,619][255279] Updated weights for policy 0, policy_version 45627 (0.0004) +[2026-06-02 17:06:08,801][255279] Updated weights for policy 0, policy_version 45637 (0.0004) +[2026-06-02 17:06:09,483][255279] Updated weights for policy 0, policy_version 45647 (0.0005) +[2026-06-02 17:06:09,664][255279] Updated weights for policy 0, policy_version 45657 (0.0006) +[2026-06-02 17:06:09,854][255279] Updated weights for policy 0, policy_version 45667 (0.0005) +[2026-06-02 17:06:10,036][255279] Updated weights for policy 0, policy_version 45677 (0.0005) +[2026-06-02 17:06:10,236][255279] Updated weights for policy 0, policy_version 45687 (0.0006) +[2026-06-02 17:06:10,419][255279] Updated weights for policy 0, policy_version 45697 (0.0005) +[2026-06-02 17:06:10,502][253683] Fps is (10 sec: 16383.9, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 23429120. Throughput: 0: 19609.6. Samples: 23441792. Policy #0 lag: (min: 27.0, avg: 61.6, max: 86.0) +[2026-06-02 17:06:10,502][253683] Avg episode reward: [(0, '2161.224')] +[2026-06-02 17:06:10,539][255187] Saving new best policy, reward=2161.224! +[2026-06-02 17:06:11,111][255279] Updated weights for policy 0, policy_version 45708 (0.0005) +[2026-06-02 17:06:11,303][255279] Updated weights for policy 0, policy_version 45719 (0.0005) +[2026-06-02 17:06:11,498][255279] Updated weights for policy 0, policy_version 45729 (0.0005) +[2026-06-02 17:06:11,675][255279] Updated weights for policy 0, policy_version 45739 (0.0005) +[2026-06-02 17:06:11,862][255279] Updated weights for policy 0, policy_version 45749 (0.0004) +[2026-06-02 17:06:12,064][255279] Updated weights for policy 0, policy_version 45760 (0.0005) +[2026-06-02 17:06:12,760][255279] Updated weights for policy 0, policy_version 45770 (0.0008) +[2026-06-02 17:06:12,934][255279] Updated weights for policy 0, policy_version 45780 (0.0008) +[2026-06-02 17:06:13,112][255279] Updated weights for policy 0, policy_version 45790 (0.0008) +[2026-06-02 17:06:13,290][255279] Updated weights for policy 0, policy_version 45800 (0.0009) +[2026-06-02 17:06:13,480][255279] Updated weights for policy 0, policy_version 45810 (0.0009) +[2026-06-02 17:06:13,673][255279] Updated weights for policy 0, policy_version 45820 (0.0009) +[2026-06-02 17:06:13,858][255279] Updated weights for policy 0, policy_version 45830 (0.0009) +[2026-06-02 17:06:14,546][255279] Updated weights for policy 0, policy_version 45840 (0.0008) +[2026-06-02 17:06:14,737][255279] Updated weights for policy 0, policy_version 45850 (0.0008) +[2026-06-02 17:06:14,914][255279] Updated weights for policy 0, policy_version 45860 (0.0008) +[2026-06-02 17:06:15,100][255279] Updated weights for policy 0, policy_version 45870 (0.0008) +[2026-06-02 17:06:15,288][255279] Updated weights for policy 0, policy_version 45880 (0.0007) +[2026-06-02 17:06:15,472][255279] Updated weights for policy 0, policy_version 45890 (0.0004) +[2026-06-02 17:06:15,502][253683] Fps is (10 sec: 16384.0, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 23527424. Throughput: 0: 19384.9. Samples: 23551872. Policy #0 lag: (min: 27.0, avg: 61.6, max: 86.0) +[2026-06-02 17:06:15,502][253683] Avg episode reward: [(0, '2188.077')] +[2026-06-02 17:06:15,582][255187] Saving new best policy, reward=2188.077! +[2026-06-02 17:06:16,137][255279] Updated weights for policy 0, policy_version 45900 (0.0004) +[2026-06-02 17:06:16,320][255279] Updated weights for policy 0, policy_version 45910 (0.0004) +[2026-06-02 17:06:16,508][255279] Updated weights for policy 0, policy_version 45920 (0.0004) +[2026-06-02 17:06:16,690][255279] Updated weights for policy 0, policy_version 45930 (0.0008) +[2026-06-02 17:06:16,885][255279] Updated weights for policy 0, policy_version 45940 (0.0008) +[2026-06-02 17:06:17,071][255279] Updated weights for policy 0, policy_version 45950 (0.0008) +[2026-06-02 17:06:17,258][255279] Updated weights for policy 0, policy_version 45960 (0.0009) +[2026-06-02 17:06:17,946][255279] Updated weights for policy 0, policy_version 45971 (0.0009) +[2026-06-02 17:06:18,128][255279] Updated weights for policy 0, policy_version 45981 (0.0008) +[2026-06-02 17:06:18,318][255279] Updated weights for policy 0, policy_version 45991 (0.0009) +[2026-06-02 17:06:18,523][255279] Updated weights for policy 0, policy_version 46002 (0.0008) +[2026-06-02 17:06:18,716][255279] Updated weights for policy 0, policy_version 46012 (0.0008) +[2026-06-02 17:06:18,902][255279] Updated weights for policy 0, policy_version 46022 (0.0008) +[2026-06-02 17:06:19,604][255279] Updated weights for policy 0, policy_version 46033 (0.0008) +[2026-06-02 17:06:19,786][255279] Updated weights for policy 0, policy_version 46043 (0.0009) +[2026-06-02 17:06:19,991][255279] Updated weights for policy 0, policy_version 46054 (0.0009) +[2026-06-02 17:06:20,179][255279] Updated weights for policy 0, policy_version 46064 (0.0008) +[2026-06-02 17:06:20,377][255279] Updated weights for policy 0, policy_version 46074 (0.0008) +[2026-06-02 17:06:20,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 23625728. Throughput: 0: 19618.1. Samples: 23674368. Policy #0 lag: (min: 27.0, avg: 61.6, max: 86.0) +[2026-06-02 17:06:20,503][253683] Avg episode reward: [(0, '2242.386')] +[2026-06-02 17:06:20,550][255279] Updated weights for policy 0, policy_version 46084 (0.0008) +[2026-06-02 17:06:20,627][255187] Saving new best policy, reward=2242.386! +[2026-06-02 17:06:21,229][255279] Updated weights for policy 0, policy_version 46094 (0.0008) +[2026-06-02 17:06:21,415][255279] Updated weights for policy 0, policy_version 46104 (0.0009) +[2026-06-02 17:06:21,603][255279] Updated weights for policy 0, policy_version 46114 (0.0008) +[2026-06-02 17:06:21,781][255279] Updated weights for policy 0, policy_version 46124 (0.0009) +[2026-06-02 17:06:21,975][255279] Updated weights for policy 0, policy_version 46134 (0.0008) +[2026-06-02 17:06:22,160][255279] Updated weights for policy 0, policy_version 46144 (0.0008) +[2026-06-02 17:06:22,837][255279] Updated weights for policy 0, policy_version 46154 (0.0009) +[2026-06-02 17:06:23,015][255279] Updated weights for policy 0, policy_version 46164 (0.0008) +[2026-06-02 17:06:23,194][255279] Updated weights for policy 0, policy_version 46174 (0.0008) +[2026-06-02 17:06:23,382][255279] Updated weights for policy 0, policy_version 46184 (0.0008) +[2026-06-02 17:06:23,563][255279] Updated weights for policy 0, policy_version 46194 (0.0009) +[2026-06-02 17:06:23,756][255279] Updated weights for policy 0, policy_version 46204 (0.0008) +[2026-06-02 17:06:23,935][255279] Updated weights for policy 0, policy_version 46214 (0.0008) +[2026-06-02 17:06:24,615][255279] Updated weights for policy 0, policy_version 46224 (0.0009) +[2026-06-02 17:06:24,796][255279] Updated weights for policy 0, policy_version 46234 (0.0009) +[2026-06-02 17:06:24,997][255279] Updated weights for policy 0, policy_version 46245 (0.0009) +[2026-06-02 17:06:25,181][255279] Updated weights for policy 0, policy_version 46255 (0.0009) +[2026-06-02 17:06:25,358][255279] Updated weights for policy 0, policy_version 46265 (0.0009) +[2026-06-02 17:06:25,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 23724032. Throughput: 0: 19601.0. Samples: 23735552. Policy #0 lag: (min: 27.0, avg: 61.6, max: 86.0) +[2026-06-02 17:06:25,502][253683] Avg episode reward: [(0, '2268.312')] +[2026-06-02 17:06:25,554][255279] Updated weights for policy 0, policy_version 46275 (0.0009) +[2026-06-02 17:06:25,637][255187] Saving new best policy, reward=2268.312! +[2026-06-02 17:06:26,264][255279] Updated weights for policy 0, policy_version 46285 (0.0009) +[2026-06-02 17:06:26,452][255279] Updated weights for policy 0, policy_version 46295 (0.0009) +[2026-06-02 17:06:26,627][255279] Updated weights for policy 0, policy_version 46305 (0.0009) +[2026-06-02 17:06:26,814][255279] Updated weights for policy 0, policy_version 46315 (0.0007) +[2026-06-02 17:06:27,023][255279] Updated weights for policy 0, policy_version 46326 (0.0008) +[2026-06-02 17:06:27,212][255279] Updated weights for policy 0, policy_version 46336 (0.0008) +[2026-06-02 17:06:27,894][255279] Updated weights for policy 0, policy_version 46346 (0.0008) +[2026-06-02 17:06:28,068][255279] Updated weights for policy 0, policy_version 46356 (0.0008) +[2026-06-02 17:06:28,281][255279] Updated weights for policy 0, policy_version 46367 (0.0008) +[2026-06-02 17:06:28,459][255279] Updated weights for policy 0, policy_version 46377 (0.0008) +[2026-06-02 17:06:28,643][255279] Updated weights for policy 0, policy_version 46387 (0.0008) +[2026-06-02 17:06:28,833][255279] Updated weights for policy 0, policy_version 46397 (0.0009) +[2026-06-02 17:06:29,019][255279] Updated weights for policy 0, policy_version 46407 (0.0008) +[2026-06-02 17:06:29,714][255279] Updated weights for policy 0, policy_version 46418 (0.0008) +[2026-06-02 17:06:29,893][255279] Updated weights for policy 0, policy_version 46428 (0.0008) +[2026-06-02 17:06:30,079][255279] Updated weights for policy 0, policy_version 46438 (0.0008) +[2026-06-02 17:06:30,270][255279] Updated weights for policy 0, policy_version 46448 (0.0008) +[2026-06-02 17:06:30,458][255279] Updated weights for policy 0, policy_version 46458 (0.0008) +[2026-06-02 17:06:30,502][253683] Fps is (10 sec: 19661.0, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 23822336. Throughput: 0: 19518.6. Samples: 23845248. Policy #0 lag: (min: 27.0, avg: 61.6, max: 86.0) +[2026-06-02 17:06:30,502][253683] Avg episode reward: [(0, '2281.696')] +[2026-06-02 17:06:30,664][255279] Updated weights for policy 0, policy_version 46469 (0.0009) +[2026-06-02 17:06:30,710][255187] Saving new best policy, reward=2281.696! +[2026-06-02 17:06:31,346][255279] Updated weights for policy 0, policy_version 46479 (0.0008) +[2026-06-02 17:06:31,529][255279] Updated weights for policy 0, policy_version 46489 (0.0008) +[2026-06-02 17:06:31,726][255279] Updated weights for policy 0, policy_version 46500 (0.0008) +[2026-06-02 17:06:31,917][255279] Updated weights for policy 0, policy_version 46510 (0.0008) +[2026-06-02 17:06:32,109][255279] Updated weights for policy 0, policy_version 46520 (0.0008) +[2026-06-02 17:06:32,296][255279] Updated weights for policy 0, policy_version 46530 (0.0008) +[2026-06-02 17:06:32,981][255279] Updated weights for policy 0, policy_version 46540 (0.0008) +[2026-06-02 17:06:33,160][255279] Updated weights for policy 0, policy_version 46550 (0.0008) +[2026-06-02 17:06:33,344][255279] Updated weights for policy 0, policy_version 46560 (0.0008) +[2026-06-02 17:06:33,530][255279] Updated weights for policy 0, policy_version 46570 (0.0009) +[2026-06-02 17:06:33,720][255279] Updated weights for policy 0, policy_version 46580 (0.0008) +[2026-06-02 17:06:33,916][255279] Updated weights for policy 0, policy_version 46591 (0.0009) +[2026-06-02 17:06:34,611][255279] Updated weights for policy 0, policy_version 46601 (0.0009) +[2026-06-02 17:06:34,768][255279] Updated weights for policy 0, policy_version 46611 (0.0009) +[2026-06-02 17:06:34,957][255279] Updated weights for policy 0, policy_version 46621 (0.0009) +[2026-06-02 17:06:35,140][255279] Updated weights for policy 0, policy_version 46631 (0.0009) +[2026-06-02 17:06:35,329][255279] Updated weights for policy 0, policy_version 46641 (0.0009) +[2026-06-02 17:06:35,501][253683] Fps is (10 sec: 19660.9, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 23920640. Throughput: 0: 19572.6. Samples: 23966976. Policy #0 lag: (min: 39.0, avg: 71.9, max: 100.0) +[2026-06-02 17:06:35,502][253683] Avg episode reward: [(0, '2281.696')] +[2026-06-02 17:06:35,522][255279] Updated weights for policy 0, policy_version 46652 (0.0008) +[2026-06-02 17:06:35,723][255279] Updated weights for policy 0, policy_version 46663 (0.0008) +[2026-06-02 17:06:36,428][255279] Updated weights for policy 0, policy_version 46673 (0.0008) +[2026-06-02 17:06:36,613][255279] Updated weights for policy 0, policy_version 46684 (0.0008) +[2026-06-02 17:06:36,793][255279] Updated weights for policy 0, policy_version 46694 (0.0009) +[2026-06-02 17:06:36,993][255279] Updated weights for policy 0, policy_version 46705 (0.0008) +[2026-06-02 17:06:37,197][255279] Updated weights for policy 0, policy_version 46716 (0.0009) +[2026-06-02 17:06:37,405][255279] Updated weights for policy 0, policy_version 46728 (0.0009) +[2026-06-02 17:06:38,114][255279] Updated weights for policy 0, policy_version 46739 (0.0008) +[2026-06-02 17:06:38,306][255279] Updated weights for policy 0, policy_version 46750 (0.0008) +[2026-06-02 17:06:38,522][255279] Updated weights for policy 0, policy_version 46761 (0.0008) +[2026-06-02 17:06:38,720][255279] Updated weights for policy 0, policy_version 46772 (0.0009) +[2026-06-02 17:06:38,900][255279] Updated weights for policy 0, policy_version 46782 (0.0008) +[2026-06-02 17:06:39,632][255279] Updated weights for policy 0, policy_version 46793 (0.0008) +[2026-06-02 17:06:39,800][255279] Updated weights for policy 0, policy_version 46803 (0.0008) +[2026-06-02 17:06:39,996][255279] Updated weights for policy 0, policy_version 46814 (0.0009) +[2026-06-02 17:06:40,198][255279] Updated weights for policy 0, policy_version 46825 (0.0009) +[2026-06-02 17:06:40,404][255279] Updated weights for policy 0, policy_version 46836 (0.0008) +[2026-06-02 17:06:40,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.6, 300 sec: 19438.6). Total num frames: 24018944. Throughput: 0: 19444.7. Samples: 24022912. Policy #0 lag: (min: 39.0, avg: 71.9, max: 100.0) +[2026-06-02 17:06:40,502][253683] Avg episode reward: [(0, '2281.696')] +[2026-06-02 17:06:40,611][255279] Updated weights for policy 0, policy_version 46847 (0.0008) +[2026-06-02 17:06:41,310][255279] Updated weights for policy 0, policy_version 46857 (0.0008) +[2026-06-02 17:06:41,483][255279] Updated weights for policy 0, policy_version 46867 (0.0009) +[2026-06-02 17:06:41,694][255279] Updated weights for policy 0, policy_version 46878 (0.0008) +[2026-06-02 17:06:41,872][255279] Updated weights for policy 0, policy_version 46888 (0.0008) +[2026-06-02 17:06:42,060][255279] Updated weights for policy 0, policy_version 46898 (0.0008) +[2026-06-02 17:06:42,262][255279] Updated weights for policy 0, policy_version 46909 (0.0008) +[2026-06-02 17:06:42,470][255279] Updated weights for policy 0, policy_version 46920 (0.0008) +[2026-06-02 17:06:43,156][255279] Updated weights for policy 0, policy_version 46930 (0.0009) +[2026-06-02 17:06:43,335][255279] Updated weights for policy 0, policy_version 46940 (0.0008) +[2026-06-02 17:06:43,517][255279] Updated weights for policy 0, policy_version 46950 (0.0008) +[2026-06-02 17:06:43,710][255279] Updated weights for policy 0, policy_version 46960 (0.0008) +[2026-06-02 17:06:43,904][255279] Updated weights for policy 0, policy_version 46970 (0.0009) +[2026-06-02 17:06:44,095][255279] Updated weights for policy 0, policy_version 46980 (0.0009) +[2026-06-02 17:06:44,760][255279] Updated weights for policy 0, policy_version 46990 (0.0008) +[2026-06-02 17:06:44,945][255279] Updated weights for policy 0, policy_version 47000 (0.0008) +[2026-06-02 17:06:45,129][255279] Updated weights for policy 0, policy_version 47010 (0.0009) +[2026-06-02 17:06:45,332][255279] Updated weights for policy 0, policy_version 47021 (0.0009) +[2026-06-02 17:06:45,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 24117248. Throughput: 0: 19558.4. Samples: 24137600. Policy #0 lag: (min: 39.0, avg: 71.9, max: 100.0) +[2026-06-02 17:06:45,502][253683] Avg episode reward: [(0, '2281.696')] +[2026-06-02 17:06:45,523][255279] Updated weights for policy 0, policy_version 47031 (0.0008) +[2026-06-02 17:06:45,708][255279] Updated weights for policy 0, policy_version 47041 (0.0008) +[2026-06-02 17:06:46,415][255279] Updated weights for policy 0, policy_version 47053 (0.0009) +[2026-06-02 17:06:46,593][255279] Updated weights for policy 0, policy_version 47063 (0.0008) +[2026-06-02 17:06:46,804][255279] Updated weights for policy 0, policy_version 47075 (0.0008) +[2026-06-02 17:06:46,991][255279] Updated weights for policy 0, policy_version 47085 (0.0009) +[2026-06-02 17:06:47,165][255279] Updated weights for policy 0, policy_version 47095 (0.0008) +[2026-06-02 17:06:47,376][255279] Updated weights for policy 0, policy_version 47106 (0.0009) +[2026-06-02 17:06:48,102][255279] Updated weights for policy 0, policy_version 47118 (0.0009) +[2026-06-02 17:06:48,309][255279] Updated weights for policy 0, policy_version 47129 (0.0008) +[2026-06-02 17:06:48,508][255279] Updated weights for policy 0, policy_version 47140 (0.0009) +[2026-06-02 17:06:48,687][255279] Updated weights for policy 0, policy_version 47150 (0.0008) +[2026-06-02 17:06:48,867][255279] Updated weights for policy 0, policy_version 47160 (0.0009) +[2026-06-02 17:06:49,050][255279] Updated weights for policy 0, policy_version 47170 (0.0008) +[2026-06-02 17:06:49,748][255279] Updated weights for policy 0, policy_version 47180 (0.0008) +[2026-06-02 17:06:49,943][255279] Updated weights for policy 0, policy_version 47191 (0.0008) +[2026-06-02 17:06:50,126][255279] Updated weights for policy 0, policy_version 47201 (0.0009) +[2026-06-02 17:06:50,325][255279] Updated weights for policy 0, policy_version 47212 (0.0008) +[2026-06-02 17:06:50,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 24215552. Throughput: 0: 19524.2. Samples: 24259072. Policy #0 lag: (min: 39.0, avg: 71.9, max: 100.0) +[2026-06-02 17:06:50,502][253683] Avg episode reward: [(0, '2281.696')] +[2026-06-02 17:06:50,522][255279] Updated weights for policy 0, policy_version 47223 (0.0008) +[2026-06-02 17:06:50,727][255279] Updated weights for policy 0, policy_version 47234 (0.0009) +[2026-06-02 17:06:51,428][255279] Updated weights for policy 0, policy_version 47245 (0.0009) +[2026-06-02 17:06:51,616][255279] Updated weights for policy 0, policy_version 47255 (0.0008) +[2026-06-02 17:06:51,802][255279] Updated weights for policy 0, policy_version 47265 (0.0009) +[2026-06-02 17:06:51,991][255279] Updated weights for policy 0, policy_version 47275 (0.0009) +[2026-06-02 17:06:52,176][255279] Updated weights for policy 0, policy_version 47285 (0.0008) +[2026-06-02 17:06:52,386][255279] Updated weights for policy 0, policy_version 47296 (0.0009) +[2026-06-02 17:06:53,091][255279] Updated weights for policy 0, policy_version 47307 (0.0008) +[2026-06-02 17:06:53,250][255279] Updated weights for policy 0, policy_version 47317 (0.0008) +[2026-06-02 17:06:53,438][255279] Updated weights for policy 0, policy_version 47327 (0.0008) +[2026-06-02 17:06:53,637][255279] Updated weights for policy 0, policy_version 47338 (0.0009) +[2026-06-02 17:06:53,862][255279] Updated weights for policy 0, policy_version 47351 (0.0009) +[2026-06-02 17:06:54,068][255279] Updated weights for policy 0, policy_version 47362 (0.0008) +[2026-06-02 17:06:54,778][255279] Updated weights for policy 0, policy_version 47373 (0.0009) +[2026-06-02 17:06:54,964][255279] Updated weights for policy 0, policy_version 47383 (0.0009) +[2026-06-02 17:06:55,186][255279] Updated weights for policy 0, policy_version 47396 (0.0009) +[2026-06-02 17:06:55,384][255279] Updated weights for policy 0, policy_version 47406 (0.0009) +[2026-06-02 17:06:55,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 24313856. Throughput: 0: 19328.0. Samples: 24311552. Policy #0 lag: (min: 39.0, avg: 71.9, max: 100.0) +[2026-06-02 17:06:55,502][253683] Avg episode reward: [(0, '2281.696')] +[2026-06-02 17:06:55,564][255279] Updated weights for policy 0, policy_version 47416 (0.0009) +[2026-06-02 17:06:55,763][255279] Updated weights for policy 0, policy_version 47426 (0.0009) +[2026-06-02 17:06:56,482][255279] Updated weights for policy 0, policy_version 47436 (0.0008) +[2026-06-02 17:06:56,659][255279] Updated weights for policy 0, policy_version 47446 (0.0008) +[2026-06-02 17:06:56,855][255279] Updated weights for policy 0, policy_version 47456 (0.0008) +[2026-06-02 17:06:57,043][255279] Updated weights for policy 0, policy_version 47466 (0.0008) +[2026-06-02 17:06:57,228][255279] Updated weights for policy 0, policy_version 47476 (0.0008) +[2026-06-02 17:06:57,433][255279] Updated weights for policy 0, policy_version 47487 (0.0008) +[2026-06-02 17:06:58,074][255279] Updated weights for policy 0, policy_version 47497 (0.0008) +[2026-06-02 17:06:58,242][255279] Updated weights for policy 0, policy_version 47507 (0.0008) +[2026-06-02 17:06:58,415][255279] Updated weights for policy 0, policy_version 47517 (0.0009) +[2026-06-02 17:06:58,603][255279] Updated weights for policy 0, policy_version 47527 (0.0008) +[2026-06-02 17:06:58,797][255279] Updated weights for policy 0, policy_version 47537 (0.0009) +[2026-06-02 17:06:58,999][255279] Updated weights for policy 0, policy_version 47548 (0.0009) +[2026-06-02 17:06:59,196][255279] Updated weights for policy 0, policy_version 47558 (0.0009) +[2026-06-02 17:06:59,873][255279] Updated weights for policy 0, policy_version 47568 (0.0009) +[2026-06-02 17:07:00,050][255279] Updated weights for policy 0, policy_version 47578 (0.0008) +[2026-06-02 17:07:00,242][255279] Updated weights for policy 0, policy_version 47588 (0.0009) +[2026-06-02 17:07:00,429][255279] Updated weights for policy 0, policy_version 47598 (0.0008) +[2026-06-02 17:07:00,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 24412160. Throughput: 0: 19478.7. Samples: 24428416. Policy #0 lag: (min: 39.0, avg: 71.9, max: 100.0) +[2026-06-02 17:07:00,502][253683] Avg episode reward: [(0, '2275.351')] +[2026-06-02 17:07:00,603][255279] Updated weights for policy 0, policy_version 47608 (0.0008) +[2026-06-02 17:07:00,796][255279] Updated weights for policy 0, policy_version 47618 (0.0009) +[2026-06-02 17:07:01,475][255279] Updated weights for policy 0, policy_version 47628 (0.0008) +[2026-06-02 17:07:01,665][255279] Updated weights for policy 0, policy_version 47639 (0.0008) +[2026-06-02 17:07:01,851][255279] Updated weights for policy 0, policy_version 47649 (0.0008) +[2026-06-02 17:07:02,061][255279] Updated weights for policy 0, policy_version 47660 (0.0008) +[2026-06-02 17:07:02,246][255279] Updated weights for policy 0, policy_version 47670 (0.0009) +[2026-06-02 17:07:02,431][255279] Updated weights for policy 0, policy_version 47680 (0.0009) +[2026-06-02 17:07:03,113][255279] Updated weights for policy 0, policy_version 47690 (0.0009) +[2026-06-02 17:07:03,288][255279] Updated weights for policy 0, policy_version 47700 (0.0009) +[2026-06-02 17:07:03,462][255279] Updated weights for policy 0, policy_version 47710 (0.0009) +[2026-06-02 17:07:03,647][255279] Updated weights for policy 0, policy_version 47720 (0.0009) +[2026-06-02 17:07:03,840][255279] Updated weights for policy 0, policy_version 47730 (0.0009) +[2026-06-02 17:07:04,041][255279] Updated weights for policy 0, policy_version 47741 (0.0009) +[2026-06-02 17:07:04,234][255279] Updated weights for policy 0, policy_version 47751 (0.0009) +[2026-06-02 17:07:04,920][255279] Updated weights for policy 0, policy_version 47761 (0.0009) +[2026-06-02 17:07:05,104][255279] Updated weights for policy 0, policy_version 47771 (0.0008) +[2026-06-02 17:07:05,289][255279] Updated weights for policy 0, policy_version 47781 (0.0008) +[2026-06-02 17:07:05,470][255279] Updated weights for policy 0, policy_version 47791 (0.0009) +[2026-06-02 17:07:05,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19114.7, 300 sec: 19438.6). Total num frames: 24510464. Throughput: 0: 19473.1. Samples: 24550656. Policy #0 lag: (min: 59.0, avg: 103.8, max: 123.0) +[2026-06-02 17:07:05,502][253683] Avg episode reward: [(0, '2332.110')] +[2026-06-02 17:07:05,664][255279] Updated weights for policy 0, policy_version 47801 (0.0009) +[2026-06-02 17:07:05,853][255279] Updated weights for policy 0, policy_version 47811 (0.0009) +[2026-06-02 17:07:05,937][255187] Saving new best policy, reward=2332.110! +[2026-06-02 17:07:06,551][255279] Updated weights for policy 0, policy_version 47822 (0.0009) +[2026-06-02 17:07:06,729][255279] Updated weights for policy 0, policy_version 47832 (0.0009) +[2026-06-02 17:07:06,927][255279] Updated weights for policy 0, policy_version 47843 (0.0009) +[2026-06-02 17:07:07,120][255279] Updated weights for policy 0, policy_version 47853 (0.0009) +[2026-06-02 17:07:07,311][255279] Updated weights for policy 0, policy_version 47863 (0.0009) +[2026-06-02 17:07:07,504][255279] Updated weights for policy 0, policy_version 47873 (0.0009) +[2026-06-02 17:07:08,187][255279] Updated weights for policy 0, policy_version 47883 (0.0009) +[2026-06-02 17:07:08,387][255279] Updated weights for policy 0, policy_version 47894 (0.0009) +[2026-06-02 17:07:08,574][255279] Updated weights for policy 0, policy_version 47904 (0.0008) +[2026-06-02 17:07:08,758][255279] Updated weights for policy 0, policy_version 47914 (0.0009) +[2026-06-02 17:07:08,949][255279] Updated weights for policy 0, policy_version 47924 (0.0009) +[2026-06-02 17:07:09,132][255279] Updated weights for policy 0, policy_version 47934 (0.0008) +[2026-06-02 17:07:09,322][255279] Updated weights for policy 0, policy_version 47944 (0.0008) +[2026-06-02 17:07:09,990][255279] Updated weights for policy 0, policy_version 47954 (0.0009) +[2026-06-02 17:07:10,169][255279] Updated weights for policy 0, policy_version 47964 (0.0009) +[2026-06-02 17:07:10,343][255279] Updated weights for policy 0, policy_version 47974 (0.0008) +[2026-06-02 17:07:10,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 24608768. Throughput: 0: 19202.8. Samples: 24599680. Policy #0 lag: (min: 59.0, avg: 103.8, max: 123.0) +[2026-06-02 17:07:10,503][253683] Avg episode reward: [(0, '2332.110')] +[2026-06-02 17:07:10,537][255279] Updated weights for policy 0, policy_version 47984 (0.0008) +[2026-06-02 17:07:10,726][255279] Updated weights for policy 0, policy_version 47994 (0.0009) +[2026-06-02 17:07:10,907][255279] Updated weights for policy 0, policy_version 48004 (0.0009) +[2026-06-02 17:07:11,603][255279] Updated weights for policy 0, policy_version 48015 (0.0009) +[2026-06-02 17:07:11,783][255279] Updated weights for policy 0, policy_version 48025 (0.0009) +[2026-06-02 17:07:11,979][255279] Updated weights for policy 0, policy_version 48035 (0.0009) +[2026-06-02 17:07:12,176][255279] Updated weights for policy 0, policy_version 48046 (0.0009) +[2026-06-02 17:07:12,373][255279] Updated weights for policy 0, policy_version 48057 (0.0009) +[2026-06-02 17:07:12,556][255279] Updated weights for policy 0, policy_version 48067 (0.0009) +[2026-06-02 17:07:13,258][255279] Updated weights for policy 0, policy_version 48077 (0.0009) +[2026-06-02 17:07:13,439][255279] Updated weights for policy 0, policy_version 48087 (0.0009) +[2026-06-02 17:07:13,620][255279] Updated weights for policy 0, policy_version 48097 (0.0009) +[2026-06-02 17:07:13,821][255279] Updated weights for policy 0, policy_version 48108 (0.0009) +[2026-06-02 17:07:14,044][255279] Updated weights for policy 0, policy_version 48120 (0.0009) +[2026-06-02 17:07:14,240][255279] Updated weights for policy 0, policy_version 48131 (0.0009) +[2026-06-02 17:07:14,946][255279] Updated weights for policy 0, policy_version 48142 (0.0008) +[2026-06-02 17:07:15,139][255279] Updated weights for policy 0, policy_version 48153 (0.0008) +[2026-06-02 17:07:15,321][255279] Updated weights for policy 0, policy_version 48163 (0.0008) +[2026-06-02 17:07:15,502][253683] Fps is (10 sec: 19660.7, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 24707072. Throughput: 0: 19473.1. Samples: 24721536. Policy #0 lag: (min: 59.0, avg: 103.8, max: 123.0) +[2026-06-02 17:07:15,503][253683] Avg episode reward: [(0, '2332.110')] +[2026-06-02 17:07:15,526][255279] Updated weights for policy 0, policy_version 48174 (0.0008) +[2026-06-02 17:07:15,699][255279] Updated weights for policy 0, policy_version 48184 (0.0008) +[2026-06-02 17:07:15,904][255279] Updated weights for policy 0, policy_version 48195 (0.0008) +[2026-06-02 17:07:16,619][255279] Updated weights for policy 0, policy_version 48205 (0.0009) +[2026-06-02 17:07:16,789][255279] Updated weights for policy 0, policy_version 48215 (0.0009) +[2026-06-02 17:07:16,979][255279] Updated weights for policy 0, policy_version 48225 (0.0008) +[2026-06-02 17:07:17,155][255279] Updated weights for policy 0, policy_version 48235 (0.0009) +[2026-06-02 17:07:17,379][255279] Updated weights for policy 0, policy_version 48247 (0.0009) +[2026-06-02 17:07:17,575][255279] Updated weights for policy 0, policy_version 48257 (0.0009) +[2026-06-02 17:07:18,264][255279] Updated weights for policy 0, policy_version 48267 (0.0009) +[2026-06-02 17:07:18,445][255279] Updated weights for policy 0, policy_version 48277 (0.0009) +[2026-06-02 17:07:18,621][255279] Updated weights for policy 0, policy_version 48287 (0.0009) +[2026-06-02 17:07:18,818][255279] Updated weights for policy 0, policy_version 48297 (0.0008) +[2026-06-02 17:07:19,002][255279] Updated weights for policy 0, policy_version 48307 (0.0009) +[2026-06-02 17:07:19,185][255279] Updated weights for policy 0, policy_version 48317 (0.0009) +[2026-06-02 17:07:19,382][255279] Updated weights for policy 0, policy_version 48327 (0.0008) +[2026-06-02 17:07:20,040][255279] Updated weights for policy 0, policy_version 48337 (0.0009) +[2026-06-02 17:07:20,226][255279] Updated weights for policy 0, policy_version 48347 (0.0009) +[2026-06-02 17:07:20,420][255279] Updated weights for policy 0, policy_version 48357 (0.0009) +[2026-06-02 17:07:20,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 24805376. Throughput: 0: 19419.0. Samples: 24840832. Policy #0 lag: (min: 59.0, avg: 103.8, max: 123.0) +[2026-06-02 17:07:20,503][253683] Avg episode reward: [(0, '2374.705')] +[2026-06-02 17:07:20,601][255279] Updated weights for policy 0, policy_version 48367 (0.0009) +[2026-06-02 17:07:20,808][255279] Updated weights for policy 0, policy_version 48378 (0.0009) +[2026-06-02 17:07:21,021][255279] Updated weights for policy 0, policy_version 48389 (0.0009) +[2026-06-02 17:07:21,069][255187] Saving new best policy, reward=2374.705! +[2026-06-02 17:07:21,694][255279] Updated weights for policy 0, policy_version 48399 (0.0009) +[2026-06-02 17:07:21,878][255279] Updated weights for policy 0, policy_version 48409 (0.0009) +[2026-06-02 17:07:22,071][255279] Updated weights for policy 0, policy_version 48419 (0.0009) +[2026-06-02 17:07:22,257][255279] Updated weights for policy 0, policy_version 48429 (0.0009) +[2026-06-02 17:07:22,444][255279] Updated weights for policy 0, policy_version 48439 (0.0009) +[2026-06-02 17:07:22,631][255279] Updated weights for policy 0, policy_version 48449 (0.0009) +[2026-06-02 17:07:23,301][255279] Updated weights for policy 0, policy_version 48459 (0.0009) +[2026-06-02 17:07:23,476][255279] Updated weights for policy 0, policy_version 48469 (0.0009) +[2026-06-02 17:07:23,659][255279] Updated weights for policy 0, policy_version 48479 (0.0009) +[2026-06-02 17:07:23,857][255279] Updated weights for policy 0, policy_version 48489 (0.0009) +[2026-06-02 17:07:24,043][255279] Updated weights for policy 0, policy_version 48499 (0.0009) +[2026-06-02 17:07:24,228][255279] Updated weights for policy 0, policy_version 48509 (0.0009) +[2026-06-02 17:07:24,418][255279] Updated weights for policy 0, policy_version 48519 (0.0009) +[2026-06-02 17:07:25,086][255279] Updated weights for policy 0, policy_version 48529 (0.0009) +[2026-06-02 17:07:25,271][255279] Updated weights for policy 0, policy_version 48539 (0.0009) +[2026-06-02 17:07:25,465][255279] Updated weights for policy 0, policy_version 48549 (0.0009) +[2026-06-02 17:07:25,502][253683] Fps is (10 sec: 19660.8, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 24903680. Throughput: 0: 19333.7. Samples: 24892928. Policy #0 lag: (min: 59.0, avg: 103.8, max: 123.0) +[2026-06-02 17:07:25,503][253683] Avg episode reward: [(0, '2463.582')] +[2026-06-02 17:07:25,669][255279] Updated weights for policy 0, policy_version 48560 (0.0009) +[2026-06-02 17:07:25,857][255279] Updated weights for policy 0, policy_version 48570 (0.0009) +[2026-06-02 17:07:26,045][255279] Updated weights for policy 0, policy_version 48580 (0.0009) +[2026-06-02 17:07:26,109][255187] Saving new best policy, reward=2463.582! +[2026-06-02 17:07:26,763][255279] Updated weights for policy 0, policy_version 48591 (0.0009) +[2026-06-02 17:07:26,940][255279] Updated weights for policy 0, policy_version 48601 (0.0009) +[2026-06-02 17:07:27,143][255279] Updated weights for policy 0, policy_version 48612 (0.0009) +[2026-06-02 17:07:27,326][255279] Updated weights for policy 0, policy_version 48622 (0.0008) +[2026-06-02 17:07:27,539][255279] Updated weights for policy 0, policy_version 48633 (0.0009) +[2026-06-02 17:07:27,721][255279] Updated weights for policy 0, policy_version 48643 (0.0008) +[2026-06-02 17:07:28,398][255279] Updated weights for policy 0, policy_version 48654 (0.0009) +[2026-06-02 17:07:28,579][255279] Updated weights for policy 0, policy_version 48664 (0.0008) +[2026-06-02 17:07:28,766][255279] Updated weights for policy 0, policy_version 48674 (0.0008) +[2026-06-02 17:07:28,951][255279] Updated weights for policy 0, policy_version 48684 (0.0009) +[2026-06-02 17:07:29,133][255279] Updated weights for policy 0, policy_version 48694 (0.0009) +[2026-06-02 17:07:29,325][255279] Updated weights for policy 0, policy_version 48704 (0.0009) +[2026-06-02 17:07:30,009][255279] Updated weights for policy 0, policy_version 48714 (0.0009) +[2026-06-02 17:07:30,184][255279] Updated weights for policy 0, policy_version 48724 (0.0008) +[2026-06-02 17:07:30,371][255279] Updated weights for policy 0, policy_version 48734 (0.0008) +[2026-06-02 17:07:30,502][253683] Fps is (10 sec: 19660.9, 60 sec: 19660.8, 300 sec: 19438.6). Total num frames: 25001984. Throughput: 0: 19490.1. Samples: 25014656. Policy #0 lag: (min: 59.0, avg: 103.8, max: 123.0) +[2026-06-02 17:07:30,502][253683] Avg episode reward: [(0, '2458.795')] +[2026-06-02 17:07:30,548][255279] Updated weights for policy 0, policy_version 48744 (0.0008) +[2026-06-02 17:07:30,735][255279] Updated weights for policy 0, policy_version 48754 (0.0008) +[2026-06-02 17:07:30,925][255279] Updated weights for policy 0, policy_version 48764 (0.0008) +[2026-06-02 17:07:31,122][255279] Updated weights for policy 0, policy_version 48774 (0.0008) +[2026-06-02 17:07:31,149][253683] Component Batcher_0 stopped! +[2026-06-02 17:07:31,150][255280] Stopping RolloutWorker_w0... +[2026-06-02 17:07:31,150][255280] Loop rollout_proc0_evt_loop terminating... +[2026-06-02 17:07:31,150][255187] Stopping Batcher_0... +[2026-06-02 17:07:31,150][253683] Component RolloutWorker_w0 stopped! +[2026-06-02 17:07:31,151][255187] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs5_seed10/checkpoint_p0/checkpoint_000048776_25034752.pth... +[2026-06-02 17:07:31,151][253683] Component RolloutWorker_w1 stopped! +[2026-06-02 17:07:31,151][255281] Stopping RolloutWorker_w1... +[2026-06-02 17:07:31,152][255281] Loop rollout_proc1_evt_loop terminating... +[2026-06-02 17:07:31,151][255187] Loop batcher_evt_loop terminating... +[2026-06-02 17:07:31,172][255187] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs5_seed10/checkpoint_p0/checkpoint_000048776_25034752.pth... +[2026-06-02 17:07:31,193][255187] Stopping LearnerWorker_p0... +[2026-06-02 17:07:31,193][255187] Loop learner_proc0_evt_loop terminating... +[2026-06-02 17:07:31,193][253683] Component LearnerWorker_p0 stopped! +[2026-06-02 17:07:31,194][255279] Weights refcount: 2 0 +[2026-06-02 17:07:31,208][255279] Stopping InferenceWorker_p0-w0... +[2026-06-02 17:07:31,208][255279] Loop inference_proc0-0_evt_loop terminating... +[2026-06-02 17:07:31,208][253683] Component InferenceWorker_p0-w0 stopped! +[2026-06-02 17:07:31,208][253683] Waiting for process learner_proc0 to stop... +[2026-06-02 17:07:32,126][253683] Waiting for process inference_proc0-0 to join... +[2026-06-02 17:07:32,127][253683] Waiting for process rollout_proc0 to join... +[2026-06-02 17:07:32,128][253683] Waiting for process rollout_proc1 to join... +[2026-06-02 17:07:32,128][253683] Batcher 0 profile tree view: +batching: 0.9992, releasing_batches: 0.0388 +[2026-06-02 17:07:32,129][253683] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0000 + wait_policy_total: 820.2797 +update_model: 45.4586 + weight_update: 0.0008 +one_step: 0.0016 + handle_policy_step: 401.2821 + deserialize: 5.1073, stack: 0.3936, obs_to_device_normalize: 55.0495, forward: 142.4774, prepare_outputs: 171.8168, send_messages: 10.2843 +[2026-06-02 17:07:32,129][253683] Learner 0 profile tree view: +misc: 0.0061, prepare_batch: 125.2118 +train: 899.3348 + epoch_init: 0.0646, minibatch_init: 2.8769, losses_postprocess: 311.6543, kl_divergence: 24.6198, after_optimizer: 370.7084 + calculate_losses: 42.1841 + losses_init: 0.0864, forward_head: 14.3053, bptt_initial: 0.3672, bptt: 0.3883, tail: 9.4117, advantages_returns: 3.0697, losses: 11.3653 + update: 143.8053 + clip: 14.7442 +[2026-06-02 17:07:32,130][253683] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.0334, enqueue_policy_requests: 137.8895, process_policy_outputs: 7.6031, env_step: 866.0987, finalize_trajectories: 0.1064, complete_rollouts: 0.0765 +post_env_step: 16.4799 + process_env_step: 5.0203 +[2026-06-02 17:07:32,130][253683] RolloutWorker_w1 profile tree view: +wait_for_trajectories: 0.0326, enqueue_policy_requests: 135.0599, process_policy_outputs: 7.2947, env_step: 861.4513, finalize_trajectories: 0.1025, complete_rollouts: 0.0766 +post_env_step: 16.1455 + process_env_step: 4.9010 +[2026-06-02 17:07:32,131][253683] Loop Runner_EvtLoop terminating... +[2026-06-02 17:07:32,132][253683] Runner profile tree view: +main_loop: 1296.6805 +[2026-06-02 17:07:32,132][253683] Collected {0: 25034752}, FPS: 19306.8