diff --git a/.gitattributes b/.gitattributes index ce079f6f5f601eac59718503dd6688940d247d9a..d07e5844e8dbd45eb17b6bb6d9405cb9d649c187 100644 --- a/.gitattributes +++ b/.gitattributes @@ -293,3 +293,4 @@ factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_ factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs2:obs30:stride1:seed13/episode_metrics.jsonl filter=lfs diff=lfs merge=lfs -text factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs2:obs30:stride1:seed14/episode_metrics.jsonl filter=lfs diff=lfs merge=lfs -text factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:uniform_u1_3:fs1:obs30:stride1:seed10/episode_metrics.jsonl filter=lfs diff=lfs merge=lfs -text +factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs3:obs30:stride1:seed11/episode_metrics.jsonl filter=lfs diff=lfs merge=lfs -text diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs3:obs30:stride1:seed11/checkpoint_p0/best_000048816_25001984_reward_2113.825.pth b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs3:obs30:stride1:seed11/checkpoint_p0/best_000048816_25001984_reward_2113.825.pth new file mode 100644 index 0000000000000000000000000000000000000000..ceb611f08b70daa96c6c173f170be046d975277f --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs3:obs30:stride1:seed11/checkpoint_p0/best_000048816_25001984_reward_2113.825.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d81a7258367fe5793a75265c6f1fa6474677d5981795ba5e4fd312e08e5bee3b +size 21385529 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs3:obs30:stride1:seed11/checkpoint_p0/checkpoint_000026160_13402112.pth b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs3:obs30:stride1:seed11/checkpoint_p0/checkpoint_000026160_13402112.pth new file mode 100644 index 0000000000000000000000000000000000000000..53923a5638fc3c55d87b9160392d9f9dcbe681f3 --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs3:obs30:stride1:seed11/checkpoint_p0/checkpoint_000026160_13402112.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ea0b75bd50a7983d73c18a02923ab3f123345dd542d14605cb6386f34c95a4d +size 21385889 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs3:obs30:stride1:seed11/checkpoint_p0/checkpoint_000048832_25034752.pth b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs3:obs30:stride1:seed11/checkpoint_p0/checkpoint_000048832_25034752.pth new file mode 100644 index 0000000000000000000000000000000000000000..d1987045a7819be4747cc29f13b6b087172a335b --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs3:obs30:stride1:seed11/checkpoint_p0/checkpoint_000048832_25034752.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:699cead3ce2e887eb0c99b832aea52b9d1c31d2f69540c0599cadfe9aed8ba06 +size 21385889 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs3:obs30:stride1:seed11/config.json b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs3:obs30:stride1:seed11/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f2be0d3cf7ad6c51b8c146e5314c7e4f7cf15c74 --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs3:obs30:stride1:seed11/config.json @@ -0,0 +1,266 @@ +{ + "help": false, + "algo": "APPO", + "env": "latency_flappy", + "experiment": "flappy_frame_stack_fixed_l2_fs3_seed11", + "train_dir": "results/checkpoints_factor_sweeps/flappy/context_window", + "restart_behavior": "resume", + "device": "gpu", + "seed": 11, + "num_policies": 1, + "async_rl": true, + "serial_mode": false, + "batched_sampling": true, + "num_batches_to_accumulate": 2, + "worker_num_splits": 1, + "policy_workers_per_policy": 1, + "max_policy_lag": 400, + "num_workers": 2, + "num_envs_per_worker": 1, + "batch_size": 4096, + "num_batches_per_epoch": 8, + "num_epochs": 8, + "rollout": 128, + "recurrence": 1, + "shuffle_minibatches": false, + "gamma": 0.99, + "reward_scale": 1.0, + "reward_clip": 1000.0, + "value_bootstrap": false, + "normalize_returns": true, + "exploration_loss_coeff": 0.003, + "value_loss_coeff": 0.5, + "kl_loss_coeff": 0.0, + "exploration_loss": "entropy", + "gae_lambda": 0.95, + "ppo_clip_ratio": 0.1, + "ppo_clip_value": 0.2, + "with_vtrace": false, + "vtrace_rho": 1.0, + "vtrace_c": 1.0, + "optimizer": "adam", + "adam_eps": 1e-05, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "max_grad_norm": 0.5, + "learning_rate": 0.00025, + "lr_schedule": "linear_decay", + "lr_schedule_kl_threshold": 0.008, + "lr_adaptive_min": 1e-06, + "lr_adaptive_max": 0.01, + "obs_subtract_mean": 0.0, + "obs_scale": 255.0, + "normalize_input": true, + "normalize_input_keys": null, + "decorrelate_experience_max_seconds": 0, + "decorrelate_envs_on_one_worker": true, + "actor_worker_gpus": [ + 0 + ], + "set_workers_cpu_affinity": true, + "force_envs_single_thread": false, + "default_niceness": 0, + "log_to_file": true, + "experiment_summaries_interval": 1, + "flush_summaries_interval": 30, + "stats_avg": 100, + "summaries_use_frameskip": true, + "heartbeat_interval": 20, + "heartbeat_reporting_interval": 180, + "train_for_env_steps": 25000000, + "train_for_seconds": 10000000000, + "save_every_sec": 600, + "keep_checkpoints": 5, + "load_checkpoint_kind": "latest", + "save_milestones_sec": -1, + "save_best_every_sec": 5, + "save_best_metric": "reward", + "save_best_after": 100000, + "benchmark": false, + "encoder_mlp_layers": [ + 512, + 512 + ], + "encoder_conv_architecture": "convnet_atari", + "encoder_conv_mlp_layers": [ + 512 + ], + "use_rnn": false, + "rnn_size": 512, + "rnn_type": "gru", + "rnn_num_layers": 1, + "decoder_mlp_layers": [], + "nonlinearity": "elu", + "policy_initialization": "orthogonal", + "policy_init_gain": 1.0, + "actor_critic_share_weights": true, + "adaptive_stddev": true, + "continuous_tanh_scale": 0.0, + "initial_stddev": 1.0, + "use_env_info_cache": false, + "env_gpu_actions": true, + "env_gpu_observations": true, + "env_frameskip": 1, + "env_framestack": 1, + "pixel_format": "CHW", + "use_record_episode_statistics": false, + "with_wandb": true, + "wandb_user": null, + "wandb_project": "latency-sensitive-bench", + "wandb_group": "flappy-fs3-fixed_l2", + "wandb_job_type": "sample_factory", + "wandb_tags": [ + "factor_sweep", + "flappy", + "frame_stack", + "fixed", + "fixed_l2", + "fs3", + "seed11" + ], + "with_pbt": false, + "pbt_mix_policies_in_one_env": true, + "pbt_period_env_steps": 5000000, + "pbt_start_mutation": 20000000, + "pbt_replace_fraction": 0.3, + "pbt_mutation_rate": 0.15, + "pbt_replace_reward_gap": 0.1, + "pbt_replace_reward_gap_absolute": 1e-06, + "pbt_optimize_gamma": false, + "pbt_target_objective": "true_objective", + "pbt_perturb_min": 1.1, + "pbt_perturb_max": 1.5, + "gym_id": "FlappyBird-v0", + "env_fps": 30.0, + "obs_fps": 30.0, + "use_lidar": false, + "normalize_obs": true, + "audio_on": false, + "screen_size": "", + "obs_resize": "84,84", + "use_gpu_render": true, + "simulator": "gpu", + "gpu_render_device": "auto", + "gpu_render_batch_size": 128, + "gpu_render_profile": false, + "gpu_render_profile_interval": 200, + "pipe_gap": 100, + "bird_color": "yellow", + "pipe_color": "green", + "background": "day", + "score_limit": -1, + "frame_stack": 3, + "debug": false, + "debug_timelimit_diagnostics": false, + "max_episode_steps": 0, + "mode": "train", + "latency_type": "fixed", + "fixed_latency_ms": 66.66666666666667, + "mean_latency_ms": null, + "std_latency_ms": null, + "min_latency_ms": null, + "max_latency_ms": null, + "latency_seed": null, + "add_latency_info": false, + "max_pending_actions": null, + "hold_policy": "one_frame_then_noop", + "ordering_policy": "latest_ready", + "eval_episodes": 100, + "eval_parallel_envs": 100, + "eval_latency_raw_frame_values": "0,1,2,3,4,5", + "eval_max_steps": 3600, + "eval_deterministic": true, + "eval_raw_reward": false, + "episode_metrics_path": "results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs3_seed11/episode_metrics.jsonl", + "command_line": "--mode train --algo APPO --env latency_flappy --experiment flappy_frame_stack_fixed_l2_fs3_seed11 --train_dir results/checkpoints_factor_sweeps/flappy/context_window --restart_behavior resume --device gpu --actor_worker_gpus 0 --env_gpu_observations True --env_gpu_actions True --gpu-render-batch-size 128 --seed 11 --episode_metrics_path results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs3_seed11/episode_metrics.jsonl --train_for_env_steps 25000000 --num_workers 2 --num_envs_per_worker 1 --num_policies 1 --batch_size 4096 --rollout 128 --recurrence 1 --num_epochs 8 --num_batches_per_epoch 8 --worker_num_splits 1 --max_policy_lag 400 --learning_rate 0.00025 --gamma 0.99 --gae_lambda 0.95 --ppo_clip_ratio 0.1 --ppo_clip_value 0.2 --value_loss_coeff 0.5 --max_grad_norm 0.5 --save_every_sec 600 --keep_checkpoints 5 --stats_avg 100 --experiment_summaries_interval 1 --batched_sampling True --async_rl True --use_rnn False --normalize_returns True --normalize_input True --latency-type fixed --fixed-latency-ms 66.66666666666667 --add-latency-info False --eval-episodes 100 --eval-parallel-envs 100 --eval-max-steps 3600 --eval-deterministic True --with_wandb True --wandb_project latency-sensitive-bench --wandb_group flappy-fs3-fixed_l2 --wandb_job_type sample_factory --wandb_tags factor_sweep flappy frame_stack fixed fixed_l2 fs3 seed11 --gym_id FlappyBird-v0 --env-fps 30 --obs-fps 30.0 --use_lidar False --normalize_obs True --audio_on False --obs_resize 84,84 --use-gpu-render True --simulator gpu --gpu-render-device auto --gpu-render-profile False --gpu-render-profile-interval 200 --pipe_gap 100 --bird_color yellow --pipe_color green --background day --frame_stack 3 --debug False --debug-timelimit-diagnostics False --hold-policy one_frame_then_noop --ordering-policy latest_ready", + "cli_args": { + "algo": "APPO", + "env": "latency_flappy", + "experiment": "flappy_frame_stack_fixed_l2_fs3_seed11", + "train_dir": "results/checkpoints_factor_sweeps/flappy/context_window", + "restart_behavior": "resume", + "device": "gpu", + "seed": 11, + "num_policies": 1, + "async_rl": true, + "batched_sampling": true, + "worker_num_splits": 1, + "max_policy_lag": 400, + "num_workers": 2, + "num_envs_per_worker": 1, + "batch_size": 4096, + "num_batches_per_epoch": 8, + "num_epochs": 8, + "rollout": 128, + "recurrence": 1, + "gamma": 0.99, + "normalize_returns": true, + "value_loss_coeff": 0.5, + "gae_lambda": 0.95, + "ppo_clip_ratio": 0.1, + "ppo_clip_value": 0.2, + "max_grad_norm": 0.5, + "learning_rate": 0.00025, + "normalize_input": true, + "actor_worker_gpus": [ + 0 + ], + "experiment_summaries_interval": 1, + "stats_avg": 100, + "train_for_env_steps": 25000000, + "save_every_sec": 600, + "keep_checkpoints": 5, + "use_rnn": false, + "env_gpu_actions": true, + "env_gpu_observations": true, + "with_wandb": true, + "wandb_project": "latency-sensitive-bench", + "wandb_group": "flappy-fs3-fixed_l2", + "wandb_job_type": "sample_factory", + "wandb_tags": [ + "factor_sweep", + "flappy", + "frame_stack", + "fixed", + "fixed_l2", + "fs3", + "seed11" + ], + "gym_id": "FlappyBird-v0", + "env_fps": 30.0, + "obs_fps": 30.0, + "use_lidar": false, + "normalize_obs": true, + "audio_on": false, + "obs_resize": "84,84", + "use_gpu_render": true, + "simulator": "gpu", + "gpu_render_device": "auto", + "gpu_render_batch_size": 128, + "gpu_render_profile": false, + "gpu_render_profile_interval": 200, + "pipe_gap": 100, + "bird_color": "yellow", + "pipe_color": "green", + "background": "day", + "frame_stack": 3, + "debug": false, + "debug_timelimit_diagnostics": false, + "mode": "train", + "latency_type": "fixed", + "fixed_latency_ms": 66.66666666666667, + "add_latency_info": false, + "hold_policy": "one_frame_then_noop", + "ordering_policy": "latest_ready", + "eval_episodes": 100, + "eval_parallel_envs": 100, + "eval_max_steps": 3600, + "eval_deterministic": true, + "episode_metrics_path": "results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs3_seed11/episode_metrics.jsonl" + }, + "git_hash": "eb3a2e1efbd2aa03a60d7f44f5e18d8fdd0f5a2d", + "git_repo_name": "git@github.com:ZihanWang314/latency-sensitive-bench.git", + "eval_env_frameskip": 1, + "output_dir": "outputs/factor_sweeps/flappy/context_window/train/frame_stack/fixed_l2/fs3/seed_11", + "wandb_unique_id": "flappy-fs3-fixed_l2-s11" +} \ No newline at end of file diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs3:obs30:stride1:seed11/episode_metrics.jsonl b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs3:obs30:stride1:seed11/episode_metrics.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..d7c11d6bfbd54d244ff7b71f9682c70c71aa3ee2 --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs3:obs30:stride1:seed11/episode_metrics.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:824fbd8bd716e41696e273f6e1fdbdc7eb79ceef86477b017cf81f3b4df393db +size 21869951 diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs3:obs30:stride1:seed11/git.diff b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs3:obs30:stride1:seed11/git.diff new file mode 100644 index 0000000000000000000000000000000000000000..466fb8b9b61c2e47b54ca5d7f5f930e28515b107 --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs3:obs30:stride1:seed11/git.diff @@ -0,0 +1,33 @@ +diff --git a/latency_bench/run.py b/latency_bench/run.py +index 163ca1b..e9016f3 100644 +--- a/latency_bench/run.py ++++ b/latency_bench/run.py +@@ -152,6 +152,8 @@ def _record_stratified_replay_videos( + video_cfg = config["logging"]["video"] + if not video_cfg["enabled"]: + return ++ if not config["logging"].get("save_step_records", False): ++ return + if ExecutorMode(config["executor"]["mode"]) == ExecutorMode.REALTIME: + return + selections = select_episode_return_stratified( +diff --git a/scripts/experiment_runner/run_manifest_jobs.py b/scripts/experiment_runner/run_manifest_jobs.py +index 18376d9..646fe8f 100644 +--- a/scripts/experiment_runner/run_manifest_jobs.py ++++ b/scripts/experiment_runner/run_manifest_jobs.py +@@ -192,6 +192,8 @@ def _resume_stage(job: dict[str, str], states: dict[str, str]) -> str: + return "upload" + if stage == "train_succeeded": + return "eval" ++ if stage == "eval_failed": ++ return "eval" + return "train" + + +diff --git a/starVLA b/starVLA +index ab3380d..9d8c567 160000 +--- a/starVLA ++++ b/starVLA +@@ -1 +1 @@ +-Subproject commit ab3380dfbd1de9649c15d154cc41b97788674537 ++Subproject commit 9d8c567188a3aa2a825296016cf17f3977101d8f diff --git a/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs3:obs30:stride1:seed11/sf_log.txt b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs3:obs30:stride1:seed11/sf_log.txt new file mode 100644 index 0000000000000000000000000000000000000000..0a75ce8b26c36cd333877847fa839f5cb4b0849a --- /dev/null +++ b/factor_sweeps/flappy/context_window/train/factor_sweep:flappy:frame_stack:fixed_l2:fs3:obs30:stride1:seed11/sf_log.txt @@ -0,0 +1,5348 @@ +[2026-06-07 03:03:12,586][492660] Saving configuration to results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs3_seed11/config.json... +[2026-06-07 03:03:12,651][492660] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2026-06-07 03:03:12,653][492660] Rollout worker 0 uses device cuda:0 +[2026-06-07 03:03:12,653][492660] Using GPUs [0] for process 1 (actually maps to GPUs [0]) +[2026-06-07 03:03:12,653][492660] Rollout worker 1 uses device cuda:0 +[2026-06-07 03:03:14,555][492660] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2026-06-07 03:03:14,555][492660] InferenceWorker_p0-w0: min num requests: 1 +[2026-06-07 03:03:14,560][492660] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2026-06-07 03:03:14,564][492660] Using GPUs [0] for process 1 (actually maps to GPUs [0]) +[2026-06-07 03:03:14,565][492660] Starting all processes... +[2026-06-07 03:03:14,565][492660] Starting process learner_proc0 +[2026-06-07 03:03:15,827][492660] Starting all processes... +[2026-06-07 03:03:15,831][492660] Starting process inference_proc0-0 +[2026-06-07 03:03:15,831][492660] Starting process rollout_proc0 +[2026-06-07 03:03:15,831][492660] Starting process rollout_proc1 +[2026-06-07 03:03:16,404][495570] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2026-06-07 03:03:16,404][495570] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2026-06-07 03:03:16,404][495570] Num visible devices: 1 +[2026-06-07 03:03:16,405][495570] Setting fixed seed 11 +[2026-06-07 03:03:16,406][495570] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2026-06-07 03:03:16,407][495570] Initializing actor-critic model on device cuda:0 +[2026-06-07 03:03:16,407][495570] RunningMeanStd input shape: (9, 84, 84) +[2026-06-07 03:03:16,442][495570] RunningMeanStd input shape: (1,) +[2026-06-07 03:03:16,455][495570] ConvEncoder: input_channels=9 +[2026-06-07 03:03:16,544][495570] Conv encoder output size: 512 +[2026-06-07 03:03:16,545][495570] Created Actor Critic model with architecture: +[2026-06-07 03:03:16,545][495570] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): MultiInputEncoder( + (encoders): ModuleDict( + (obs): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + ) + (core): ModelCoreIdentity() + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=2, bias=True) + ) +) +[2026-06-07 03:03:16,553][495570] Using optimizer +[2026-06-07 03:03:17,353][495570] No checkpoints found +[2026-06-07 03:03:17,354][495570] Did not load from checkpoint, starting from scratch! +[2026-06-07 03:03:17,354][495570] Initialized policy 0 weights for model version 0 +[2026-06-07 03:03:17,359][495570] LearnerWorker_p0 finished initialization! +[2026-06-07 03:03:17,359][495570] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2026-06-07 03:03:18,909][492660] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2026-06-07 03:03:19,077][495928] Worker 1 uses CPU cores [192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383] +[2026-06-07 03:03:19,078][495928] Using GPUs [0] for process 1 (actually maps to GPUs [0]) +[2026-06-07 03:03:19,078][495928] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for actor process 1 +[2026-06-07 03:03:19,078][495928] Num visible devices: 1 +[2026-06-07 03:03:19,099][495927] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2026-06-07 03:03:19,100][495927] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2026-06-07 03:03:19,100][495927] Num visible devices: 1 +[2026-06-07 03:03:19,105][495927] RunningMeanStd input shape: (9, 84, 84) +[2026-06-07 03:03:19,142][495927] RunningMeanStd input shape: (1,) +[2026-06-07 03:03:19,157][495927] ConvEncoder: input_channels=9 +[2026-06-07 03:03:19,164][495929] Worker 0 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191] +[2026-06-07 03:03:19,164][495929] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2026-06-07 03:03:19,164][495929] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for actor process 0 +[2026-06-07 03:03:19,165][495929] Num visible devices: 1 +[2026-06-07 03:03:19,219][495927] Conv encoder output size: 512 +[2026-06-07 03:03:19,233][492660] Inference worker 0-0 is ready! +[2026-06-07 03:03:19,233][492660] All inference workers are ready! Signal rollout workers to start! +[2026-06-07 03:03:19,234][495929] EnvRunner 0-0 uses policy 0 +[2026-06-07 03:03:19,234][495928] EnvRunner 1-0 uses policy 0 +[2026-06-07 03:03:22,072][495570] Signal inference workers to stop experience collection... +[2026-06-07 03:03:22,076][495927] InferenceWorker_p0-w0: stopping experience collection +[2026-06-07 03:03:23,699][495570] Signal inference workers to resume experience collection... +[2026-06-07 03:03:23,700][495927] InferenceWorker_p0-w0: resuming experience collection +[2026-06-07 03:03:23,909][492660] Fps is (10 sec: 6553.5, 60 sec: 6553.5, 300 sec: 6553.5). Total num frames: 32768. Throughput: 0: 13465.4. Samples: 67328. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0) +[2026-06-07 03:03:23,910][492660] Avg episode reward: [(0, '-7.402')] +[2026-06-07 03:03:23,951][495927] Updated weights for policy 0, policy_version 73 (0.0040) +[2026-06-07 03:03:24,103][495927] Updated weights for policy 0, policy_version 83 (0.0008) +[2026-06-07 03:03:24,261][495927] Updated weights for policy 0, policy_version 94 (0.0008) +[2026-06-07 03:03:24,416][495927] Updated weights for policy 0, policy_version 104 (0.0009) +[2026-06-07 03:03:24,576][495927] Updated weights for policy 0, policy_version 114 (0.0008) +[2026-06-07 03:03:24,727][495927] Updated weights for policy 0, policy_version 124 (0.0009) +[2026-06-07 03:03:25,174][495927] Updated weights for policy 0, policy_version 134 (0.0008) +[2026-06-07 03:03:25,321][495927] Updated weights for policy 0, policy_version 144 (0.0008) +[2026-06-07 03:03:25,485][495927] Updated weights for policy 0, policy_version 154 (0.0008) +[2026-06-07 03:03:25,638][495927] Updated weights for policy 0, policy_version 164 (0.0008) +[2026-06-07 03:03:25,791][495927] Updated weights for policy 0, policy_version 174 (0.0008) +[2026-06-07 03:03:25,944][495927] Updated weights for policy 0, policy_version 184 (0.0009) +[2026-06-07 03:03:26,455][495927] Updated weights for policy 0, policy_version 194 (0.0009) +[2026-06-07 03:03:26,606][495927] Updated weights for policy 0, policy_version 204 (0.0009) +[2026-06-07 03:03:26,765][495927] Updated weights for policy 0, policy_version 214 (0.0009) +[2026-06-07 03:03:26,967][495927] Updated weights for policy 0, policy_version 228 (0.0008) +[2026-06-07 03:03:27,119][495927] Updated weights for policy 0, policy_version 238 (0.0008) +[2026-06-07 03:03:27,281][495927] Updated weights for policy 0, policy_version 248 (0.0009) +[2026-06-07 03:03:27,807][495927] Updated weights for policy 0, policy_version 260 (0.0009) +[2026-06-07 03:03:27,952][495927] Updated weights for policy 0, policy_version 270 (0.0008) +[2026-06-07 03:03:28,110][495927] Updated weights for policy 0, policy_version 280 (0.0009) +[2026-06-07 03:03:28,262][495927] Updated weights for policy 0, policy_version 290 (0.0009) +[2026-06-07 03:03:28,416][495927] Updated weights for policy 0, policy_version 300 (0.0009) +[2026-06-07 03:03:28,575][495927] Updated weights for policy 0, policy_version 310 (0.0007) +[2026-06-07 03:03:28,736][495927] Updated weights for policy 0, policy_version 320 (0.0011) +[2026-06-07 03:03:28,909][492660] Fps is (10 sec: 16383.4, 60 sec: 16383.4, 300 sec: 16383.4). Total num frames: 163840. Throughput: 0: 18239.3. Samples: 182400. Policy #0 lag: (min: 63.0, avg: 84.3, max: 127.0) +[2026-06-07 03:03:28,911][492660] Avg episode reward: [(0, '-5.958')] +[2026-06-07 03:03:28,919][495570] Saving new best policy, reward=-5.958! +[2026-06-07 03:03:29,241][495927] Updated weights for policy 0, policy_version 330 (0.0009) +[2026-06-07 03:03:29,399][495927] Updated weights for policy 0, policy_version 340 (0.0009) +[2026-06-07 03:03:29,545][495927] Updated weights for policy 0, policy_version 350 (0.0009) +[2026-06-07 03:03:29,692][495927] Updated weights for policy 0, policy_version 360 (0.0009) +[2026-06-07 03:03:29,848][495927] Updated weights for policy 0, policy_version 370 (0.0008) +[2026-06-07 03:03:30,011][495927] Updated weights for policy 0, policy_version 380 (0.0008) +[2026-06-07 03:03:30,525][495927] Updated weights for policy 0, policy_version 392 (0.0008) +[2026-06-07 03:03:30,680][495927] Updated weights for policy 0, policy_version 402 (0.0008) +[2026-06-07 03:03:30,838][495927] Updated weights for policy 0, policy_version 412 (0.0008) +[2026-06-07 03:03:30,997][495927] Updated weights for policy 0, policy_version 422 (0.0007) +[2026-06-07 03:03:31,171][495927] Updated weights for policy 0, policy_version 434 (0.0008) +[2026-06-07 03:03:31,327][495927] Updated weights for policy 0, policy_version 444 (0.0008) +[2026-06-07 03:03:31,843][495927] Updated weights for policy 0, policy_version 454 (0.0007) +[2026-06-07 03:03:31,992][495927] Updated weights for policy 0, policy_version 464 (0.0009) +[2026-06-07 03:03:32,149][495927] Updated weights for policy 0, policy_version 474 (0.0008) +[2026-06-07 03:03:32,304][495927] Updated weights for policy 0, policy_version 484 (0.0012) +[2026-06-07 03:03:32,456][495927] Updated weights for policy 0, policy_version 494 (0.0009) +[2026-06-07 03:03:32,618][495927] Updated weights for policy 0, policy_version 504 (0.0008) +[2026-06-07 03:03:33,082][495927] Updated weights for policy 0, policy_version 514 (0.0008) +[2026-06-07 03:03:33,220][495927] Updated weights for policy 0, policy_version 524 (0.0009) +[2026-06-07 03:03:33,371][495927] Updated weights for policy 0, policy_version 534 (0.0008) +[2026-06-07 03:03:33,531][495927] Updated weights for policy 0, policy_version 544 (0.0008) +[2026-06-07 03:03:33,694][495927] Updated weights for policy 0, policy_version 554 (0.0009) +[2026-06-07 03:03:33,909][492660] Fps is (10 sec: 22937.7, 60 sec: 17476.2, 300 sec: 17476.2). Total num frames: 262144. Throughput: 0: 17237.3. Samples: 258560. Policy #0 lag: (min: 21.0, avg: 39.5, max: 85.0) +[2026-06-07 03:03:33,910][492660] Avg episode reward: [(0, '-3.272')] +[2026-06-07 03:03:33,919][495927] Updated weights for policy 0, policy_version 570 (0.0009) +[2026-06-07 03:03:34,012][495570] Saving new best policy, reward=-3.272! +[2026-06-07 03:03:34,348][495927] Updated weights for policy 0, policy_version 580 (0.0008) +[2026-06-07 03:03:34,501][495927] Updated weights for policy 0, policy_version 590 (0.0008) +[2026-06-07 03:03:34,544][492660] Heartbeat connected on Batcher_0 +[2026-06-07 03:03:34,565][492660] Heartbeat connected on RolloutWorker_w1 +[2026-06-07 03:03:34,565][492660] Heartbeat connected on RolloutWorker_w0 +[2026-06-07 03:03:34,568][492660] Heartbeat connected on InferenceWorker_p0-w0 +[2026-06-07 03:03:34,655][495927] Updated weights for policy 0, policy_version 600 (0.0008) +[2026-06-07 03:03:34,812][495927] Updated weights for policy 0, policy_version 610 (0.0008) +[2026-06-07 03:03:34,969][495927] Updated weights for policy 0, policy_version 620 (0.0008) +[2026-06-07 03:03:35,115][495927] Updated weights for policy 0, policy_version 630 (0.0008) +[2026-06-07 03:03:35,263][492660] Heartbeat connected on LearnerWorker_p0 +[2026-06-07 03:03:35,265][495927] Updated weights for policy 0, policy_version 640 (0.0008) +[2026-06-07 03:03:35,689][495927] Updated weights for policy 0, policy_version 652 (0.0008) +[2026-06-07 03:03:35,830][495927] Updated weights for policy 0, policy_version 662 (0.0009) +[2026-06-07 03:03:35,994][495927] Updated weights for policy 0, policy_version 672 (0.0008) +[2026-06-07 03:03:36,156][495927] Updated weights for policy 0, policy_version 682 (0.0008) +[2026-06-07 03:03:36,393][495927] Updated weights for policy 0, policy_version 699 (0.0009) +[2026-06-07 03:03:36,897][495927] Updated weights for policy 0, policy_version 709 (0.0006) +[2026-06-07 03:03:37,059][495927] Updated weights for policy 0, policy_version 719 (0.0004) +[2026-06-07 03:03:37,273][495927] Updated weights for policy 0, policy_version 734 (0.0006) +[2026-06-07 03:03:37,428][495927] Updated weights for policy 0, policy_version 745 (0.0008) +[2026-06-07 03:03:37,589][495927] Updated weights for policy 0, policy_version 755 (0.0008) +[2026-06-07 03:03:37,751][495927] Updated weights for policy 0, policy_version 765 (0.0008) +[2026-06-07 03:03:38,197][495927] Updated weights for policy 0, policy_version 775 (0.0008) +[2026-06-07 03:03:38,371][495927] Updated weights for policy 0, policy_version 787 (0.0008) +[2026-06-07 03:03:38,494][495927] Updated weights for policy 0, policy_version 797 (0.0004) +[2026-06-07 03:03:38,654][495927] Updated weights for policy 0, policy_version 807 (0.0005) +[2026-06-07 03:03:38,814][495927] Updated weights for policy 0, policy_version 817 (0.0008) +[2026-06-07 03:03:38,909][492660] Fps is (10 sec: 22938.1, 60 sec: 19660.7, 300 sec: 19660.7). Total num frames: 393216. Throughput: 0: 20505.4. Samples: 410112. Policy #0 lag: (min: 30.0, avg: 56.4, max: 94.0) +[2026-06-07 03:03:38,912][492660] Avg episode reward: [(0, '2.673')] +[2026-06-07 03:03:38,967][495927] Updated weights for policy 0, policy_version 827 (0.0008) +[2026-06-07 03:03:39,040][495570] Saving new best policy, reward=2.673! +[2026-06-07 03:03:39,548][495927] Updated weights for policy 0, policy_version 837 (0.0006) +[2026-06-07 03:03:39,704][495927] Updated weights for policy 0, policy_version 847 (0.0007) +[2026-06-07 03:03:39,862][495927] Updated weights for policy 0, policy_version 858 (0.0004) +[2026-06-07 03:03:40,034][495927] Updated weights for policy 0, policy_version 870 (0.0004) +[2026-06-07 03:03:40,167][495927] Updated weights for policy 0, policy_version 880 (0.0004) +[2026-06-07 03:03:40,324][495927] Updated weights for policy 0, policy_version 890 (0.0004) +[2026-06-07 03:03:40,868][495927] Updated weights for policy 0, policy_version 900 (0.0006) +[2026-06-07 03:03:41,019][495927] Updated weights for policy 0, policy_version 911 (0.0008) +[2026-06-07 03:03:41,178][495927] Updated weights for policy 0, policy_version 923 (0.0008) +[2026-06-07 03:03:41,320][495927] Updated weights for policy 0, policy_version 933 (0.0010) +[2026-06-07 03:03:41,467][495927] Updated weights for policy 0, policy_version 943 (0.0008) +[2026-06-07 03:03:41,610][495927] Updated weights for policy 0, policy_version 953 (0.0009) +[2026-06-07 03:03:42,356][495927] Updated weights for policy 0, policy_version 963 (0.0008) +[2026-06-07 03:03:42,514][495927] Updated weights for policy 0, policy_version 974 (0.0008) +[2026-06-07 03:03:42,669][495927] Updated weights for policy 0, policy_version 984 (0.0008) +[2026-06-07 03:03:42,830][495927] Updated weights for policy 0, policy_version 996 (0.0009) +[2026-06-07 03:03:42,965][495927] Updated weights for policy 0, policy_version 1006 (0.0008) +[2026-06-07 03:03:43,105][495927] Updated weights for policy 0, policy_version 1016 (0.0009) +[2026-06-07 03:03:43,839][495927] Updated weights for policy 0, policy_version 1026 (0.0008) +[2026-06-07 03:03:43,909][492660] Fps is (10 sec: 26214.4, 60 sec: 20971.5, 300 sec: 20971.5). Total num frames: 524288. Throughput: 0: 21775.3. Samples: 544384. Policy #0 lag: (min: 63.0, avg: 78.4, max: 127.0) +[2026-06-07 03:03:43,910][492660] Avg episode reward: [(0, '4.083')] +[2026-06-07 03:03:43,967][495927] Updated weights for policy 0, policy_version 1036 (0.0008) +[2026-06-07 03:03:44,093][495927] Updated weights for policy 0, policy_version 1046 (0.0007) +[2026-06-07 03:03:44,244][495927] Updated weights for policy 0, policy_version 1056 (0.0008) +[2026-06-07 03:03:44,380][495927] Updated weights for policy 0, policy_version 1066 (0.0008) +[2026-06-07 03:03:44,526][495927] Updated weights for policy 0, policy_version 1076 (0.0008) +[2026-06-07 03:03:44,685][495927] Updated weights for policy 0, policy_version 1087 (0.0007) +[2026-06-07 03:03:44,700][495570] Saving new best policy, reward=4.083! +[2026-06-07 03:03:45,481][495927] Updated weights for policy 0, policy_version 1098 (0.0009) +[2026-06-07 03:03:45,627][495927] Updated weights for policy 0, policy_version 1108 (0.0009) +[2026-06-07 03:03:45,769][495927] Updated weights for policy 0, policy_version 1118 (0.0008) +[2026-06-07 03:03:45,947][495927] Updated weights for policy 0, policy_version 1131 (0.0008) +[2026-06-07 03:03:46,104][495927] Updated weights for policy 0, policy_version 1142 (0.0008) +[2026-06-07 03:03:46,245][495927] Updated weights for policy 0, policy_version 1152 (0.0008) +[2026-06-07 03:03:47,012][495927] Updated weights for policy 0, policy_version 1162 (0.0009) +[2026-06-07 03:03:47,142][495927] Updated weights for policy 0, policy_version 1172 (0.0008) +[2026-06-07 03:03:47,301][495927] Updated weights for policy 0, policy_version 1183 (0.0008) +[2026-06-07 03:03:47,447][495927] Updated weights for policy 0, policy_version 1193 (0.0008) +[2026-06-07 03:03:47,604][495927] Updated weights for policy 0, policy_version 1204 (0.0008) +[2026-06-07 03:03:47,757][495927] Updated weights for policy 0, policy_version 1215 (0.0009) +[2026-06-07 03:03:48,554][495927] Updated weights for policy 0, policy_version 1225 (0.0008) +[2026-06-07 03:03:48,686][495927] Updated weights for policy 0, policy_version 1235 (0.0008) +[2026-06-07 03:03:48,841][495927] Updated weights for policy 0, policy_version 1246 (0.0008) +[2026-06-07 03:03:48,910][492660] Fps is (10 sec: 22937.3, 60 sec: 20752.9, 300 sec: 20752.9). Total num frames: 622592. Throughput: 0: 20300.6. Samples: 609024. Policy #0 lag: (min: 63.0, avg: 76.8, max: 127.0) +[2026-06-07 03:03:48,912][492660] Avg episode reward: [(0, '4.133')] +[2026-06-07 03:03:49,055][495927] Updated weights for policy 0, policy_version 1261 (0.0009) +[2026-06-07 03:03:49,219][495927] Updated weights for policy 0, policy_version 1273 (0.0008) +[2026-06-07 03:03:49,319][495570] Saving new best policy, reward=4.133! +[2026-06-07 03:03:50,039][495927] Updated weights for policy 0, policy_version 1284 (0.0009) +[2026-06-07 03:03:50,190][495927] Updated weights for policy 0, policy_version 1295 (0.0008) +[2026-06-07 03:03:50,331][495927] Updated weights for policy 0, policy_version 1305 (0.0008) +[2026-06-07 03:03:50,482][495927] Updated weights for policy 0, policy_version 1316 (0.0008) +[2026-06-07 03:03:50,627][495927] Updated weights for policy 0, policy_version 1326 (0.0009) +[2026-06-07 03:03:50,787][495927] Updated weights for policy 0, policy_version 1337 (0.0008) +[2026-06-07 03:03:51,584][495927] Updated weights for policy 0, policy_version 1347 (0.0008) +[2026-06-07 03:03:51,719][495927] Updated weights for policy 0, policy_version 1357 (0.0008) +[2026-06-07 03:03:51,858][495927] Updated weights for policy 0, policy_version 1367 (0.0009) +[2026-06-07 03:03:52,013][495927] Updated weights for policy 0, policy_version 1378 (0.0008) +[2026-06-07 03:03:52,184][495927] Updated weights for policy 0, policy_version 1390 (0.0009) +[2026-06-07 03:03:52,344][495927] Updated weights for policy 0, policy_version 1401 (0.0009) +[2026-06-07 03:03:53,107][495927] Updated weights for policy 0, policy_version 1412 (0.0008) +[2026-06-07 03:03:53,249][495927] Updated weights for policy 0, policy_version 1423 (0.0008) +[2026-06-07 03:03:53,389][495927] Updated weights for policy 0, policy_version 1433 (0.0007) +[2026-06-07 03:03:53,551][495927] Updated weights for policy 0, policy_version 1444 (0.0009) +[2026-06-07 03:03:53,727][495927] Updated weights for policy 0, policy_version 1456 (0.0009) +[2026-06-07 03:03:53,859][495927] Updated weights for policy 0, policy_version 1466 (0.0008) +[2026-06-07 03:03:53,909][492660] Fps is (10 sec: 19660.5, 60 sec: 20596.9, 300 sec: 20596.9). Total num frames: 720896. Throughput: 0: 21094.3. Samples: 738304. Policy #0 lag: (min: 61.0, avg: 72.4, max: 125.0) +[2026-06-07 03:03:53,911][492660] Avg episode reward: [(0, '4.277')] +[2026-06-07 03:03:53,945][495570] Saving new best policy, reward=4.277! +[2026-06-07 03:03:54,690][495927] Updated weights for policy 0, policy_version 1476 (0.0008) +[2026-06-07 03:03:54,859][495927] Updated weights for policy 0, policy_version 1488 (0.0009) +[2026-06-07 03:03:55,017][495927] Updated weights for policy 0, policy_version 1500 (0.0009) +[2026-06-07 03:03:55,177][495927] Updated weights for policy 0, policy_version 1511 (0.0008) +[2026-06-07 03:03:55,316][495927] Updated weights for policy 0, policy_version 1521 (0.0008) +[2026-06-07 03:03:55,474][495927] Updated weights for policy 0, policy_version 1532 (0.0010) +[2026-06-07 03:03:56,285][495927] Updated weights for policy 0, policy_version 1542 (0.0010) +[2026-06-07 03:03:56,437][495927] Updated weights for policy 0, policy_version 1554 (0.0008) +[2026-06-07 03:03:56,581][495927] Updated weights for policy 0, policy_version 1564 (0.0008) +[2026-06-07 03:03:56,736][495927] Updated weights for policy 0, policy_version 1575 (0.0008) +[2026-06-07 03:03:56,877][495927] Updated weights for policy 0, policy_version 1585 (0.0008) +[2026-06-07 03:03:57,044][495927] Updated weights for policy 0, policy_version 1597 (0.0008) +[2026-06-07 03:03:57,860][495927] Updated weights for policy 0, policy_version 1607 (0.0009) +[2026-06-07 03:03:57,992][495927] Updated weights for policy 0, policy_version 1617 (0.0008) +[2026-06-07 03:03:58,129][495927] Updated weights for policy 0, policy_version 1627 (0.0008) +[2026-06-07 03:03:58,280][495927] Updated weights for policy 0, policy_version 1638 (0.0008) +[2026-06-07 03:03:58,436][495927] Updated weights for policy 0, policy_version 1649 (0.0008) +[2026-06-07 03:03:58,589][495927] Updated weights for policy 0, policy_version 1659 (0.0008) +[2026-06-07 03:03:58,909][492660] Fps is (10 sec: 22938.2, 60 sec: 21299.2, 300 sec: 21299.2). Total num frames: 851968. Throughput: 0: 21657.6. Samples: 866304. Policy #0 lag: (min: 2.0, avg: 44.2, max: 66.0) +[2026-06-07 03:03:58,911][492660] Avg episode reward: [(0, '4.620')] +[2026-06-07 03:03:58,916][495570] Saving new best policy, reward=4.620! +[2026-06-07 03:03:59,367][495927] Updated weights for policy 0, policy_version 1670 (0.0009) +[2026-06-07 03:03:59,507][495927] Updated weights for policy 0, policy_version 1680 (0.0008) +[2026-06-07 03:03:59,657][495927] Updated weights for policy 0, policy_version 1691 (0.0009) +[2026-06-07 03:03:59,809][495927] Updated weights for policy 0, policy_version 1701 (0.0010) +[2026-06-07 03:03:59,947][495927] Updated weights for policy 0, policy_version 1711 (0.0007) +[2026-06-07 03:04:00,087][495927] Updated weights for policy 0, policy_version 1721 (0.0008) +[2026-06-07 03:04:00,847][495927] Updated weights for policy 0, policy_version 1731 (0.0008) +[2026-06-07 03:04:01,029][495927] Updated weights for policy 0, policy_version 1745 (0.0006) +[2026-06-07 03:04:01,165][495927] Updated weights for policy 0, policy_version 1755 (0.0008) +[2026-06-07 03:04:01,306][495927] Updated weights for policy 0, policy_version 1765 (0.0008) +[2026-06-07 03:04:01,457][495927] Updated weights for policy 0, policy_version 1776 (0.0008) +[2026-06-07 03:04:01,604][495927] Updated weights for policy 0, policy_version 1786 (0.0008) +[2026-06-07 03:04:02,440][495927] Updated weights for policy 0, policy_version 1797 (0.0009) +[2026-06-07 03:04:02,582][495927] Updated weights for policy 0, policy_version 1807 (0.0009) +[2026-06-07 03:04:02,724][495927] Updated weights for policy 0, policy_version 1817 (0.0008) +[2026-06-07 03:04:02,859][495927] Updated weights for policy 0, policy_version 1827 (0.0008) +[2026-06-07 03:04:03,009][495927] Updated weights for policy 0, policy_version 1837 (0.0008) +[2026-06-07 03:04:03,154][495927] Updated weights for policy 0, policy_version 1848 (0.0008) +[2026-06-07 03:04:03,909][492660] Fps is (10 sec: 22937.7, 60 sec: 21117.1, 300 sec: 21117.1). Total num frames: 950272. Throughput: 0: 20653.5. Samples: 929408. Policy #0 lag: (min: 42.0, avg: 80.7, max: 106.0) +[2026-06-07 03:04:03,911][492660] Avg episode reward: [(0, '5.361')] +[2026-06-07 03:04:03,967][495927] Updated weights for policy 0, policy_version 1859 (0.0008) +[2026-06-07 03:04:04,091][495927] Updated weights for policy 0, policy_version 1869 (0.0007) +[2026-06-07 03:04:04,285][495927] Updated weights for policy 0, policy_version 1882 (0.0009) +[2026-06-07 03:04:04,439][495927] Updated weights for policy 0, policy_version 1893 (0.0008) +[2026-06-07 03:04:04,594][495927] Updated weights for policy 0, policy_version 1904 (0.0008) +[2026-06-07 03:04:04,730][495927] Updated weights for policy 0, policy_version 1914 (0.0008) +[2026-06-07 03:04:04,810][495570] Saving new best policy, reward=5.361! +[2026-06-07 03:04:05,544][495927] Updated weights for policy 0, policy_version 1927 (0.0006) +[2026-06-07 03:04:05,678][495927] Updated weights for policy 0, policy_version 1937 (0.0007) +[2026-06-07 03:04:05,818][495927] Updated weights for policy 0, policy_version 1947 (0.0004) +[2026-06-07 03:04:05,993][495927] Updated weights for policy 0, policy_version 1960 (0.0004) +[2026-06-07 03:04:06,149][495927] Updated weights for policy 0, policy_version 1971 (0.0004) +[2026-06-07 03:04:06,308][495927] Updated weights for policy 0, policy_version 1982 (0.0004) +[2026-06-07 03:04:07,049][495927] Updated weights for policy 0, policy_version 1992 (0.0007) +[2026-06-07 03:04:07,193][495927] Updated weights for policy 0, policy_version 2002 (0.0008) +[2026-06-07 03:04:07,361][495927] Updated weights for policy 0, policy_version 2014 (0.0009) +[2026-06-07 03:04:07,513][495927] Updated weights for policy 0, policy_version 2024 (0.0009) +[2026-06-07 03:04:07,668][495927] Updated weights for policy 0, policy_version 2035 (0.0007) +[2026-06-07 03:04:07,806][495927] Updated weights for policy 0, policy_version 2045 (0.0006) +[2026-06-07 03:04:08,558][495927] Updated weights for policy 0, policy_version 2055 (0.0005) +[2026-06-07 03:04:08,709][495927] Updated weights for policy 0, policy_version 2065 (0.0008) +[2026-06-07 03:04:08,853][495927] Updated weights for policy 0, policy_version 2076 (0.0008) +[2026-06-07 03:04:08,909][492660] Fps is (10 sec: 19661.0, 60 sec: 20971.6, 300 sec: 20971.6). Total num frames: 1048576. Throughput: 0: 22050.2. Samples: 1059584. Policy #0 lag: (min: 63.0, avg: 75.1, max: 127.0) +[2026-06-07 03:04:08,910][492660] Avg episode reward: [(0, '5.766')] +[2026-06-07 03:04:08,999][495927] Updated weights for policy 0, policy_version 2086 (0.0006) +[2026-06-07 03:04:09,142][495927] Updated weights for policy 0, policy_version 2096 (0.0008) +[2026-06-07 03:04:09,293][495927] Updated weights for policy 0, policy_version 2106 (0.0008) +[2026-06-07 03:04:09,363][495570] Saving new best policy, reward=5.766! +[2026-06-07 03:04:10,131][495927] Updated weights for policy 0, policy_version 2119 (0.0007) +[2026-06-07 03:04:10,269][495927] Updated weights for policy 0, policy_version 2129 (0.0008) +[2026-06-07 03:04:10,416][495927] Updated weights for policy 0, policy_version 2139 (0.0008) +[2026-06-07 03:04:10,566][495927] Updated weights for policy 0, policy_version 2150 (0.0008) +[2026-06-07 03:04:10,713][495927] Updated weights for policy 0, policy_version 2160 (0.0008) +[2026-06-07 03:04:10,865][495927] Updated weights for policy 0, policy_version 2170 (0.0008) +[2026-06-07 03:04:11,661][495927] Updated weights for policy 0, policy_version 2180 (0.0009) +[2026-06-07 03:04:11,800][495927] Updated weights for policy 0, policy_version 2191 (0.0008) +[2026-06-07 03:04:11,941][495927] Updated weights for policy 0, policy_version 2201 (0.0008) +[2026-06-07 03:04:12,097][495927] Updated weights for policy 0, policy_version 2212 (0.0008) +[2026-06-07 03:04:12,244][495927] Updated weights for policy 0, policy_version 2222 (0.0008) +[2026-06-07 03:04:12,412][495927] Updated weights for policy 0, policy_version 2234 (0.0008) +[2026-06-07 03:04:13,262][495927] Updated weights for policy 0, policy_version 2246 (0.0009) +[2026-06-07 03:04:13,426][495927] Updated weights for policy 0, policy_version 2257 (0.0009) +[2026-06-07 03:04:13,624][495927] Updated weights for policy 0, policy_version 2271 (0.0009) +[2026-06-07 03:04:13,761][495927] Updated weights for policy 0, policy_version 2281 (0.0009) +[2026-06-07 03:04:13,910][495927] Updated weights for policy 0, policy_version 2291 (0.0009) +[2026-06-07 03:04:13,909][492660] Fps is (10 sec: 19660.7, 60 sec: 20852.3, 300 sec: 20852.3). Total num frames: 1146880. Throughput: 0: 22371.6. Samples: 1189120. Policy #0 lag: (min: 63.0, avg: 75.4, max: 127.0) +[2026-06-07 03:04:13,912][492660] Avg episode reward: [(0, '6.573')] +[2026-06-07 03:04:14,044][495927] Updated weights for policy 0, policy_version 2301 (0.0008) +[2026-06-07 03:04:14,082][495570] Saving new best policy, reward=6.573! +[2026-06-07 03:04:14,857][495927] Updated weights for policy 0, policy_version 2311 (0.0009) +[2026-06-07 03:04:15,014][495927] Updated weights for policy 0, policy_version 2322 (0.0006) +[2026-06-07 03:04:15,180][495927] Updated weights for policy 0, policy_version 2334 (0.0008) +[2026-06-07 03:04:15,330][495927] Updated weights for policy 0, policy_version 2345 (0.0008) +[2026-06-07 03:04:15,472][495927] Updated weights for policy 0, policy_version 2355 (0.0008) +[2026-06-07 03:04:15,644][495927] Updated weights for policy 0, policy_version 2367 (0.0008) +[2026-06-07 03:04:16,464][495927] Updated weights for policy 0, policy_version 2379 (0.0008) +[2026-06-07 03:04:16,626][495927] Updated weights for policy 0, policy_version 2391 (0.0008) +[2026-06-07 03:04:16,769][495927] Updated weights for policy 0, policy_version 2401 (0.0008) +[2026-06-07 03:04:16,917][495927] Updated weights for policy 0, policy_version 2412 (0.0006) +[2026-06-07 03:04:17,118][495927] Updated weights for policy 0, policy_version 2425 (0.0008) +[2026-06-07 03:04:17,945][495927] Updated weights for policy 0, policy_version 2435 (0.0007) +[2026-06-07 03:04:18,096][495927] Updated weights for policy 0, policy_version 2446 (0.0008) +[2026-06-07 03:04:18,232][495927] Updated weights for policy 0, policy_version 2456 (0.0008) +[2026-06-07 03:04:18,370][495927] Updated weights for policy 0, policy_version 2466 (0.0008) +[2026-06-07 03:04:18,570][495927] Updated weights for policy 0, policy_version 2480 (0.0009) +[2026-06-07 03:04:18,715][495927] Updated weights for policy 0, policy_version 2490 (0.0008) +[2026-06-07 03:04:18,910][492660] Fps is (10 sec: 22935.8, 60 sec: 21299.0, 300 sec: 21299.0). Total num frames: 1277952. Throughput: 0: 22109.5. Samples: 1253504. Policy #0 lag: (min: 4.0, avg: 17.0, max: 68.0) +[2026-06-07 03:04:18,913][492660] Avg episode reward: [(0, '7.482')] +[2026-06-07 03:04:18,922][495570] Saving new best policy, reward=7.482! +[2026-06-07 03:04:19,568][495927] Updated weights for policy 0, policy_version 2501 (0.0008) +[2026-06-07 03:04:19,695][495927] Updated weights for policy 0, policy_version 2511 (0.0008) +[2026-06-07 03:04:19,838][495927] Updated weights for policy 0, policy_version 2521 (0.0008) +[2026-06-07 03:04:19,976][495927] Updated weights for policy 0, policy_version 2531 (0.0009) +[2026-06-07 03:04:20,141][495927] Updated weights for policy 0, policy_version 2542 (0.0009) +[2026-06-07 03:04:20,311][495927] Updated weights for policy 0, policy_version 2554 (0.0010) +[2026-06-07 03:04:21,157][495927] Updated weights for policy 0, policy_version 2565 (0.0009) +[2026-06-07 03:04:21,329][495927] Updated weights for policy 0, policy_version 2577 (0.0008) +[2026-06-07 03:04:21,486][495927] Updated weights for policy 0, policy_version 2588 (0.0008) +[2026-06-07 03:04:21,620][495927] Updated weights for policy 0, policy_version 2598 (0.0005) +[2026-06-07 03:04:21,763][495927] Updated weights for policy 0, policy_version 2608 (0.0004) +[2026-06-07 03:04:21,909][495927] Updated weights for policy 0, policy_version 2618 (0.0004) +[2026-06-07 03:04:22,728][495927] Updated weights for policy 0, policy_version 2632 (0.0007) +[2026-06-07 03:04:22,898][495927] Updated weights for policy 0, policy_version 2644 (0.0008) +[2026-06-07 03:04:23,070][495927] Updated weights for policy 0, policy_version 2657 (0.0008) +[2026-06-07 03:04:23,243][495927] Updated weights for policy 0, policy_version 2669 (0.0008) +[2026-06-07 03:04:23,404][495927] Updated weights for policy 0, policy_version 2680 (0.0009) +[2026-06-07 03:04:23,909][492660] Fps is (10 sec: 22938.1, 60 sec: 22391.5, 300 sec: 21173.2). Total num frames: 1376256. Throughput: 0: 21470.0. Samples: 1376256. Policy #0 lag: (min: 62.0, avg: 72.6, max: 126.0) +[2026-06-07 03:04:23,910][492660] Avg episode reward: [(0, '9.488')] +[2026-06-07 03:04:23,915][495570] Saving new best policy, reward=9.488! +[2026-06-07 03:04:24,213][495927] Updated weights for policy 0, policy_version 2691 (0.0008) +[2026-06-07 03:04:24,381][495927] Updated weights for policy 0, policy_version 2703 (0.0008) +[2026-06-07 03:04:24,515][495927] Updated weights for policy 0, policy_version 2713 (0.0008) +[2026-06-07 03:04:24,649][495927] Updated weights for policy 0, policy_version 2723 (0.0006) +[2026-06-07 03:04:24,804][495927] Updated weights for policy 0, policy_version 2733 (0.0009) +[2026-06-07 03:04:24,957][495927] Updated weights for policy 0, policy_version 2744 (0.0008) +[2026-06-07 03:04:25,741][495927] Updated weights for policy 0, policy_version 2754 (0.0008) +[2026-06-07 03:04:25,884][495927] Updated weights for policy 0, policy_version 2765 (0.0009) +[2026-06-07 03:04:26,043][495927] Updated weights for policy 0, policy_version 2776 (0.0009) +[2026-06-07 03:04:26,202][495927] Updated weights for policy 0, policy_version 2787 (0.0008) +[2026-06-07 03:04:26,369][495927] Updated weights for policy 0, policy_version 2799 (0.0008) +[2026-06-07 03:04:26,525][495927] Updated weights for policy 0, policy_version 2810 (0.0009) +[2026-06-07 03:04:27,324][495927] Updated weights for policy 0, policy_version 2821 (0.0009) +[2026-06-07 03:04:27,481][495927] Updated weights for policy 0, policy_version 2832 (0.0008) +[2026-06-07 03:04:27,647][495927] Updated weights for policy 0, policy_version 2844 (0.0008) +[2026-06-07 03:04:27,790][495927] Updated weights for policy 0, policy_version 2854 (0.0009) +[2026-06-07 03:04:27,960][495927] Updated weights for policy 0, policy_version 2866 (0.0008) +[2026-06-07 03:04:28,139][495927] Updated weights for policy 0, policy_version 2878 (0.0008) +[2026-06-07 03:04:28,909][492660] Fps is (10 sec: 19662.1, 60 sec: 21845.5, 300 sec: 21065.1). Total num frames: 1474560. Throughput: 0: 21245.2. Samples: 1500416. Policy #0 lag: (min: 45.0, avg: 95.9, max: 109.0) +[2026-06-07 03:04:28,911][492660] Avg episode reward: [(0, '11.258')] +[2026-06-07 03:04:28,930][495927] Updated weights for policy 0, policy_version 2889 (0.0009) +[2026-06-07 03:04:29,084][495927] Updated weights for policy 0, policy_version 2900 (0.0008) +[2026-06-07 03:04:29,226][495927] Updated weights for policy 0, policy_version 2910 (0.0008) +[2026-06-07 03:04:29,420][495927] Updated weights for policy 0, policy_version 2924 (0.0008) +[2026-06-07 03:04:29,568][495927] Updated weights for policy 0, policy_version 2934 (0.0007) +[2026-06-07 03:04:29,711][495570] Saving new best policy, reward=11.258! +[2026-06-07 03:04:30,359][495927] Updated weights for policy 0, policy_version 2946 (0.0009) +[2026-06-07 03:04:30,495][495927] Updated weights for policy 0, policy_version 2956 (0.0008) +[2026-06-07 03:04:30,679][495927] Updated weights for policy 0, policy_version 2969 (0.0008) +[2026-06-07 03:04:30,841][495927] Updated weights for policy 0, policy_version 2980 (0.0007) +[2026-06-07 03:04:30,990][495927] Updated weights for policy 0, policy_version 2991 (0.0009) +[2026-06-07 03:04:31,175][495927] Updated weights for policy 0, policy_version 3003 (0.0009) +[2026-06-07 03:04:31,962][495927] Updated weights for policy 0, policy_version 3013 (0.0009) +[2026-06-07 03:04:32,107][495927] Updated weights for policy 0, policy_version 3023 (0.0009) +[2026-06-07 03:04:32,257][495927] Updated weights for policy 0, policy_version 3034 (0.0008) +[2026-06-07 03:04:32,417][495927] Updated weights for policy 0, policy_version 3045 (0.0008) +[2026-06-07 03:04:32,574][495927] Updated weights for policy 0, policy_version 3056 (0.0009) +[2026-06-07 03:04:32,732][495927] Updated weights for policy 0, policy_version 3067 (0.0007) +[2026-06-07 03:04:33,477][495927] Updated weights for policy 0, policy_version 3078 (0.0008) +[2026-06-07 03:04:33,622][495927] Updated weights for policy 0, policy_version 3089 (0.0008) +[2026-06-07 03:04:33,805][495927] Updated weights for policy 0, policy_version 3102 (0.0008) +[2026-06-07 03:04:33,909][492660] Fps is (10 sec: 19660.8, 60 sec: 21845.4, 300 sec: 20971.5). Total num frames: 1572864. Throughput: 0: 21202.6. Samples: 1563136. Policy #0 lag: (min: 63.0, avg: 75.0, max: 127.0) +[2026-06-07 03:04:33,910][492660] Avg episode reward: [(0, '13.746')] +[2026-06-07 03:04:33,941][495927] Updated weights for policy 0, policy_version 3112 (0.0008) +[2026-06-07 03:04:34,119][495927] Updated weights for policy 0, policy_version 3124 (0.0007) +[2026-06-07 03:04:34,269][495927] Updated weights for policy 0, policy_version 3135 (0.0008) +[2026-06-07 03:04:34,277][495570] Saving new best policy, reward=13.746! +[2026-06-07 03:04:35,058][495927] Updated weights for policy 0, policy_version 3147 (0.0009) +[2026-06-07 03:04:35,218][495927] Updated weights for policy 0, policy_version 3158 (0.0008) +[2026-06-07 03:04:35,367][495927] Updated weights for policy 0, policy_version 3169 (0.0008) +[2026-06-07 03:04:35,528][495927] Updated weights for policy 0, policy_version 3180 (0.0008) +[2026-06-07 03:04:35,677][495927] Updated weights for policy 0, policy_version 3190 (0.0006) +[2026-06-07 03:04:36,466][495927] Updated weights for policy 0, policy_version 3202 (0.0010) +[2026-06-07 03:04:36,616][495927] Updated weights for policy 0, policy_version 3213 (0.0005) +[2026-06-07 03:04:36,791][495927] Updated weights for policy 0, policy_version 3225 (0.0006) +[2026-06-07 03:04:36,955][495927] Updated weights for policy 0, policy_version 3237 (0.0007) +[2026-06-07 03:04:37,150][495927] Updated weights for policy 0, policy_version 3250 (0.0005) +[2026-06-07 03:04:37,287][495927] Updated weights for policy 0, policy_version 3260 (0.0008) +[2026-06-07 03:04:38,089][495927] Updated weights for policy 0, policy_version 3274 (0.0010) +[2026-06-07 03:04:38,247][495927] Updated weights for policy 0, policy_version 3285 (0.0009) +[2026-06-07 03:04:38,391][495927] Updated weights for policy 0, policy_version 3296 (0.0007) +[2026-06-07 03:04:38,564][495927] Updated weights for policy 0, policy_version 3308 (0.0007) +[2026-06-07 03:04:38,711][495927] Updated weights for policy 0, policy_version 3318 (0.0009) +[2026-06-07 03:04:38,909][492660] Fps is (10 sec: 22937.3, 60 sec: 21845.3, 300 sec: 21299.2). Total num frames: 1703936. Throughput: 0: 21216.7. Samples: 1693056. Policy #0 lag: (min: 71.0, avg: 118.2, max: 142.0) +[2026-06-07 03:04:38,911][492660] Avg episode reward: [(0, '15.145')] +[2026-06-07 03:04:38,917][495570] Saving new best policy, reward=15.145! +[2026-06-07 03:04:39,479][495927] Updated weights for policy 0, policy_version 3329 (0.0009) +[2026-06-07 03:04:39,633][495927] Updated weights for policy 0, policy_version 3340 (0.0008) +[2026-06-07 03:04:39,802][495927] Updated weights for policy 0, policy_version 3352 (0.0008) +[2026-06-07 03:04:39,948][495927] Updated weights for policy 0, policy_version 3362 (0.0008) +[2026-06-07 03:04:40,087][495927] Updated weights for policy 0, policy_version 3372 (0.0008) +[2026-06-07 03:04:40,293][495927] Updated weights for policy 0, policy_version 3386 (0.0009) +[2026-06-07 03:04:41,068][495927] Updated weights for policy 0, policy_version 3396 (0.0007) +[2026-06-07 03:04:41,216][495927] Updated weights for policy 0, policy_version 3406 (0.0008) +[2026-06-07 03:04:41,356][495927] Updated weights for policy 0, policy_version 3416 (0.0010) +[2026-06-07 03:04:41,493][495927] Updated weights for policy 0, policy_version 3426 (0.0009) +[2026-06-07 03:04:41,650][495927] Updated weights for policy 0, policy_version 3437 (0.0005) +[2026-06-07 03:04:41,817][495927] Updated weights for policy 0, policy_version 3449 (0.0009) +[2026-06-07 03:04:42,581][495927] Updated weights for policy 0, policy_version 3461 (0.0009) +[2026-06-07 03:04:42,721][495927] Updated weights for policy 0, policy_version 3471 (0.0009) +[2026-06-07 03:04:42,867][495927] Updated weights for policy 0, policy_version 3482 (0.0006) +[2026-06-07 03:04:43,036][495927] Updated weights for policy 0, policy_version 3494 (0.0008) +[2026-06-07 03:04:43,178][495927] Updated weights for policy 0, policy_version 3504 (0.0008) +[2026-06-07 03:04:43,353][495927] Updated weights for policy 0, policy_version 3517 (0.0005) +[2026-06-07 03:04:43,909][492660] Fps is (10 sec: 22937.7, 60 sec: 21299.2, 300 sec: 21202.8). Total num frames: 1802240. Throughput: 0: 21296.4. Samples: 1824640. Policy #0 lag: (min: 5.0, avg: 16.3, max: 69.0) +[2026-06-07 03:04:43,910][492660] Avg episode reward: [(0, '17.843')] +[2026-06-07 03:04:44,115][495927] Updated weights for policy 0, policy_version 3528 (0.0005) +[2026-06-07 03:04:44,249][495927] Updated weights for policy 0, policy_version 3538 (0.0004) +[2026-06-07 03:04:44,404][495927] Updated weights for policy 0, policy_version 3549 (0.0004) +[2026-06-07 03:04:44,552][495927] Updated weights for policy 0, policy_version 3560 (0.0004) +[2026-06-07 03:04:44,734][495927] Updated weights for policy 0, policy_version 3572 (0.0007) +[2026-06-07 03:04:44,879][495927] Updated weights for policy 0, policy_version 3582 (0.0008) +[2026-06-07 03:04:44,897][495570] Saving new best policy, reward=17.843! +[2026-06-07 03:04:45,710][495927] Updated weights for policy 0, policy_version 3596 (0.0008) +[2026-06-07 03:04:45,868][495927] Updated weights for policy 0, policy_version 3607 (0.0009) +[2026-06-07 03:04:46,046][495927] Updated weights for policy 0, policy_version 3619 (0.0008) +[2026-06-07 03:04:46,205][495927] Updated weights for policy 0, policy_version 3630 (0.0008) +[2026-06-07 03:04:46,376][495927] Updated weights for policy 0, policy_version 3642 (0.0009) +[2026-06-07 03:04:47,117][495927] Updated weights for policy 0, policy_version 3653 (0.0009) +[2026-06-07 03:04:47,260][495927] Updated weights for policy 0, policy_version 3663 (0.0008) +[2026-06-07 03:04:47,442][495927] Updated weights for policy 0, policy_version 3676 (0.0009) +[2026-06-07 03:04:47,601][495927] Updated weights for policy 0, policy_version 3687 (0.0009) +[2026-06-07 03:04:47,774][495927] Updated weights for policy 0, policy_version 3699 (0.0008) +[2026-06-07 03:04:47,929][495927] Updated weights for policy 0, policy_version 3710 (0.0009) +[2026-06-07 03:04:48,662][495927] Updated weights for policy 0, policy_version 3720 (0.0009) +[2026-06-07 03:04:48,825][495927] Updated weights for policy 0, policy_version 3732 (0.0008) +[2026-06-07 03:04:48,909][492660] Fps is (10 sec: 19661.0, 60 sec: 21299.3, 300 sec: 21117.1). Total num frames: 1900544. Throughput: 0: 21378.9. Samples: 1891456. Policy #0 lag: (min: 63.0, avg: 74.4, max: 127.0) +[2026-06-07 03:04:48,911][492660] Avg episode reward: [(0, '21.896')] +[2026-06-07 03:04:48,966][495927] Updated weights for policy 0, policy_version 3742 (0.0008) +[2026-06-07 03:04:49,109][495927] Updated weights for policy 0, policy_version 3752 (0.0009) +[2026-06-07 03:04:49,273][495927] Updated weights for policy 0, policy_version 3763 (0.0009) +[2026-06-07 03:04:49,441][495927] Updated weights for policy 0, policy_version 3775 (0.0008) +[2026-06-07 03:04:49,450][495570] Saving new best policy, reward=21.896! +[2026-06-07 03:04:50,210][495927] Updated weights for policy 0, policy_version 3786 (0.0010) +[2026-06-07 03:04:50,414][495927] Updated weights for policy 0, policy_version 3801 (0.0008) +[2026-06-07 03:04:50,589][495927] Updated weights for policy 0, policy_version 3813 (0.0009) +[2026-06-07 03:04:50,735][495927] Updated weights for policy 0, policy_version 3823 (0.0008) +[2026-06-07 03:04:50,874][495927] Updated weights for policy 0, policy_version 3833 (0.0009) +[2026-06-07 03:04:51,570][495927] Updated weights for policy 0, policy_version 3843 (0.0009) +[2026-06-07 03:04:51,740][495927] Updated weights for policy 0, policy_version 3855 (0.0009) +[2026-06-07 03:04:51,901][495927] Updated weights for policy 0, policy_version 3866 (0.0009) +[2026-06-07 03:04:52,054][495927] Updated weights for policy 0, policy_version 3877 (0.0008) +[2026-06-07 03:04:52,220][495927] Updated weights for policy 0, policy_version 3888 (0.0008) +[2026-06-07 03:04:52,404][495927] Updated weights for policy 0, policy_version 3901 (0.0008) +[2026-06-07 03:04:53,164][495927] Updated weights for policy 0, policy_version 3912 (0.0009) +[2026-06-07 03:04:53,303][495927] Updated weights for policy 0, policy_version 3922 (0.0008) +[2026-06-07 03:04:53,460][495927] Updated weights for policy 0, policy_version 3933 (0.0008) +[2026-06-07 03:04:53,657][495927] Updated weights for policy 0, policy_version 3947 (0.0008) +[2026-06-07 03:04:53,811][495927] Updated weights for policy 0, policy_version 3957 (0.0008) +[2026-06-07 03:04:53,909][492660] Fps is (10 sec: 19660.7, 60 sec: 21299.3, 300 sec: 21040.5). Total num frames: 1998848. Throughput: 0: 21387.3. Samples: 2022016. Policy #0 lag: (min: 63.0, avg: 74.7, max: 127.0) +[2026-06-07 03:04:53,910][492660] Avg episode reward: [(0, '22.571')] +[2026-06-07 03:04:53,968][495570] Saving new best policy, reward=22.571! +[2026-06-07 03:04:53,971][495927] Updated weights for policy 0, policy_version 3968 (0.0008) +[2026-06-07 03:04:54,703][495927] Updated weights for policy 0, policy_version 3980 (0.0008) +[2026-06-07 03:04:54,863][495927] Updated weights for policy 0, policy_version 3991 (0.0008) +[2026-06-07 03:04:55,018][495927] Updated weights for policy 0, policy_version 4002 (0.0008) +[2026-06-07 03:04:55,173][495927] Updated weights for policy 0, policy_version 4013 (0.0008) +[2026-06-07 03:04:55,341][495927] Updated weights for policy 0, policy_version 4024 (0.0009) +[2026-06-07 03:04:56,063][495927] Updated weights for policy 0, policy_version 4034 (0.0008) +[2026-06-07 03:04:56,212][495927] Updated weights for policy 0, policy_version 4045 (0.0008) +[2026-06-07 03:04:56,400][495927] Updated weights for policy 0, policy_version 4059 (0.0008) +[2026-06-07 03:04:56,555][495927] Updated weights for policy 0, policy_version 4069 (0.0008) +[2026-06-07 03:04:56,728][495927] Updated weights for policy 0, policy_version 4081 (0.0008) +[2026-06-07 03:04:56,872][495927] Updated weights for policy 0, policy_version 4091 (0.0008) +[2026-06-07 03:04:57,602][495927] Updated weights for policy 0, policy_version 4101 (0.0008) +[2026-06-07 03:04:57,778][495927] Updated weights for policy 0, policy_version 4114 (0.0008) +[2026-06-07 03:04:57,972][495927] Updated weights for policy 0, policy_version 4128 (0.0008) +[2026-06-07 03:04:58,127][495927] Updated weights for policy 0, policy_version 4139 (0.0008) +[2026-06-07 03:04:58,278][495927] Updated weights for policy 0, policy_version 4149 (0.0009) +[2026-06-07 03:04:58,434][495927] Updated weights for policy 0, policy_version 4160 (0.0009) +[2026-06-07 03:04:58,909][492660] Fps is (10 sec: 22937.5, 60 sec: 21299.2, 300 sec: 21299.2). Total num frames: 2129920. Throughput: 0: 21450.0. Samples: 2154368. Policy #0 lag: (min: 44.0, avg: 56.2, max: 108.0) +[2026-06-07 03:04:58,911][492660] Avg episode reward: [(0, '29.798')] +[2026-06-07 03:04:58,917][495570] Saving new best policy, reward=29.798! +[2026-06-07 03:04:59,210][495927] Updated weights for policy 0, policy_version 4170 (0.0008) +[2026-06-07 03:04:59,362][495927] Updated weights for policy 0, policy_version 4181 (0.0008) +[2026-06-07 03:04:59,524][495927] Updated weights for policy 0, policy_version 4192 (0.0008) +[2026-06-07 03:04:59,693][495927] Updated weights for policy 0, policy_version 4204 (0.0008) +[2026-06-07 03:04:59,856][495927] Updated weights for policy 0, policy_version 4215 (0.0009) +[2026-06-07 03:05:00,529][495927] Updated weights for policy 0, policy_version 4226 (0.0008) +[2026-06-07 03:05:00,688][495927] Updated weights for policy 0, policy_version 4237 (0.0008) +[2026-06-07 03:05:00,844][495927] Updated weights for policy 0, policy_version 4248 (0.0009) +[2026-06-07 03:05:01,001][495927] Updated weights for policy 0, policy_version 4259 (0.0009) +[2026-06-07 03:05:01,176][495927] Updated weights for policy 0, policy_version 4271 (0.0008) +[2026-06-07 03:05:01,323][495927] Updated weights for policy 0, policy_version 4281 (0.0008) +[2026-06-07 03:05:02,039][495927] Updated weights for policy 0, policy_version 4291 (0.0007) +[2026-06-07 03:05:02,179][495927] Updated weights for policy 0, policy_version 4302 (0.0008) +[2026-06-07 03:05:02,320][495927] Updated weights for policy 0, policy_version 4312 (0.0008) +[2026-06-07 03:05:02,471][495927] Updated weights for policy 0, policy_version 4322 (0.0009) +[2026-06-07 03:05:02,617][495927] Updated weights for policy 0, policy_version 4332 (0.0008) +[2026-06-07 03:05:02,757][495927] Updated weights for policy 0, policy_version 4342 (0.0008) +[2026-06-07 03:05:02,909][495927] Updated weights for policy 0, policy_version 4352 (0.0008) +[2026-06-07 03:05:03,615][495927] Updated weights for policy 0, policy_version 4363 (0.0009) +[2026-06-07 03:05:03,783][495927] Updated weights for policy 0, policy_version 4375 (0.0008) +[2026-06-07 03:05:03,909][492660] Fps is (10 sec: 22937.5, 60 sec: 21299.2, 300 sec: 21221.2). Total num frames: 2228224. Throughput: 0: 21507.1. Samples: 2221312. Policy #0 lag: (min: 44.0, avg: 56.2, max: 108.0) +[2026-06-07 03:05:03,910][492660] Avg episode reward: [(0, '36.099')] +[2026-06-07 03:05:03,942][495927] Updated weights for policy 0, policy_version 4386 (0.0008) +[2026-06-07 03:05:04,102][495927] Updated weights for policy 0, policy_version 4397 (0.0008) +[2026-06-07 03:05:04,250][495927] Updated weights for policy 0, policy_version 4407 (0.0006) +[2026-06-07 03:05:04,390][495570] Saving new best policy, reward=36.099! +[2026-06-07 03:05:04,943][495927] Updated weights for policy 0, policy_version 4417 (0.0005) +[2026-06-07 03:05:05,107][495927] Updated weights for policy 0, policy_version 4429 (0.0008) +[2026-06-07 03:05:05,274][495927] Updated weights for policy 0, policy_version 4441 (0.0009) +[2026-06-07 03:05:05,432][495927] Updated weights for policy 0, policy_version 4452 (0.0009) +[2026-06-07 03:05:05,577][495927] Updated weights for policy 0, policy_version 4462 (0.0009) +[2026-06-07 03:05:05,742][495927] Updated weights for policy 0, policy_version 4473 (0.0008) +[2026-06-07 03:05:06,449][495927] Updated weights for policy 0, policy_version 4485 (0.0009) +[2026-06-07 03:05:06,621][495927] Updated weights for policy 0, policy_version 4498 (0.0008) +[2026-06-07 03:05:06,787][495927] Updated weights for policy 0, policy_version 4509 (0.0009) +[2026-06-07 03:05:06,958][495927] Updated weights for policy 0, policy_version 4521 (0.0008) +[2026-06-07 03:05:07,102][495927] Updated weights for policy 0, policy_version 4531 (0.0008) +[2026-06-07 03:05:07,282][495927] Updated weights for policy 0, policy_version 4543 (0.0009) +[2026-06-07 03:05:08,001][495927] Updated weights for policy 0, policy_version 4554 (0.0009) +[2026-06-07 03:05:08,156][495927] Updated weights for policy 0, policy_version 4565 (0.0009) +[2026-06-07 03:05:08,324][495927] Updated weights for policy 0, policy_version 4576 (0.0009) +[2026-06-07 03:05:08,499][495927] Updated weights for policy 0, policy_version 4588 (0.0008) +[2026-06-07 03:05:08,648][495927] Updated weights for policy 0, policy_version 4598 (0.0008) +[2026-06-07 03:05:08,794][495927] Updated weights for policy 0, policy_version 4608 (0.0008) +[2026-06-07 03:05:08,909][492660] Fps is (10 sec: 22937.9, 60 sec: 21845.3, 300 sec: 21448.2). Total num frames: 2359296. Throughput: 0: 21723.0. Samples: 2353792. Policy #0 lag: (min: 127.0, avg: 140.0, max: 191.0) +[2026-06-07 03:05:08,910][492660] Avg episode reward: [(0, '45.868')] +[2026-06-07 03:05:08,916][495570] Saving new best policy, reward=45.868! +[2026-06-07 03:05:09,453][495927] Updated weights for policy 0, policy_version 4618 (0.0009) +[2026-06-07 03:05:09,594][495927] Updated weights for policy 0, policy_version 4628 (0.0008) +[2026-06-07 03:05:09,757][495927] Updated weights for policy 0, policy_version 4639 (0.0008) +[2026-06-07 03:05:09,920][495927] Updated weights for policy 0, policy_version 4650 (0.0008) +[2026-06-07 03:05:10,107][495927] Updated weights for policy 0, policy_version 4663 (0.0008) +[2026-06-07 03:05:10,781][495927] Updated weights for policy 0, policy_version 4674 (0.0009) +[2026-06-07 03:05:10,926][495927] Updated weights for policy 0, policy_version 4685 (0.0008) +[2026-06-07 03:05:11,081][495927] Updated weights for policy 0, policy_version 4696 (0.0008) +[2026-06-07 03:05:11,248][495927] Updated weights for policy 0, policy_version 4707 (0.0008) +[2026-06-07 03:05:11,428][495927] Updated weights for policy 0, policy_version 4719 (0.0008) +[2026-06-07 03:05:11,578][495927] Updated weights for policy 0, policy_version 4729 (0.0008) +[2026-06-07 03:05:12,236][495927] Updated weights for policy 0, policy_version 4739 (0.0008) +[2026-06-07 03:05:12,372][495927] Updated weights for policy 0, policy_version 4749 (0.0008) +[2026-06-07 03:05:12,526][495927] Updated weights for policy 0, policy_version 4760 (0.0008) +[2026-06-07 03:05:12,675][495927] Updated weights for policy 0, policy_version 4770 (0.0008) +[2026-06-07 03:05:12,819][495927] Updated weights for policy 0, policy_version 4780 (0.0008) +[2026-06-07 03:05:12,971][495927] Updated weights for policy 0, policy_version 4790 (0.0008) +[2026-06-07 03:05:13,115][495927] Updated weights for policy 0, policy_version 4800 (0.0008) +[2026-06-07 03:05:13,779][495927] Updated weights for policy 0, policy_version 4810 (0.0008) +[2026-06-07 03:05:13,909][492660] Fps is (10 sec: 22937.7, 60 sec: 21845.4, 300 sec: 21370.4). Total num frames: 2457600. Throughput: 0: 22013.2. Samples: 2491008. Policy #0 lag: (min: 106.0, avg: 117.9, max: 170.0) +[2026-06-07 03:05:13,910][492660] Avg episode reward: [(0, '53.180')] +[2026-06-07 03:05:13,928][495927] Updated weights for policy 0, policy_version 4821 (0.0008) +[2026-06-07 03:05:14,078][495927] Updated weights for policy 0, policy_version 4831 (0.0009) +[2026-06-07 03:05:14,228][495927] Updated weights for policy 0, policy_version 4841 (0.0008) +[2026-06-07 03:05:14,372][495927] Updated weights for policy 0, policy_version 4851 (0.0008) +[2026-06-07 03:05:14,523][495927] Updated weights for policy 0, policy_version 4861 (0.0009) +[2026-06-07 03:05:14,559][495570] Saving new best policy, reward=53.180! +[2026-06-07 03:05:15,206][495927] Updated weights for policy 0, policy_version 4871 (0.0009) +[2026-06-07 03:05:15,373][495927] Updated weights for policy 0, policy_version 4882 (0.0008) +[2026-06-07 03:05:15,523][495927] Updated weights for policy 0, policy_version 4893 (0.0008) +[2026-06-07 03:05:15,681][495927] Updated weights for policy 0, policy_version 4904 (0.0010) +[2026-06-07 03:05:15,837][495927] Updated weights for policy 0, policy_version 4914 (0.0008) +[2026-06-07 03:05:16,003][495927] Updated weights for policy 0, policy_version 4925 (0.0008) +[2026-06-07 03:05:16,631][495927] Updated weights for policy 0, policy_version 4935 (0.0008) +[2026-06-07 03:05:16,774][495927] Updated weights for policy 0, policy_version 4945 (0.0008) +[2026-06-07 03:05:16,937][495927] Updated weights for policy 0, policy_version 4956 (0.0008) +[2026-06-07 03:05:17,108][495927] Updated weights for policy 0, policy_version 4968 (0.0008) +[2026-06-07 03:05:17,258][495927] Updated weights for policy 0, policy_version 4978 (0.0008) +[2026-06-07 03:05:17,414][495927] Updated weights for policy 0, policy_version 4989 (0.0008) +[2026-06-07 03:05:18,089][495927] Updated weights for policy 0, policy_version 5000 (0.0009) +[2026-06-07 03:05:18,258][495927] Updated weights for policy 0, policy_version 5012 (0.0008) +[2026-06-07 03:05:18,426][495927] Updated weights for policy 0, policy_version 5024 (0.0009) +[2026-06-07 03:05:18,575][495927] Updated weights for policy 0, policy_version 5034 (0.0008) +[2026-06-07 03:05:18,725][495927] Updated weights for policy 0, policy_version 5044 (0.0008) +[2026-06-07 03:05:18,900][495927] Updated weights for policy 0, policy_version 5056 (0.0008) +[2026-06-07 03:05:18,909][492660] Fps is (10 sec: 22937.6, 60 sec: 21845.6, 300 sec: 21572.3). Total num frames: 2588672. Throughput: 0: 22169.6. Samples: 2560768. Policy #0 lag: (min: 63.0, avg: 75.6, max: 127.0) +[2026-06-07 03:05:18,910][492660] Avg episode reward: [(0, '62.087')] +[2026-06-07 03:05:18,915][495570] Saving new best policy, reward=62.087! +[2026-06-07 03:05:19,578][495927] Updated weights for policy 0, policy_version 5066 (0.0008) +[2026-06-07 03:05:19,714][495927] Updated weights for policy 0, policy_version 5076 (0.0008) +[2026-06-07 03:05:19,880][495927] Updated weights for policy 0, policy_version 5087 (0.0009) +[2026-06-07 03:05:20,040][495927] Updated weights for policy 0, policy_version 5098 (0.0009) +[2026-06-07 03:05:20,207][495927] Updated weights for policy 0, policy_version 5109 (0.0008) +[2026-06-07 03:05:20,356][495927] Updated weights for policy 0, policy_version 5119 (0.0008) +[2026-06-07 03:05:21,033][495927] Updated weights for policy 0, policy_version 5131 (0.0009) +[2026-06-07 03:05:21,171][495927] Updated weights for policy 0, policy_version 5141 (0.0008) +[2026-06-07 03:05:21,317][495927] Updated weights for policy 0, policy_version 5151 (0.0008) +[2026-06-07 03:05:21,465][495927] Updated weights for policy 0, policy_version 5161 (0.0008) +[2026-06-07 03:05:21,612][495927] Updated weights for policy 0, policy_version 5171 (0.0008) +[2026-06-07 03:05:21,759][495927] Updated weights for policy 0, policy_version 5181 (0.0008) +[2026-06-07 03:05:22,415][495927] Updated weights for policy 0, policy_version 5191 (0.0010) +[2026-06-07 03:05:22,578][495927] Updated weights for policy 0, policy_version 5202 (0.0008) +[2026-06-07 03:05:22,735][495927] Updated weights for policy 0, policy_version 5213 (0.0009) +[2026-06-07 03:05:22,877][495927] Updated weights for policy 0, policy_version 5223 (0.0008) +[2026-06-07 03:05:23,035][495927] Updated weights for policy 0, policy_version 5233 (0.0008) +[2026-06-07 03:05:23,182][495927] Updated weights for policy 0, policy_version 5243 (0.0008) +[2026-06-07 03:05:23,841][495927] Updated weights for policy 0, policy_version 5254 (0.0008) +[2026-06-07 03:05:23,909][492660] Fps is (10 sec: 22937.7, 60 sec: 21845.3, 300 sec: 21495.8). Total num frames: 2686976. Throughput: 0: 22451.3. Samples: 2703360. Policy #0 lag: (min: 63.0, avg: 76.2, max: 127.0) +[2026-06-07 03:05:23,910][492660] Avg episode reward: [(0, '73.444')] +[2026-06-07 03:05:23,991][495927] Updated weights for policy 0, policy_version 5265 (0.0008) +[2026-06-07 03:05:24,145][495927] Updated weights for policy 0, policy_version 5275 (0.0009) +[2026-06-07 03:05:24,302][495927] Updated weights for policy 0, policy_version 5286 (0.0010) +[2026-06-07 03:05:24,467][495927] Updated weights for policy 0, policy_version 5297 (0.0008) +[2026-06-07 03:05:24,611][495927] Updated weights for policy 0, policy_version 5307 (0.0008) +[2026-06-07 03:05:24,678][495570] Saving new best policy, reward=73.444! +[2026-06-07 03:05:25,264][495927] Updated weights for policy 0, policy_version 5318 (0.0008) +[2026-06-07 03:05:25,404][495927] Updated weights for policy 0, policy_version 5328 (0.0008) +[2026-06-07 03:05:25,563][495927] Updated weights for policy 0, policy_version 5339 (0.0008) +[2026-06-07 03:05:25,726][495927] Updated weights for policy 0, policy_version 5350 (0.0008) +[2026-06-07 03:05:25,903][495927] Updated weights for policy 0, policy_version 5362 (0.0009) +[2026-06-07 03:05:26,054][495927] Updated weights for policy 0, policy_version 5372 (0.0008) +[2026-06-07 03:05:26,715][495927] Updated weights for policy 0, policy_version 5382 (0.0008) +[2026-06-07 03:05:26,858][495927] Updated weights for policy 0, policy_version 5392 (0.0008) +[2026-06-07 03:05:27,003][495927] Updated weights for policy 0, policy_version 5402 (0.0008) +[2026-06-07 03:05:27,145][495927] Updated weights for policy 0, policy_version 5412 (0.0009) +[2026-06-07 03:05:27,289][495927] Updated weights for policy 0, policy_version 5422 (0.0008) +[2026-06-07 03:05:27,447][495927] Updated weights for policy 0, policy_version 5432 (0.0008) +[2026-06-07 03:05:28,096][495927] Updated weights for policy 0, policy_version 5442 (0.0008) +[2026-06-07 03:05:28,254][495927] Updated weights for policy 0, policy_version 5453 (0.0008) +[2026-06-07 03:05:28,392][495927] Updated weights for policy 0, policy_version 5463 (0.0008) +[2026-06-07 03:05:28,537][495927] Updated weights for policy 0, policy_version 5473 (0.0009) +[2026-06-07 03:05:28,688][495927] Updated weights for policy 0, policy_version 5483 (0.0009) +[2026-06-07 03:05:28,857][495927] Updated weights for policy 0, policy_version 5494 (0.0008) +[2026-06-07 03:05:28,909][492660] Fps is (10 sec: 19660.7, 60 sec: 21845.3, 300 sec: 21425.2). Total num frames: 2785280. Throughput: 0: 22493.8. Samples: 2836864. Policy #0 lag: (min: 63.0, avg: 76.2, max: 127.0) +[2026-06-07 03:05:28,910][492660] Avg episode reward: [(0, '85.249')] +[2026-06-07 03:05:28,993][495570] Saving new best policy, reward=85.249! +[2026-06-07 03:05:28,995][495927] Updated weights for policy 0, policy_version 5504 (0.0008) +[2026-06-07 03:05:29,633][495927] Updated weights for policy 0, policy_version 5514 (0.0008) +[2026-06-07 03:05:29,783][495927] Updated weights for policy 0, policy_version 5524 (0.0008) +[2026-06-07 03:05:29,932][495927] Updated weights for policy 0, policy_version 5534 (0.0009) +[2026-06-07 03:05:30,094][495927] Updated weights for policy 0, policy_version 5545 (0.0008) +[2026-06-07 03:05:30,243][495927] Updated weights for policy 0, policy_version 5555 (0.0008) +[2026-06-07 03:05:30,396][495927] Updated weights for policy 0, policy_version 5565 (0.0008) +[2026-06-07 03:05:31,046][495927] Updated weights for policy 0, policy_version 5576 (0.0008) +[2026-06-07 03:05:31,211][495927] Updated weights for policy 0, policy_version 5587 (0.0008) +[2026-06-07 03:05:31,366][495927] Updated weights for policy 0, policy_version 5598 (0.0009) +[2026-06-07 03:05:31,535][495927] Updated weights for policy 0, policy_version 5609 (0.0008) +[2026-06-07 03:05:31,709][495927] Updated weights for policy 0, policy_version 5621 (0.0008) +[2026-06-07 03:05:31,857][495927] Updated weights for policy 0, policy_version 5631 (0.0010) +[2026-06-07 03:05:32,485][495927] Updated weights for policy 0, policy_version 5641 (0.0008) +[2026-06-07 03:05:32,629][495927] Updated weights for policy 0, policy_version 5651 (0.0008) +[2026-06-07 03:05:32,773][495927] Updated weights for policy 0, policy_version 5661 (0.0008) +[2026-06-07 03:05:32,938][495927] Updated weights for policy 0, policy_version 5672 (0.0008) +[2026-06-07 03:05:33,087][495927] Updated weights for policy 0, policy_version 5682 (0.0008) +[2026-06-07 03:05:33,251][495927] Updated weights for policy 0, policy_version 5693 (0.0009) +[2026-06-07 03:05:33,894][495927] Updated weights for policy 0, policy_version 5704 (0.0008) +[2026-06-07 03:05:33,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22391.5, 300 sec: 21602.6). Total num frames: 2916352. Throughput: 0: 22525.2. Samples: 2905088. Policy #0 lag: (min: 70.0, avg: 86.4, max: 138.0) +[2026-06-07 03:05:33,910][492660] Avg episode reward: [(0, '110.824')] +[2026-06-07 03:05:34,055][495927] Updated weights for policy 0, policy_version 5715 (0.0009) +[2026-06-07 03:05:34,199][495927] Updated weights for policy 0, policy_version 5725 (0.0009) +[2026-06-07 03:05:34,349][495927] Updated weights for policy 0, policy_version 5735 (0.0008) +[2026-06-07 03:05:34,504][495927] Updated weights for policy 0, policy_version 5745 (0.0009) +[2026-06-07 03:05:34,653][495927] Updated weights for policy 0, policy_version 5755 (0.0008) +[2026-06-07 03:05:34,717][495570] Saving new best policy, reward=110.824! +[2026-06-07 03:05:35,269][495927] Updated weights for policy 0, policy_version 5765 (0.0008) +[2026-06-07 03:05:35,413][495927] Updated weights for policy 0, policy_version 5775 (0.0008) +[2026-06-07 03:05:35,552][495927] Updated weights for policy 0, policy_version 5785 (0.0008) +[2026-06-07 03:05:35,723][495927] Updated weights for policy 0, policy_version 5796 (0.0008) +[2026-06-07 03:05:35,866][495927] Updated weights for policy 0, policy_version 5806 (0.0008) +[2026-06-07 03:05:36,015][495927] Updated weights for policy 0, policy_version 5816 (0.0008) +[2026-06-07 03:05:36,655][495927] Updated weights for policy 0, policy_version 5826 (0.0009) +[2026-06-07 03:05:36,793][495927] Updated weights for policy 0, policy_version 5836 (0.0008) +[2026-06-07 03:05:36,986][495927] Updated weights for policy 0, policy_version 5849 (0.0009) +[2026-06-07 03:05:37,133][495927] Updated weights for policy 0, policy_version 5859 (0.0008) +[2026-06-07 03:05:37,279][495927] Updated weights for policy 0, policy_version 5869 (0.0008) +[2026-06-07 03:05:37,424][495927] Updated weights for policy 0, policy_version 5879 (0.0009) +[2026-06-07 03:05:38,072][495927] Updated weights for policy 0, policy_version 5889 (0.0009) +[2026-06-07 03:05:38,211][495927] Updated weights for policy 0, policy_version 5899 (0.0008) +[2026-06-07 03:05:38,369][495927] Updated weights for policy 0, policy_version 5910 (0.0008) +[2026-06-07 03:05:38,516][495927] Updated weights for policy 0, policy_version 5920 (0.0008) +[2026-06-07 03:05:38,678][495927] Updated weights for policy 0, policy_version 5931 (0.0009) +[2026-06-07 03:05:38,838][495927] Updated weights for policy 0, policy_version 5942 (0.0009) +[2026-06-07 03:05:38,909][492660] Fps is (10 sec: 22937.5, 60 sec: 21845.4, 300 sec: 21533.3). Total num frames: 3014656. Throughput: 0: 22579.2. Samples: 3038080. Policy #0 lag: (min: 63.0, avg: 76.8, max: 127.0) +[2026-06-07 03:05:38,910][492660] Avg episode reward: [(0, '112.650')] +[2026-06-07 03:05:38,987][495570] Saving new best policy, reward=112.650! +[2026-06-07 03:05:39,517][495927] Updated weights for policy 0, policy_version 5953 (0.0009) +[2026-06-07 03:05:39,664][495927] Updated weights for policy 0, policy_version 5963 (0.0009) +[2026-06-07 03:05:39,805][495927] Updated weights for policy 0, policy_version 5973 (0.0009) +[2026-06-07 03:05:39,947][495927] Updated weights for policy 0, policy_version 5983 (0.0008) +[2026-06-07 03:05:40,095][495927] Updated weights for policy 0, policy_version 5993 (0.0009) +[2026-06-07 03:05:40,241][495927] Updated weights for policy 0, policy_version 6003 (0.0009) +[2026-06-07 03:05:40,410][495927] Updated weights for policy 0, policy_version 6014 (0.0009) +[2026-06-07 03:05:41,045][495927] Updated weights for policy 0, policy_version 6024 (0.0009) +[2026-06-07 03:05:41,201][495927] Updated weights for policy 0, policy_version 6035 (0.0009) +[2026-06-07 03:05:41,347][495927] Updated weights for policy 0, policy_version 6045 (0.0008) +[2026-06-07 03:05:41,501][495927] Updated weights for policy 0, policy_version 6055 (0.0007) +[2026-06-07 03:05:41,649][495927] Updated weights for policy 0, policy_version 6065 (0.0009) +[2026-06-07 03:05:41,793][495927] Updated weights for policy 0, policy_version 6075 (0.0007) +[2026-06-07 03:05:42,414][495927] Updated weights for policy 0, policy_version 6085 (0.0008) +[2026-06-07 03:05:42,556][495927] Updated weights for policy 0, policy_version 6095 (0.0011) +[2026-06-07 03:05:42,698][495927] Updated weights for policy 0, policy_version 6105 (0.0010) +[2026-06-07 03:05:42,849][495927] Updated weights for policy 0, policy_version 6115 (0.0009) +[2026-06-07 03:05:42,995][495927] Updated weights for policy 0, policy_version 6125 (0.0010) +[2026-06-07 03:05:43,156][495927] Updated weights for policy 0, policy_version 6136 (0.0008) +[2026-06-07 03:05:43,798][495927] Updated weights for policy 0, policy_version 6146 (0.0006) +[2026-06-07 03:05:43,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22391.4, 300 sec: 21694.7). Total num frames: 3145728. Throughput: 0: 22633.3. Samples: 3172864. Policy #0 lag: (min: 62.0, avg: 79.6, max: 123.0) +[2026-06-07 03:05:43,910][492660] Avg episode reward: [(0, '123.574')] +[2026-06-07 03:05:43,942][495927] Updated weights for policy 0, policy_version 6156 (0.0008) +[2026-06-07 03:05:44,084][495927] Updated weights for policy 0, policy_version 6166 (0.0008) +[2026-06-07 03:05:44,249][495927] Updated weights for policy 0, policy_version 6177 (0.0008) +[2026-06-07 03:05:44,399][495927] Updated weights for policy 0, policy_version 6187 (0.0008) +[2026-06-07 03:05:44,547][495927] Updated weights for policy 0, policy_version 6197 (0.0008) +[2026-06-07 03:05:44,705][495570] Saving new best policy, reward=123.574! +[2026-06-07 03:05:44,706][495927] Updated weights for policy 0, policy_version 6208 (0.0008) +[2026-06-07 03:05:45,347][495927] Updated weights for policy 0, policy_version 6218 (0.0009) +[2026-06-07 03:05:45,509][495927] Updated weights for policy 0, policy_version 6229 (0.0008) +[2026-06-07 03:05:45,653][495927] Updated weights for policy 0, policy_version 6239 (0.0008) +[2026-06-07 03:05:45,809][495927] Updated weights for policy 0, policy_version 6249 (0.0008) +[2026-06-07 03:05:45,986][495927] Updated weights for policy 0, policy_version 6261 (0.0009) +[2026-06-07 03:05:46,131][495927] Updated weights for policy 0, policy_version 6271 (0.0009) +[2026-06-07 03:05:46,772][495927] Updated weights for policy 0, policy_version 6282 (0.0009) +[2026-06-07 03:05:46,930][495927] Updated weights for policy 0, policy_version 6293 (0.0009) +[2026-06-07 03:05:47,094][495927] Updated weights for policy 0, policy_version 6304 (0.0008) +[2026-06-07 03:05:47,272][495927] Updated weights for policy 0, policy_version 6316 (0.0008) +[2026-06-07 03:05:47,432][495927] Updated weights for policy 0, policy_version 6327 (0.0008) +[2026-06-07 03:05:48,112][495927] Updated weights for policy 0, policy_version 6338 (0.0009) +[2026-06-07 03:05:48,253][495927] Updated weights for policy 0, policy_version 6348 (0.0008) +[2026-06-07 03:05:48,406][495927] Updated weights for policy 0, policy_version 6359 (0.0008) +[2026-06-07 03:05:48,557][495927] Updated weights for policy 0, policy_version 6369 (0.0009) +[2026-06-07 03:05:48,724][495927] Updated weights for policy 0, policy_version 6380 (0.0008) +[2026-06-07 03:05:48,875][495927] Updated weights for policy 0, policy_version 6390 (0.0008) +[2026-06-07 03:05:48,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22391.5, 300 sec: 21626.9). Total num frames: 3244032. Throughput: 0: 22741.3. Samples: 3244672. Policy #0 lag: (min: 62.0, avg: 79.6, max: 123.0) +[2026-06-07 03:05:48,910][492660] Avg episode reward: [(0, '156.207')] +[2026-06-07 03:05:49,015][495570] Saving new best policy, reward=156.207! +[2026-06-07 03:05:49,018][495927] Updated weights for policy 0, policy_version 6400 (0.0007) +[2026-06-07 03:05:49,627][495927] Updated weights for policy 0, policy_version 6410 (0.0008) +[2026-06-07 03:05:49,777][495927] Updated weights for policy 0, policy_version 6420 (0.0008) +[2026-06-07 03:05:49,924][495927] Updated weights for policy 0, policy_version 6430 (0.0008) +[2026-06-07 03:05:50,084][495927] Updated weights for policy 0, policy_version 6441 (0.0008) +[2026-06-07 03:05:50,259][495927] Updated weights for policy 0, policy_version 6452 (0.0009) +[2026-06-07 03:05:50,405][495927] Updated weights for policy 0, policy_version 6462 (0.0008) +[2026-06-07 03:05:50,993][495927] Updated weights for policy 0, policy_version 6472 (0.0009) +[2026-06-07 03:05:51,139][495927] Updated weights for policy 0, policy_version 6482 (0.0008) +[2026-06-07 03:05:51,279][495927] Updated weights for policy 0, policy_version 6492 (0.0009) +[2026-06-07 03:05:51,428][495927] Updated weights for policy 0, policy_version 6502 (0.0008) +[2026-06-07 03:05:51,588][495927] Updated weights for policy 0, policy_version 6512 (0.0008) +[2026-06-07 03:05:51,755][495927] Updated weights for policy 0, policy_version 6523 (0.0008) +[2026-06-07 03:05:52,388][495927] Updated weights for policy 0, policy_version 6533 (0.0009) +[2026-06-07 03:05:52,567][495927] Updated weights for policy 0, policy_version 6546 (0.0008) +[2026-06-07 03:05:52,721][495927] Updated weights for policy 0, policy_version 6556 (0.0008) +[2026-06-07 03:05:52,896][495927] Updated weights for policy 0, policy_version 6568 (0.0008) +[2026-06-07 03:05:53,042][495927] Updated weights for policy 0, policy_version 6578 (0.0008) +[2026-06-07 03:05:53,209][495927] Updated weights for policy 0, policy_version 6589 (0.0008) +[2026-06-07 03:05:53,854][495927] Updated weights for policy 0, policy_version 6599 (0.0007) +[2026-06-07 03:05:53,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 21774.9). Total num frames: 3375104. Throughput: 0: 22980.2. Samples: 3387904. Policy #0 lag: (min: 101.0, avg: 121.8, max: 163.0) +[2026-06-07 03:05:53,910][492660] Avg episode reward: [(0, '163.945')] +[2026-06-07 03:05:54,018][495927] Updated weights for policy 0, policy_version 6611 (0.0005) +[2026-06-07 03:05:54,189][495927] Updated weights for policy 0, policy_version 6622 (0.0005) +[2026-06-07 03:05:54,343][495927] Updated weights for policy 0, policy_version 6633 (0.0004) +[2026-06-07 03:05:54,528][495927] Updated weights for policy 0, policy_version 6645 (0.0005) +[2026-06-07 03:05:54,663][495927] Updated weights for policy 0, policy_version 6655 (0.0005) +[2026-06-07 03:05:54,677][495570] Saving new best policy, reward=163.945! +[2026-06-07 03:05:55,315][495927] Updated weights for policy 0, policy_version 6666 (0.0005) +[2026-06-07 03:05:55,472][495927] Updated weights for policy 0, policy_version 6677 (0.0005) +[2026-06-07 03:05:55,624][495927] Updated weights for policy 0, policy_version 6688 (0.0004) +[2026-06-07 03:05:55,797][495927] Updated weights for policy 0, policy_version 6699 (0.0004) +[2026-06-07 03:05:55,947][495927] Updated weights for policy 0, policy_version 6709 (0.0005) +[2026-06-07 03:05:56,094][495927] Updated weights for policy 0, policy_version 6719 (0.0005) +[2026-06-07 03:05:56,729][495927] Updated weights for policy 0, policy_version 6729 (0.0008) +[2026-06-07 03:05:56,895][495927] Updated weights for policy 0, policy_version 6741 (0.0008) +[2026-06-07 03:05:57,053][495927] Updated weights for policy 0, policy_version 6751 (0.0008) +[2026-06-07 03:05:57,256][495927] Updated weights for policy 0, policy_version 6765 (0.0008) +[2026-06-07 03:05:57,428][495927] Updated weights for policy 0, policy_version 6776 (0.0008) +[2026-06-07 03:05:58,080][495927] Updated weights for policy 0, policy_version 6786 (0.0008) +[2026-06-07 03:05:58,240][495927] Updated weights for policy 0, policy_version 6797 (0.0008) +[2026-06-07 03:05:58,406][495927] Updated weights for policy 0, policy_version 6808 (0.0009) +[2026-06-07 03:05:58,574][495927] Updated weights for policy 0, policy_version 6819 (0.0009) +[2026-06-07 03:05:58,729][495927] Updated weights for policy 0, policy_version 6829 (0.0008) +[2026-06-07 03:05:58,899][495927] Updated weights for policy 0, policy_version 6840 (0.0008) +[2026-06-07 03:05:58,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22391.5, 300 sec: 21708.8). Total num frames: 3473408. Throughput: 0: 22920.5. Samples: 3522432. Policy #0 lag: (min: 63.0, avg: 76.1, max: 127.0) +[2026-06-07 03:05:58,910][492660] Avg episode reward: [(0, '175.776')] +[2026-06-07 03:05:59,009][495570] Saving new best policy, reward=175.776! +[2026-06-07 03:05:59,518][495927] Updated weights for policy 0, policy_version 6851 (0.0008) +[2026-06-07 03:05:59,652][495927] Updated weights for policy 0, policy_version 6861 (0.0008) +[2026-06-07 03:05:59,827][495927] Updated weights for policy 0, policy_version 6872 (0.0009) +[2026-06-07 03:05:59,969][495927] Updated weights for policy 0, policy_version 6882 (0.0008) +[2026-06-07 03:06:00,120][495927] Updated weights for policy 0, policy_version 6892 (0.0009) +[2026-06-07 03:06:00,300][495927] Updated weights for policy 0, policy_version 6904 (0.0008) +[2026-06-07 03:06:00,956][495927] Updated weights for policy 0, policy_version 6916 (0.0008) +[2026-06-07 03:06:01,116][495927] Updated weights for policy 0, policy_version 6927 (0.0009) +[2026-06-07 03:06:01,271][495927] Updated weights for policy 0, policy_version 6938 (0.0008) +[2026-06-07 03:06:01,440][495927] Updated weights for policy 0, policy_version 6949 (0.0009) +[2026-06-07 03:06:01,606][495927] Updated weights for policy 0, policy_version 6960 (0.0008) +[2026-06-07 03:06:01,755][495927] Updated weights for policy 0, policy_version 6970 (0.0008) +[2026-06-07 03:06:02,370][495927] Updated weights for policy 0, policy_version 6980 (0.0009) +[2026-06-07 03:06:02,511][495927] Updated weights for policy 0, policy_version 6990 (0.0009) +[2026-06-07 03:06:02,655][495927] Updated weights for policy 0, policy_version 7000 (0.0009) +[2026-06-07 03:06:02,799][495927] Updated weights for policy 0, policy_version 7010 (0.0009) +[2026-06-07 03:06:02,949][495927] Updated weights for policy 0, policy_version 7020 (0.0009) +[2026-06-07 03:06:03,117][495927] Updated weights for policy 0, policy_version 7031 (0.0009) +[2026-06-07 03:06:03,783][495927] Updated weights for policy 0, policy_version 7041 (0.0009) +[2026-06-07 03:06:03,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 21845.3). Total num frames: 3604480. Throughput: 0: 22849.4. Samples: 3588992. Policy #0 lag: (min: 63.0, avg: 76.1, max: 127.0) +[2026-06-07 03:06:03,910][492660] Avg episode reward: [(0, '187.585')] +[2026-06-07 03:06:03,939][495927] Updated weights for policy 0, policy_version 7052 (0.0008) +[2026-06-07 03:06:04,083][495927] Updated weights for policy 0, policy_version 7062 (0.0009) +[2026-06-07 03:06:04,242][495927] Updated weights for policy 0, policy_version 7073 (0.0009) +[2026-06-07 03:06:04,392][495927] Updated weights for policy 0, policy_version 7083 (0.0010) +[2026-06-07 03:06:04,537][495927] Updated weights for policy 0, policy_version 7093 (0.0008) +[2026-06-07 03:06:04,689][495927] Updated weights for policy 0, policy_version 7103 (0.0008) +[2026-06-07 03:06:04,706][495570] Saving new best policy, reward=187.585! +[2026-06-07 03:06:05,317][495927] Updated weights for policy 0, policy_version 7113 (0.0009) +[2026-06-07 03:06:05,467][495927] Updated weights for policy 0, policy_version 7123 (0.0008) +[2026-06-07 03:06:05,604][495927] Updated weights for policy 0, policy_version 7133 (0.0009) +[2026-06-07 03:06:05,767][495927] Updated weights for policy 0, policy_version 7144 (0.0009) +[2026-06-07 03:06:05,918][495927] Updated weights for policy 0, policy_version 7154 (0.0008) +[2026-06-07 03:06:06,067][495927] Updated weights for policy 0, policy_version 7164 (0.0009) +[2026-06-07 03:06:06,712][495927] Updated weights for policy 0, policy_version 7175 (0.0009) +[2026-06-07 03:06:06,889][495927] Updated weights for policy 0, policy_version 7187 (0.0009) +[2026-06-07 03:06:07,051][495927] Updated weights for policy 0, policy_version 7198 (0.0009) +[2026-06-07 03:06:07,212][495927] Updated weights for policy 0, policy_version 7209 (0.0009) +[2026-06-07 03:06:07,370][495927] Updated weights for policy 0, policy_version 7219 (0.0008) +[2026-06-07 03:06:07,517][495927] Updated weights for policy 0, policy_version 7229 (0.0008) +[2026-06-07 03:06:08,146][495927] Updated weights for policy 0, policy_version 7239 (0.0009) +[2026-06-07 03:06:08,301][495927] Updated weights for policy 0, policy_version 7249 (0.0009) +[2026-06-07 03:06:08,451][495927] Updated weights for policy 0, policy_version 7260 (0.0009) +[2026-06-07 03:06:08,620][495927] Updated weights for policy 0, policy_version 7271 (0.0008) +[2026-06-07 03:06:08,784][495927] Updated weights for policy 0, policy_version 7282 (0.0009) +[2026-06-07 03:06:08,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22391.4, 300 sec: 21781.1). Total num frames: 3702784. Throughput: 0: 22687.2. Samples: 3724288. Policy #0 lag: (min: 63.0, avg: 76.5, max: 127.0) +[2026-06-07 03:06:08,910][492660] Avg episode reward: [(0, '192.603')] +[2026-06-07 03:06:08,940][495927] Updated weights for policy 0, policy_version 7292 (0.0009) +[2026-06-07 03:06:08,993][495570] Saving new best policy, reward=192.603! +[2026-06-07 03:06:09,551][495927] Updated weights for policy 0, policy_version 7303 (0.0009) +[2026-06-07 03:06:09,695][495927] Updated weights for policy 0, policy_version 7313 (0.0008) +[2026-06-07 03:06:09,840][495927] Updated weights for policy 0, policy_version 7323 (0.0008) +[2026-06-07 03:06:09,988][495927] Updated weights for policy 0, policy_version 7333 (0.0009) +[2026-06-07 03:06:10,139][495927] Updated weights for policy 0, policy_version 7343 (0.0008) +[2026-06-07 03:06:10,289][495927] Updated weights for policy 0, policy_version 7353 (0.0008) +[2026-06-07 03:06:10,913][495927] Updated weights for policy 0, policy_version 7363 (0.0009) +[2026-06-07 03:06:11,052][495927] Updated weights for policy 0, policy_version 7373 (0.0008) +[2026-06-07 03:06:11,208][495927] Updated weights for policy 0, policy_version 7384 (0.0008) +[2026-06-07 03:06:11,357][495927] Updated weights for policy 0, policy_version 7394 (0.0009) +[2026-06-07 03:06:11,521][495927] Updated weights for policy 0, policy_version 7405 (0.0009) +[2026-06-07 03:06:11,669][495927] Updated weights for policy 0, policy_version 7415 (0.0008) +[2026-06-07 03:06:12,293][495927] Updated weights for policy 0, policy_version 7425 (0.0009) +[2026-06-07 03:06:12,442][495927] Updated weights for policy 0, policy_version 7435 (0.0009) +[2026-06-07 03:06:12,596][495927] Updated weights for policy 0, policy_version 7446 (0.0009) +[2026-06-07 03:06:12,754][495927] Updated weights for policy 0, policy_version 7456 (0.0009) +[2026-06-07 03:06:12,895][495927] Updated weights for policy 0, policy_version 7466 (0.0008) +[2026-06-07 03:06:13,045][495927] Updated weights for policy 0, policy_version 7476 (0.0009) +[2026-06-07 03:06:13,198][495927] Updated weights for policy 0, policy_version 7486 (0.0010) +[2026-06-07 03:06:13,823][495927] Updated weights for policy 0, policy_version 7496 (0.0009) +[2026-06-07 03:06:13,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 21907.7). Total num frames: 3833856. Throughput: 0: 22704.3. Samples: 3858560. Policy #0 lag: (min: 63.0, avg: 76.8, max: 127.0) +[2026-06-07 03:06:13,910][492660] Avg episode reward: [(0, '208.623')] +[2026-06-07 03:06:13,982][495927] Updated weights for policy 0, policy_version 7507 (0.0008) +[2026-06-07 03:06:14,129][495927] Updated weights for policy 0, policy_version 7517 (0.0009) +[2026-06-07 03:06:14,275][495927] Updated weights for policy 0, policy_version 7527 (0.0009) +[2026-06-07 03:06:14,429][495927] Updated weights for policy 0, policy_version 7537 (0.0009) +[2026-06-07 03:06:14,581][495927] Updated weights for policy 0, policy_version 7547 (0.0008) +[2026-06-07 03:06:14,648][495570] Saving new best policy, reward=208.623! +[2026-06-07 03:06:15,233][495927] Updated weights for policy 0, policy_version 7558 (0.0009) +[2026-06-07 03:06:15,391][495927] Updated weights for policy 0, policy_version 7569 (0.0009) +[2026-06-07 03:06:15,537][495927] Updated weights for policy 0, policy_version 7579 (0.0008) +[2026-06-07 03:06:15,689][495927] Updated weights for policy 0, policy_version 7589 (0.0008) +[2026-06-07 03:06:15,843][495927] Updated weights for policy 0, policy_version 7599 (0.0009) +[2026-06-07 03:06:15,993][495927] Updated weights for policy 0, policy_version 7609 (0.0008) +[2026-06-07 03:06:16,616][495927] Updated weights for policy 0, policy_version 7620 (0.0009) +[2026-06-07 03:06:16,759][495927] Updated weights for policy 0, policy_version 7630 (0.0008) +[2026-06-07 03:06:16,924][495927] Updated weights for policy 0, policy_version 7641 (0.0008) +[2026-06-07 03:06:17,069][495927] Updated weights for policy 0, policy_version 7651 (0.0008) +[2026-06-07 03:06:17,220][495927] Updated weights for policy 0, policy_version 7661 (0.0008) +[2026-06-07 03:06:17,367][495927] Updated weights for policy 0, policy_version 7671 (0.0008) +[2026-06-07 03:06:17,983][495927] Updated weights for policy 0, policy_version 7681 (0.0008) +[2026-06-07 03:06:18,131][495927] Updated weights for policy 0, policy_version 7691 (0.0009) +[2026-06-07 03:06:18,297][495927] Updated weights for policy 0, policy_version 7703 (0.0008) +[2026-06-07 03:06:18,461][495927] Updated weights for policy 0, policy_version 7714 (0.0008) +[2026-06-07 03:06:18,651][495927] Updated weights for policy 0, policy_version 7727 (0.0008) +[2026-06-07 03:06:18,804][495927] Updated weights for policy 0, policy_version 7737 (0.0010) +[2026-06-07 03:06:18,909][492660] Fps is (10 sec: 26214.6, 60 sec: 22937.6, 300 sec: 22027.4). Total num frames: 3964928. Throughput: 0: 22809.6. Samples: 3931520. Policy #0 lag: (min: 63.0, avg: 76.8, max: 127.0) +[2026-06-07 03:06:18,910][492660] Avg episode reward: [(0, '225.258')] +[2026-06-07 03:06:18,916][495570] Saving new best policy, reward=225.258! +[2026-06-07 03:06:19,431][495927] Updated weights for policy 0, policy_version 7747 (0.0009) +[2026-06-07 03:06:19,587][495927] Updated weights for policy 0, policy_version 7758 (0.0008) +[2026-06-07 03:06:19,726][495927] Updated weights for policy 0, policy_version 7768 (0.0009) +[2026-06-07 03:06:19,883][495927] Updated weights for policy 0, policy_version 7778 (0.0009) +[2026-06-07 03:06:20,051][495927] Updated weights for policy 0, policy_version 7789 (0.0009) +[2026-06-07 03:06:20,202][495927] Updated weights for policy 0, policy_version 7799 (0.0009) +[2026-06-07 03:06:20,829][495927] Updated weights for policy 0, policy_version 7809 (0.0009) +[2026-06-07 03:06:20,968][495927] Updated weights for policy 0, policy_version 7819 (0.0008) +[2026-06-07 03:06:21,125][495927] Updated weights for policy 0, policy_version 7830 (0.0009) +[2026-06-07 03:06:21,298][495927] Updated weights for policy 0, policy_version 7841 (0.0008) +[2026-06-07 03:06:21,443][495927] Updated weights for policy 0, policy_version 7851 (0.0010) +[2026-06-07 03:06:21,596][495927] Updated weights for policy 0, policy_version 7861 (0.0010) +[2026-06-07 03:06:21,759][495927] Updated weights for policy 0, policy_version 7871 (0.0009) +[2026-06-07 03:06:22,362][495927] Updated weights for policy 0, policy_version 7881 (0.0008) +[2026-06-07 03:06:22,529][495927] Updated weights for policy 0, policy_version 7892 (0.0009) +[2026-06-07 03:06:22,671][495927] Updated weights for policy 0, policy_version 7902 (0.0008) +[2026-06-07 03:06:22,840][495927] Updated weights for policy 0, policy_version 7912 (0.0009) +[2026-06-07 03:06:22,987][495927] Updated weights for policy 0, policy_version 7922 (0.0008) +[2026-06-07 03:06:23,135][495927] Updated weights for policy 0, policy_version 7932 (0.0008) +[2026-06-07 03:06:23,754][495927] Updated weights for policy 0, policy_version 7942 (0.0009) +[2026-06-07 03:06:23,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 21963.4). Total num frames: 4063232. Throughput: 0: 23003.0. Samples: 4073216. Policy #0 lag: (min: 55.0, avg: 69.1, max: 119.0) +[2026-06-07 03:06:23,910][492660] Avg episode reward: [(0, '257.112')] +[2026-06-07 03:06:23,913][495927] Updated weights for policy 0, policy_version 7953 (0.0008) +[2026-06-07 03:06:24,060][495927] Updated weights for policy 0, policy_version 7963 (0.0008) +[2026-06-07 03:06:24,219][495927] Updated weights for policy 0, policy_version 7974 (0.0009) +[2026-06-07 03:06:24,371][495927] Updated weights for policy 0, policy_version 7984 (0.0009) +[2026-06-07 03:06:24,520][495927] Updated weights for policy 0, policy_version 7994 (0.0009) +[2026-06-07 03:06:24,605][495570] Saving new best policy, reward=257.112! +[2026-06-07 03:06:25,166][495927] Updated weights for policy 0, policy_version 8004 (0.0009) +[2026-06-07 03:06:25,307][495927] Updated weights for policy 0, policy_version 8014 (0.0008) +[2026-06-07 03:06:25,463][495927] Updated weights for policy 0, policy_version 8025 (0.0008) +[2026-06-07 03:06:25,610][495927] Updated weights for policy 0, policy_version 8035 (0.0009) +[2026-06-07 03:06:25,765][495927] Updated weights for policy 0, policy_version 8045 (0.0008) +[2026-06-07 03:06:25,914][495927] Updated weights for policy 0, policy_version 8055 (0.0008) +[2026-06-07 03:06:26,555][495927] Updated weights for policy 0, policy_version 8066 (0.0008) +[2026-06-07 03:06:26,701][495927] Updated weights for policy 0, policy_version 8077 (0.0008) +[2026-06-07 03:06:26,850][495927] Updated weights for policy 0, policy_version 8087 (0.0007) +[2026-06-07 03:06:27,002][495927] Updated weights for policy 0, policy_version 8097 (0.0008) +[2026-06-07 03:06:27,146][495927] Updated weights for policy 0, policy_version 8107 (0.0008) +[2026-06-07 03:06:27,300][495927] Updated weights for policy 0, policy_version 8117 (0.0008) +[2026-06-07 03:06:27,443][495927] Updated weights for policy 0, policy_version 8127 (0.0008) +[2026-06-07 03:06:28,101][495927] Updated weights for policy 0, policy_version 8138 (0.0009) +[2026-06-07 03:06:28,249][495927] Updated weights for policy 0, policy_version 8149 (0.0008) +[2026-06-07 03:06:28,394][495927] Updated weights for policy 0, policy_version 8159 (0.0008) +[2026-06-07 03:06:28,544][495927] Updated weights for policy 0, policy_version 8169 (0.0008) +[2026-06-07 03:06:28,684][495927] Updated weights for policy 0, policy_version 8179 (0.0009) +[2026-06-07 03:06:28,859][495927] Updated weights for policy 0, policy_version 8190 (0.0009) +[2026-06-07 03:06:28,909][492660] Fps is (10 sec: 22937.5, 60 sec: 23483.7, 300 sec: 22075.3). Total num frames: 4194304. Throughput: 0: 23017.3. Samples: 4208640. Policy #0 lag: (min: 55.0, avg: 69.1, max: 119.0) +[2026-06-07 03:06:28,910][492660] Avg episode reward: [(0, '273.986')] +[2026-06-07 03:06:28,915][495570] Saving new best policy, reward=273.986! +[2026-06-07 03:06:29,501][495927] Updated weights for policy 0, policy_version 8201 (0.0008) +[2026-06-07 03:06:29,649][495927] Updated weights for policy 0, policy_version 8211 (0.0008) +[2026-06-07 03:06:29,829][495927] Updated weights for policy 0, policy_version 8223 (0.0008) +[2026-06-07 03:06:29,984][495927] Updated weights for policy 0, policy_version 8233 (0.0008) +[2026-06-07 03:06:30,145][495927] Updated weights for policy 0, policy_version 8244 (0.0008) +[2026-06-07 03:06:30,303][495927] Updated weights for policy 0, policy_version 8255 (0.0008) +[2026-06-07 03:06:30,970][495927] Updated weights for policy 0, policy_version 8267 (0.0008) +[2026-06-07 03:06:31,118][495927] Updated weights for policy 0, policy_version 8277 (0.0008) +[2026-06-07 03:06:31,275][495927] Updated weights for policy 0, policy_version 8288 (0.0008) +[2026-06-07 03:06:31,445][495927] Updated weights for policy 0, policy_version 8299 (0.0008) +[2026-06-07 03:06:31,604][495927] Updated weights for policy 0, policy_version 8310 (0.0008) +[2026-06-07 03:06:31,753][495927] Updated weights for policy 0, policy_version 8320 (0.0004) +[2026-06-07 03:06:32,398][495927] Updated weights for policy 0, policy_version 8331 (0.0008) +[2026-06-07 03:06:32,556][495927] Updated weights for policy 0, policy_version 8342 (0.0007) +[2026-06-07 03:06:32,709][495927] Updated weights for policy 0, policy_version 8352 (0.0004) +[2026-06-07 03:06:32,873][495927] Updated weights for policy 0, policy_version 8363 (0.0004) +[2026-06-07 03:06:33,024][495927] Updated weights for policy 0, policy_version 8373 (0.0006) +[2026-06-07 03:06:33,185][495927] Updated weights for policy 0, policy_version 8384 (0.0008) +[2026-06-07 03:06:33,803][495927] Updated weights for policy 0, policy_version 8394 (0.0007) +[2026-06-07 03:06:33,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22013.4). Total num frames: 4292608. Throughput: 0: 22929.1. Samples: 4276480. Policy #0 lag: (min: 63.0, avg: 76.3, max: 127.0) +[2026-06-07 03:06:33,910][492660] Avg episode reward: [(0, '286.419')] +[2026-06-07 03:06:33,952][495927] Updated weights for policy 0, policy_version 8404 (0.0006) +[2026-06-07 03:06:34,092][495927] Updated weights for policy 0, policy_version 8414 (0.0006) +[2026-06-07 03:06:34,248][495927] Updated weights for policy 0, policy_version 8424 (0.0008) +[2026-06-07 03:06:34,398][495927] Updated weights for policy 0, policy_version 8434 (0.0008) +[2026-06-07 03:06:34,578][495927] Updated weights for policy 0, policy_version 8446 (0.0008) +[2026-06-07 03:06:34,599][495570] Saving new best policy, reward=286.419! +[2026-06-07 03:06:35,215][495927] Updated weights for policy 0, policy_version 8456 (0.0008) +[2026-06-07 03:06:35,369][495927] Updated weights for policy 0, policy_version 8467 (0.0008) +[2026-06-07 03:06:35,528][495927] Updated weights for policy 0, policy_version 8478 (0.0008) +[2026-06-07 03:06:35,684][495927] Updated weights for policy 0, policy_version 8488 (0.0008) +[2026-06-07 03:06:35,852][495927] Updated weights for policy 0, policy_version 8499 (0.0008) +[2026-06-07 03:06:36,018][495927] Updated weights for policy 0, policy_version 8510 (0.0008) +[2026-06-07 03:06:36,667][495927] Updated weights for policy 0, policy_version 8522 (0.0008) +[2026-06-07 03:06:36,807][495927] Updated weights for policy 0, policy_version 8532 (0.0008) +[2026-06-07 03:06:36,959][495927] Updated weights for policy 0, policy_version 8542 (0.0008) +[2026-06-07 03:06:37,105][495927] Updated weights for policy 0, policy_version 8552 (0.0008) +[2026-06-07 03:06:37,257][495927] Updated weights for policy 0, policy_version 8562 (0.0008) +[2026-06-07 03:06:37,415][495927] Updated weights for policy 0, policy_version 8572 (0.0008) +[2026-06-07 03:06:38,070][495927] Updated weights for policy 0, policy_version 8584 (0.0008) +[2026-06-07 03:06:38,215][495927] Updated weights for policy 0, policy_version 8594 (0.0008) +[2026-06-07 03:06:38,363][495927] Updated weights for policy 0, policy_version 8604 (0.0008) +[2026-06-07 03:06:38,520][495927] Updated weights for policy 0, policy_version 8615 (0.0008) +[2026-06-07 03:06:38,680][495927] Updated weights for policy 0, policy_version 8625 (0.0008) +[2026-06-07 03:06:38,834][495927] Updated weights for policy 0, policy_version 8635 (0.0008) +[2026-06-07 03:06:38,911][492660] Fps is (10 sec: 22933.8, 60 sec: 23483.1, 300 sec: 22118.2). Total num frames: 4423680. Throughput: 0: 22751.9. Samples: 4411776. Policy #0 lag: (min: 63.0, avg: 76.2, max: 127.0) +[2026-06-07 03:06:38,914][492660] Avg episode reward: [(0, '286.408')] +[2026-06-07 03:06:39,468][495927] Updated weights for policy 0, policy_version 8646 (0.0008) +[2026-06-07 03:06:39,624][495927] Updated weights for policy 0, policy_version 8657 (0.0008) +[2026-06-07 03:06:39,772][495927] Updated weights for policy 0, policy_version 8667 (0.0008) +[2026-06-07 03:06:39,942][495927] Updated weights for policy 0, policy_version 8678 (0.0008) +[2026-06-07 03:06:40,097][495927] Updated weights for policy 0, policy_version 8688 (0.0009) +[2026-06-07 03:06:40,259][495927] Updated weights for policy 0, policy_version 8699 (0.0008) +[2026-06-07 03:06:40,873][495927] Updated weights for policy 0, policy_version 8709 (0.0008) +[2026-06-07 03:06:41,016][495927] Updated weights for policy 0, policy_version 8719 (0.0008) +[2026-06-07 03:06:41,164][495927] Updated weights for policy 0, policy_version 8729 (0.0008) +[2026-06-07 03:06:41,312][495927] Updated weights for policy 0, policy_version 8739 (0.0008) +[2026-06-07 03:06:41,454][495927] Updated weights for policy 0, policy_version 8749 (0.0009) +[2026-06-07 03:06:41,625][495927] Updated weights for policy 0, policy_version 8760 (0.0008) +[2026-06-07 03:06:42,245][495927] Updated weights for policy 0, policy_version 8770 (0.0008) +[2026-06-07 03:06:42,392][495927] Updated weights for policy 0, policy_version 8780 (0.0008) +[2026-06-07 03:06:42,536][495927] Updated weights for policy 0, policy_version 8790 (0.0008) +[2026-06-07 03:06:42,681][495927] Updated weights for policy 0, policy_version 8800 (0.0008) +[2026-06-07 03:06:42,843][495927] Updated weights for policy 0, policy_version 8811 (0.0008) +[2026-06-07 03:06:43,014][495927] Updated weights for policy 0, policy_version 8822 (0.0008) +[2026-06-07 03:06:43,161][495927] Updated weights for policy 0, policy_version 8832 (0.0008) +[2026-06-07 03:06:43,802][495927] Updated weights for policy 0, policy_version 8843 (0.0009) +[2026-06-07 03:06:43,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22058.5). Total num frames: 4521984. Throughput: 0: 22809.6. Samples: 4548864. Policy #0 lag: (min: 63.0, avg: 76.2, max: 127.0) +[2026-06-07 03:06:43,910][492660] Avg episode reward: [(0, '299.018')] +[2026-06-07 03:06:43,947][495927] Updated weights for policy 0, policy_version 8853 (0.0008) +[2026-06-07 03:06:44,119][495927] Updated weights for policy 0, policy_version 8865 (0.0008) +[2026-06-07 03:06:44,267][495927] Updated weights for policy 0, policy_version 8875 (0.0009) +[2026-06-07 03:06:44,428][495927] Updated weights for policy 0, policy_version 8886 (0.0009) +[2026-06-07 03:06:44,572][495570] Saving new best policy, reward=299.018! +[2026-06-07 03:06:45,119][495927] Updated weights for policy 0, policy_version 8898 (0.0010) +[2026-06-07 03:06:45,256][495927] Updated weights for policy 0, policy_version 8908 (0.0008) +[2026-06-07 03:06:45,418][495927] Updated weights for policy 0, policy_version 8919 (0.0009) +[2026-06-07 03:06:45,568][495927] Updated weights for policy 0, policy_version 8929 (0.0009) +[2026-06-07 03:06:45,741][495927] Updated weights for policy 0, policy_version 8941 (0.0008) +[2026-06-07 03:06:45,897][495927] Updated weights for policy 0, policy_version 8951 (0.0005) +[2026-06-07 03:06:46,559][495927] Updated weights for policy 0, policy_version 8963 (0.0010) +[2026-06-07 03:06:46,696][495927] Updated weights for policy 0, policy_version 8973 (0.0008) +[2026-06-07 03:06:46,841][495927] Updated weights for policy 0, policy_version 8983 (0.0009) +[2026-06-07 03:06:47,005][495927] Updated weights for policy 0, policy_version 8994 (0.0009) +[2026-06-07 03:06:47,166][495927] Updated weights for policy 0, policy_version 9005 (0.0009) +[2026-06-07 03:06:47,321][495927] Updated weights for policy 0, policy_version 9015 (0.0009) +[2026-06-07 03:06:48,009][495927] Updated weights for policy 0, policy_version 9026 (0.0009) +[2026-06-07 03:06:48,147][495927] Updated weights for policy 0, policy_version 9036 (0.0008) +[2026-06-07 03:06:48,302][495927] Updated weights for policy 0, policy_version 9047 (0.0005) +[2026-06-07 03:06:48,451][495927] Updated weights for policy 0, policy_version 9057 (0.0005) +[2026-06-07 03:06:48,619][495927] Updated weights for policy 0, policy_version 9068 (0.0011) +[2026-06-07 03:06:48,760][495927] Updated weights for policy 0, policy_version 9078 (0.0012) +[2026-06-07 03:06:48,909][492660] Fps is (10 sec: 19663.9, 60 sec: 22937.6, 300 sec: 22001.4). Total num frames: 4620288. Throughput: 0: 22940.4. Samples: 4621312. Policy #0 lag: (min: 63.0, avg: 77.3, max: 127.0) +[2026-06-07 03:06:48,910][492660] Avg episode reward: [(0, '304.070')] +[2026-06-07 03:06:48,911][495927] Updated weights for policy 0, policy_version 9088 (0.0010) +[2026-06-07 03:06:48,916][495570] Saving new best policy, reward=304.070! +[2026-06-07 03:06:49,553][495927] Updated weights for policy 0, policy_version 9099 (0.0009) +[2026-06-07 03:06:49,709][495927] Updated weights for policy 0, policy_version 9110 (0.0009) +[2026-06-07 03:06:49,874][495927] Updated weights for policy 0, policy_version 9121 (0.0009) +[2026-06-07 03:06:50,025][495927] Updated weights for policy 0, policy_version 9131 (0.0006) +[2026-06-07 03:06:50,171][495927] Updated weights for policy 0, policy_version 9141 (0.0007) +[2026-06-07 03:06:50,322][495927] Updated weights for policy 0, policy_version 9151 (0.0008) +[2026-06-07 03:06:50,976][495927] Updated weights for policy 0, policy_version 9162 (0.0008) +[2026-06-07 03:06:51,125][495927] Updated weights for policy 0, policy_version 9172 (0.0008) +[2026-06-07 03:06:51,268][495927] Updated weights for policy 0, policy_version 9182 (0.0008) +[2026-06-07 03:06:51,443][495927] Updated weights for policy 0, policy_version 9194 (0.0008) +[2026-06-07 03:06:51,593][495927] Updated weights for policy 0, policy_version 9204 (0.0009) +[2026-06-07 03:06:51,749][495927] Updated weights for policy 0, policy_version 9215 (0.0008) +[2026-06-07 03:06:52,414][495927] Updated weights for policy 0, policy_version 9226 (0.0008) +[2026-06-07 03:06:52,561][495927] Updated weights for policy 0, policy_version 9236 (0.0008) +[2026-06-07 03:06:52,701][495927] Updated weights for policy 0, policy_version 9246 (0.0008) +[2026-06-07 03:06:52,848][495927] Updated weights for policy 0, policy_version 9256 (0.0008) +[2026-06-07 03:06:53,003][495927] Updated weights for policy 0, policy_version 9266 (0.0008) +[2026-06-07 03:06:53,161][495927] Updated weights for policy 0, policy_version 9277 (0.0008) +[2026-06-07 03:06:53,800][495927] Updated weights for policy 0, policy_version 9287 (0.0009) +[2026-06-07 03:06:53,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22099.3). Total num frames: 4751360. Throughput: 0: 23062.7. Samples: 4762112. Policy #0 lag: (min: 63.0, avg: 77.3, max: 127.0) +[2026-06-07 03:06:53,910][492660] Avg episode reward: [(0, '197.562')] +[2026-06-07 03:06:53,959][495927] Updated weights for policy 0, policy_version 9298 (0.0008) +[2026-06-07 03:06:54,111][495927] Updated weights for policy 0, policy_version 9308 (0.0008) +[2026-06-07 03:06:54,282][495927] Updated weights for policy 0, policy_version 9320 (0.0008) +[2026-06-07 03:06:54,439][495927] Updated weights for policy 0, policy_version 9330 (0.0008) +[2026-06-07 03:06:54,588][495927] Updated weights for policy 0, policy_version 9340 (0.0009) +[2026-06-07 03:06:55,227][495927] Updated weights for policy 0, policy_version 9350 (0.0008) +[2026-06-07 03:06:55,357][495927] Updated weights for policy 0, policy_version 9360 (0.0008) +[2026-06-07 03:06:55,511][495927] Updated weights for policy 0, policy_version 9370 (0.0008) +[2026-06-07 03:06:55,661][495927] Updated weights for policy 0, policy_version 9380 (0.0008) +[2026-06-07 03:06:55,809][495927] Updated weights for policy 0, policy_version 9390 (0.0008) +[2026-06-07 03:06:55,990][495927] Updated weights for policy 0, policy_version 9402 (0.0008) +[2026-06-07 03:06:56,627][495927] Updated weights for policy 0, policy_version 9412 (0.0009) +[2026-06-07 03:06:56,772][495927] Updated weights for policy 0, policy_version 9422 (0.0009) +[2026-06-07 03:06:56,917][495927] Updated weights for policy 0, policy_version 9432 (0.0008) +[2026-06-07 03:06:57,081][495927] Updated weights for policy 0, policy_version 9443 (0.0008) +[2026-06-07 03:06:57,233][495927] Updated weights for policy 0, policy_version 9453 (0.0008) +[2026-06-07 03:06:57,378][495927] Updated weights for policy 0, policy_version 9463 (0.0008) +[2026-06-07 03:06:58,019][495927] Updated weights for policy 0, policy_version 9474 (0.0008) +[2026-06-07 03:06:58,161][495927] Updated weights for policy 0, policy_version 9484 (0.0008) +[2026-06-07 03:06:58,322][495927] Updated weights for policy 0, policy_version 9495 (0.0008) +[2026-06-07 03:06:58,484][495927] Updated weights for policy 0, policy_version 9506 (0.0008) +[2026-06-07 03:06:58,640][495927] Updated weights for policy 0, policy_version 9517 (0.0008) +[2026-06-07 03:06:58,795][495927] Updated weights for policy 0, policy_version 9527 (0.0008) +[2026-06-07 03:06:58,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22043.9). Total num frames: 4849664. Throughput: 0: 23102.6. Samples: 4898176. Policy #0 lag: (min: 63.0, avg: 76.9, max: 127.0) +[2026-06-07 03:06:58,910][492660] Avg episode reward: [(0, '214.631')] +[2026-06-07 03:06:59,455][495927] Updated weights for policy 0, policy_version 9538 (0.0009) +[2026-06-07 03:06:59,594][495927] Updated weights for policy 0, policy_version 9548 (0.0008) +[2026-06-07 03:06:59,730][495927] Updated weights for policy 0, policy_version 9558 (0.0008) +[2026-06-07 03:06:59,881][495927] Updated weights for policy 0, policy_version 9568 (0.0007) +[2026-06-07 03:07:00,035][495927] Updated weights for policy 0, policy_version 9578 (0.0008) +[2026-06-07 03:07:00,185][495927] Updated weights for policy 0, policy_version 9588 (0.0008) +[2026-06-07 03:07:00,331][495927] Updated weights for policy 0, policy_version 9598 (0.0008) +[2026-06-07 03:07:00,969][495927] Updated weights for policy 0, policy_version 9609 (0.0008) +[2026-06-07 03:07:01,131][495927] Updated weights for policy 0, policy_version 9620 (0.0008) +[2026-06-07 03:07:01,304][495927] Updated weights for policy 0, policy_version 9632 (0.0009) +[2026-06-07 03:07:01,469][495927] Updated weights for policy 0, policy_version 9643 (0.0008) +[2026-06-07 03:07:01,624][495927] Updated weights for policy 0, policy_version 9653 (0.0008) +[2026-06-07 03:07:01,787][495927] Updated weights for policy 0, policy_version 9664 (0.0008) +[2026-06-07 03:07:02,462][495927] Updated weights for policy 0, policy_version 9675 (0.0008) +[2026-06-07 03:07:02,610][495927] Updated weights for policy 0, policy_version 9685 (0.0008) +[2026-06-07 03:07:02,764][495927] Updated weights for policy 0, policy_version 9696 (0.0008) +[2026-06-07 03:07:02,917][495927] Updated weights for policy 0, policy_version 9706 (0.0008) +[2026-06-07 03:07:03,073][495927] Updated weights for policy 0, policy_version 9716 (0.0008) +[2026-06-07 03:07:03,214][495927] Updated weights for policy 0, policy_version 9726 (0.0008) +[2026-06-07 03:07:03,840][495927] Updated weights for policy 0, policy_version 9737 (0.0009) +[2026-06-07 03:07:03,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22136.6). Total num frames: 4980736. Throughput: 0: 22983.1. Samples: 4965760. Policy #0 lag: (min: 63.0, avg: 76.9, max: 127.0) +[2026-06-07 03:07:03,910][492660] Avg episode reward: [(0, '205.855')] +[2026-06-07 03:07:03,984][495927] Updated weights for policy 0, policy_version 9747 (0.0008) +[2026-06-07 03:07:04,142][495927] Updated weights for policy 0, policy_version 9758 (0.0008) +[2026-06-07 03:07:04,291][495927] Updated weights for policy 0, policy_version 9768 (0.0009) +[2026-06-07 03:07:04,462][495927] Updated weights for policy 0, policy_version 9780 (0.0009) +[2026-06-07 03:07:04,607][495927] Updated weights for policy 0, policy_version 9790 (0.0008) +[2026-06-07 03:07:05,297][495927] Updated weights for policy 0, policy_version 9803 (0.0009) +[2026-06-07 03:07:05,442][495927] Updated weights for policy 0, policy_version 9814 (0.0009) +[2026-06-07 03:07:05,614][495927] Updated weights for policy 0, policy_version 9826 (0.0008) +[2026-06-07 03:07:05,760][495927] Updated weights for policy 0, policy_version 9836 (0.0008) +[2026-06-07 03:07:05,938][495927] Updated weights for policy 0, policy_version 9848 (0.0008) +[2026-06-07 03:07:06,629][495927] Updated weights for policy 0, policy_version 9860 (0.0009) +[2026-06-07 03:07:06,775][495927] Updated weights for policy 0, policy_version 9871 (0.0009) +[2026-06-07 03:07:06,919][495927] Updated weights for policy 0, policy_version 9881 (0.0008) +[2026-06-07 03:07:07,119][495927] Updated weights for policy 0, policy_version 9895 (0.0008) +[2026-06-07 03:07:07,266][495927] Updated weights for policy 0, policy_version 9905 (0.0008) +[2026-06-07 03:07:07,461][495927] Updated weights for policy 0, policy_version 9918 (0.0008) +[2026-06-07 03:07:08,143][495927] Updated weights for policy 0, policy_version 9931 (0.0009) +[2026-06-07 03:07:08,328][495927] Updated weights for policy 0, policy_version 9944 (0.0008) +[2026-06-07 03:07:08,477][495927] Updated weights for policy 0, policy_version 9954 (0.0008) +[2026-06-07 03:07:08,667][495927] Updated weights for policy 0, policy_version 9967 (0.0008) +[2026-06-07 03:07:08,813][495927] Updated weights for policy 0, policy_version 9977 (0.0008) +[2026-06-07 03:07:08,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22082.8). Total num frames: 5079040. Throughput: 0: 22835.2. Samples: 5100800. Policy #0 lag: (min: 39.0, avg: 52.5, max: 103.0) +[2026-06-07 03:07:08,910][492660] Avg episode reward: [(0, '251.470')] +[2026-06-07 03:07:09,473][495927] Updated weights for policy 0, policy_version 9987 (0.0009) +[2026-06-07 03:07:09,612][495927] Updated weights for policy 0, policy_version 9997 (0.0008) +[2026-06-07 03:07:09,758][495927] Updated weights for policy 0, policy_version 10007 (0.0008) +[2026-06-07 03:07:09,907][495927] Updated weights for policy 0, policy_version 10017 (0.0008) +[2026-06-07 03:07:10,058][495927] Updated weights for policy 0, policy_version 10027 (0.0008) +[2026-06-07 03:07:10,205][495927] Updated weights for policy 0, policy_version 10037 (0.0008) +[2026-06-07 03:07:10,363][495927] Updated weights for policy 0, policy_version 10047 (0.0008) +[2026-06-07 03:07:10,970][495927] Updated weights for policy 0, policy_version 10057 (0.0008) +[2026-06-07 03:07:11,153][495927] Updated weights for policy 0, policy_version 10069 (0.0008) +[2026-06-07 03:07:11,318][495927] Updated weights for policy 0, policy_version 10080 (0.0009) +[2026-06-07 03:07:11,467][495927] Updated weights for policy 0, policy_version 10090 (0.0009) +[2026-06-07 03:07:11,615][495927] Updated weights for policy 0, policy_version 10100 (0.0008) +[2026-06-07 03:07:11,771][495927] Updated weights for policy 0, policy_version 10110 (0.0008) +[2026-06-07 03:07:12,429][495927] Updated weights for policy 0, policy_version 10120 (0.0008) +[2026-06-07 03:07:12,571][495927] Updated weights for policy 0, policy_version 10130 (0.0008) +[2026-06-07 03:07:12,714][495927] Updated weights for policy 0, policy_version 10140 (0.0008) +[2026-06-07 03:07:12,864][495927] Updated weights for policy 0, policy_version 10150 (0.0009) +[2026-06-07 03:07:13,031][495927] Updated weights for policy 0, policy_version 10161 (0.0009) +[2026-06-07 03:07:13,128][495570] Early stopping after 7 epochs (56 sgd steps), loss delta 0.0000006 +[2026-06-07 03:07:13,680][495927] Updated weights for policy 0, policy_version 10171 (0.0008) +[2026-06-07 03:07:13,814][495927] Updated weights for policy 0, policy_version 10181 (0.0008) +[2026-06-07 03:07:13,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22170.7). Total num frames: 5210112. Throughput: 0: 22968.9. Samples: 5242240. Policy #0 lag: (min: 5.0, avg: 18.7, max: 69.0) +[2026-06-07 03:07:13,910][492660] Avg episode reward: [(0, '265.360')] +[2026-06-07 03:07:13,959][495927] Updated weights for policy 0, policy_version 10191 (0.0008) +[2026-06-07 03:07:14,113][495927] Updated weights for policy 0, policy_version 10201 (0.0009) +[2026-06-07 03:07:14,261][495927] Updated weights for policy 0, policy_version 10211 (0.0008) +[2026-06-07 03:07:14,410][495927] Updated weights for policy 0, policy_version 10221 (0.0008) +[2026-06-07 03:07:14,568][495927] Updated weights for policy 0, policy_version 10231 (0.0008) +[2026-06-07 03:07:15,205][495927] Updated weights for policy 0, policy_version 10241 (0.0008) +[2026-06-07 03:07:15,348][495927] Updated weights for policy 0, policy_version 10251 (0.0008) +[2026-06-07 03:07:15,499][495927] Updated weights for policy 0, policy_version 10261 (0.0009) +[2026-06-07 03:07:15,649][495927] Updated weights for policy 0, policy_version 10271 (0.0008) +[2026-06-07 03:07:15,818][495927] Updated weights for policy 0, policy_version 10282 (0.0010) +[2026-06-07 03:07:15,978][495927] Updated weights for policy 0, policy_version 10293 (0.0008) +[2026-06-07 03:07:16,595][495927] Updated weights for policy 0, policy_version 10303 (0.0008) +[2026-06-07 03:07:16,741][495927] Updated weights for policy 0, policy_version 10313 (0.0008) +[2026-06-07 03:07:16,904][495927] Updated weights for policy 0, policy_version 10324 (0.0008) +[2026-06-07 03:07:17,063][495927] Updated weights for policy 0, policy_version 10335 (0.0008) +[2026-06-07 03:07:17,215][495927] Updated weights for policy 0, policy_version 10345 (0.0008) +[2026-06-07 03:07:17,363][495927] Updated weights for policy 0, policy_version 10355 (0.0008) +[2026-06-07 03:07:18,002][495927] Updated weights for policy 0, policy_version 10365 (0.0009) +[2026-06-07 03:07:18,143][495927] Updated weights for policy 0, policy_version 10375 (0.0008) +[2026-06-07 03:07:18,291][495927] Updated weights for policy 0, policy_version 10385 (0.0008) +[2026-06-07 03:07:18,441][495927] Updated weights for policy 0, policy_version 10395 (0.0008) +[2026-06-07 03:07:18,597][495927] Updated weights for policy 0, policy_version 10405 (0.0009) +[2026-06-07 03:07:18,745][495927] Updated weights for policy 0, policy_version 10415 (0.0008) +[2026-06-07 03:07:18,909][492660] Fps is (10 sec: 26214.4, 60 sec: 22937.6, 300 sec: 22254.9). Total num frames: 5341184. Throughput: 0: 23091.2. Samples: 5315584. Policy #0 lag: (min: 5.0, avg: 18.7, max: 69.0) +[2026-06-07 03:07:18,910][492660] Avg episode reward: [(0, '266.364')] +[2026-06-07 03:07:19,354][495927] Updated weights for policy 0, policy_version 10425 (0.0008) +[2026-06-07 03:07:19,540][495927] Updated weights for policy 0, policy_version 10438 (0.0009) +[2026-06-07 03:07:19,688][495927] Updated weights for policy 0, policy_version 10448 (0.0006) +[2026-06-07 03:07:19,837][495927] Updated weights for policy 0, policy_version 10458 (0.0008) +[2026-06-07 03:07:19,988][495927] Updated weights for policy 0, policy_version 10468 (0.0008) +[2026-06-07 03:07:20,157][495927] Updated weights for policy 0, policy_version 10479 (0.0008) +[2026-06-07 03:07:20,786][495927] Updated weights for policy 0, policy_version 10489 (0.0008) +[2026-06-07 03:07:20,928][495927] Updated weights for policy 0, policy_version 10499 (0.0008) +[2026-06-07 03:07:21,086][495927] Updated weights for policy 0, policy_version 10510 (0.0008) +[2026-06-07 03:07:21,234][495927] Updated weights for policy 0, policy_version 10520 (0.0007) +[2026-06-07 03:07:21,397][495927] Updated weights for policy 0, policy_version 10531 (0.0005) +[2026-06-07 03:07:21,544][495927] Updated weights for policy 0, policy_version 10541 (0.0011) +[2026-06-07 03:07:21,692][495927] Updated weights for policy 0, policy_version 10551 (0.0010) +[2026-06-07 03:07:22,314][495927] Updated weights for policy 0, policy_version 10561 (0.0009) +[2026-06-07 03:07:22,475][495927] Updated weights for policy 0, policy_version 10573 (0.0009) +[2026-06-07 03:07:22,627][495927] Updated weights for policy 0, policy_version 10583 (0.0008) +[2026-06-07 03:07:22,775][495927] Updated weights for policy 0, policy_version 10593 (0.0008) +[2026-06-07 03:07:22,927][495927] Updated weights for policy 0, policy_version 10603 (0.0008) +[2026-06-07 03:07:23,098][495927] Updated weights for policy 0, policy_version 10615 (0.0008) +[2026-06-07 03:07:23,755][495927] Updated weights for policy 0, policy_version 10626 (0.0009) +[2026-06-07 03:07:23,893][495927] Updated weights for policy 0, policy_version 10636 (0.0008) +[2026-06-07 03:07:23,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22202.0). Total num frames: 5439488. Throughput: 0: 23220.0. Samples: 5456640. Policy #0 lag: (min: 63.0, avg: 76.7, max: 127.0) +[2026-06-07 03:07:23,910][492660] Avg episode reward: [(0, '294.339')] +[2026-06-07 03:07:24,086][495927] Updated weights for policy 0, policy_version 10649 (0.0008) +[2026-06-07 03:07:24,249][495927] Updated weights for policy 0, policy_version 10660 (0.0008) +[2026-06-07 03:07:24,391][495927] Updated weights for policy 0, policy_version 10670 (0.0008) +[2026-06-07 03:07:25,038][495927] Updated weights for policy 0, policy_version 10681 (0.0009) +[2026-06-07 03:07:25,190][495927] Updated weights for policy 0, policy_version 10692 (0.0009) +[2026-06-07 03:07:25,346][495927] Updated weights for policy 0, policy_version 10703 (0.0008) +[2026-06-07 03:07:25,487][495927] Updated weights for policy 0, policy_version 10713 (0.0008) +[2026-06-07 03:07:25,652][495927] Updated weights for policy 0, policy_version 10724 (0.0008) +[2026-06-07 03:07:25,803][495927] Updated weights for policy 0, policy_version 10734 (0.0008) +[2026-06-07 03:07:25,945][495927] Updated weights for policy 0, policy_version 10744 (0.0008) +[2026-06-07 03:07:26,591][495927] Updated weights for policy 0, policy_version 10754 (0.0009) +[2026-06-07 03:07:26,758][495927] Updated weights for policy 0, policy_version 10766 (0.0008) +[2026-06-07 03:07:26,916][495927] Updated weights for policy 0, policy_version 10777 (0.0008) +[2026-06-07 03:07:27,094][495927] Updated weights for policy 0, policy_version 10789 (0.0008) +[2026-06-07 03:07:27,259][495927] Updated weights for policy 0, policy_version 10800 (0.0008) +[2026-06-07 03:07:27,939][495927] Updated weights for policy 0, policy_version 10811 (0.0009) +[2026-06-07 03:07:28,071][495927] Updated weights for policy 0, policy_version 10821 (0.0008) +[2026-06-07 03:07:28,224][495927] Updated weights for policy 0, policy_version 10832 (0.0009) +[2026-06-07 03:07:28,389][495927] Updated weights for policy 0, policy_version 10843 (0.0009) +[2026-06-07 03:07:28,531][495927] Updated weights for policy 0, policy_version 10853 (0.0008) +[2026-06-07 03:07:28,685][495927] Updated weights for policy 0, policy_version 10863 (0.0008) +[2026-06-07 03:07:28,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22282.2). Total num frames: 5570560. Throughput: 0: 23173.7. Samples: 5591680. Policy #0 lag: (min: 63.0, avg: 76.7, max: 127.0) +[2026-06-07 03:07:28,910][492660] Avg episode reward: [(0, '317.522')] +[2026-06-07 03:07:28,915][495570] Saving new best policy, reward=317.522! +[2026-06-07 03:07:29,332][495927] Updated weights for policy 0, policy_version 10873 (0.0009) +[2026-06-07 03:07:29,493][495927] Updated weights for policy 0, policy_version 10884 (0.0009) +[2026-06-07 03:07:29,649][495927] Updated weights for policy 0, policy_version 10895 (0.0009) +[2026-06-07 03:07:29,800][495927] Updated weights for policy 0, policy_version 10905 (0.0008) +[2026-06-07 03:07:29,962][495927] Updated weights for policy 0, policy_version 10916 (0.0009) +[2026-06-07 03:07:30,122][495927] Updated weights for policy 0, policy_version 10927 (0.0009) +[2026-06-07 03:07:30,752][495927] Updated weights for policy 0, policy_version 10937 (0.0010) +[2026-06-07 03:07:30,895][495927] Updated weights for policy 0, policy_version 10947 (0.0008) +[2026-06-07 03:07:31,053][495927] Updated weights for policy 0, policy_version 10958 (0.0009) +[2026-06-07 03:07:31,202][495927] Updated weights for policy 0, policy_version 10968 (0.0008) +[2026-06-07 03:07:31,364][495927] Updated weights for policy 0, policy_version 10979 (0.0009) +[2026-06-07 03:07:31,512][495927] Updated weights for policy 0, policy_version 10989 (0.0008) +[2026-06-07 03:07:31,675][495927] Updated weights for policy 0, policy_version 11000 (0.0008) +[2026-06-07 03:07:32,341][495927] Updated weights for policy 0, policy_version 11012 (0.0009) +[2026-06-07 03:07:32,486][495927] Updated weights for policy 0, policy_version 11022 (0.0008) +[2026-06-07 03:07:32,645][495927] Updated weights for policy 0, policy_version 11033 (0.0009) +[2026-06-07 03:07:32,800][495927] Updated weights for policy 0, policy_version 11043 (0.0009) +[2026-06-07 03:07:32,951][495927] Updated weights for policy 0, policy_version 11053 (0.0009) +[2026-06-07 03:07:33,109][495927] Updated weights for policy 0, policy_version 11064 (0.0008) +[2026-06-07 03:07:33,733][495927] Updated weights for policy 0, policy_version 11074 (0.0009) +[2026-06-07 03:07:33,877][495927] Updated weights for policy 0, policy_version 11084 (0.0008) +[2026-06-07 03:07:33,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22230.8). Total num frames: 5668864. Throughput: 0: 23077.0. Samples: 5659776. Policy #0 lag: (min: 121.0, avg: 131.8, max: 184.0) +[2026-06-07 03:07:33,910][492660] Avg episode reward: [(0, '306.569')] +[2026-06-07 03:07:34,031][495927] Updated weights for policy 0, policy_version 11094 (0.0008) +[2026-06-07 03:07:34,179][495927] Updated weights for policy 0, policy_version 11104 (0.0009) +[2026-06-07 03:07:34,334][495927] Updated weights for policy 0, policy_version 11114 (0.0008) +[2026-06-07 03:07:34,518][495927] Updated weights for policy 0, policy_version 11126 (0.0008) +[2026-06-07 03:07:35,155][495927] Updated weights for policy 0, policy_version 11138 (0.0008) +[2026-06-07 03:07:35,332][495927] Updated weights for policy 0, policy_version 11150 (0.0008) +[2026-06-07 03:07:35,476][495927] Updated weights for policy 0, policy_version 11160 (0.0009) +[2026-06-07 03:07:35,630][495927] Updated weights for policy 0, policy_version 11170 (0.0008) +[2026-06-07 03:07:35,792][495927] Updated weights for policy 0, policy_version 11181 (0.0008) +[2026-06-07 03:07:35,944][495927] Updated weights for policy 0, policy_version 11191 (0.0009) +[2026-06-07 03:07:36,574][495927] Updated weights for policy 0, policy_version 11201 (0.0008) +[2026-06-07 03:07:36,716][495927] Updated weights for policy 0, policy_version 11211 (0.0008) +[2026-06-07 03:07:36,865][495927] Updated weights for policy 0, policy_version 11221 (0.0008) +[2026-06-07 03:07:37,021][495927] Updated weights for policy 0, policy_version 11231 (0.0009) +[2026-06-07 03:07:37,202][495927] Updated weights for policy 0, policy_version 11243 (0.0008) +[2026-06-07 03:07:37,365][495927] Updated weights for policy 0, policy_version 11254 (0.0008) +[2026-06-07 03:07:37,974][495927] Updated weights for policy 0, policy_version 11264 (0.0009) +[2026-06-07 03:07:38,124][495927] Updated weights for policy 0, policy_version 11274 (0.0008) +[2026-06-07 03:07:38,271][495927] Updated weights for policy 0, policy_version 11284 (0.0008) +[2026-06-07 03:07:38,415][495927] Updated weights for policy 0, policy_version 11294 (0.0008) +[2026-06-07 03:07:38,579][495927] Updated weights for policy 0, policy_version 11305 (0.0008) +[2026-06-07 03:07:38,751][495927] Updated weights for policy 0, policy_version 11316 (0.0008) +[2026-06-07 03:07:38,910][492660] Fps is (10 sec: 22936.7, 60 sec: 22938.1, 300 sec: 22307.4). Total num frames: 5799936. Throughput: 0: 22963.0. Samples: 5795456. Policy #0 lag: (min: 121.0, avg: 131.8, max: 184.0) +[2026-06-07 03:07:38,911][492660] Avg episode reward: [(0, '333.440')] +[2026-06-07 03:07:38,917][495570] Saving new best policy, reward=333.440! +[2026-06-07 03:07:39,386][495927] Updated weights for policy 0, policy_version 11327 (0.0008) +[2026-06-07 03:07:39,536][495927] Updated weights for policy 0, policy_version 11338 (0.0008) +[2026-06-07 03:07:39,688][495927] Updated weights for policy 0, policy_version 11348 (0.0009) +[2026-06-07 03:07:39,845][495927] Updated weights for policy 0, policy_version 11358 (0.0008) +[2026-06-07 03:07:39,997][495927] Updated weights for policy 0, policy_version 11368 (0.0009) +[2026-06-07 03:07:40,159][495927] Updated weights for policy 0, policy_version 11379 (0.0009) +[2026-06-07 03:07:40,787][495927] Updated weights for policy 0, policy_version 11389 (0.0009) +[2026-06-07 03:07:40,932][495927] Updated weights for policy 0, policy_version 11399 (0.0008) +[2026-06-07 03:07:41,107][495927] Updated weights for policy 0, policy_version 11411 (0.0009) +[2026-06-07 03:07:41,257][495927] Updated weights for policy 0, policy_version 11421 (0.0009) +[2026-06-07 03:07:41,439][495927] Updated weights for policy 0, policy_version 11433 (0.0008) +[2026-06-07 03:07:41,586][495927] Updated weights for policy 0, policy_version 11443 (0.0008) +[2026-06-07 03:07:42,232][495927] Updated weights for policy 0, policy_version 11454 (0.0009) +[2026-06-07 03:07:42,390][495927] Updated weights for policy 0, policy_version 11465 (0.0009) +[2026-06-07 03:07:42,536][495927] Updated weights for policy 0, policy_version 11475 (0.0008) +[2026-06-07 03:07:42,685][495927] Updated weights for policy 0, policy_version 11485 (0.0009) +[2026-06-07 03:07:42,842][495927] Updated weights for policy 0, policy_version 11496 (0.0008) +[2026-06-07 03:07:43,000][495927] Updated weights for policy 0, policy_version 11506 (0.0008) +[2026-06-07 03:07:43,632][495927] Updated weights for policy 0, policy_version 11517 (0.0009) +[2026-06-07 03:07:43,801][495927] Updated weights for policy 0, policy_version 11529 (0.0008) +[2026-06-07 03:07:43,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22257.5). Total num frames: 5898240. Throughput: 0: 23048.6. Samples: 5935360. Policy #0 lag: (min: 63.0, avg: 77.0, max: 127.0) +[2026-06-07 03:07:43,910][492660] Avg episode reward: [(0, '363.929')] +[2026-06-07 03:07:43,951][495927] Updated weights for policy 0, policy_version 11539 (0.0008) +[2026-06-07 03:07:44,111][495927] Updated weights for policy 0, policy_version 11550 (0.0009) +[2026-06-07 03:07:44,267][495927] Updated weights for policy 0, policy_version 11560 (0.0008) +[2026-06-07 03:07:44,435][495927] Updated weights for policy 0, policy_version 11571 (0.0008) +[2026-06-07 03:07:44,501][495570] Saving new best policy, reward=363.929! +[2026-06-07 03:07:45,075][495927] Updated weights for policy 0, policy_version 11581 (0.0009) +[2026-06-07 03:07:45,238][495927] Updated weights for policy 0, policy_version 11593 (0.0008) +[2026-06-07 03:07:45,404][495927] Updated weights for policy 0, policy_version 11604 (0.0009) +[2026-06-07 03:07:45,561][495927] Updated weights for policy 0, policy_version 11614 (0.0009) +[2026-06-07 03:07:45,704][495927] Updated weights for policy 0, policy_version 11624 (0.0009) +[2026-06-07 03:07:45,855][495927] Updated weights for policy 0, policy_version 11634 (0.0008) +[2026-06-07 03:07:46,468][495927] Updated weights for policy 0, policy_version 11644 (0.0009) +[2026-06-07 03:07:46,622][495927] Updated weights for policy 0, policy_version 11655 (0.0008) +[2026-06-07 03:07:46,762][495927] Updated weights for policy 0, policy_version 11665 (0.0008) +[2026-06-07 03:07:46,916][495927] Updated weights for policy 0, policy_version 11675 (0.0009) +[2026-06-07 03:07:47,065][495927] Updated weights for policy 0, policy_version 11685 (0.0009) +[2026-06-07 03:07:47,217][495927] Updated weights for policy 0, policy_version 11695 (0.0009) +[2026-06-07 03:07:47,853][495927] Updated weights for policy 0, policy_version 11706 (0.0009) +[2026-06-07 03:07:47,996][495927] Updated weights for policy 0, policy_version 11716 (0.0008) +[2026-06-07 03:07:48,140][495927] Updated weights for policy 0, policy_version 11726 (0.0008) +[2026-06-07 03:07:48,293][495927] Updated weights for policy 0, policy_version 11736 (0.0008) +[2026-06-07 03:07:48,444][495927] Updated weights for policy 0, policy_version 11746 (0.0008) +[2026-06-07 03:07:48,584][495927] Updated weights for policy 0, policy_version 11756 (0.0008) +[2026-06-07 03:07:48,762][495927] Updated weights for policy 0, policy_version 11767 (0.0008) +[2026-06-07 03:07:48,909][492660] Fps is (10 sec: 22938.6, 60 sec: 23483.8, 300 sec: 22330.8). Total num frames: 6029312. Throughput: 0: 23173.7. Samples: 6008576. Policy #0 lag: (min: 63.0, avg: 77.0, max: 127.0) +[2026-06-07 03:07:48,910][492660] Avg episode reward: [(0, '376.182')] +[2026-06-07 03:07:48,914][495570] Saving new best policy, reward=376.182! +[2026-06-07 03:07:49,381][495927] Updated weights for policy 0, policy_version 11777 (0.0008) +[2026-06-07 03:07:49,529][495927] Updated weights for policy 0, policy_version 11787 (0.0008) +[2026-06-07 03:07:49,704][495927] Updated weights for policy 0, policy_version 11799 (0.0008) +[2026-06-07 03:07:49,869][495927] Updated weights for policy 0, policy_version 11810 (0.0008) +[2026-06-07 03:07:50,023][495927] Updated weights for policy 0, policy_version 11820 (0.0009) +[2026-06-07 03:07:50,176][495927] Updated weights for policy 0, policy_version 11830 (0.0008) +[2026-06-07 03:07:50,858][495927] Updated weights for policy 0, policy_version 11844 (0.0010) +[2026-06-07 03:07:51,009][495927] Updated weights for policy 0, policy_version 11854 (0.0008) +[2026-06-07 03:07:51,156][495927] Updated weights for policy 0, policy_version 11864 (0.0008) +[2026-06-07 03:07:51,307][495927] Updated weights for policy 0, policy_version 11874 (0.0009) +[2026-06-07 03:07:51,474][495927] Updated weights for policy 0, policy_version 11885 (0.0009) +[2026-06-07 03:07:51,637][495927] Updated weights for policy 0, policy_version 11896 (0.0008) +[2026-06-07 03:07:52,270][495927] Updated weights for policy 0, policy_version 11907 (0.0009) +[2026-06-07 03:07:52,417][495927] Updated weights for policy 0, policy_version 11917 (0.0008) +[2026-06-07 03:07:52,575][495927] Updated weights for policy 0, policy_version 11927 (0.0008) +[2026-06-07 03:07:52,713][495927] Updated weights for policy 0, policy_version 11937 (0.0008) +[2026-06-07 03:07:52,867][495927] Updated weights for policy 0, policy_version 11947 (0.0008) +[2026-06-07 03:07:53,019][495927] Updated weights for policy 0, policy_version 11957 (0.0008) +[2026-06-07 03:07:53,629][495927] Updated weights for policy 0, policy_version 11967 (0.0008) +[2026-06-07 03:07:53,798][495927] Updated weights for policy 0, policy_version 11978 (0.0008) +[2026-06-07 03:07:53,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22282.2). Total num frames: 6127616. Throughput: 0: 23239.1. Samples: 6146560. Policy #0 lag: (min: 63.0, avg: 74.3, max: 122.0) +[2026-06-07 03:07:53,910][492660] Avg episode reward: [(0, '408.130')] +[2026-06-07 03:07:53,952][495927] Updated weights for policy 0, policy_version 11989 (0.0008) +[2026-06-07 03:07:54,109][495927] Updated weights for policy 0, policy_version 11999 (0.0007) +[2026-06-07 03:07:54,284][495927] Updated weights for policy 0, policy_version 12011 (0.0008) +[2026-06-07 03:07:54,440][495927] Updated weights for policy 0, policy_version 12021 (0.0008) +[2026-06-07 03:07:54,474][495570] Saving new best policy, reward=408.130! +[2026-06-07 03:07:55,060][495927] Updated weights for policy 0, policy_version 12031 (0.0008) +[2026-06-07 03:07:55,240][495927] Updated weights for policy 0, policy_version 12043 (0.0009) +[2026-06-07 03:07:55,387][495927] Updated weights for policy 0, policy_version 12053 (0.0008) +[2026-06-07 03:07:55,531][495927] Updated weights for policy 0, policy_version 12063 (0.0008) +[2026-06-07 03:07:55,682][495927] Updated weights for policy 0, policy_version 12073 (0.0008) +[2026-06-07 03:07:55,849][495927] Updated weights for policy 0, policy_version 12084 (0.0008) +[2026-06-07 03:07:56,477][495927] Updated weights for policy 0, policy_version 12094 (0.0009) +[2026-06-07 03:07:56,620][495927] Updated weights for policy 0, policy_version 12104 (0.0008) +[2026-06-07 03:07:56,783][495927] Updated weights for policy 0, policy_version 12115 (0.0008) +[2026-06-07 03:07:56,925][495927] Updated weights for policy 0, policy_version 12125 (0.0008) +[2026-06-07 03:07:57,074][495927] Updated weights for policy 0, policy_version 12135 (0.0008) +[2026-06-07 03:07:57,230][495927] Updated weights for policy 0, policy_version 12145 (0.0008) +[2026-06-07 03:07:57,859][495927] Updated weights for policy 0, policy_version 12155 (0.0008) +[2026-06-07 03:07:58,013][495927] Updated weights for policy 0, policy_version 12166 (0.0009) +[2026-06-07 03:07:58,157][495927] Updated weights for policy 0, policy_version 12176 (0.0008) +[2026-06-07 03:07:58,305][495927] Updated weights for policy 0, policy_version 12186 (0.0008) +[2026-06-07 03:07:58,486][495927] Updated weights for policy 0, policy_version 12198 (0.0008) +[2026-06-07 03:07:58,635][495927] Updated weights for policy 0, policy_version 12208 (0.0009) +[2026-06-07 03:07:58,909][492660] Fps is (10 sec: 22937.6, 60 sec: 23483.8, 300 sec: 22352.5). Total num frames: 6258688. Throughput: 0: 23091.2. Samples: 6281344. Policy #0 lag: (min: 63.0, avg: 74.3, max: 122.0) +[2026-06-07 03:07:58,910][492660] Avg episode reward: [(0, '456.014')] +[2026-06-07 03:07:58,915][495570] Saving new best policy, reward=456.014! +[2026-06-07 03:07:59,256][495927] Updated weights for policy 0, policy_version 12218 (0.0008) +[2026-06-07 03:07:59,416][495927] Updated weights for policy 0, policy_version 12228 (0.0008) +[2026-06-07 03:07:59,552][495927] Updated weights for policy 0, policy_version 12238 (0.0008) +[2026-06-07 03:07:59,711][495927] Updated weights for policy 0, policy_version 12249 (0.0009) +[2026-06-07 03:07:59,867][495927] Updated weights for policy 0, policy_version 12259 (0.0009) +[2026-06-07 03:08:00,022][495927] Updated weights for policy 0, policy_version 12269 (0.0009) +[2026-06-07 03:08:00,176][495927] Updated weights for policy 0, policy_version 12279 (0.0009) +[2026-06-07 03:08:00,802][495927] Updated weights for policy 0, policy_version 12291 (0.0009) +[2026-06-07 03:08:00,967][495927] Updated weights for policy 0, policy_version 12302 (0.0008) +[2026-06-07 03:08:01,110][495927] Updated weights for policy 0, policy_version 12312 (0.0008) +[2026-06-07 03:08:01,258][495927] Updated weights for policy 0, policy_version 12322 (0.0008) +[2026-06-07 03:08:01,435][495927] Updated weights for policy 0, policy_version 12333 (0.0008) +[2026-06-07 03:08:01,591][495927] Updated weights for policy 0, policy_version 12343 (0.0008) +[2026-06-07 03:08:02,207][495927] Updated weights for policy 0, policy_version 12354 (0.0008) +[2026-06-07 03:08:02,377][495927] Updated weights for policy 0, policy_version 12365 (0.0008) +[2026-06-07 03:08:02,542][495927] Updated weights for policy 0, policy_version 12376 (0.0008) +[2026-06-07 03:08:02,695][495927] Updated weights for policy 0, policy_version 12386 (0.0008) +[2026-06-07 03:08:02,852][495927] Updated weights for policy 0, policy_version 12396 (0.0008) +[2026-06-07 03:08:03,011][495927] Updated weights for policy 0, policy_version 12406 (0.0008) +[2026-06-07 03:08:03,606][495927] Updated weights for policy 0, policy_version 12416 (0.0008) +[2026-06-07 03:08:03,752][495927] Updated weights for policy 0, policy_version 12426 (0.0008) +[2026-06-07 03:08:03,900][495927] Updated weights for policy 0, policy_version 12436 (0.0008) +[2026-06-07 03:08:03,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22305.2). Total num frames: 6356992. Throughput: 0: 22949.0. Samples: 6348288. Policy #0 lag: (min: 63.0, avg: 77.3, max: 127.0) +[2026-06-07 03:08:03,910][492660] Avg episode reward: [(0, '454.082')] +[2026-06-07 03:08:04,059][495927] Updated weights for policy 0, policy_version 12446 (0.0008) +[2026-06-07 03:08:04,210][495927] Updated weights for policy 0, policy_version 12456 (0.0008) +[2026-06-07 03:08:04,368][495927] Updated weights for policy 0, policy_version 12466 (0.0008) +[2026-06-07 03:08:04,988][495927] Updated weights for policy 0, policy_version 12476 (0.0009) +[2026-06-07 03:08:05,119][495927] Updated weights for policy 0, policy_version 12486 (0.0008) +[2026-06-07 03:08:05,284][495927] Updated weights for policy 0, policy_version 12497 (0.0008) +[2026-06-07 03:08:05,458][495927] Updated weights for policy 0, policy_version 12509 (0.0008) +[2026-06-07 03:08:05,621][495927] Updated weights for policy 0, policy_version 12520 (0.0008) +[2026-06-07 03:08:05,803][495927] Updated weights for policy 0, policy_version 12532 (0.0008) +[2026-06-07 03:08:06,439][495927] Updated weights for policy 0, policy_version 12542 (0.0008) +[2026-06-07 03:08:06,586][495927] Updated weights for policy 0, policy_version 12552 (0.0009) +[2026-06-07 03:08:06,733][495927] Updated weights for policy 0, policy_version 12562 (0.0008) +[2026-06-07 03:08:06,878][495927] Updated weights for policy 0, policy_version 12572 (0.0008) +[2026-06-07 03:08:07,039][495927] Updated weights for policy 0, policy_version 12583 (0.0009) +[2026-06-07 03:08:07,204][495927] Updated weights for policy 0, policy_version 12594 (0.0008) +[2026-06-07 03:08:07,875][495927] Updated weights for policy 0, policy_version 12605 (0.0009) +[2026-06-07 03:08:08,024][495927] Updated weights for policy 0, policy_version 12615 (0.0009) +[2026-06-07 03:08:08,192][495927] Updated weights for policy 0, policy_version 12627 (0.0008) +[2026-06-07 03:08:08,343][495927] Updated weights for policy 0, policy_version 12637 (0.0008) +[2026-06-07 03:08:08,489][495927] Updated weights for policy 0, policy_version 12647 (0.0008) +[2026-06-07 03:08:08,655][495927] Updated weights for policy 0, policy_version 12658 (0.0009) +[2026-06-07 03:08:08,909][492660] Fps is (10 sec: 22937.5, 60 sec: 23483.7, 300 sec: 22372.6). Total num frames: 6488064. Throughput: 0: 22809.6. Samples: 6483072. Policy #0 lag: (min: 63.0, avg: 77.3, max: 127.0) +[2026-06-07 03:08:08,910][492660] Avg episode reward: [(0, '469.142')] +[2026-06-07 03:08:08,915][495570] Saving new best policy, reward=469.142! +[2026-06-07 03:08:09,305][495927] Updated weights for policy 0, policy_version 12669 (0.0008) +[2026-06-07 03:08:09,446][495927] Updated weights for policy 0, policy_version 12679 (0.0008) +[2026-06-07 03:08:09,630][495927] Updated weights for policy 0, policy_version 12692 (0.0009) +[2026-06-07 03:08:09,784][495927] Updated weights for policy 0, policy_version 12703 (0.0008) +[2026-06-07 03:08:09,931][495927] Updated weights for policy 0, policy_version 12713 (0.0008) +[2026-06-07 03:08:10,087][495927] Updated weights for policy 0, policy_version 12723 (0.0008) +[2026-06-07 03:08:10,717][495927] Updated weights for policy 0, policy_version 12733 (0.0008) +[2026-06-07 03:08:10,879][495927] Updated weights for policy 0, policy_version 12745 (0.0008) +[2026-06-07 03:08:11,044][495927] Updated weights for policy 0, policy_version 12756 (0.0008) +[2026-06-07 03:08:11,208][495927] Updated weights for policy 0, policy_version 12768 (0.0008) +[2026-06-07 03:08:11,369][495927] Updated weights for policy 0, policy_version 12778 (0.0009) +[2026-06-07 03:08:11,528][495927] Updated weights for policy 0, policy_version 12789 (0.0008) +[2026-06-07 03:08:12,186][495927] Updated weights for policy 0, policy_version 12800 (0.0009) +[2026-06-07 03:08:12,346][495927] Updated weights for policy 0, policy_version 12811 (0.0009) +[2026-06-07 03:08:12,490][495927] Updated weights for policy 0, policy_version 12821 (0.0008) +[2026-06-07 03:08:12,659][495927] Updated weights for policy 0, policy_version 12832 (0.0009) +[2026-06-07 03:08:12,823][495927] Updated weights for policy 0, policy_version 12843 (0.0009) +[2026-06-07 03:08:13,007][495927] Updated weights for policy 0, policy_version 12855 (0.0009) +[2026-06-07 03:08:13,635][495927] Updated weights for policy 0, policy_version 12865 (0.0009) +[2026-06-07 03:08:13,773][495927] Updated weights for policy 0, policy_version 12875 (0.0008) +[2026-06-07 03:08:13,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22326.7). Total num frames: 6586368. Throughput: 0: 22986.0. Samples: 6626048. Policy #0 lag: (min: 63.0, avg: 77.3, max: 127.0) +[2026-06-07 03:08:13,910][492660] Avg episode reward: [(0, '462.934')] +[2026-06-07 03:08:13,930][495927] Updated weights for policy 0, policy_version 12885 (0.0009) +[2026-06-07 03:08:14,100][495927] Updated weights for policy 0, policy_version 12896 (0.0009) +[2026-06-07 03:08:14,265][495927] Updated weights for policy 0, policy_version 12907 (0.0008) +[2026-06-07 03:08:14,420][495927] Updated weights for policy 0, policy_version 12917 (0.0008) +[2026-06-07 03:08:15,049][495927] Updated weights for policy 0, policy_version 12928 (0.0005) +[2026-06-07 03:08:15,192][495927] Updated weights for policy 0, policy_version 12938 (0.0004) +[2026-06-07 03:08:15,340][495927] Updated weights for policy 0, policy_version 12948 (0.0004) +[2026-06-07 03:08:15,495][495927] Updated weights for policy 0, policy_version 12958 (0.0004) +[2026-06-07 03:08:15,646][495927] Updated weights for policy 0, policy_version 12968 (0.0004) +[2026-06-07 03:08:15,798][495927] Updated weights for policy 0, policy_version 12978 (0.0004) +[2026-06-07 03:08:16,381][495927] Updated weights for policy 0, policy_version 12988 (0.0004) +[2026-06-07 03:08:16,526][495927] Updated weights for policy 0, policy_version 12998 (0.0004) +[2026-06-07 03:08:16,675][495927] Updated weights for policy 0, policy_version 13008 (0.0004) +[2026-06-07 03:08:16,825][495927] Updated weights for policy 0, policy_version 13018 (0.0005) +[2026-06-07 03:08:16,977][495927] Updated weights for policy 0, policy_version 13028 (0.0007) +[2026-06-07 03:08:17,123][495927] Updated weights for policy 0, policy_version 13038 (0.0005) +[2026-06-07 03:08:17,274][495927] Updated weights for policy 0, policy_version 13048 (0.0005) +[2026-06-07 03:08:17,887][495927] Updated weights for policy 0, policy_version 13058 (0.0010) +[2026-06-07 03:08:18,049][495927] Updated weights for policy 0, policy_version 13069 (0.0009) +[2026-06-07 03:08:18,211][495927] Updated weights for policy 0, policy_version 13080 (0.0005) +[2026-06-07 03:08:18,363][495927] Updated weights for policy 0, policy_version 13090 (0.0006) +[2026-06-07 03:08:18,529][495927] Updated weights for policy 0, policy_version 13101 (0.0012) +[2026-06-07 03:08:18,679][495927] Updated weights for policy 0, policy_version 13111 (0.0010) +[2026-06-07 03:08:18,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22659.9). Total num frames: 6717440. Throughput: 0: 23108.3. Samples: 6699648. Policy #0 lag: (min: 22.0, avg: 35.9, max: 86.0) +[2026-06-07 03:08:18,910][492660] Avg episode reward: [(0, '481.594')] +[2026-06-07 03:08:18,915][495570] Saving new best policy, reward=481.594! +[2026-06-07 03:08:19,314][495927] Updated weights for policy 0, policy_version 13122 (0.0009) +[2026-06-07 03:08:19,469][495927] Updated weights for policy 0, policy_version 13133 (0.0009) +[2026-06-07 03:08:19,617][495927] Updated weights for policy 0, policy_version 13143 (0.0008) +[2026-06-07 03:08:19,768][495927] Updated weights for policy 0, policy_version 13153 (0.0008) +[2026-06-07 03:08:19,920][495927] Updated weights for policy 0, policy_version 13163 (0.0008) +[2026-06-07 03:08:20,086][495927] Updated weights for policy 0, policy_version 13174 (0.0008) +[2026-06-07 03:08:20,710][495927] Updated weights for policy 0, policy_version 13184 (0.0009) +[2026-06-07 03:08:20,858][495927] Updated weights for policy 0, policy_version 13194 (0.0009) +[2026-06-07 03:08:20,999][495927] Updated weights for policy 0, policy_version 13204 (0.0009) +[2026-06-07 03:08:21,165][495927] Updated weights for policy 0, policy_version 13215 (0.0009) +[2026-06-07 03:08:21,318][495927] Updated weights for policy 0, policy_version 13225 (0.0008) +[2026-06-07 03:08:21,462][495927] Updated weights for policy 0, policy_version 13235 (0.0008) +[2026-06-07 03:08:22,097][495927] Updated weights for policy 0, policy_version 13245 (0.0008) +[2026-06-07 03:08:22,241][495927] Updated weights for policy 0, policy_version 13255 (0.0008) +[2026-06-07 03:08:22,408][495927] Updated weights for policy 0, policy_version 13267 (0.0008) +[2026-06-07 03:08:22,602][495927] Updated weights for policy 0, policy_version 13279 (0.0009) +[2026-06-07 03:08:22,747][495927] Updated weights for policy 0, policy_version 13289 (0.0008) +[2026-06-07 03:08:22,902][495927] Updated weights for policy 0, policy_version 13299 (0.0008) +[2026-06-07 03:08:23,535][495927] Updated weights for policy 0, policy_version 13309 (0.0009) +[2026-06-07 03:08:23,679][495927] Updated weights for policy 0, policy_version 13319 (0.0008) +[2026-06-07 03:08:23,828][495927] Updated weights for policy 0, policy_version 13329 (0.0008) +[2026-06-07 03:08:23,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22548.9). Total num frames: 6815744. Throughput: 0: 23091.4. Samples: 6834560. Policy #0 lag: (min: 22.0, avg: 35.9, max: 86.0) +[2026-06-07 03:08:23,910][492660] Avg episode reward: [(0, '489.779')] +[2026-06-07 03:08:23,987][495927] Updated weights for policy 0, policy_version 13340 (0.0008) +[2026-06-07 03:08:24,140][495927] Updated weights for policy 0, policy_version 13350 (0.0008) +[2026-06-07 03:08:24,308][495927] Updated weights for policy 0, policy_version 13361 (0.0009) +[2026-06-07 03:08:24,410][495570] Saving new best policy, reward=489.779! +[2026-06-07 03:08:24,925][495927] Updated weights for policy 0, policy_version 13371 (0.0008) +[2026-06-07 03:08:25,065][495927] Updated weights for policy 0, policy_version 13381 (0.0008) +[2026-06-07 03:08:25,203][495927] Updated weights for policy 0, policy_version 13391 (0.0010) +[2026-06-07 03:08:25,381][495927] Updated weights for policy 0, policy_version 13402 (0.0008) +[2026-06-07 03:08:25,534][495927] Updated weights for policy 0, policy_version 13412 (0.0008) +[2026-06-07 03:08:25,684][495927] Updated weights for policy 0, policy_version 13422 (0.0008) +[2026-06-07 03:08:25,830][495927] Updated weights for policy 0, policy_version 13432 (0.0008) +[2026-06-07 03:08:26,455][495927] Updated weights for policy 0, policy_version 13442 (0.0008) +[2026-06-07 03:08:26,602][495927] Updated weights for policy 0, policy_version 13452 (0.0008) +[2026-06-07 03:08:26,753][495927] Updated weights for policy 0, policy_version 13462 (0.0008) +[2026-06-07 03:08:26,921][495927] Updated weights for policy 0, policy_version 13473 (0.0009) +[2026-06-07 03:08:27,061][495927] Updated weights for policy 0, policy_version 13483 (0.0008) +[2026-06-07 03:08:27,230][495927] Updated weights for policy 0, policy_version 13494 (0.0008) +[2026-06-07 03:08:27,866][495927] Updated weights for policy 0, policy_version 13504 (0.0009) +[2026-06-07 03:08:28,004][495927] Updated weights for policy 0, policy_version 13514 (0.0008) +[2026-06-07 03:08:28,170][495927] Updated weights for policy 0, policy_version 13525 (0.0009) +[2026-06-07 03:08:28,335][495927] Updated weights for policy 0, policy_version 13536 (0.0009) +[2026-06-07 03:08:28,509][495927] Updated weights for policy 0, policy_version 13547 (0.0008) +[2026-06-07 03:08:28,579][495570] Early stopping after 7 epochs (56 sgd steps), loss delta 0.0000009 +[2026-06-07 03:08:28,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22659.9). Total num frames: 6946816. Throughput: 0: 22980.3. Samples: 6969472. Policy #0 lag: (min: 80.0, avg: 98.1, max: 142.0) +[2026-06-07 03:08:28,910][492660] Avg episode reward: [(0, '476.040')] +[2026-06-07 03:08:29,166][495927] Updated weights for policy 0, policy_version 13557 (0.0008) +[2026-06-07 03:08:29,304][495927] Updated weights for policy 0, policy_version 13567 (0.0008) +[2026-06-07 03:08:29,451][495927] Updated weights for policy 0, policy_version 13577 (0.0008) +[2026-06-07 03:08:29,619][495927] Updated weights for policy 0, policy_version 13588 (0.0009) +[2026-06-07 03:08:29,775][495927] Updated weights for policy 0, policy_version 13599 (0.0008) +[2026-06-07 03:08:29,930][495927] Updated weights for policy 0, policy_version 13609 (0.0009) +[2026-06-07 03:08:30,569][495927] Updated weights for policy 0, policy_version 13620 (0.0009) +[2026-06-07 03:08:30,705][495927] Updated weights for policy 0, policy_version 13630 (0.0008) +[2026-06-07 03:08:30,861][495927] Updated weights for policy 0, policy_version 13640 (0.0008) +[2026-06-07 03:08:31,006][495927] Updated weights for policy 0, policy_version 13650 (0.0009) +[2026-06-07 03:08:31,163][495927] Updated weights for policy 0, policy_version 13660 (0.0008) +[2026-06-07 03:08:31,314][495927] Updated weights for policy 0, policy_version 13670 (0.0009) +[2026-06-07 03:08:31,462][495927] Updated weights for policy 0, policy_version 13680 (0.0008) +[2026-06-07 03:08:32,085][495927] Updated weights for policy 0, policy_version 13690 (0.0009) +[2026-06-07 03:08:32,239][495927] Updated weights for policy 0, policy_version 13701 (0.0008) +[2026-06-07 03:08:32,392][495927] Updated weights for policy 0, policy_version 13711 (0.0008) +[2026-06-07 03:08:32,576][495927] Updated weights for policy 0, policy_version 13723 (0.0008) +[2026-06-07 03:08:32,725][495927] Updated weights for policy 0, policy_version 13733 (0.0009) +[2026-06-07 03:08:32,876][495927] Updated weights for policy 0, policy_version 13743 (0.0008) +[2026-06-07 03:08:33,492][495927] Updated weights for policy 0, policy_version 13753 (0.0008) +[2026-06-07 03:08:33,652][495927] Updated weights for policy 0, policy_version 13764 (0.0008) +[2026-06-07 03:08:33,809][495927] Updated weights for policy 0, policy_version 13775 (0.0008) +[2026-06-07 03:08:33,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22548.8). Total num frames: 7045120. Throughput: 0: 22886.4. Samples: 7038464. Policy #0 lag: (min: 80.0, avg: 98.1, max: 142.0) +[2026-06-07 03:08:33,910][492660] Avg episode reward: [(0, '489.409')] +[2026-06-07 03:08:33,960][495927] Updated weights for policy 0, policy_version 13785 (0.0009) +[2026-06-07 03:08:34,118][495927] Updated weights for policy 0, policy_version 13795 (0.0008) +[2026-06-07 03:08:34,268][495927] Updated weights for policy 0, policy_version 13805 (0.0009) +[2026-06-07 03:08:34,887][495927] Updated weights for policy 0, policy_version 13815 (0.0008) +[2026-06-07 03:08:35,045][495927] Updated weights for policy 0, policy_version 13826 (0.0008) +[2026-06-07 03:08:35,212][495927] Updated weights for policy 0, policy_version 13837 (0.0008) +[2026-06-07 03:08:35,354][495927] Updated weights for policy 0, policy_version 13847 (0.0008) +[2026-06-07 03:08:35,508][495927] Updated weights for policy 0, policy_version 13857 (0.0008) +[2026-06-07 03:08:35,659][495927] Updated weights for policy 0, policy_version 13867 (0.0008) +[2026-06-07 03:08:36,296][495927] Updated weights for policy 0, policy_version 13878 (0.0009) +[2026-06-07 03:08:36,443][495927] Updated weights for policy 0, policy_version 13888 (0.0008) +[2026-06-07 03:08:36,608][495927] Updated weights for policy 0, policy_version 13900 (0.0008) +[2026-06-07 03:08:36,762][495927] Updated weights for policy 0, policy_version 13910 (0.0010) +[2026-06-07 03:08:36,925][495927] Updated weights for policy 0, policy_version 13921 (0.0008) +[2026-06-07 03:08:37,095][495927] Updated weights for policy 0, policy_version 13932 (0.0008) +[2026-06-07 03:08:37,726][495927] Updated weights for policy 0, policy_version 13942 (0.0008) +[2026-06-07 03:08:37,868][495927] Updated weights for policy 0, policy_version 13952 (0.0008) +[2026-06-07 03:08:38,015][495927] Updated weights for policy 0, policy_version 13962 (0.0009) +[2026-06-07 03:08:38,166][495927] Updated weights for policy 0, policy_version 13972 (0.0009) +[2026-06-07 03:08:38,312][495927] Updated weights for policy 0, policy_version 13982 (0.0008) +[2026-06-07 03:08:38,468][495927] Updated weights for policy 0, policy_version 13992 (0.0008) +[2026-06-07 03:08:38,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.8, 300 sec: 22548.8). Total num frames: 7176192. Throughput: 0: 22877.9. Samples: 7176064. Policy #0 lag: (min: 63.0, avg: 77.1, max: 127.0) +[2026-06-07 03:08:38,910][492660] Avg episode reward: [(0, '498.792')] +[2026-06-07 03:08:38,915][495570] Saving new best policy, reward=498.792! +[2026-06-07 03:08:39,132][495927] Updated weights for policy 0, policy_version 14003 (0.0009) +[2026-06-07 03:08:39,287][495927] Updated weights for policy 0, policy_version 14014 (0.0009) +[2026-06-07 03:08:39,432][495927] Updated weights for policy 0, policy_version 14024 (0.0008) +[2026-06-07 03:08:39,582][495927] Updated weights for policy 0, policy_version 14034 (0.0009) +[2026-06-07 03:08:39,727][495927] Updated weights for policy 0, policy_version 14044 (0.0008) +[2026-06-07 03:08:39,886][495927] Updated weights for policy 0, policy_version 14054 (0.0009) +[2026-06-07 03:08:40,026][495927] Updated weights for policy 0, policy_version 14064 (0.0009) +[2026-06-07 03:08:40,668][495927] Updated weights for policy 0, policy_version 14074 (0.0008) +[2026-06-07 03:08:40,818][495927] Updated weights for policy 0, policy_version 14085 (0.0008) +[2026-06-07 03:08:40,983][495927] Updated weights for policy 0, policy_version 14096 (0.0008) +[2026-06-07 03:08:41,136][495927] Updated weights for policy 0, policy_version 14106 (0.0008) +[2026-06-07 03:08:41,295][495927] Updated weights for policy 0, policy_version 14117 (0.0008) +[2026-06-07 03:08:41,454][495927] Updated weights for policy 0, policy_version 14127 (0.0009) +[2026-06-07 03:08:42,079][495927] Updated weights for policy 0, policy_version 14137 (0.0009) +[2026-06-07 03:08:42,226][495927] Updated weights for policy 0, policy_version 14147 (0.0008) +[2026-06-07 03:08:42,381][495927] Updated weights for policy 0, policy_version 14158 (0.0008) +[2026-06-07 03:08:42,539][495927] Updated weights for policy 0, policy_version 14168 (0.0008) +[2026-06-07 03:08:42,680][495927] Updated weights for policy 0, policy_version 14178 (0.0009) +[2026-06-07 03:08:42,851][495927] Updated weights for policy 0, policy_version 14189 (0.0008) +[2026-06-07 03:08:43,486][495927] Updated weights for policy 0, policy_version 14200 (0.0008) +[2026-06-07 03:08:43,633][495927] Updated weights for policy 0, policy_version 14210 (0.0009) +[2026-06-07 03:08:43,793][495927] Updated weights for policy 0, policy_version 14221 (0.0009) +[2026-06-07 03:08:43,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22548.8). Total num frames: 7274496. Throughput: 0: 23116.8. Samples: 7321600. Policy #0 lag: (min: 63.0, avg: 77.1, max: 127.0) +[2026-06-07 03:08:43,910][492660] Avg episode reward: [(0, '515.117')] +[2026-06-07 03:08:43,948][495927] Updated weights for policy 0, policy_version 14231 (0.0008) +[2026-06-07 03:08:44,124][495927] Updated weights for policy 0, policy_version 14243 (0.0008) +[2026-06-07 03:08:44,274][495927] Updated weights for policy 0, policy_version 14253 (0.0009) +[2026-06-07 03:08:44,312][495570] Saving new best policy, reward=515.117! +[2026-06-07 03:08:44,922][495927] Updated weights for policy 0, policy_version 14263 (0.0008) +[2026-06-07 03:08:45,064][495927] Updated weights for policy 0, policy_version 14273 (0.0008) +[2026-06-07 03:08:45,206][495927] Updated weights for policy 0, policy_version 14283 (0.0009) +[2026-06-07 03:08:45,362][495927] Updated weights for policy 0, policy_version 14293 (0.0008) +[2026-06-07 03:08:45,513][495927] Updated weights for policy 0, policy_version 14303 (0.0008) +[2026-06-07 03:08:45,670][495927] Updated weights for policy 0, policy_version 14314 (0.0008) +[2026-06-07 03:08:46,298][495927] Updated weights for policy 0, policy_version 14325 (0.0009) +[2026-06-07 03:08:46,445][495927] Updated weights for policy 0, policy_version 14335 (0.0008) +[2026-06-07 03:08:46,608][495927] Updated weights for policy 0, policy_version 14346 (0.0008) +[2026-06-07 03:08:46,773][495927] Updated weights for policy 0, policy_version 14357 (0.0008) +[2026-06-07 03:08:46,940][495927] Updated weights for policy 0, policy_version 14368 (0.0008) +[2026-06-07 03:08:47,091][495927] Updated weights for policy 0, policy_version 14378 (0.0008) +[2026-06-07 03:08:47,703][495927] Updated weights for policy 0, policy_version 14388 (0.0008) +[2026-06-07 03:08:47,847][495927] Updated weights for policy 0, policy_version 14398 (0.0008) +[2026-06-07 03:08:48,002][495927] Updated weights for policy 0, policy_version 14409 (0.0009) +[2026-06-07 03:08:48,169][495927] Updated weights for policy 0, policy_version 14420 (0.0009) +[2026-06-07 03:08:48,326][495927] Updated weights for policy 0, policy_version 14430 (0.0008) +[2026-06-07 03:08:48,485][495927] Updated weights for policy 0, policy_version 14440 (0.0008) +[2026-06-07 03:08:48,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22659.9). Total num frames: 7405568. Throughput: 0: 23148.1. Samples: 7389952. Policy #0 lag: (min: 127.0, avg: 142.0, max: 191.0) +[2026-06-07 03:08:48,910][492660] Avg episode reward: [(0, '531.587')] +[2026-06-07 03:08:48,915][495570] Saving new best policy, reward=531.587! +[2026-06-07 03:08:49,088][495927] Updated weights for policy 0, policy_version 14450 (0.0009) +[2026-06-07 03:08:49,231][495927] Updated weights for policy 0, policy_version 14460 (0.0008) +[2026-06-07 03:08:49,377][495927] Updated weights for policy 0, policy_version 14470 (0.0008) +[2026-06-07 03:08:49,528][495927] Updated weights for policy 0, policy_version 14480 (0.0009) +[2026-06-07 03:08:49,682][495927] Updated weights for policy 0, policy_version 14490 (0.0008) +[2026-06-07 03:08:49,832][495927] Updated weights for policy 0, policy_version 14500 (0.0009) +[2026-06-07 03:08:50,002][495927] Updated weights for policy 0, policy_version 14511 (0.0008) +[2026-06-07 03:08:50,624][495927] Updated weights for policy 0, policy_version 14521 (0.0009) +[2026-06-07 03:08:50,767][495927] Updated weights for policy 0, policy_version 14531 (0.0008) +[2026-06-07 03:08:50,918][495927] Updated weights for policy 0, policy_version 14541 (0.0008) +[2026-06-07 03:08:51,060][495927] Updated weights for policy 0, policy_version 14551 (0.0008) +[2026-06-07 03:08:51,220][495927] Updated weights for policy 0, policy_version 14561 (0.0008) +[2026-06-07 03:08:51,374][495927] Updated weights for policy 0, policy_version 14571 (0.0008) +[2026-06-07 03:08:51,993][495927] Updated weights for policy 0, policy_version 14581 (0.0008) +[2026-06-07 03:08:52,152][495927] Updated weights for policy 0, policy_version 14592 (0.0008) +[2026-06-07 03:08:52,299][495927] Updated weights for policy 0, policy_version 14602 (0.0008) +[2026-06-07 03:08:52,469][495927] Updated weights for policy 0, policy_version 14613 (0.0008) +[2026-06-07 03:08:52,612][495927] Updated weights for policy 0, policy_version 14623 (0.0008) +[2026-06-07 03:08:52,776][495927] Updated weights for policy 0, policy_version 14634 (0.0008) +[2026-06-07 03:08:53,434][495927] Updated weights for policy 0, policy_version 14645 (0.0008) +[2026-06-07 03:08:53,572][495927] Updated weights for policy 0, policy_version 14655 (0.0009) +[2026-06-07 03:08:53,749][495927] Updated weights for policy 0, policy_version 14666 (0.0008) +[2026-06-07 03:08:53,895][495927] Updated weights for policy 0, policy_version 14676 (0.0009) +[2026-06-07 03:08:53,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22548.8). Total num frames: 7503872. Throughput: 0: 23136.7. Samples: 7524224. Policy #0 lag: (min: 127.0, avg: 142.0, max: 191.0) +[2026-06-07 03:08:53,910][492660] Avg episode reward: [(0, '538.094')] +[2026-06-07 03:08:54,051][495927] Updated weights for policy 0, policy_version 14686 (0.0008) +[2026-06-07 03:08:54,202][495927] Updated weights for policy 0, policy_version 14696 (0.0008) +[2026-06-07 03:08:54,314][495570] Saving new best policy, reward=538.094! +[2026-06-07 03:08:54,854][495927] Updated weights for policy 0, policy_version 14707 (0.0008) +[2026-06-07 03:08:54,995][495927] Updated weights for policy 0, policy_version 14717 (0.0008) +[2026-06-07 03:08:55,143][495927] Updated weights for policy 0, policy_version 14727 (0.0010) +[2026-06-07 03:08:55,294][495927] Updated weights for policy 0, policy_version 14737 (0.0012) +[2026-06-07 03:08:55,466][495927] Updated weights for policy 0, policy_version 14748 (0.0013) +[2026-06-07 03:08:55,619][495927] Updated weights for policy 0, policy_version 14758 (0.0012) +[2026-06-07 03:08:56,258][495927] Updated weights for policy 0, policy_version 14769 (0.0010) +[2026-06-07 03:08:56,398][495927] Updated weights for policy 0, policy_version 14779 (0.0013) +[2026-06-07 03:08:56,546][495927] Updated weights for policy 0, policy_version 14789 (0.0012) +[2026-06-07 03:08:56,696][495927] Updated weights for policy 0, policy_version 14799 (0.0013) +[2026-06-07 03:08:56,843][495927] Updated weights for policy 0, policy_version 14809 (0.0010) +[2026-06-07 03:08:57,009][495927] Updated weights for policy 0, policy_version 14819 (0.0011) +[2026-06-07 03:08:57,178][495927] Updated weights for policy 0, policy_version 14830 (0.0013) +[2026-06-07 03:08:57,794][495927] Updated weights for policy 0, policy_version 14840 (0.0009) +[2026-06-07 03:08:57,940][495927] Updated weights for policy 0, policy_version 14850 (0.0009) +[2026-06-07 03:08:58,090][495927] Updated weights for policy 0, policy_version 14860 (0.0008) +[2026-06-07 03:08:58,241][495927] Updated weights for policy 0, policy_version 14870 (0.0008) +[2026-06-07 03:08:58,398][495927] Updated weights for policy 0, policy_version 14881 (0.0008) +[2026-06-07 03:08:58,548][495927] Updated weights for policy 0, policy_version 14891 (0.0008) +[2026-06-07 03:08:58,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22659.9). Total num frames: 7634944. Throughput: 0: 22966.1. Samples: 7659520. Policy #0 lag: (min: 127.0, avg: 142.0, max: 191.0) +[2026-06-07 03:08:58,910][492660] Avg episode reward: [(0, '571.975')] +[2026-06-07 03:08:58,913][495570] Saving new best policy, reward=571.975! +[2026-06-07 03:08:59,185][495927] Updated weights for policy 0, policy_version 14901 (0.0009) +[2026-06-07 03:08:59,329][495927] Updated weights for policy 0, policy_version 14911 (0.0008) +[2026-06-07 03:08:59,496][495927] Updated weights for policy 0, policy_version 14922 (0.0009) +[2026-06-07 03:08:59,665][495927] Updated weights for policy 0, policy_version 14933 (0.0008) +[2026-06-07 03:08:59,805][495927] Updated weights for policy 0, policy_version 14943 (0.0008) +[2026-06-07 03:08:59,969][495927] Updated weights for policy 0, policy_version 14953 (0.0010) +[2026-06-07 03:09:00,589][495927] Updated weights for policy 0, policy_version 14964 (0.0009) +[2026-06-07 03:09:00,733][495927] Updated weights for policy 0, policy_version 14974 (0.0009) +[2026-06-07 03:09:00,899][495927] Updated weights for policy 0, policy_version 14985 (0.0008) +[2026-06-07 03:09:01,059][495927] Updated weights for policy 0, policy_version 14996 (0.0009) +[2026-06-07 03:09:01,223][495927] Updated weights for policy 0, policy_version 15007 (0.0009) +[2026-06-07 03:09:01,370][495927] Updated weights for policy 0, policy_version 15017 (0.0009) +[2026-06-07 03:09:02,013][495927] Updated weights for policy 0, policy_version 15029 (0.0007) +[2026-06-07 03:09:02,157][495927] Updated weights for policy 0, policy_version 15039 (0.0008) +[2026-06-07 03:09:02,304][495927] Updated weights for policy 0, policy_version 15049 (0.0008) +[2026-06-07 03:09:02,494][495927] Updated weights for policy 0, policy_version 15062 (0.0009) +[2026-06-07 03:09:02,664][495927] Updated weights for policy 0, policy_version 15073 (0.0009) +[2026-06-07 03:09:02,811][495927] Updated weights for policy 0, policy_version 15083 (0.0008) +[2026-06-07 03:09:03,458][495927] Updated weights for policy 0, policy_version 15094 (0.0009) +[2026-06-07 03:09:03,631][495927] Updated weights for policy 0, policy_version 15106 (0.0008) +[2026-06-07 03:09:03,793][495927] Updated weights for policy 0, policy_version 15117 (0.0009) +[2026-06-07 03:09:03,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22659.9). Total num frames: 7733248. Throughput: 0: 22832.3. Samples: 7727104. Policy #0 lag: (min: 22.0, avg: 36.6, max: 86.0) +[2026-06-07 03:09:03,910][492660] Avg episode reward: [(0, '546.548')] +[2026-06-07 03:09:03,946][495927] Updated weights for policy 0, policy_version 15127 (0.0008) +[2026-06-07 03:09:04,091][495927] Updated weights for policy 0, policy_version 15137 (0.0008) +[2026-06-07 03:09:04,247][495927] Updated weights for policy 0, policy_version 15147 (0.0009) +[2026-06-07 03:09:04,871][495927] Updated weights for policy 0, policy_version 15158 (0.0009) +[2026-06-07 03:09:05,009][495927] Updated weights for policy 0, policy_version 15168 (0.0008) +[2026-06-07 03:09:05,169][495927] Updated weights for policy 0, policy_version 15179 (0.0008) +[2026-06-07 03:09:05,323][495927] Updated weights for policy 0, policy_version 15189 (0.0008) +[2026-06-07 03:09:05,472][495927] Updated weights for policy 0, policy_version 15199 (0.0009) +[2026-06-07 03:09:05,656][495927] Updated weights for policy 0, policy_version 15211 (0.0008) +[2026-06-07 03:09:06,291][495927] Updated weights for policy 0, policy_version 15221 (0.0009) +[2026-06-07 03:09:06,436][495927] Updated weights for policy 0, policy_version 15231 (0.0008) +[2026-06-07 03:09:06,585][495927] Updated weights for policy 0, policy_version 15241 (0.0008) +[2026-06-07 03:09:06,729][495927] Updated weights for policy 0, policy_version 15251 (0.0008) +[2026-06-07 03:09:06,903][495927] Updated weights for policy 0, policy_version 15263 (0.0009) +[2026-06-07 03:09:07,051][495927] Updated weights for policy 0, policy_version 15273 (0.0008) +[2026-06-07 03:09:07,699][495927] Updated weights for policy 0, policy_version 15284 (0.0008) +[2026-06-07 03:09:07,854][495927] Updated weights for policy 0, policy_version 15295 (0.0009) +[2026-06-07 03:09:08,017][495927] Updated weights for policy 0, policy_version 15306 (0.0008) +[2026-06-07 03:09:08,161][495927] Updated weights for policy 0, policy_version 15316 (0.0008) +[2026-06-07 03:09:08,305][495927] Updated weights for policy 0, policy_version 15326 (0.0009) +[2026-06-07 03:09:08,459][495927] Updated weights for policy 0, policy_version 15336 (0.0008) +[2026-06-07 03:09:08,909][492660] Fps is (10 sec: 22937.4, 60 sec: 22937.6, 300 sec: 22771.0). Total num frames: 7864320. Throughput: 0: 22852.2. Samples: 7862912. Policy #0 lag: (min: 22.0, avg: 36.6, max: 86.0) +[2026-06-07 03:09:08,910][492660] Avg episode reward: [(0, '507.067')] +[2026-06-07 03:09:09,092][495927] Updated weights for policy 0, policy_version 15346 (0.0008) +[2026-06-07 03:09:09,235][495927] Updated weights for policy 0, policy_version 15356 (0.0008) +[2026-06-07 03:09:09,380][495927] Updated weights for policy 0, policy_version 15366 (0.0008) +[2026-06-07 03:09:09,534][495927] Updated weights for policy 0, policy_version 15377 (0.0008) +[2026-06-07 03:09:09,698][495927] Updated weights for policy 0, policy_version 15388 (0.0008) +[2026-06-07 03:09:09,881][495927] Updated weights for policy 0, policy_version 15401 (0.0006) +[2026-06-07 03:09:10,529][495927] Updated weights for policy 0, policy_version 15412 (0.0007) +[2026-06-07 03:09:10,667][495927] Updated weights for policy 0, policy_version 15422 (0.0008) +[2026-06-07 03:09:10,814][495927] Updated weights for policy 0, policy_version 15432 (0.0008) +[2026-06-07 03:09:10,971][495927] Updated weights for policy 0, policy_version 15443 (0.0008) +[2026-06-07 03:09:11,129][495927] Updated weights for policy 0, policy_version 15454 (0.0008) +[2026-06-07 03:09:11,279][495927] Updated weights for policy 0, policy_version 15464 (0.0008) +[2026-06-07 03:09:11,946][495927] Updated weights for policy 0, policy_version 15476 (0.0009) +[2026-06-07 03:09:12,095][495927] Updated weights for policy 0, policy_version 15487 (0.0008) +[2026-06-07 03:09:12,235][495927] Updated weights for policy 0, policy_version 15497 (0.0009) +[2026-06-07 03:09:12,389][495927] Updated weights for policy 0, policy_version 15507 (0.0009) +[2026-06-07 03:09:12,538][495927] Updated weights for policy 0, policy_version 15517 (0.0008) +[2026-06-07 03:09:12,687][495927] Updated weights for policy 0, policy_version 15527 (0.0008) +[2026-06-07 03:09:13,334][495927] Updated weights for policy 0, policy_version 15537 (0.0009) +[2026-06-07 03:09:13,489][495927] Updated weights for policy 0, policy_version 15548 (0.0010) +[2026-06-07 03:09:13,654][495927] Updated weights for policy 0, policy_version 15559 (0.0008) +[2026-06-07 03:09:13,802][495927] Updated weights for policy 0, policy_version 15569 (0.0009) +[2026-06-07 03:09:13,909][492660] Fps is (10 sec: 22937.2, 60 sec: 22937.5, 300 sec: 22659.9). Total num frames: 7962624. Throughput: 0: 23122.4. Samples: 8009984. Policy #0 lag: (min: 63.0, avg: 76.5, max: 127.0) +[2026-06-07 03:09:13,910][492660] Avg episode reward: [(0, '494.591')] +[2026-06-07 03:09:13,951][495927] Updated weights for policy 0, policy_version 15579 (0.0009) +[2026-06-07 03:09:14,097][495927] Updated weights for policy 0, policy_version 15589 (0.0008) +[2026-06-07 03:09:14,251][495927] Updated weights for policy 0, policy_version 15599 (0.0010) +[2026-06-07 03:09:14,880][495927] Updated weights for policy 0, policy_version 15609 (0.0009) +[2026-06-07 03:09:15,040][495927] Updated weights for policy 0, policy_version 15620 (0.0008) +[2026-06-07 03:09:15,216][495927] Updated weights for policy 0, policy_version 15632 (0.0009) +[2026-06-07 03:09:15,363][495927] Updated weights for policy 0, policy_version 15642 (0.0008) +[2026-06-07 03:09:15,525][495927] Updated weights for policy 0, policy_version 15653 (0.0009) +[2026-06-07 03:09:15,676][495927] Updated weights for policy 0, policy_version 15663 (0.0009) +[2026-06-07 03:09:16,340][495927] Updated weights for policy 0, policy_version 15675 (0.0009) +[2026-06-07 03:09:16,490][495927] Updated weights for policy 0, policy_version 15685 (0.0008) +[2026-06-07 03:09:16,639][495927] Updated weights for policy 0, policy_version 15695 (0.0008) +[2026-06-07 03:09:16,790][495927] Updated weights for policy 0, policy_version 15705 (0.0008) +[2026-06-07 03:09:16,941][495927] Updated weights for policy 0, policy_version 15715 (0.0008) +[2026-06-07 03:09:17,094][495927] Updated weights for policy 0, policy_version 15725 (0.0008) +[2026-06-07 03:09:17,714][495927] Updated weights for policy 0, policy_version 15736 (0.0008) +[2026-06-07 03:09:17,858][495927] Updated weights for policy 0, policy_version 15746 (0.0008) +[2026-06-07 03:09:18,003][495927] Updated weights for policy 0, policy_version 15756 (0.0008) +[2026-06-07 03:09:18,160][495927] Updated weights for policy 0, policy_version 15766 (0.0008) +[2026-06-07 03:09:18,327][495927] Updated weights for policy 0, policy_version 15777 (0.0008) +[2026-06-07 03:09:18,477][495927] Updated weights for policy 0, policy_version 15787 (0.0008) +[2026-06-07 03:09:18,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22771.0). Total num frames: 8093696. Throughput: 0: 23071.3. Samples: 8076672. Policy #0 lag: (min: 63.0, avg: 76.5, max: 127.0) +[2026-06-07 03:09:18,910][492660] Avg episode reward: [(0, '507.699')] +[2026-06-07 03:09:19,106][495927] Updated weights for policy 0, policy_version 15798 (0.0010) +[2026-06-07 03:09:19,268][495927] Updated weights for policy 0, policy_version 15810 (0.0009) +[2026-06-07 03:09:19,425][495927] Updated weights for policy 0, policy_version 15820 (0.0008) +[2026-06-07 03:09:19,592][495927] Updated weights for policy 0, policy_version 15831 (0.0008) +[2026-06-07 03:09:19,754][495927] Updated weights for policy 0, policy_version 15842 (0.0009) +[2026-06-07 03:09:19,920][495927] Updated weights for policy 0, policy_version 15853 (0.0008) +[2026-06-07 03:09:20,546][495927] Updated weights for policy 0, policy_version 15863 (0.0008) +[2026-06-07 03:09:20,697][495927] Updated weights for policy 0, policy_version 15874 (0.0007) +[2026-06-07 03:09:20,866][495927] Updated weights for policy 0, policy_version 15885 (0.0008) +[2026-06-07 03:09:21,046][495927] Updated weights for policy 0, policy_version 15897 (0.0009) +[2026-06-07 03:09:21,204][495927] Updated weights for policy 0, policy_version 15908 (0.0008) +[2026-06-07 03:09:21,359][495927] Updated weights for policy 0, policy_version 15918 (0.0009) +[2026-06-07 03:09:21,963][495927] Updated weights for policy 0, policy_version 15928 (0.0009) +[2026-06-07 03:09:22,117][495927] Updated weights for policy 0, policy_version 15938 (0.0009) +[2026-06-07 03:09:22,268][495927] Updated weights for policy 0, policy_version 15949 (0.0009) +[2026-06-07 03:09:22,437][495927] Updated weights for policy 0, policy_version 15960 (0.0008) +[2026-06-07 03:09:22,589][495927] Updated weights for policy 0, policy_version 15970 (0.0008) +[2026-06-07 03:09:22,752][495927] Updated weights for policy 0, policy_version 15981 (0.0008) +[2026-06-07 03:09:23,381][495927] Updated weights for policy 0, policy_version 15992 (0.0009) +[2026-06-07 03:09:23,555][495927] Updated weights for policy 0, policy_version 16004 (0.0009) +[2026-06-07 03:09:23,706][495927] Updated weights for policy 0, policy_version 16014 (0.0008) +[2026-06-07 03:09:23,852][495927] Updated weights for policy 0, policy_version 16024 (0.0008) +[2026-06-07 03:09:23,909][492660] Fps is (10 sec: 22938.1, 60 sec: 22937.6, 300 sec: 22771.0). Total num frames: 8192000. Throughput: 0: 23020.1. Samples: 8211968. Policy #0 lag: (min: 63.0, avg: 76.5, max: 127.0) +[2026-06-07 03:09:23,910][492660] Avg episode reward: [(0, '508.749')] +[2026-06-07 03:09:24,006][495927] Updated weights for policy 0, policy_version 16034 (0.0008) +[2026-06-07 03:09:24,158][495927] Updated weights for policy 0, policy_version 16044 (0.0008) +[2026-06-07 03:09:24,795][495927] Updated weights for policy 0, policy_version 16056 (0.0009) +[2026-06-07 03:09:24,942][495927] Updated weights for policy 0, policy_version 16066 (0.0008) +[2026-06-07 03:09:25,090][495927] Updated weights for policy 0, policy_version 16076 (0.0008) +[2026-06-07 03:09:25,266][495927] Updated weights for policy 0, policy_version 16088 (0.0008) +[2026-06-07 03:09:25,421][495927] Updated weights for policy 0, policy_version 16098 (0.0008) +[2026-06-07 03:09:25,588][495927] Updated weights for policy 0, policy_version 16109 (0.0008) +[2026-06-07 03:09:26,187][495927] Updated weights for policy 0, policy_version 16119 (0.0009) +[2026-06-07 03:09:26,350][495927] Updated weights for policy 0, policy_version 16130 (0.0008) +[2026-06-07 03:09:26,510][495927] Updated weights for policy 0, policy_version 16141 (0.0009) +[2026-06-07 03:09:26,682][495927] Updated weights for policy 0, policy_version 16153 (0.0008) +[2026-06-07 03:09:26,835][495927] Updated weights for policy 0, policy_version 16163 (0.0008) +[2026-06-07 03:09:26,997][495927] Updated weights for policy 0, policy_version 16174 (0.0008) +[2026-06-07 03:09:27,650][495927] Updated weights for policy 0, policy_version 16185 (0.0008) +[2026-06-07 03:09:27,800][495927] Updated weights for policy 0, policy_version 16196 (0.0008) +[2026-06-07 03:09:27,969][495927] Updated weights for policy 0, policy_version 16207 (0.0008) +[2026-06-07 03:09:28,134][495927] Updated weights for policy 0, policy_version 16218 (0.0008) +[2026-06-07 03:09:28,318][495927] Updated weights for policy 0, policy_version 16230 (0.0008) +[2026-06-07 03:09:28,460][495927] Updated weights for policy 0, policy_version 16240 (0.0009) +[2026-06-07 03:09:28,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 8323072. Throughput: 0: 22769.8. Samples: 8346240. Policy #0 lag: (min: 46.0, avg: 60.2, max: 110.0) +[2026-06-07 03:09:28,911][492660] Avg episode reward: [(0, '508.820')] +[2026-06-07 03:09:29,063][495927] Updated weights for policy 0, policy_version 16250 (0.0009) +[2026-06-07 03:09:29,211][495927] Updated weights for policy 0, policy_version 16260 (0.0008) +[2026-06-07 03:09:29,354][495927] Updated weights for policy 0, policy_version 16270 (0.0008) +[2026-06-07 03:09:29,504][495927] Updated weights for policy 0, policy_version 16280 (0.0008) +[2026-06-07 03:09:29,657][495927] Updated weights for policy 0, policy_version 16290 (0.0008) +[2026-06-07 03:09:29,825][495927] Updated weights for policy 0, policy_version 16301 (0.0008) +[2026-06-07 03:09:30,441][495927] Updated weights for policy 0, policy_version 16311 (0.0009) +[2026-06-07 03:09:30,589][495927] Updated weights for policy 0, policy_version 16321 (0.0008) +[2026-06-07 03:09:30,740][495927] Updated weights for policy 0, policy_version 16331 (0.0008) +[2026-06-07 03:09:30,880][495927] Updated weights for policy 0, policy_version 16341 (0.0008) +[2026-06-07 03:09:31,051][495927] Updated weights for policy 0, policy_version 16352 (0.0008) +[2026-06-07 03:09:31,218][495927] Updated weights for policy 0, policy_version 16363 (0.0008) +[2026-06-07 03:09:31,857][495927] Updated weights for policy 0, policy_version 16373 (0.0008) +[2026-06-07 03:09:32,008][495927] Updated weights for policy 0, policy_version 16384 (0.0008) +[2026-06-07 03:09:32,157][495927] Updated weights for policy 0, policy_version 16394 (0.0008) +[2026-06-07 03:09:32,320][495927] Updated weights for policy 0, policy_version 16405 (0.0008) +[2026-06-07 03:09:32,476][495927] Updated weights for policy 0, policy_version 16415 (0.0009) +[2026-06-07 03:09:32,628][495927] Updated weights for policy 0, policy_version 16425 (0.0008) +[2026-06-07 03:09:33,239][495927] Updated weights for policy 0, policy_version 16435 (0.0009) +[2026-06-07 03:09:33,384][495927] Updated weights for policy 0, policy_version 16445 (0.0009) +[2026-06-07 03:09:33,527][495927] Updated weights for policy 0, policy_version 16455 (0.0008) +[2026-06-07 03:09:33,668][495927] Updated weights for policy 0, policy_version 16465 (0.0008) +[2026-06-07 03:09:33,840][495927] Updated weights for policy 0, policy_version 16476 (0.0009) +[2026-06-07 03:09:33,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22771.0). Total num frames: 8421376. Throughput: 0: 22735.7. Samples: 8413056. Policy #0 lag: (min: 46.0, avg: 60.2, max: 110.0) +[2026-06-07 03:09:33,910][492660] Avg episode reward: [(0, '534.282')] +[2026-06-07 03:09:33,994][495927] Updated weights for policy 0, policy_version 16486 (0.0008) +[2026-06-07 03:09:34,630][495927] Updated weights for policy 0, policy_version 16497 (0.0008) +[2026-06-07 03:09:34,786][495927] Updated weights for policy 0, policy_version 16508 (0.0009) +[2026-06-07 03:09:34,943][495927] Updated weights for policy 0, policy_version 16519 (0.0009) +[2026-06-07 03:09:35,123][495927] Updated weights for policy 0, policy_version 16532 (0.0008) +[2026-06-07 03:09:35,302][495927] Updated weights for policy 0, policy_version 16544 (0.0008) +[2026-06-07 03:09:35,467][495927] Updated weights for policy 0, policy_version 16555 (0.0009) +[2026-06-07 03:09:36,121][495927] Updated weights for policy 0, policy_version 16566 (0.0009) +[2026-06-07 03:09:36,275][495927] Updated weights for policy 0, policy_version 16577 (0.0008) +[2026-06-07 03:09:36,434][495927] Updated weights for policy 0, policy_version 16588 (0.0008) +[2026-06-07 03:09:36,588][495927] Updated weights for policy 0, policy_version 16598 (0.0009) +[2026-06-07 03:09:36,727][495927] Updated weights for policy 0, policy_version 16608 (0.0009) +[2026-06-07 03:09:36,881][495927] Updated weights for policy 0, policy_version 16618 (0.0009) +[2026-06-07 03:09:37,520][495927] Updated weights for policy 0, policy_version 16628 (0.0009) +[2026-06-07 03:09:37,657][495927] Updated weights for policy 0, policy_version 16638 (0.0008) +[2026-06-07 03:09:37,803][495927] Updated weights for policy 0, policy_version 16648 (0.0008) +[2026-06-07 03:09:37,957][495927] Updated weights for policy 0, policy_version 16658 (0.0009) +[2026-06-07 03:09:38,107][495927] Updated weights for policy 0, policy_version 16668 (0.0009) +[2026-06-07 03:09:38,253][495927] Updated weights for policy 0, policy_version 16678 (0.0009) +[2026-06-07 03:09:38,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 8552448. Throughput: 0: 22957.5. Samples: 8557312. Policy #0 lag: (min: 14.0, avg: 26.9, max: 78.0) +[2026-06-07 03:09:38,910][492660] Avg episode reward: [(0, '544.328')] +[2026-06-07 03:09:38,911][495927] Updated weights for policy 0, policy_version 16689 (0.0009) +[2026-06-07 03:09:39,043][495927] Updated weights for policy 0, policy_version 16699 (0.0008) +[2026-06-07 03:09:39,192][495927] Updated weights for policy 0, policy_version 16709 (0.0008) +[2026-06-07 03:09:39,338][495927] Updated weights for policy 0, policy_version 16719 (0.0008) +[2026-06-07 03:09:39,507][495927] Updated weights for policy 0, policy_version 16730 (0.0008) +[2026-06-07 03:09:39,665][495927] Updated weights for policy 0, policy_version 16740 (0.0008) +[2026-06-07 03:09:39,817][495927] Updated weights for policy 0, policy_version 16750 (0.0008) +[2026-06-07 03:09:40,432][495927] Updated weights for policy 0, policy_version 16761 (0.0009) +[2026-06-07 03:09:40,572][495927] Updated weights for policy 0, policy_version 16771 (0.0008) +[2026-06-07 03:09:40,737][495927] Updated weights for policy 0, policy_version 16782 (0.0009) +[2026-06-07 03:09:40,893][495927] Updated weights for policy 0, policy_version 16792 (0.0008) +[2026-06-07 03:09:41,049][495927] Updated weights for policy 0, policy_version 16802 (0.0008) +[2026-06-07 03:09:41,206][495927] Updated weights for policy 0, policy_version 16812 (0.0008) +[2026-06-07 03:09:41,813][495927] Updated weights for policy 0, policy_version 16822 (0.0008) +[2026-06-07 03:09:41,969][495927] Updated weights for policy 0, policy_version 16832 (0.0008) +[2026-06-07 03:09:42,122][495927] Updated weights for policy 0, policy_version 16843 (0.0008) +[2026-06-07 03:09:42,293][495927] Updated weights for policy 0, policy_version 16854 (0.0009) +[2026-06-07 03:09:42,440][495927] Updated weights for policy 0, policy_version 16865 (0.0008) +[2026-06-07 03:09:42,590][495927] Updated weights for policy 0, policy_version 16875 (0.0008) +[2026-06-07 03:09:43,236][495927] Updated weights for policy 0, policy_version 16885 (0.0009) +[2026-06-07 03:09:43,396][495927] Updated weights for policy 0, policy_version 16896 (0.0008) +[2026-06-07 03:09:43,546][495927] Updated weights for policy 0, policy_version 16907 (0.0008) +[2026-06-07 03:09:43,717][495927] Updated weights for policy 0, policy_version 16918 (0.0009) +[2026-06-07 03:09:43,873][495927] Updated weights for policy 0, policy_version 16928 (0.0008) +[2026-06-07 03:09:43,909][492660] Fps is (10 sec: 22937.1, 60 sec: 22937.5, 300 sec: 22882.0). Total num frames: 8650752. Throughput: 0: 23048.4. Samples: 8696704. Policy #0 lag: (min: 14.0, avg: 26.9, max: 78.0) +[2026-06-07 03:09:43,910][492660] Avg episode reward: [(0, '608.659')] +[2026-06-07 03:09:44,025][495927] Updated weights for policy 0, policy_version 16938 (0.0008) +[2026-06-07 03:09:44,112][495570] Saving new best policy, reward=608.659! +[2026-06-07 03:09:44,644][495927] Updated weights for policy 0, policy_version 16948 (0.0008) +[2026-06-07 03:09:44,795][495927] Updated weights for policy 0, policy_version 16959 (0.0008) +[2026-06-07 03:09:44,944][495927] Updated weights for policy 0, policy_version 16969 (0.0009) +[2026-06-07 03:09:45,096][495927] Updated weights for policy 0, policy_version 16979 (0.0009) +[2026-06-07 03:09:45,259][495927] Updated weights for policy 0, policy_version 16990 (0.0008) +[2026-06-07 03:09:45,413][495927] Updated weights for policy 0, policy_version 17000 (0.0005) +[2026-06-07 03:09:46,056][495927] Updated weights for policy 0, policy_version 17011 (0.0005) +[2026-06-07 03:09:46,225][495927] Updated weights for policy 0, policy_version 17022 (0.0004) +[2026-06-07 03:09:46,367][495927] Updated weights for policy 0, policy_version 17032 (0.0004) +[2026-06-07 03:09:46,541][495927] Updated weights for policy 0, policy_version 17044 (0.0004) +[2026-06-07 03:09:46,699][495927] Updated weights for policy 0, policy_version 17054 (0.0004) +[2026-06-07 03:09:46,846][495927] Updated weights for policy 0, policy_version 17064 (0.0004) +[2026-06-07 03:09:47,438][495927] Updated weights for policy 0, policy_version 17074 (0.0004) +[2026-06-07 03:09:47,609][495927] Updated weights for policy 0, policy_version 17086 (0.0004) +[2026-06-07 03:09:47,755][495927] Updated weights for policy 0, policy_version 17096 (0.0004) +[2026-06-07 03:09:47,908][495927] Updated weights for policy 0, policy_version 17106 (0.0004) +[2026-06-07 03:09:48,058][495927] Updated weights for policy 0, policy_version 17116 (0.0006) +[2026-06-07 03:09:48,212][495927] Updated weights for policy 0, policy_version 17126 (0.0008) +[2026-06-07 03:09:48,359][495927] Updated weights for policy 0, policy_version 17136 (0.0008) +[2026-06-07 03:09:48,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 8781824. Throughput: 0: 23065.6. Samples: 8765056. Policy #0 lag: (min: 14.0, avg: 26.9, max: 78.0) +[2026-06-07 03:09:48,910][492660] Avg episode reward: [(0, '660.443')] +[2026-06-07 03:09:48,983][495927] Updated weights for policy 0, policy_version 17147 (0.0009) +[2026-06-07 03:09:49,130][495927] Updated weights for policy 0, policy_version 17157 (0.0008) +[2026-06-07 03:09:49,274][495927] Updated weights for policy 0, policy_version 17167 (0.0009) +[2026-06-07 03:09:49,423][495927] Updated weights for policy 0, policy_version 17177 (0.0008) +[2026-06-07 03:09:49,598][495927] Updated weights for policy 0, policy_version 17188 (0.0009) +[2026-06-07 03:09:49,744][495927] Updated weights for policy 0, policy_version 17198 (0.0008) +[2026-06-07 03:09:49,770][495570] Saving new best policy, reward=660.443! +[2026-06-07 03:09:50,389][495927] Updated weights for policy 0, policy_version 17209 (0.0009) +[2026-06-07 03:09:50,545][495927] Updated weights for policy 0, policy_version 17220 (0.0008) +[2026-06-07 03:09:50,697][495927] Updated weights for policy 0, policy_version 17230 (0.0008) +[2026-06-07 03:09:50,857][495927] Updated weights for policy 0, policy_version 17241 (0.0009) +[2026-06-07 03:09:51,016][495927] Updated weights for policy 0, policy_version 17251 (0.0008) +[2026-06-07 03:09:51,178][495927] Updated weights for policy 0, policy_version 17262 (0.0008) +[2026-06-07 03:09:51,799][495927] Updated weights for policy 0, policy_version 17272 (0.0008) +[2026-06-07 03:09:51,974][495927] Updated weights for policy 0, policy_version 17284 (0.0009) +[2026-06-07 03:09:52,120][495927] Updated weights for policy 0, policy_version 17294 (0.0008) +[2026-06-07 03:09:52,262][495927] Updated weights for policy 0, policy_version 17304 (0.0008) +[2026-06-07 03:09:52,418][495927] Updated weights for policy 0, policy_version 17314 (0.0008) +[2026-06-07 03:09:52,598][495927] Updated weights for policy 0, policy_version 17326 (0.0008) +[2026-06-07 03:09:53,245][495927] Updated weights for policy 0, policy_version 17336 (0.0009) +[2026-06-07 03:09:53,390][495927] Updated weights for policy 0, policy_version 17346 (0.0008) +[2026-06-07 03:09:53,536][495927] Updated weights for policy 0, policy_version 17356 (0.0008) +[2026-06-07 03:09:53,685][495927] Updated weights for policy 0, policy_version 17366 (0.0008) +[2026-06-07 03:09:53,833][495927] Updated weights for policy 0, policy_version 17376 (0.0008) +[2026-06-07 03:09:53,909][492660] Fps is (10 sec: 22938.1, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 8880128. Throughput: 0: 23023.0. Samples: 8898944. Policy #0 lag: (min: 53.0, avg: 96.8, max: 118.0) +[2026-06-07 03:09:53,910][492660] Avg episode reward: [(0, '661.088')] +[2026-06-07 03:09:53,997][495927] Updated weights for policy 0, policy_version 17387 (0.0008) +[2026-06-07 03:09:54,069][495570] Saving new best policy, reward=661.088! +[2026-06-07 03:09:54,655][495927] Updated weights for policy 0, policy_version 17399 (0.0009) +[2026-06-07 03:09:54,812][495927] Updated weights for policy 0, policy_version 17410 (0.0008) +[2026-06-07 03:09:54,971][495927] Updated weights for policy 0, policy_version 17421 (0.0008) +[2026-06-07 03:09:55,124][495927] Updated weights for policy 0, policy_version 17431 (0.0008) +[2026-06-07 03:09:55,308][495927] Updated weights for policy 0, policy_version 17443 (0.0008) +[2026-06-07 03:09:55,458][495927] Updated weights for policy 0, policy_version 17453 (0.0008) +[2026-06-07 03:09:56,077][495927] Updated weights for policy 0, policy_version 17463 (0.0008) +[2026-06-07 03:09:56,220][495927] Updated weights for policy 0, policy_version 17473 (0.0008) +[2026-06-07 03:09:56,386][495927] Updated weights for policy 0, policy_version 17484 (0.0008) +[2026-06-07 03:09:56,536][495927] Updated weights for policy 0, policy_version 17494 (0.0008) +[2026-06-07 03:09:56,691][495927] Updated weights for policy 0, policy_version 17504 (0.0008) +[2026-06-07 03:09:56,834][495927] Updated weights for policy 0, policy_version 17514 (0.0008) +[2026-06-07 03:09:57,475][495927] Updated weights for policy 0, policy_version 17524 (0.0008) +[2026-06-07 03:09:57,629][495927] Updated weights for policy 0, policy_version 17535 (0.0008) +[2026-06-07 03:09:57,783][495927] Updated weights for policy 0, policy_version 17545 (0.0008) +[2026-06-07 03:09:57,925][495927] Updated weights for policy 0, policy_version 17555 (0.0009) +[2026-06-07 03:09:58,091][495927] Updated weights for policy 0, policy_version 17566 (0.0010) +[2026-06-07 03:09:58,261][495927] Updated weights for policy 0, policy_version 17577 (0.0009) +[2026-06-07 03:09:58,886][495927] Updated weights for policy 0, policy_version 17587 (0.0008) +[2026-06-07 03:09:58,909][492660] Fps is (10 sec: 22937.8, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 9011200. Throughput: 0: 22778.4. Samples: 9035008. Policy #0 lag: (min: 53.0, avg: 96.8, max: 118.0) +[2026-06-07 03:09:58,910][492660] Avg episode reward: [(0, '685.552')] +[2026-06-07 03:09:59,051][495927] Updated weights for policy 0, policy_version 17599 (0.0009) +[2026-06-07 03:09:59,197][495927] Updated weights for policy 0, policy_version 17609 (0.0008) +[2026-06-07 03:09:59,348][495927] Updated weights for policy 0, policy_version 17619 (0.0009) +[2026-06-07 03:09:59,497][495927] Updated weights for policy 0, policy_version 17629 (0.0008) +[2026-06-07 03:09:59,655][495927] Updated weights for policy 0, policy_version 17639 (0.0008) +[2026-06-07 03:09:59,784][495570] Saving new best policy, reward=685.552! +[2026-06-07 03:10:00,284][495927] Updated weights for policy 0, policy_version 17649 (0.0009) +[2026-06-07 03:10:00,427][495927] Updated weights for policy 0, policy_version 17659 (0.0009) +[2026-06-07 03:10:00,580][495927] Updated weights for policy 0, policy_version 17669 (0.0009) +[2026-06-07 03:10:00,730][495927] Updated weights for policy 0, policy_version 17679 (0.0009) +[2026-06-07 03:10:00,883][495927] Updated weights for policy 0, policy_version 17689 (0.0008) +[2026-06-07 03:10:01,048][495927] Updated weights for policy 0, policy_version 17700 (0.0008) +[2026-06-07 03:10:01,223][495927] Updated weights for policy 0, policy_version 17712 (0.0008) +[2026-06-07 03:10:01,852][495927] Updated weights for policy 0, policy_version 17723 (0.0009) +[2026-06-07 03:10:01,995][495927] Updated weights for policy 0, policy_version 17733 (0.0008) +[2026-06-07 03:10:02,145][495927] Updated weights for policy 0, policy_version 17743 (0.0009) +[2026-06-07 03:10:02,298][495927] Updated weights for policy 0, policy_version 17753 (0.0008) +[2026-06-07 03:10:02,447][495927] Updated weights for policy 0, policy_version 17763 (0.0008) +[2026-06-07 03:10:02,632][495927] Updated weights for policy 0, policy_version 17775 (0.0008) +[2026-06-07 03:10:03,233][495927] Updated weights for policy 0, policy_version 17785 (0.0008) +[2026-06-07 03:10:03,381][495927] Updated weights for policy 0, policy_version 17795 (0.0008) +[2026-06-07 03:10:03,535][495927] Updated weights for policy 0, policy_version 17806 (0.0008) +[2026-06-07 03:10:03,723][495927] Updated weights for policy 0, policy_version 17818 (0.0008) +[2026-06-07 03:10:03,904][495927] Updated weights for policy 0, policy_version 17830 (0.0008) +[2026-06-07 03:10:03,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 9109504. Throughput: 0: 22823.8. Samples: 9103744. Policy #0 lag: (min: 53.0, avg: 96.8, max: 118.0) +[2026-06-07 03:10:03,910][492660] Avg episode reward: [(0, '661.898')] +[2026-06-07 03:10:04,565][495927] Updated weights for policy 0, policy_version 17841 (0.0006) +[2026-06-07 03:10:04,713][495927] Updated weights for policy 0, policy_version 17851 (0.0008) +[2026-06-07 03:10:04,862][495927] Updated weights for policy 0, policy_version 17861 (0.0008) +[2026-06-07 03:10:05,011][495927] Updated weights for policy 0, policy_version 17871 (0.0009) +[2026-06-07 03:10:05,173][495927] Updated weights for policy 0, policy_version 17882 (0.0008) +[2026-06-07 03:10:05,331][495927] Updated weights for policy 0, policy_version 17893 (0.0008) +[2026-06-07 03:10:05,482][495927] Updated weights for policy 0, policy_version 17903 (0.0008) +[2026-06-07 03:10:06,109][495927] Updated weights for policy 0, policy_version 17914 (0.0008) +[2026-06-07 03:10:06,256][495927] Updated weights for policy 0, policy_version 17924 (0.0008) +[2026-06-07 03:10:06,404][495927] Updated weights for policy 0, policy_version 17934 (0.0008) +[2026-06-07 03:10:06,583][495927] Updated weights for policy 0, policy_version 17946 (0.0008) +[2026-06-07 03:10:06,754][495927] Updated weights for policy 0, policy_version 17957 (0.0009) +[2026-06-07 03:10:07,406][495927] Updated weights for policy 0, policy_version 17969 (0.0009) +[2026-06-07 03:10:07,540][495927] Updated weights for policy 0, policy_version 17979 (0.0008) +[2026-06-07 03:10:07,701][495927] Updated weights for policy 0, policy_version 17990 (0.0008) +[2026-06-07 03:10:07,869][495927] Updated weights for policy 0, policy_version 18001 (0.0009) +[2026-06-07 03:10:08,025][495927] Updated weights for policy 0, policy_version 18011 (0.0009) +[2026-06-07 03:10:08,201][495927] Updated weights for policy 0, policy_version 18023 (0.0009) +[2026-06-07 03:10:08,843][495927] Updated weights for policy 0, policy_version 18033 (0.0009) +[2026-06-07 03:10:08,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 9240576. Throughput: 0: 23045.7. Samples: 9249024. Policy #0 lag: (min: 28.0, avg: 42.0, max: 92.0) +[2026-06-07 03:10:08,910][492660] Avg episode reward: [(0, '663.869')] +[2026-06-07 03:10:08,986][495927] Updated weights for policy 0, policy_version 18043 (0.0009) +[2026-06-07 03:10:09,148][495927] Updated weights for policy 0, policy_version 18054 (0.0009) +[2026-06-07 03:10:09,305][495927] Updated weights for policy 0, policy_version 18065 (0.0009) +[2026-06-07 03:10:09,456][495927] Updated weights for policy 0, policy_version 18075 (0.0008) +[2026-06-07 03:10:09,614][495927] Updated weights for policy 0, policy_version 18085 (0.0009) +[2026-06-07 03:10:09,780][495927] Updated weights for policy 0, policy_version 18096 (0.0008) +[2026-06-07 03:10:10,413][495927] Updated weights for policy 0, policy_version 18108 (0.0009) +[2026-06-07 03:10:10,559][495927] Updated weights for policy 0, policy_version 18118 (0.0008) +[2026-06-07 03:10:10,718][495927] Updated weights for policy 0, policy_version 18128 (0.0008) +[2026-06-07 03:10:10,888][495927] Updated weights for policy 0, policy_version 18139 (0.0009) +[2026-06-07 03:10:11,035][495927] Updated weights for policy 0, policy_version 18149 (0.0008) +[2026-06-07 03:10:11,196][495927] Updated weights for policy 0, policy_version 18160 (0.0008) +[2026-06-07 03:10:11,817][495927] Updated weights for policy 0, policy_version 18170 (0.0009) +[2026-06-07 03:10:11,956][495927] Updated weights for policy 0, policy_version 18180 (0.0009) +[2026-06-07 03:10:12,110][495927] Updated weights for policy 0, policy_version 18190 (0.0008) +[2026-06-07 03:10:12,256][495927] Updated weights for policy 0, policy_version 18200 (0.0008) +[2026-06-07 03:10:12,407][495927] Updated weights for policy 0, policy_version 18210 (0.0008) +[2026-06-07 03:10:12,564][495927] Updated weights for policy 0, policy_version 18220 (0.0008) +[2026-06-07 03:10:13,211][495927] Updated weights for policy 0, policy_version 18232 (0.0009) +[2026-06-07 03:10:13,399][495927] Updated weights for policy 0, policy_version 18244 (0.0009) +[2026-06-07 03:10:13,557][495927] Updated weights for policy 0, policy_version 18255 (0.0008) +[2026-06-07 03:10:13,724][495927] Updated weights for policy 0, policy_version 18266 (0.0008) +[2026-06-07 03:10:13,874][495927] Updated weights for policy 0, policy_version 18276 (0.0009) +[2026-06-07 03:10:13,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.7, 300 sec: 22882.1). Total num frames: 9338880. Throughput: 0: 23108.3. Samples: 9386112. Policy #0 lag: (min: 28.0, avg: 42.0, max: 92.0) +[2026-06-07 03:10:13,910][492660] Avg episode reward: [(0, '650.529')] +[2026-06-07 03:10:14,030][495927] Updated weights for policy 0, policy_version 18286 (0.0008) +[2026-06-07 03:10:14,630][495927] Updated weights for policy 0, policy_version 18296 (0.0009) +[2026-06-07 03:10:14,778][495927] Updated weights for policy 0, policy_version 18306 (0.0008) +[2026-06-07 03:10:14,931][495927] Updated weights for policy 0, policy_version 18316 (0.0008) +[2026-06-07 03:10:15,079][495927] Updated weights for policy 0, policy_version 18326 (0.0008) +[2026-06-07 03:10:15,258][495927] Updated weights for policy 0, policy_version 18338 (0.0009) +[2026-06-07 03:10:15,423][495927] Updated weights for policy 0, policy_version 18349 (0.0008) +[2026-06-07 03:10:16,048][495927] Updated weights for policy 0, policy_version 18359 (0.0008) +[2026-06-07 03:10:16,190][495927] Updated weights for policy 0, policy_version 18369 (0.0008) +[2026-06-07 03:10:16,340][495927] Updated weights for policy 0, policy_version 18379 (0.0008) +[2026-06-07 03:10:16,503][495927] Updated weights for policy 0, policy_version 18390 (0.0008) +[2026-06-07 03:10:16,668][495927] Updated weights for policy 0, policy_version 18401 (0.0008) +[2026-06-07 03:10:16,818][495927] Updated weights for policy 0, policy_version 18411 (0.0008) +[2026-06-07 03:10:17,457][495927] Updated weights for policy 0, policy_version 18422 (0.0008) +[2026-06-07 03:10:17,632][495927] Updated weights for policy 0, policy_version 18434 (0.0009) +[2026-06-07 03:10:17,808][495927] Updated weights for policy 0, policy_version 18446 (0.0008) +[2026-06-07 03:10:17,958][495927] Updated weights for policy 0, policy_version 18456 (0.0008) +[2026-06-07 03:10:18,123][495927] Updated weights for policy 0, policy_version 18467 (0.0008) +[2026-06-07 03:10:18,271][495927] Updated weights for policy 0, policy_version 18477 (0.0008) +[2026-06-07 03:10:18,899][495927] Updated weights for policy 0, policy_version 18487 (0.0008) +[2026-06-07 03:10:18,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 9469952. Throughput: 0: 23116.8. Samples: 9453312. Policy #0 lag: (min: 28.0, avg: 42.0, max: 92.0) +[2026-06-07 03:10:18,910][492660] Avg episode reward: [(0, '645.926')] +[2026-06-07 03:10:19,049][495927] Updated weights for policy 0, policy_version 18497 (0.0008) +[2026-06-07 03:10:19,210][495927] Updated weights for policy 0, policy_version 18508 (0.0008) +[2026-06-07 03:10:19,360][495927] Updated weights for policy 0, policy_version 18518 (0.0009) +[2026-06-07 03:10:19,528][495927] Updated weights for policy 0, policy_version 18529 (0.0008) +[2026-06-07 03:10:19,688][495927] Updated weights for policy 0, policy_version 18539 (0.0008) +[2026-06-07 03:10:20,291][495927] Updated weights for policy 0, policy_version 18549 (0.0009) +[2026-06-07 03:10:20,452][495927] Updated weights for policy 0, policy_version 18560 (0.0009) +[2026-06-07 03:10:20,605][495927] Updated weights for policy 0, policy_version 18571 (0.0010) +[2026-06-07 03:10:20,762][495927] Updated weights for policy 0, policy_version 18581 (0.0010) +[2026-06-07 03:10:20,924][495927] Updated weights for policy 0, policy_version 18592 (0.0010) +[2026-06-07 03:10:21,095][495927] Updated weights for policy 0, policy_version 18603 (0.0009) +[2026-06-07 03:10:21,753][495927] Updated weights for policy 0, policy_version 18615 (0.0009) +[2026-06-07 03:10:21,913][495927] Updated weights for policy 0, policy_version 18626 (0.0010) +[2026-06-07 03:10:22,069][495927] Updated weights for policy 0, policy_version 18636 (0.0009) +[2026-06-07 03:10:22,211][495927] Updated weights for policy 0, policy_version 18646 (0.0008) +[2026-06-07 03:10:22,366][495927] Updated weights for policy 0, policy_version 18656 (0.0009) +[2026-06-07 03:10:22,516][495927] Updated weights for policy 0, policy_version 18666 (0.0008) +[2026-06-07 03:10:23,137][495927] Updated weights for policy 0, policy_version 18676 (0.0008) +[2026-06-07 03:10:23,279][495927] Updated weights for policy 0, policy_version 18686 (0.0008) +[2026-06-07 03:10:23,454][495927] Updated weights for policy 0, policy_version 18698 (0.0009) +[2026-06-07 03:10:23,603][495927] Updated weights for policy 0, policy_version 18708 (0.0010) +[2026-06-07 03:10:23,760][495927] Updated weights for policy 0, policy_version 18718 (0.0008) +[2026-06-07 03:10:23,909][492660] Fps is (10 sec: 22937.4, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 9568256. Throughput: 0: 22912.0. Samples: 9588352. Policy #0 lag: (min: 63.0, avg: 76.8, max: 127.0) +[2026-06-07 03:10:23,910][492660] Avg episode reward: [(0, '636.276')] +[2026-06-07 03:10:23,925][495927] Updated weights for policy 0, policy_version 18729 (0.0011) +[2026-06-07 03:10:24,537][495927] Updated weights for policy 0, policy_version 18739 (0.0009) +[2026-06-07 03:10:24,677][495927] Updated weights for policy 0, policy_version 18749 (0.0008) +[2026-06-07 03:10:24,816][495927] Updated weights for policy 0, policy_version 18759 (0.0008) +[2026-06-07 03:10:24,964][495927] Updated weights for policy 0, policy_version 18769 (0.0010) +[2026-06-07 03:10:25,129][495927] Updated weights for policy 0, policy_version 18780 (0.0009) +[2026-06-07 03:10:25,292][495927] Updated weights for policy 0, policy_version 18791 (0.0008) +[2026-06-07 03:10:25,947][495927] Updated weights for policy 0, policy_version 18801 (0.0010) +[2026-06-07 03:10:26,085][495927] Updated weights for policy 0, policy_version 18811 (0.0008) +[2026-06-07 03:10:26,224][495927] Updated weights for policy 0, policy_version 18821 (0.0008) +[2026-06-07 03:10:26,404][495927] Updated weights for policy 0, policy_version 18833 (0.0008) +[2026-06-07 03:10:26,556][495927] Updated weights for policy 0, policy_version 18843 (0.0008) +[2026-06-07 03:10:26,709][495927] Updated weights for policy 0, policy_version 18853 (0.0008) +[2026-06-07 03:10:26,859][495927] Updated weights for policy 0, policy_version 18863 (0.0009) +[2026-06-07 03:10:27,482][495927] Updated weights for policy 0, policy_version 18873 (0.0009) +[2026-06-07 03:10:27,648][495927] Updated weights for policy 0, policy_version 18884 (0.0008) +[2026-06-07 03:10:27,790][495927] Updated weights for policy 0, policy_version 18894 (0.0008) +[2026-06-07 03:10:27,936][495927] Updated weights for policy 0, policy_version 18904 (0.0008) +[2026-06-07 03:10:28,107][495927] Updated weights for policy 0, policy_version 18915 (0.0009) +[2026-06-07 03:10:28,262][495927] Updated weights for policy 0, policy_version 18925 (0.0009) +[2026-06-07 03:10:28,885][495927] Updated weights for policy 0, policy_version 18935 (0.0008) +[2026-06-07 03:10:28,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 9699328. Throughput: 0: 22812.5. Samples: 9723264. Policy #0 lag: (min: 63.0, avg: 76.8, max: 127.0) +[2026-06-07 03:10:28,910][492660] Avg episode reward: [(0, '646.357')] +[2026-06-07 03:10:29,024][495927] Updated weights for policy 0, policy_version 18945 (0.0008) +[2026-06-07 03:10:29,172][495927] Updated weights for policy 0, policy_version 18955 (0.0009) +[2026-06-07 03:10:29,327][495927] Updated weights for policy 0, policy_version 18965 (0.0008) +[2026-06-07 03:10:29,472][495927] Updated weights for policy 0, policy_version 18975 (0.0008) +[2026-06-07 03:10:29,632][495927] Updated weights for policy 0, policy_version 18985 (0.0009) +[2026-06-07 03:10:30,246][495927] Updated weights for policy 0, policy_version 18995 (0.0005) +[2026-06-07 03:10:30,381][495927] Updated weights for policy 0, policy_version 19005 (0.0005) +[2026-06-07 03:10:30,526][495927] Updated weights for policy 0, policy_version 19015 (0.0006) +[2026-06-07 03:10:30,680][495927] Updated weights for policy 0, policy_version 19025 (0.0008) +[2026-06-07 03:10:30,834][495927] Updated weights for policy 0, policy_version 19035 (0.0008) +[2026-06-07 03:10:30,990][495927] Updated weights for policy 0, policy_version 19045 (0.0008) +[2026-06-07 03:10:31,147][495927] Updated weights for policy 0, policy_version 19056 (0.0008) +[2026-06-07 03:10:31,783][495927] Updated weights for policy 0, policy_version 19066 (0.0009) +[2026-06-07 03:10:31,940][495927] Updated weights for policy 0, policy_version 19077 (0.0009) +[2026-06-07 03:10:32,085][495927] Updated weights for policy 0, policy_version 19087 (0.0008) +[2026-06-07 03:10:32,270][495927] Updated weights for policy 0, policy_version 19099 (0.0009) +[2026-06-07 03:10:32,425][495927] Updated weights for policy 0, policy_version 19109 (0.0008) +[2026-06-07 03:10:32,582][495927] Updated weights for policy 0, policy_version 19120 (0.0009) +[2026-06-07 03:10:33,197][495927] Updated weights for policy 0, policy_version 19130 (0.0009) +[2026-06-07 03:10:33,342][495927] Updated weights for policy 0, policy_version 19140 (0.0008) +[2026-06-07 03:10:33,494][495927] Updated weights for policy 0, policy_version 19150 (0.0009) +[2026-06-07 03:10:33,650][495927] Updated weights for policy 0, policy_version 19160 (0.0008) +[2026-06-07 03:10:33,795][495927] Updated weights for policy 0, policy_version 19170 (0.0008) +[2026-06-07 03:10:33,909][492660] Fps is (10 sec: 22937.9, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 9797632. Throughput: 0: 22863.7. Samples: 9793920. Policy #0 lag: (min: 63.0, avg: 76.8, max: 127.0) +[2026-06-07 03:10:33,910][492660] Avg episode reward: [(0, '655.359')] +[2026-06-07 03:10:33,952][495927] Updated weights for policy 0, policy_version 19180 (0.0009) +[2026-06-07 03:10:34,581][495927] Updated weights for policy 0, policy_version 19191 (0.0009) +[2026-06-07 03:10:34,727][495927] Updated weights for policy 0, policy_version 19201 (0.0008) +[2026-06-07 03:10:34,879][495927] Updated weights for policy 0, policy_version 19211 (0.0008) +[2026-06-07 03:10:35,027][495927] Updated weights for policy 0, policy_version 19221 (0.0009) +[2026-06-07 03:10:35,178][495927] Updated weights for policy 0, policy_version 19231 (0.0008) +[2026-06-07 03:10:35,346][495927] Updated weights for policy 0, policy_version 19242 (0.0008) +[2026-06-07 03:10:35,964][495927] Updated weights for policy 0, policy_version 19253 (0.0008) +[2026-06-07 03:10:36,116][495927] Updated weights for policy 0, policy_version 19264 (0.0008) +[2026-06-07 03:10:36,266][495927] Updated weights for policy 0, policy_version 19274 (0.0008) +[2026-06-07 03:10:36,424][495927] Updated weights for policy 0, policy_version 19284 (0.0008) +[2026-06-07 03:10:36,576][495927] Updated weights for policy 0, policy_version 19294 (0.0008) +[2026-06-07 03:10:36,742][495927] Updated weights for policy 0, policy_version 19305 (0.0009) +[2026-06-07 03:10:37,365][495927] Updated weights for policy 0, policy_version 19315 (0.0008) +[2026-06-07 03:10:37,511][495927] Updated weights for policy 0, policy_version 19325 (0.0008) +[2026-06-07 03:10:37,671][495927] Updated weights for policy 0, policy_version 19336 (0.0010) +[2026-06-07 03:10:37,820][495927] Updated weights for policy 0, policy_version 19346 (0.0008) +[2026-06-07 03:10:37,972][495927] Updated weights for policy 0, policy_version 19356 (0.0008) +[2026-06-07 03:10:38,120][495927] Updated weights for policy 0, policy_version 19366 (0.0008) +[2026-06-07 03:10:38,277][495927] Updated weights for policy 0, policy_version 19376 (0.0008) +[2026-06-07 03:10:38,892][495927] Updated weights for policy 0, policy_version 19386 (0.0008) +[2026-06-07 03:10:38,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 9928704. Throughput: 0: 23116.8. Samples: 9939200. Policy #0 lag: (min: 54.0, avg: 68.9, max: 118.0) +[2026-06-07 03:10:38,910][492660] Avg episode reward: [(0, '659.462')] +[2026-06-07 03:10:39,027][495927] Updated weights for policy 0, policy_version 19396 (0.0008) +[2026-06-07 03:10:39,173][495927] Updated weights for policy 0, policy_version 19406 (0.0008) +[2026-06-07 03:10:39,326][495927] Updated weights for policy 0, policy_version 19416 (0.0008) +[2026-06-07 03:10:39,484][495927] Updated weights for policy 0, policy_version 19426 (0.0008) +[2026-06-07 03:10:39,635][495927] Updated weights for policy 0, policy_version 19436 (0.0008) +[2026-06-07 03:10:40,230][495927] Updated weights for policy 0, policy_version 19446 (0.0009) +[2026-06-07 03:10:40,376][495927] Updated weights for policy 0, policy_version 19456 (0.0008) +[2026-06-07 03:10:40,527][495927] Updated weights for policy 0, policy_version 19466 (0.0009) +[2026-06-07 03:10:40,690][495927] Updated weights for policy 0, policy_version 19477 (0.0008) +[2026-06-07 03:10:40,839][495927] Updated weights for policy 0, policy_version 19487 (0.0007) +[2026-06-07 03:10:41,006][495927] Updated weights for policy 0, policy_version 19498 (0.0008) +[2026-06-07 03:10:41,634][495927] Updated weights for policy 0, policy_version 19508 (0.0007) +[2026-06-07 03:10:41,778][495927] Updated weights for policy 0, policy_version 19518 (0.0004) +[2026-06-07 03:10:41,920][495927] Updated weights for policy 0, policy_version 19528 (0.0004) +[2026-06-07 03:10:42,092][495927] Updated weights for policy 0, policy_version 19539 (0.0004) +[2026-06-07 03:10:42,273][495927] Updated weights for policy 0, policy_version 19551 (0.0007) +[2026-06-07 03:10:42,429][495927] Updated weights for policy 0, policy_version 19561 (0.0008) +[2026-06-07 03:10:43,032][495927] Updated weights for policy 0, policy_version 19571 (0.0009) +[2026-06-07 03:10:43,190][495927] Updated weights for policy 0, policy_version 19582 (0.0008) +[2026-06-07 03:10:43,337][495927] Updated weights for policy 0, policy_version 19592 (0.0008) +[2026-06-07 03:10:43,479][495927] Updated weights for policy 0, policy_version 19602 (0.0008) +[2026-06-07 03:10:43,633][495927] Updated weights for policy 0, policy_version 19612 (0.0009) +[2026-06-07 03:10:43,795][495927] Updated weights for policy 0, policy_version 19622 (0.0008) +[2026-06-07 03:10:43,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.7, 300 sec: 22993.1). Total num frames: 10027008. Throughput: 0: 23071.3. Samples: 10073216. Policy #0 lag: (min: 54.0, avg: 68.9, max: 118.0) +[2026-06-07 03:10:43,910][492660] Avg episode reward: [(0, '648.880')] +[2026-06-07 03:10:43,942][495927] Updated weights for policy 0, policy_version 19632 (0.0009) +[2026-06-07 03:10:44,573][495927] Updated weights for policy 0, policy_version 19643 (0.0008) +[2026-06-07 03:10:44,738][495927] Updated weights for policy 0, policy_version 19654 (0.0008) +[2026-06-07 03:10:44,891][495927] Updated weights for policy 0, policy_version 19664 (0.0008) +[2026-06-07 03:10:45,040][495927] Updated weights for policy 0, policy_version 19674 (0.0008) +[2026-06-07 03:10:45,191][495927] Updated weights for policy 0, policy_version 19684 (0.0008) +[2026-06-07 03:10:45,334][495927] Updated weights for policy 0, policy_version 19694 (0.0009) +[2026-06-07 03:10:45,956][495927] Updated weights for policy 0, policy_version 19704 (0.0008) +[2026-06-07 03:10:46,096][495927] Updated weights for policy 0, policy_version 19714 (0.0008) +[2026-06-07 03:10:46,252][495927] Updated weights for policy 0, policy_version 19724 (0.0009) +[2026-06-07 03:10:46,407][495927] Updated weights for policy 0, policy_version 19734 (0.0008) +[2026-06-07 03:10:46,556][495927] Updated weights for policy 0, policy_version 19744 (0.0008) +[2026-06-07 03:10:46,707][495927] Updated weights for policy 0, policy_version 19754 (0.0008) +[2026-06-07 03:10:47,317][495927] Updated weights for policy 0, policy_version 19764 (0.0008) +[2026-06-07 03:10:47,459][495927] Updated weights for policy 0, policy_version 19774 (0.0009) +[2026-06-07 03:10:47,609][495927] Updated weights for policy 0, policy_version 19784 (0.0008) +[2026-06-07 03:10:47,761][495927] Updated weights for policy 0, policy_version 19794 (0.0008) +[2026-06-07 03:10:47,914][495927] Updated weights for policy 0, policy_version 19804 (0.0005) +[2026-06-07 03:10:48,069][495927] Updated weights for policy 0, policy_version 19814 (0.0005) +[2026-06-07 03:10:48,219][495927] Updated weights for policy 0, policy_version 19824 (0.0005) +[2026-06-07 03:10:48,825][495927] Updated weights for policy 0, policy_version 19835 (0.0005) +[2026-06-07 03:10:48,909][492660] Fps is (10 sec: 22937.4, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 10158080. Throughput: 0: 23048.5. Samples: 10140928. Policy #0 lag: (min: 54.0, avg: 68.9, max: 118.0) +[2026-06-07 03:10:48,910][492660] Avg episode reward: [(0, '655.105')] +[2026-06-07 03:10:48,971][495927] Updated weights for policy 0, policy_version 19845 (0.0006) +[2026-06-07 03:10:49,130][495927] Updated weights for policy 0, policy_version 19856 (0.0008) +[2026-06-07 03:10:49,278][495927] Updated weights for policy 0, policy_version 19866 (0.0008) +[2026-06-07 03:10:49,433][495927] Updated weights for policy 0, policy_version 19876 (0.0008) +[2026-06-07 03:10:49,588][495927] Updated weights for policy 0, policy_version 19886 (0.0009) +[2026-06-07 03:10:50,207][495927] Updated weights for policy 0, policy_version 19896 (0.0008) +[2026-06-07 03:10:50,359][495927] Updated weights for policy 0, policy_version 19906 (0.0008) +[2026-06-07 03:10:50,523][495927] Updated weights for policy 0, policy_version 19917 (0.0008) +[2026-06-07 03:10:50,685][495927] Updated weights for policy 0, policy_version 19928 (0.0009) +[2026-06-07 03:10:50,840][495927] Updated weights for policy 0, policy_version 19938 (0.0008) +[2026-06-07 03:10:51,026][495927] Updated weights for policy 0, policy_version 19950 (0.0009) +[2026-06-07 03:10:51,636][495927] Updated weights for policy 0, policy_version 19960 (0.0009) +[2026-06-07 03:10:51,795][495927] Updated weights for policy 0, policy_version 19971 (0.0009) +[2026-06-07 03:10:51,942][495927] Updated weights for policy 0, policy_version 19981 (0.0008) +[2026-06-07 03:10:52,092][495927] Updated weights for policy 0, policy_version 19991 (0.0008) +[2026-06-07 03:10:52,262][495927] Updated weights for policy 0, policy_version 20002 (0.0008) +[2026-06-07 03:10:52,411][495927] Updated weights for policy 0, policy_version 20012 (0.0008) +[2026-06-07 03:10:53,055][495927] Updated weights for policy 0, policy_version 20023 (0.0008) +[2026-06-07 03:10:53,198][495927] Updated weights for policy 0, policy_version 20033 (0.0008) +[2026-06-07 03:10:53,343][495927] Updated weights for policy 0, policy_version 20043 (0.0008) +[2026-06-07 03:10:53,499][495927] Updated weights for policy 0, policy_version 20053 (0.0008) +[2026-06-07 03:10:53,666][495927] Updated weights for policy 0, policy_version 20064 (0.0008) +[2026-06-07 03:10:53,821][495927] Updated weights for policy 0, policy_version 20074 (0.0008) +[2026-06-07 03:10:53,909][492660] Fps is (10 sec: 26214.4, 60 sec: 23483.7, 300 sec: 23104.2). Total num frames: 10289152. Throughput: 0: 22806.8. Samples: 10275328. Policy #0 lag: (min: 54.0, avg: 68.8, max: 118.0) +[2026-06-07 03:10:53,910][492660] Avg episode reward: [(0, '680.384')] +[2026-06-07 03:10:54,453][495927] Updated weights for policy 0, policy_version 20085 (0.0008) +[2026-06-07 03:10:54,605][495927] Updated weights for policy 0, policy_version 20096 (0.0009) +[2026-06-07 03:10:54,758][495927] Updated weights for policy 0, policy_version 20106 (0.0009) +[2026-06-07 03:10:54,911][495927] Updated weights for policy 0, policy_version 20116 (0.0008) +[2026-06-07 03:10:55,057][495927] Updated weights for policy 0, policy_version 20126 (0.0008) +[2026-06-07 03:10:55,223][495927] Updated weights for policy 0, policy_version 20137 (0.0008) +[2026-06-07 03:10:55,880][495927] Updated weights for policy 0, policy_version 20148 (0.0008) +[2026-06-07 03:10:56,016][495927] Updated weights for policy 0, policy_version 20158 (0.0008) +[2026-06-07 03:10:56,187][495927] Updated weights for policy 0, policy_version 20169 (0.0008) +[2026-06-07 03:10:56,342][495927] Updated weights for policy 0, policy_version 20179 (0.0009) +[2026-06-07 03:10:56,507][495927] Updated weights for policy 0, policy_version 20190 (0.0008) +[2026-06-07 03:10:56,659][495927] Updated weights for policy 0, policy_version 20200 (0.0008) +[2026-06-07 03:10:57,295][495927] Updated weights for policy 0, policy_version 20211 (0.0008) +[2026-06-07 03:10:57,450][495927] Updated weights for policy 0, policy_version 20221 (0.0008) +[2026-06-07 03:10:57,593][495927] Updated weights for policy 0, policy_version 20231 (0.0009) +[2026-06-07 03:10:57,739][495927] Updated weights for policy 0, policy_version 20241 (0.0008) +[2026-06-07 03:10:57,891][495927] Updated weights for policy 0, policy_version 20251 (0.0008) +[2026-06-07 03:10:58,060][495927] Updated weights for policy 0, policy_version 20262 (0.0008) +[2026-06-07 03:10:58,209][495927] Updated weights for policy 0, policy_version 20272 (0.0008) +[2026-06-07 03:10:58,829][495927] Updated weights for policy 0, policy_version 20283 (0.0008) +[2026-06-07 03:10:58,909][492660] Fps is (10 sec: 22937.8, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 10387456. Throughput: 0: 22801.1. Samples: 10412160. Policy #0 lag: (min: 54.0, avg: 68.8, max: 118.0) +[2026-06-07 03:10:58,910][492660] Avg episode reward: [(0, '708.505')] +[2026-06-07 03:10:58,981][495927] Updated weights for policy 0, policy_version 20293 (0.0008) +[2026-06-07 03:10:59,129][495927] Updated weights for policy 0, policy_version 20303 (0.0008) +[2026-06-07 03:10:59,277][495927] Updated weights for policy 0, policy_version 20313 (0.0008) +[2026-06-07 03:10:59,453][495927] Updated weights for policy 0, policy_version 20324 (0.0008) +[2026-06-07 03:10:59,608][495927] Updated weights for policy 0, policy_version 20334 (0.0008) +[2026-06-07 03:10:59,632][495570] Saving new best policy, reward=708.505! +[2026-06-07 03:11:00,239][495927] Updated weights for policy 0, policy_version 20346 (0.0008) +[2026-06-07 03:11:00,385][495927] Updated weights for policy 0, policy_version 20356 (0.0005) +[2026-06-07 03:11:00,539][495927] Updated weights for policy 0, policy_version 20366 (0.0005) +[2026-06-07 03:11:00,709][495927] Updated weights for policy 0, policy_version 20377 (0.0005) +[2026-06-07 03:11:00,858][495927] Updated weights for policy 0, policy_version 20387 (0.0005) +[2026-06-07 03:11:01,038][495927] Updated weights for policy 0, policy_version 20398 (0.0004) +[2026-06-07 03:11:01,620][495927] Updated weights for policy 0, policy_version 20408 (0.0004) +[2026-06-07 03:11:01,782][495927] Updated weights for policy 0, policy_version 20419 (0.0004) +[2026-06-07 03:11:01,930][495927] Updated weights for policy 0, policy_version 20429 (0.0004) +[2026-06-07 03:11:02,086][495927] Updated weights for policy 0, policy_version 20439 (0.0006) +[2026-06-07 03:11:02,238][495927] Updated weights for policy 0, policy_version 20449 (0.0008) +[2026-06-07 03:11:02,392][495927] Updated weights for policy 0, policy_version 20459 (0.0008) +[2026-06-07 03:11:03,003][495927] Updated weights for policy 0, policy_version 20469 (0.0009) +[2026-06-07 03:11:03,144][495927] Updated weights for policy 0, policy_version 20479 (0.0007) +[2026-06-07 03:11:03,294][495927] Updated weights for policy 0, policy_version 20489 (0.0005) +[2026-06-07 03:11:03,445][495927] Updated weights for policy 0, policy_version 20499 (0.0005) +[2026-06-07 03:11:03,591][495927] Updated weights for policy 0, policy_version 20509 (0.0005) +[2026-06-07 03:11:03,763][495927] Updated weights for policy 0, policy_version 20520 (0.0005) +[2026-06-07 03:11:03,909][492660] Fps is (10 sec: 22937.6, 60 sec: 23483.7, 300 sec: 23104.2). Total num frames: 10518528. Throughput: 0: 22957.5. Samples: 10486400. Policy #0 lag: (min: 54.0, avg: 68.8, max: 118.0) +[2026-06-07 03:11:03,910][492660] Avg episode reward: [(0, '726.192')] +[2026-06-07 03:11:03,915][495570] Saving new best policy, reward=726.192! +[2026-06-07 03:11:04,375][495927] Updated weights for policy 0, policy_version 20530 (0.0006) +[2026-06-07 03:11:04,522][495927] Updated weights for policy 0, policy_version 20540 (0.0008) +[2026-06-07 03:11:04,670][495927] Updated weights for policy 0, policy_version 20550 (0.0008) +[2026-06-07 03:11:04,821][495927] Updated weights for policy 0, policy_version 20560 (0.0008) +[2026-06-07 03:11:04,970][495927] Updated weights for policy 0, policy_version 20570 (0.0010) +[2026-06-07 03:11:05,132][495927] Updated weights for policy 0, policy_version 20581 (0.0009) +[2026-06-07 03:11:05,290][495927] Updated weights for policy 0, policy_version 20591 (0.0008) +[2026-06-07 03:11:05,916][495927] Updated weights for policy 0, policy_version 20601 (0.0009) +[2026-06-07 03:11:06,060][495927] Updated weights for policy 0, policy_version 20611 (0.0008) +[2026-06-07 03:11:06,202][495927] Updated weights for policy 0, policy_version 20621 (0.0008) +[2026-06-07 03:11:06,353][495927] Updated weights for policy 0, policy_version 20631 (0.0008) +[2026-06-07 03:11:06,524][495927] Updated weights for policy 0, policy_version 20642 (0.0009) +[2026-06-07 03:11:06,675][495927] Updated weights for policy 0, policy_version 20652 (0.0009) +[2026-06-07 03:11:07,298][495927] Updated weights for policy 0, policy_version 20662 (0.0009) +[2026-06-07 03:11:07,434][495927] Updated weights for policy 0, policy_version 20672 (0.0008) +[2026-06-07 03:11:07,603][495927] Updated weights for policy 0, policy_version 20683 (0.0009) +[2026-06-07 03:11:07,753][495927] Updated weights for policy 0, policy_version 20693 (0.0009) +[2026-06-07 03:11:07,903][495927] Updated weights for policy 0, policy_version 20703 (0.0008) +[2026-06-07 03:11:08,052][495927] Updated weights for policy 0, policy_version 20713 (0.0009) +[2026-06-07 03:11:08,702][495927] Updated weights for policy 0, policy_version 20724 (0.0009) +[2026-06-07 03:11:08,869][495927] Updated weights for policy 0, policy_version 20736 (0.0010) +[2026-06-07 03:11:08,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 10616832. Throughput: 0: 23108.3. Samples: 10628224. Policy #0 lag: (min: 14.0, avg: 28.8, max: 78.0) +[2026-06-07 03:11:08,910][492660] Avg episode reward: [(0, '719.975')] +[2026-06-07 03:11:09,013][495927] Updated weights for policy 0, policy_version 20746 (0.0008) +[2026-06-07 03:11:09,166][495927] Updated weights for policy 0, policy_version 20756 (0.0008) +[2026-06-07 03:11:09,318][495927] Updated weights for policy 0, policy_version 20766 (0.0008) +[2026-06-07 03:11:09,488][495927] Updated weights for policy 0, policy_version 20777 (0.0009) +[2026-06-07 03:11:10,135][495927] Updated weights for policy 0, policy_version 20788 (0.0008) +[2026-06-07 03:11:10,286][495927] Updated weights for policy 0, policy_version 20799 (0.0009) +[2026-06-07 03:11:10,431][495927] Updated weights for policy 0, policy_version 20809 (0.0008) +[2026-06-07 03:11:10,574][495927] Updated weights for policy 0, policy_version 20819 (0.0008) +[2026-06-07 03:11:10,750][495927] Updated weights for policy 0, policy_version 20830 (0.0009) +[2026-06-07 03:11:10,895][495927] Updated weights for policy 0, policy_version 20840 (0.0008) +[2026-06-07 03:11:11,547][495927] Updated weights for policy 0, policy_version 20851 (0.0009) +[2026-06-07 03:11:11,701][495927] Updated weights for policy 0, policy_version 20862 (0.0008) +[2026-06-07 03:11:11,862][495927] Updated weights for policy 0, policy_version 20873 (0.0008) +[2026-06-07 03:11:12,021][495927] Updated weights for policy 0, policy_version 20883 (0.0009) +[2026-06-07 03:11:12,166][495927] Updated weights for policy 0, policy_version 20893 (0.0008) +[2026-06-07 03:11:12,327][495927] Updated weights for policy 0, policy_version 20903 (0.0008) +[2026-06-07 03:11:12,957][495927] Updated weights for policy 0, policy_version 20913 (0.0008) +[2026-06-07 03:11:13,115][495927] Updated weights for policy 0, policy_version 20924 (0.0009) +[2026-06-07 03:11:13,263][495927] Updated weights for policy 0, policy_version 20934 (0.0008) +[2026-06-07 03:11:13,407][495927] Updated weights for policy 0, policy_version 20944 (0.0008) +[2026-06-07 03:11:13,573][495927] Updated weights for policy 0, policy_version 20955 (0.0008) +[2026-06-07 03:11:13,716][495927] Updated weights for policy 0, policy_version 20965 (0.0008) +[2026-06-07 03:11:13,873][495927] Updated weights for policy 0, policy_version 20975 (0.0008) +[2026-06-07 03:11:13,909][492660] Fps is (10 sec: 22937.7, 60 sec: 23483.8, 300 sec: 22993.1). Total num frames: 10747904. Throughput: 0: 23119.7. Samples: 10763648. Policy #0 lag: (min: 14.0, avg: 28.8, max: 78.0) +[2026-06-07 03:11:13,910][492660] Avg episode reward: [(0, '690.120')] +[2026-06-07 03:11:14,524][495927] Updated weights for policy 0, policy_version 20987 (0.0009) +[2026-06-07 03:11:14,671][495927] Updated weights for policy 0, policy_version 20997 (0.0008) +[2026-06-07 03:11:14,819][495927] Updated weights for policy 0, policy_version 21007 (0.0010) +[2026-06-07 03:11:14,974][495927] Updated weights for policy 0, policy_version 21017 (0.0008) +[2026-06-07 03:11:15,138][495927] Updated weights for policy 0, policy_version 21028 (0.0008) +[2026-06-07 03:11:15,314][495927] Updated weights for policy 0, policy_version 21039 (0.0008) +[2026-06-07 03:11:15,944][495927] Updated weights for policy 0, policy_version 21049 (0.0009) +[2026-06-07 03:11:16,077][495927] Updated weights for policy 0, policy_version 21059 (0.0008) +[2026-06-07 03:11:16,261][495927] Updated weights for policy 0, policy_version 21071 (0.0008) +[2026-06-07 03:11:16,424][495927] Updated weights for policy 0, policy_version 21082 (0.0008) +[2026-06-07 03:11:16,570][495927] Updated weights for policy 0, policy_version 21092 (0.0008) +[2026-06-07 03:11:16,729][495927] Updated weights for policy 0, policy_version 21102 (0.0008) +[2026-06-07 03:11:17,352][495927] Updated weights for policy 0, policy_version 21113 (0.0009) +[2026-06-07 03:11:17,499][495927] Updated weights for policy 0, policy_version 21123 (0.0008) +[2026-06-07 03:11:17,651][495927] Updated weights for policy 0, policy_version 21133 (0.0009) +[2026-06-07 03:11:17,803][495927] Updated weights for policy 0, policy_version 21143 (0.0008) +[2026-06-07 03:11:17,952][495927] Updated weights for policy 0, policy_version 21153 (0.0008) +[2026-06-07 03:11:18,099][495927] Updated weights for policy 0, policy_version 21163 (0.0008) +[2026-06-07 03:11:18,724][495927] Updated weights for policy 0, policy_version 21173 (0.0009) +[2026-06-07 03:11:18,869][495927] Updated weights for policy 0, policy_version 21183 (0.0008) +[2026-06-07 03:11:18,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 10846208. Throughput: 0: 23034.3. Samples: 10830464. Policy #0 lag: (min: 14.0, avg: 28.8, max: 78.0) +[2026-06-07 03:11:18,910][492660] Avg episode reward: [(0, '748.354')] +[2026-06-07 03:11:19,030][495927] Updated weights for policy 0, policy_version 21194 (0.0009) +[2026-06-07 03:11:19,214][495927] Updated weights for policy 0, policy_version 21206 (0.0008) +[2026-06-07 03:11:19,360][495927] Updated weights for policy 0, policy_version 21216 (0.0008) +[2026-06-07 03:11:19,511][495927] Updated weights for policy 0, policy_version 21226 (0.0008) +[2026-06-07 03:11:19,598][495570] Saving new best policy, reward=748.354! +[2026-06-07 03:11:20,134][495927] Updated weights for policy 0, policy_version 21236 (0.0008) +[2026-06-07 03:11:20,290][495927] Updated weights for policy 0, policy_version 21247 (0.0008) +[2026-06-07 03:11:20,434][495927] Updated weights for policy 0, policy_version 21257 (0.0008) +[2026-06-07 03:11:20,582][495927] Updated weights for policy 0, policy_version 21267 (0.0008) +[2026-06-07 03:11:20,736][495927] Updated weights for policy 0, policy_version 21277 (0.0009) +[2026-06-07 03:11:20,892][495927] Updated weights for policy 0, policy_version 21287 (0.0008) +[2026-06-07 03:11:21,507][495927] Updated weights for policy 0, policy_version 21297 (0.0008) +[2026-06-07 03:11:21,648][495927] Updated weights for policy 0, policy_version 21307 (0.0008) +[2026-06-07 03:11:21,789][495927] Updated weights for policy 0, policy_version 21317 (0.0008) +[2026-06-07 03:11:21,959][495927] Updated weights for policy 0, policy_version 21328 (0.0008) +[2026-06-07 03:11:22,112][495927] Updated weights for policy 0, policy_version 21338 (0.0008) +[2026-06-07 03:11:22,260][495927] Updated weights for policy 0, policy_version 21348 (0.0008) +[2026-06-07 03:11:22,414][495927] Updated weights for policy 0, policy_version 21358 (0.0009) +[2026-06-07 03:11:23,036][495927] Updated weights for policy 0, policy_version 21368 (0.0006) +[2026-06-07 03:11:23,191][495927] Updated weights for policy 0, policy_version 21378 (0.0005) +[2026-06-07 03:11:23,359][495927] Updated weights for policy 0, policy_version 21390 (0.0009) +[2026-06-07 03:11:23,517][495927] Updated weights for policy 0, policy_version 21400 (0.0011) +[2026-06-07 03:11:23,669][495927] Updated weights for policy 0, policy_version 21410 (0.0011) +[2026-06-07 03:11:23,819][495927] Updated weights for policy 0, policy_version 21420 (0.0010) +[2026-06-07 03:11:23,909][492660] Fps is (10 sec: 22937.5, 60 sec: 23483.8, 300 sec: 22993.1). Total num frames: 10977280. Throughput: 0: 22778.3. Samples: 10964224. Policy #0 lag: (min: 47.0, avg: 61.2, max: 111.0) +[2026-06-07 03:11:23,910][492660] Avg episode reward: [(0, '781.766')] +[2026-06-07 03:11:23,915][495570] Saving new best policy, reward=781.766! +[2026-06-07 03:11:24,427][495927] Updated weights for policy 0, policy_version 21430 (0.0008) +[2026-06-07 03:11:24,573][495927] Updated weights for policy 0, policy_version 21440 (0.0008) +[2026-06-07 03:11:24,717][495927] Updated weights for policy 0, policy_version 21450 (0.0008) +[2026-06-07 03:11:24,866][495927] Updated weights for policy 0, policy_version 21460 (0.0008) +[2026-06-07 03:11:25,025][495927] Updated weights for policy 0, policy_version 21470 (0.0008) +[2026-06-07 03:11:25,189][495927] Updated weights for policy 0, policy_version 21481 (0.0008) +[2026-06-07 03:11:25,811][495927] Updated weights for policy 0, policy_version 21491 (0.0008) +[2026-06-07 03:11:25,956][495927] Updated weights for policy 0, policy_version 21501 (0.0008) +[2026-06-07 03:11:26,128][495927] Updated weights for policy 0, policy_version 21513 (0.0008) +[2026-06-07 03:11:26,288][495927] Updated weights for policy 0, policy_version 21524 (0.0008) +[2026-06-07 03:11:26,441][495927] Updated weights for policy 0, policy_version 21534 (0.0008) +[2026-06-07 03:11:26,606][495927] Updated weights for policy 0, policy_version 21545 (0.0009) +[2026-06-07 03:11:27,233][495927] Updated weights for policy 0, policy_version 21555 (0.0009) +[2026-06-07 03:11:27,370][495927] Updated weights for policy 0, policy_version 21565 (0.0009) +[2026-06-07 03:11:27,514][495927] Updated weights for policy 0, policy_version 21575 (0.0008) +[2026-06-07 03:11:27,673][495927] Updated weights for policy 0, policy_version 21585 (0.0008) +[2026-06-07 03:11:27,837][495927] Updated weights for policy 0, policy_version 21596 (0.0008) +[2026-06-07 03:11:27,987][495927] Updated weights for policy 0, policy_version 21606 (0.0008) +[2026-06-07 03:11:28,634][495927] Updated weights for policy 0, policy_version 21617 (0.0009) +[2026-06-07 03:11:28,773][495927] Updated weights for policy 0, policy_version 21627 (0.0008) +[2026-06-07 03:11:28,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 11075584. Throughput: 0: 22880.7. Samples: 11102848. Policy #0 lag: (min: 47.0, avg: 61.2, max: 111.0) +[2026-06-07 03:11:28,911][492660] Avg episode reward: [(0, '754.346')] +[2026-06-07 03:11:28,913][495927] Updated weights for policy 0, policy_version 21637 (0.0008) +[2026-06-07 03:11:29,060][495927] Updated weights for policy 0, policy_version 21647 (0.0010) +[2026-06-07 03:11:29,216][495927] Updated weights for policy 0, policy_version 21657 (0.0008) +[2026-06-07 03:11:29,380][495927] Updated weights for policy 0, policy_version 21668 (0.0009) +[2026-06-07 03:11:29,547][495927] Updated weights for policy 0, policy_version 21679 (0.0009) +[2026-06-07 03:11:30,196][495927] Updated weights for policy 0, policy_version 21690 (0.0008) +[2026-06-07 03:11:30,343][495927] Updated weights for policy 0, policy_version 21700 (0.0008) +[2026-06-07 03:11:30,511][495927] Updated weights for policy 0, policy_version 21711 (0.0008) +[2026-06-07 03:11:30,657][495927] Updated weights for policy 0, policy_version 21721 (0.0010) +[2026-06-07 03:11:30,812][495927] Updated weights for policy 0, policy_version 21731 (0.0008) +[2026-06-07 03:11:30,969][495927] Updated weights for policy 0, policy_version 21741 (0.0008) +[2026-06-07 03:11:31,586][495927] Updated weights for policy 0, policy_version 21752 (0.0011) +[2026-06-07 03:11:31,736][495927] Updated weights for policy 0, policy_version 21762 (0.0008) +[2026-06-07 03:11:31,901][495927] Updated weights for policy 0, policy_version 21773 (0.0008) +[2026-06-07 03:11:32,064][495927] Updated weights for policy 0, policy_version 21784 (0.0008) +[2026-06-07 03:11:32,216][495927] Updated weights for policy 0, policy_version 21794 (0.0009) +[2026-06-07 03:11:32,371][495927] Updated weights for policy 0, policy_version 21804 (0.0008) +[2026-06-07 03:11:32,992][495927] Updated weights for policy 0, policy_version 21814 (0.0009) +[2026-06-07 03:11:33,127][495927] Updated weights for policy 0, policy_version 21824 (0.0008) +[2026-06-07 03:11:33,297][495927] Updated weights for policy 0, policy_version 21835 (0.0009) +[2026-06-07 03:11:33,445][495927] Updated weights for policy 0, policy_version 21845 (0.0009) +[2026-06-07 03:11:33,597][495927] Updated weights for policy 0, policy_version 21855 (0.0009) +[2026-06-07 03:11:33,744][495927] Updated weights for policy 0, policy_version 21865 (0.0008) +[2026-06-07 03:11:33,909][492660] Fps is (10 sec: 22937.5, 60 sec: 23483.7, 300 sec: 22993.3). Total num frames: 11206656. Throughput: 0: 23000.2. Samples: 11175936. Policy #0 lag: (min: 47.0, avg: 61.2, max: 111.0) +[2026-06-07 03:11:33,910][492660] Avg episode reward: [(0, '691.490')] +[2026-06-07 03:11:34,375][495927] Updated weights for policy 0, policy_version 21875 (0.0009) +[2026-06-07 03:11:34,529][495927] Updated weights for policy 0, policy_version 21886 (0.0008) +[2026-06-07 03:11:34,673][495927] Updated weights for policy 0, policy_version 21896 (0.0008) +[2026-06-07 03:11:34,822][495927] Updated weights for policy 0, policy_version 21906 (0.0008) +[2026-06-07 03:11:34,992][495927] Updated weights for policy 0, policy_version 21917 (0.0008) +[2026-06-07 03:11:35,142][495927] Updated weights for policy 0, policy_version 21927 (0.0008) +[2026-06-07 03:11:35,813][495927] Updated weights for policy 0, policy_version 21939 (0.0009) +[2026-06-07 03:11:35,952][495927] Updated weights for policy 0, policy_version 21949 (0.0008) +[2026-06-07 03:11:36,110][495927] Updated weights for policy 0, policy_version 21960 (0.0009) +[2026-06-07 03:11:36,253][495927] Updated weights for policy 0, policy_version 21970 (0.0009) +[2026-06-07 03:11:36,399][495927] Updated weights for policy 0, policy_version 21980 (0.0008) +[2026-06-07 03:11:36,557][495927] Updated weights for policy 0, policy_version 21991 (0.0008) +[2026-06-07 03:11:37,263][495927] Updated weights for policy 0, policy_version 22004 (0.0009) +[2026-06-07 03:11:37,398][495927] Updated weights for policy 0, policy_version 22014 (0.0008) +[2026-06-07 03:11:37,562][495927] Updated weights for policy 0, policy_version 22025 (0.0009) +[2026-06-07 03:11:37,727][495927] Updated weights for policy 0, policy_version 22036 (0.0008) +[2026-06-07 03:11:37,879][495927] Updated weights for policy 0, policy_version 22046 (0.0008) +[2026-06-07 03:11:38,043][495927] Updated weights for policy 0, policy_version 22057 (0.0009) +[2026-06-07 03:11:38,663][495927] Updated weights for policy 0, policy_version 22067 (0.0008) +[2026-06-07 03:11:38,804][495927] Updated weights for policy 0, policy_version 22077 (0.0008) +[2026-06-07 03:11:38,909][492660] Fps is (10 sec: 22937.8, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 11304960. Throughput: 0: 23125.3. Samples: 11315968. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-07 03:11:38,910][492660] Avg episode reward: [(0, '743.867')] +[2026-06-07 03:11:38,956][495927] Updated weights for policy 0, policy_version 22087 (0.0009) +[2026-06-07 03:11:39,100][495927] Updated weights for policy 0, policy_version 22097 (0.0008) +[2026-06-07 03:11:39,242][495927] Updated weights for policy 0, policy_version 22107 (0.0008) +[2026-06-07 03:11:39,398][495927] Updated weights for policy 0, policy_version 22117 (0.0008) +[2026-06-07 03:11:40,084][495927] Updated weights for policy 0, policy_version 22129 (0.0009) +[2026-06-07 03:11:40,256][495927] Updated weights for policy 0, policy_version 22141 (0.0009) +[2026-06-07 03:11:40,414][495927] Updated weights for policy 0, policy_version 22152 (0.0008) +[2026-06-07 03:11:40,564][495927] Updated weights for policy 0, policy_version 22162 (0.0009) +[2026-06-07 03:11:40,745][495927] Updated weights for policy 0, policy_version 22174 (0.0008) +[2026-06-07 03:11:40,899][495927] Updated weights for policy 0, policy_version 22184 (0.0008) +[2026-06-07 03:11:41,528][495927] Updated weights for policy 0, policy_version 22194 (0.0008) +[2026-06-07 03:11:41,665][495927] Updated weights for policy 0, policy_version 22204 (0.0008) +[2026-06-07 03:11:41,822][495927] Updated weights for policy 0, policy_version 22215 (0.0009) +[2026-06-07 03:11:41,975][495927] Updated weights for policy 0, policy_version 22225 (0.0008) +[2026-06-07 03:11:42,145][495927] Updated weights for policy 0, policy_version 22236 (0.0008) +[2026-06-07 03:11:42,314][495927] Updated weights for policy 0, policy_version 22247 (0.0008) +[2026-06-07 03:11:42,932][495927] Updated weights for policy 0, policy_version 22257 (0.0009) +[2026-06-07 03:11:43,085][495927] Updated weights for policy 0, policy_version 22268 (0.0009) +[2026-06-07 03:11:43,233][495927] Updated weights for policy 0, policy_version 22278 (0.0010) +[2026-06-07 03:11:43,380][495927] Updated weights for policy 0, policy_version 22288 (0.0008) +[2026-06-07 03:11:43,538][495927] Updated weights for policy 0, policy_version 22299 (0.0008) +[2026-06-07 03:11:43,701][495927] Updated weights for policy 0, policy_version 22310 (0.0008) +[2026-06-07 03:11:43,853][495927] Updated weights for policy 0, policy_version 22320 (0.0008) +[2026-06-07 03:11:43,909][492660] Fps is (10 sec: 22937.7, 60 sec: 23483.7, 300 sec: 23104.2). Total num frames: 11436032. Throughput: 0: 23082.7. Samples: 11450880. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-07 03:11:43,910][492660] Avg episode reward: [(0, '745.133')] +[2026-06-07 03:11:44,499][495927] Updated weights for policy 0, policy_version 22330 (0.0009) +[2026-06-07 03:11:44,639][495927] Updated weights for policy 0, policy_version 22340 (0.0008) +[2026-06-07 03:11:44,805][495927] Updated weights for policy 0, policy_version 22351 (0.0009) +[2026-06-07 03:11:44,955][495927] Updated weights for policy 0, policy_version 22361 (0.0009) +[2026-06-07 03:11:45,107][495927] Updated weights for policy 0, policy_version 22371 (0.0008) +[2026-06-07 03:11:45,252][495927] Updated weights for policy 0, policy_version 22381 (0.0008) +[2026-06-07 03:11:45,875][495927] Updated weights for policy 0, policy_version 22391 (0.0009) +[2026-06-07 03:11:46,036][495927] Updated weights for policy 0, policy_version 22402 (0.0008) +[2026-06-07 03:11:46,189][495927] Updated weights for policy 0, policy_version 22412 (0.0008) +[2026-06-07 03:11:46,373][495927] Updated weights for policy 0, policy_version 22425 (0.0008) +[2026-06-07 03:11:46,527][495927] Updated weights for policy 0, policy_version 22435 (0.0009) +[2026-06-07 03:11:46,698][495927] Updated weights for policy 0, policy_version 22446 (0.0008) +[2026-06-07 03:11:47,322][495927] Updated weights for policy 0, policy_version 22457 (0.0008) +[2026-06-07 03:11:47,485][495927] Updated weights for policy 0, policy_version 22468 (0.0008) +[2026-06-07 03:11:47,632][495927] Updated weights for policy 0, policy_version 22478 (0.0008) +[2026-06-07 03:11:47,796][495927] Updated weights for policy 0, policy_version 22489 (0.0009) +[2026-06-07 03:11:47,958][495927] Updated weights for policy 0, policy_version 22500 (0.0008) +[2026-06-07 03:11:48,131][495927] Updated weights for policy 0, policy_version 22511 (0.0009) +[2026-06-07 03:11:48,752][495927] Updated weights for policy 0, policy_version 22521 (0.0008) +[2026-06-07 03:11:48,901][495927] Updated weights for policy 0, policy_version 22531 (0.0009) +[2026-06-07 03:11:48,909][492660] Fps is (10 sec: 22937.2, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 11534336. Throughput: 0: 22943.2. Samples: 11518848. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-07 03:11:48,910][492660] Avg episode reward: [(0, '724.808')] +[2026-06-07 03:11:49,060][495927] Updated weights for policy 0, policy_version 22542 (0.0008) +[2026-06-07 03:11:49,211][495927] Updated weights for policy 0, policy_version 22552 (0.0009) +[2026-06-07 03:11:49,361][495927] Updated weights for policy 0, policy_version 22562 (0.0008) +[2026-06-07 03:11:49,510][495927] Updated weights for policy 0, policy_version 22572 (0.0008) +[2026-06-07 03:11:50,132][495927] Updated weights for policy 0, policy_version 22582 (0.0008) +[2026-06-07 03:11:50,278][495927] Updated weights for policy 0, policy_version 22592 (0.0009) +[2026-06-07 03:11:50,423][495927] Updated weights for policy 0, policy_version 22602 (0.0009) +[2026-06-07 03:11:50,573][495927] Updated weights for policy 0, policy_version 22612 (0.0009) +[2026-06-07 03:11:50,760][495927] Updated weights for policy 0, policy_version 22625 (0.0008) +[2026-06-07 03:11:50,938][495927] Updated weights for policy 0, policy_version 22637 (0.0008) +[2026-06-07 03:11:51,585][495927] Updated weights for policy 0, policy_version 22649 (0.0009) +[2026-06-07 03:11:51,729][495927] Updated weights for policy 0, policy_version 22659 (0.0008) +[2026-06-07 03:11:51,885][495927] Updated weights for policy 0, policy_version 22670 (0.0008) +[2026-06-07 03:11:52,065][495927] Updated weights for policy 0, policy_version 22682 (0.0008) +[2026-06-07 03:11:52,207][495927] Updated weights for policy 0, policy_version 22692 (0.0008) +[2026-06-07 03:11:52,912][495927] Updated weights for policy 0, policy_version 22705 (0.0009) +[2026-06-07 03:11:53,069][495927] Updated weights for policy 0, policy_version 22716 (0.0008) +[2026-06-07 03:11:53,219][495927] Updated weights for policy 0, policy_version 22726 (0.0008) +[2026-06-07 03:11:53,381][495927] Updated weights for policy 0, policy_version 22737 (0.0008) +[2026-06-07 03:11:53,528][495927] Updated weights for policy 0, policy_version 22747 (0.0008) +[2026-06-07 03:11:53,681][495927] Updated weights for policy 0, policy_version 22757 (0.0009) +[2026-06-07 03:11:53,833][495927] Updated weights for policy 0, policy_version 22767 (0.0009) +[2026-06-07 03:11:53,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 23104.2). Total num frames: 11665408. Throughput: 0: 22767.0. Samples: 11652736. Policy #0 lag: (min: 63.0, avg: 75.0, max: 127.0) +[2026-06-07 03:11:53,910][492660] Avg episode reward: [(0, '707.357')] +[2026-06-07 03:11:54,458][495927] Updated weights for policy 0, policy_version 22777 (0.0009) +[2026-06-07 03:11:54,603][495927] Updated weights for policy 0, policy_version 22787 (0.0008) +[2026-06-07 03:11:54,750][495927] Updated weights for policy 0, policy_version 22797 (0.0008) +[2026-06-07 03:11:54,902][495927] Updated weights for policy 0, policy_version 22807 (0.0008) +[2026-06-07 03:11:55,054][495927] Updated weights for policy 0, policy_version 22817 (0.0009) +[2026-06-07 03:11:55,201][495927] Updated weights for policy 0, policy_version 22827 (0.0008) +[2026-06-07 03:11:55,842][495927] Updated weights for policy 0, policy_version 22838 (0.0008) +[2026-06-07 03:11:55,985][495927] Updated weights for policy 0, policy_version 22848 (0.0008) +[2026-06-07 03:11:56,150][495927] Updated weights for policy 0, policy_version 22859 (0.0007) +[2026-06-07 03:11:56,315][495927] Updated weights for policy 0, policy_version 22870 (0.0005) +[2026-06-07 03:11:56,492][495927] Updated weights for policy 0, policy_version 22882 (0.0005) +[2026-06-07 03:11:56,648][495927] Updated weights for policy 0, policy_version 22892 (0.0007) +[2026-06-07 03:11:57,254][495927] Updated weights for policy 0, policy_version 22902 (0.0008) +[2026-06-07 03:11:57,403][495927] Updated weights for policy 0, policy_version 22912 (0.0008) +[2026-06-07 03:11:57,560][495927] Updated weights for policy 0, policy_version 22923 (0.0008) +[2026-06-07 03:11:57,710][495927] Updated weights for policy 0, policy_version 22933 (0.0009) +[2026-06-07 03:11:57,868][495927] Updated weights for policy 0, policy_version 22943 (0.0008) +[2026-06-07 03:11:58,046][495927] Updated weights for policy 0, policy_version 22955 (0.0008) +[2026-06-07 03:11:58,671][495927] Updated weights for policy 0, policy_version 22965 (0.0008) +[2026-06-07 03:11:58,808][495927] Updated weights for policy 0, policy_version 22975 (0.0008) +[2026-06-07 03:11:58,909][492660] Fps is (10 sec: 22937.9, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 11763712. Throughput: 0: 22875.0. Samples: 11793024. Policy #0 lag: (min: 63.0, avg: 75.0, max: 127.0) +[2026-06-07 03:11:58,910][492660] Avg episode reward: [(0, '733.618')] +[2026-06-07 03:11:58,952][495927] Updated weights for policy 0, policy_version 22985 (0.0008) +[2026-06-07 03:11:59,110][495927] Updated weights for policy 0, policy_version 22995 (0.0008) +[2026-06-07 03:11:59,261][495927] Updated weights for policy 0, policy_version 23005 (0.0008) +[2026-06-07 03:11:59,411][495927] Updated weights for policy 0, policy_version 23015 (0.0008) +[2026-06-07 03:12:00,058][495927] Updated weights for policy 0, policy_version 23026 (0.0008) +[2026-06-07 03:12:00,212][495927] Updated weights for policy 0, policy_version 23037 (0.0008) +[2026-06-07 03:12:00,362][495927] Updated weights for policy 0, policy_version 23047 (0.0009) +[2026-06-07 03:12:00,510][495927] Updated weights for policy 0, policy_version 23057 (0.0008) +[2026-06-07 03:12:00,675][495927] Updated weights for policy 0, policy_version 23068 (0.0008) +[2026-06-07 03:12:00,818][495927] Updated weights for policy 0, policy_version 23078 (0.0008) +[2026-06-07 03:12:00,972][495927] Updated weights for policy 0, policy_version 23088 (0.0008) +[2026-06-07 03:12:01,595][495927] Updated weights for policy 0, policy_version 23098 (0.0009) +[2026-06-07 03:12:01,753][495927] Updated weights for policy 0, policy_version 23109 (0.0008) +[2026-06-07 03:12:01,902][495927] Updated weights for policy 0, policy_version 23119 (0.0008) +[2026-06-07 03:12:02,046][495927] Updated weights for policy 0, policy_version 23129 (0.0008) +[2026-06-07 03:12:02,201][495927] Updated weights for policy 0, policy_version 23139 (0.0008) +[2026-06-07 03:12:02,388][495927] Updated weights for policy 0, policy_version 23151 (0.0008) +[2026-06-07 03:12:03,001][495927] Updated weights for policy 0, policy_version 23161 (0.0009) +[2026-06-07 03:12:03,150][495927] Updated weights for policy 0, policy_version 23172 (0.0008) +[2026-06-07 03:12:03,299][495927] Updated weights for policy 0, policy_version 23182 (0.0008) +[2026-06-07 03:12:03,456][495927] Updated weights for policy 0, policy_version 23192 (0.0008) +[2026-06-07 03:12:03,622][495927] Updated weights for policy 0, policy_version 23203 (0.0008) +[2026-06-07 03:12:03,766][495927] Updated weights for policy 0, policy_version 23213 (0.0008) +[2026-06-07 03:12:03,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 23104.2). Total num frames: 11894784. Throughput: 0: 23023.0. Samples: 11866496. Policy #0 lag: (min: 63.0, avg: 75.0, max: 127.0) +[2026-06-07 03:12:03,910][492660] Avg episode reward: [(0, '755.218')] +[2026-06-07 03:12:04,399][495927] Updated weights for policy 0, policy_version 23224 (0.0008) +[2026-06-07 03:12:04,546][495927] Updated weights for policy 0, policy_version 23234 (0.0008) +[2026-06-07 03:12:04,692][495927] Updated weights for policy 0, policy_version 23244 (0.0008) +[2026-06-07 03:12:04,847][495927] Updated weights for policy 0, policy_version 23254 (0.0008) +[2026-06-07 03:12:04,993][495927] Updated weights for policy 0, policy_version 23264 (0.0008) +[2026-06-07 03:12:05,139][495927] Updated weights for policy 0, policy_version 23274 (0.0008) +[2026-06-07 03:12:05,770][495927] Updated weights for policy 0, policy_version 23284 (0.0008) +[2026-06-07 03:12:05,909][495927] Updated weights for policy 0, policy_version 23294 (0.0008) +[2026-06-07 03:12:06,064][495927] Updated weights for policy 0, policy_version 23304 (0.0007) +[2026-06-07 03:12:06,242][495927] Updated weights for policy 0, policy_version 23316 (0.0005) +[2026-06-07 03:12:06,392][495927] Updated weights for policy 0, policy_version 23326 (0.0005) +[2026-06-07 03:12:06,556][495927] Updated weights for policy 0, policy_version 23337 (0.0005) +[2026-06-07 03:12:07,184][495927] Updated weights for policy 0, policy_version 23348 (0.0006) +[2026-06-07 03:12:07,325][495927] Updated weights for policy 0, policy_version 23358 (0.0005) +[2026-06-07 03:12:07,472][495927] Updated weights for policy 0, policy_version 23368 (0.0004) +[2026-06-07 03:12:07,628][495927] Updated weights for policy 0, policy_version 23378 (0.0004) +[2026-06-07 03:12:07,780][495927] Updated weights for policy 0, policy_version 23388 (0.0005) +[2026-06-07 03:12:07,928][495927] Updated weights for policy 0, policy_version 23398 (0.0008) +[2026-06-07 03:12:08,076][495927] Updated weights for policy 0, policy_version 23408 (0.0009) +[2026-06-07 03:12:08,705][495927] Updated weights for policy 0, policy_version 23418 (0.0008) +[2026-06-07 03:12:08,891][495927] Updated weights for policy 0, policy_version 23431 (0.0008) +[2026-06-07 03:12:08,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 11993088. Throughput: 0: 23133.8. Samples: 12005248. Policy #0 lag: (min: 63.0, avg: 75.0, max: 127.0) +[2026-06-07 03:12:08,910][492660] Avg episode reward: [(0, '748.238')] +[2026-06-07 03:12:09,062][495927] Updated weights for policy 0, policy_version 23442 (0.0009) +[2026-06-07 03:12:09,207][495927] Updated weights for policy 0, policy_version 23452 (0.0008) +[2026-06-07 03:12:09,362][495927] Updated weights for policy 0, policy_version 23462 (0.0008) +[2026-06-07 03:12:09,510][495927] Updated weights for policy 0, policy_version 23472 (0.0008) +[2026-06-07 03:12:10,138][495927] Updated weights for policy 0, policy_version 23483 (0.0009) +[2026-06-07 03:12:10,313][495927] Updated weights for policy 0, policy_version 23495 (0.0008) +[2026-06-07 03:12:10,464][495927] Updated weights for policy 0, policy_version 23505 (0.0008) +[2026-06-07 03:12:10,622][495927] Updated weights for policy 0, policy_version 23515 (0.0008) +[2026-06-07 03:12:10,779][495927] Updated weights for policy 0, policy_version 23525 (0.0008) +[2026-06-07 03:12:10,922][495927] Updated weights for policy 0, policy_version 23535 (0.0008) +[2026-06-07 03:12:11,544][495927] Updated weights for policy 0, policy_version 23546 (0.0009) +[2026-06-07 03:12:11,690][495927] Updated weights for policy 0, policy_version 23556 (0.0008) +[2026-06-07 03:12:11,833][495927] Updated weights for policy 0, policy_version 23566 (0.0008) +[2026-06-07 03:12:11,987][495927] Updated weights for policy 0, policy_version 23576 (0.0008) +[2026-06-07 03:12:12,141][495927] Updated weights for policy 0, policy_version 23586 (0.0008) +[2026-06-07 03:12:12,290][495927] Updated weights for policy 0, policy_version 23596 (0.0008) +[2026-06-07 03:12:12,915][495927] Updated weights for policy 0, policy_version 23606 (0.0008) +[2026-06-07 03:12:13,066][495927] Updated weights for policy 0, policy_version 23616 (0.0008) +[2026-06-07 03:12:13,227][495927] Updated weights for policy 0, policy_version 23627 (0.0008) +[2026-06-07 03:12:13,367][495927] Updated weights for policy 0, policy_version 23637 (0.0008) +[2026-06-07 03:12:13,527][495927] Updated weights for policy 0, policy_version 23647 (0.0008) +[2026-06-07 03:12:13,687][495927] Updated weights for policy 0, policy_version 23657 (0.0008) +[2026-06-07 03:12:13,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 12124160. Throughput: 0: 23060.0. Samples: 12140544. Policy #0 lag: (min: 42.0, avg: 89.1, max: 102.0) +[2026-06-07 03:12:13,910][492660] Avg episode reward: [(0, '745.796')] +[2026-06-07 03:12:14,300][495927] Updated weights for policy 0, policy_version 23667 (0.0008) +[2026-06-07 03:12:14,443][495927] Updated weights for policy 0, policy_version 23678 (0.0008) +[2026-06-07 03:12:14,604][495927] Updated weights for policy 0, policy_version 23688 (0.0008) +[2026-06-07 03:12:14,757][495927] Updated weights for policy 0, policy_version 23698 (0.0008) +[2026-06-07 03:12:14,910][495927] Updated weights for policy 0, policy_version 23708 (0.0008) +[2026-06-07 03:12:15,063][495927] Updated weights for policy 0, policy_version 23718 (0.0008) +[2026-06-07 03:12:15,724][495927] Updated weights for policy 0, policy_version 23730 (0.0009) +[2026-06-07 03:12:15,870][495927] Updated weights for policy 0, policy_version 23740 (0.0010) +[2026-06-07 03:12:16,026][495927] Updated weights for policy 0, policy_version 23751 (0.0010) +[2026-06-07 03:12:16,199][495927] Updated weights for policy 0, policy_version 23762 (0.0008) +[2026-06-07 03:12:16,346][495927] Updated weights for policy 0, policy_version 23772 (0.0008) +[2026-06-07 03:12:16,496][495927] Updated weights for policy 0, policy_version 23782 (0.0008) +[2026-06-07 03:12:16,646][495927] Updated weights for policy 0, policy_version 23792 (0.0009) +[2026-06-07 03:12:17,259][495927] Updated weights for policy 0, policy_version 23802 (0.0008) +[2026-06-07 03:12:17,431][495927] Updated weights for policy 0, policy_version 23814 (0.0008) +[2026-06-07 03:12:17,580][495927] Updated weights for policy 0, policy_version 23824 (0.0009) +[2026-06-07 03:12:17,735][495927] Updated weights for policy 0, policy_version 23834 (0.0008) +[2026-06-07 03:12:17,878][495927] Updated weights for policy 0, policy_version 23844 (0.0008) +[2026-06-07 03:12:18,035][495927] Updated weights for policy 0, policy_version 23854 (0.0008) +[2026-06-07 03:12:18,686][495927] Updated weights for policy 0, policy_version 23865 (0.0008) +[2026-06-07 03:12:18,832][495927] Updated weights for policy 0, policy_version 23875 (0.0009) +[2026-06-07 03:12:18,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 12222464. Throughput: 0: 22954.6. Samples: 12208896. Policy #0 lag: (min: 42.0, avg: 89.1, max: 102.0) +[2026-06-07 03:12:18,910][492660] Avg episode reward: [(0, '724.678')] +[2026-06-07 03:12:18,982][495927] Updated weights for policy 0, policy_version 23885 (0.0008) +[2026-06-07 03:12:19,126][495927] Updated weights for policy 0, policy_version 23895 (0.0008) +[2026-06-07 03:12:19,299][495927] Updated weights for policy 0, policy_version 23906 (0.0008) +[2026-06-07 03:12:19,451][495927] Updated weights for policy 0, policy_version 23916 (0.0008) +[2026-06-07 03:12:20,064][495927] Updated weights for policy 0, policy_version 23926 (0.0008) +[2026-06-07 03:12:20,204][495927] Updated weights for policy 0, policy_version 23936 (0.0008) +[2026-06-07 03:12:20,353][495927] Updated weights for policy 0, policy_version 23946 (0.0009) +[2026-06-07 03:12:20,497][495927] Updated weights for policy 0, policy_version 23956 (0.0008) +[2026-06-07 03:12:20,658][495927] Updated weights for policy 0, policy_version 23966 (0.0008) +[2026-06-07 03:12:20,819][495927] Updated weights for policy 0, policy_version 23977 (0.0008) +[2026-06-07 03:12:21,435][495927] Updated weights for policy 0, policy_version 23987 (0.0008) +[2026-06-07 03:12:21,599][495927] Updated weights for policy 0, policy_version 23998 (0.0008) +[2026-06-07 03:12:21,747][495927] Updated weights for policy 0, policy_version 24008 (0.0004) +[2026-06-07 03:12:21,900][495927] Updated weights for policy 0, policy_version 24018 (0.0004) +[2026-06-07 03:12:22,048][495927] Updated weights for policy 0, policy_version 24028 (0.0004) +[2026-06-07 03:12:22,197][495927] Updated weights for policy 0, policy_version 24038 (0.0005) +[2026-06-07 03:12:22,345][495927] Updated weights for policy 0, policy_version 24048 (0.0008) +[2026-06-07 03:12:22,973][495927] Updated weights for policy 0, policy_version 24058 (0.0009) +[2026-06-07 03:12:23,110][495927] Updated weights for policy 0, policy_version 24068 (0.0008) +[2026-06-07 03:12:23,261][495927] Updated weights for policy 0, policy_version 24078 (0.0008) +[2026-06-07 03:12:23,413][495927] Updated weights for policy 0, policy_version 24088 (0.0009) +[2026-06-07 03:12:23,565][495927] Updated weights for policy 0, policy_version 24098 (0.0008) +[2026-06-07 03:12:23,713][495927] Updated weights for policy 0, policy_version 24108 (0.0008) +[2026-06-07 03:12:23,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 12353536. Throughput: 0: 22849.4. Samples: 12344192. Policy #0 lag: (min: 42.0, avg: 89.1, max: 102.0) +[2026-06-07 03:12:23,910][492660] Avg episode reward: [(0, '728.190')] +[2026-06-07 03:12:24,320][495927] Updated weights for policy 0, policy_version 24118 (0.0009) +[2026-06-07 03:12:24,461][495927] Updated weights for policy 0, policy_version 24128 (0.0008) +[2026-06-07 03:12:24,612][495927] Updated weights for policy 0, policy_version 24138 (0.0009) +[2026-06-07 03:12:24,763][495927] Updated weights for policy 0, policy_version 24148 (0.0008) +[2026-06-07 03:12:24,912][495927] Updated weights for policy 0, policy_version 24158 (0.0008) +[2026-06-07 03:12:25,075][495927] Updated weights for policy 0, policy_version 24169 (0.0008) +[2026-06-07 03:12:25,713][495927] Updated weights for policy 0, policy_version 24179 (0.0009) +[2026-06-07 03:12:25,852][495927] Updated weights for policy 0, policy_version 24189 (0.0008) +[2026-06-07 03:12:26,007][495927] Updated weights for policy 0, policy_version 24199 (0.0008) +[2026-06-07 03:12:26,150][495927] Updated weights for policy 0, policy_version 24209 (0.0008) +[2026-06-07 03:12:26,324][495927] Updated weights for policy 0, policy_version 24220 (0.0009) +[2026-06-07 03:12:26,485][495927] Updated weights for policy 0, policy_version 24231 (0.0008) +[2026-06-07 03:12:27,099][495927] Updated weights for policy 0, policy_version 24241 (0.0009) +[2026-06-07 03:12:27,248][495927] Updated weights for policy 0, policy_version 24251 (0.0008) +[2026-06-07 03:12:27,389][495927] Updated weights for policy 0, policy_version 24261 (0.0008) +[2026-06-07 03:12:27,535][495927] Updated weights for policy 0, policy_version 24271 (0.0008) +[2026-06-07 03:12:27,687][495927] Updated weights for policy 0, policy_version 24281 (0.0008) +[2026-06-07 03:12:27,836][495927] Updated weights for policy 0, policy_version 24291 (0.0008) +[2026-06-07 03:12:27,992][495927] Updated weights for policy 0, policy_version 24301 (0.0009) +[2026-06-07 03:12:28,632][495927] Updated weights for policy 0, policy_version 24311 (0.0009) +[2026-06-07 03:12:28,789][495927] Updated weights for policy 0, policy_version 24322 (0.0008) +[2026-06-07 03:12:28,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 12451840. Throughput: 0: 23011.5. Samples: 12486400. Policy #0 lag: (min: 63.0, avg: 78.0, max: 127.0) +[2026-06-07 03:12:28,910][492660] Avg episode reward: [(0, '828.839')] +[2026-06-07 03:12:28,938][495927] Updated weights for policy 0, policy_version 24332 (0.0009) +[2026-06-07 03:12:29,114][495927] Updated weights for policy 0, policy_version 24344 (0.0008) +[2026-06-07 03:12:29,270][495927] Updated weights for policy 0, policy_version 24354 (0.0008) +[2026-06-07 03:12:29,416][495927] Updated weights for policy 0, policy_version 24364 (0.0008) +[2026-06-07 03:12:29,479][495570] Saving new best policy, reward=828.839! +[2026-06-07 03:12:30,047][495927] Updated weights for policy 0, policy_version 24374 (0.0008) +[2026-06-07 03:12:30,243][495927] Updated weights for policy 0, policy_version 24388 (0.0008) +[2026-06-07 03:12:30,386][495927] Updated weights for policy 0, policy_version 24398 (0.0008) +[2026-06-07 03:12:30,545][495927] Updated weights for policy 0, policy_version 24408 (0.0008) +[2026-06-07 03:12:30,703][495927] Updated weights for policy 0, policy_version 24418 (0.0008) +[2026-06-07 03:12:30,856][495927] Updated weights for policy 0, policy_version 24428 (0.0008) +[2026-06-07 03:12:31,463][495927] Updated weights for policy 0, policy_version 24438 (0.0009) +[2026-06-07 03:12:31,617][495927] Updated weights for policy 0, policy_version 24449 (0.0009) +[2026-06-07 03:12:31,787][495927] Updated weights for policy 0, policy_version 24461 (0.0008) +[2026-06-07 03:12:31,941][495927] Updated weights for policy 0, policy_version 24471 (0.0008) +[2026-06-07 03:12:32,098][495927] Updated weights for policy 0, policy_version 24481 (0.0008) +[2026-06-07 03:12:32,249][495927] Updated weights for policy 0, policy_version 24491 (0.0009) +[2026-06-07 03:12:32,883][495927] Updated weights for policy 0, policy_version 24501 (0.0009) +[2026-06-07 03:12:33,026][495927] Updated weights for policy 0, policy_version 24511 (0.0008) +[2026-06-07 03:12:33,171][495927] Updated weights for policy 0, policy_version 24521 (0.0009) +[2026-06-07 03:12:33,340][495927] Updated weights for policy 0, policy_version 24532 (0.0008) +[2026-06-07 03:12:33,496][495927] Updated weights for policy 0, policy_version 24542 (0.0008) +[2026-06-07 03:12:33,681][495927] Updated weights for policy 0, policy_version 24554 (0.0008) +[2026-06-07 03:12:33,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22993.2). Total num frames: 12582912. Throughput: 0: 23119.8. Samples: 12559232. Policy #0 lag: (min: 63.0, avg: 78.0, max: 127.0) +[2026-06-07 03:12:33,910][492660] Avg episode reward: [(0, '837.306')] +[2026-06-07 03:12:33,916][495570] Saving new best policy, reward=837.306! +[2026-06-07 03:12:34,318][495927] Updated weights for policy 0, policy_version 24566 (0.0008) +[2026-06-07 03:12:34,469][495927] Updated weights for policy 0, policy_version 24576 (0.0010) +[2026-06-07 03:12:34,618][495927] Updated weights for policy 0, policy_version 24586 (0.0008) +[2026-06-07 03:12:34,798][495927] Updated weights for policy 0, policy_version 24598 (0.0009) +[2026-06-07 03:12:34,955][495927] Updated weights for policy 0, policy_version 24608 (0.0008) +[2026-06-07 03:12:35,117][495927] Updated weights for policy 0, policy_version 24619 (0.0008) +[2026-06-07 03:12:35,727][495927] Updated weights for policy 0, policy_version 24629 (0.0009) +[2026-06-07 03:12:35,875][495927] Updated weights for policy 0, policy_version 24639 (0.0008) +[2026-06-07 03:12:36,020][495927] Updated weights for policy 0, policy_version 24649 (0.0008) +[2026-06-07 03:12:36,175][495927] Updated weights for policy 0, policy_version 24659 (0.0009) +[2026-06-07 03:12:36,335][495927] Updated weights for policy 0, policy_version 24669 (0.0008) +[2026-06-07 03:12:36,488][495927] Updated weights for policy 0, policy_version 24679 (0.0008) +[2026-06-07 03:12:37,099][495927] Updated weights for policy 0, policy_version 24689 (0.0008) +[2026-06-07 03:12:37,255][495927] Updated weights for policy 0, policy_version 24700 (0.0008) +[2026-06-07 03:12:37,431][495927] Updated weights for policy 0, policy_version 24712 (0.0008) +[2026-06-07 03:12:37,583][495927] Updated weights for policy 0, policy_version 24722 (0.0008) +[2026-06-07 03:12:37,733][495927] Updated weights for policy 0, policy_version 24732 (0.0008) +[2026-06-07 03:12:37,881][495927] Updated weights for policy 0, policy_version 24742 (0.0009) +[2026-06-07 03:12:38,032][495927] Updated weights for policy 0, policy_version 24752 (0.0009) +[2026-06-07 03:12:38,653][495927] Updated weights for policy 0, policy_version 24762 (0.0008) +[2026-06-07 03:12:38,790][495927] Updated weights for policy 0, policy_version 24772 (0.0008) +[2026-06-07 03:12:38,909][492660] Fps is (10 sec: 22937.8, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 12681216. Throughput: 0: 23168.0. Samples: 12695296. Policy #0 lag: (min: 63.0, avg: 78.0, max: 127.0) +[2026-06-07 03:12:38,910][492660] Avg episode reward: [(0, '833.494')] +[2026-06-07 03:12:38,941][495927] Updated weights for policy 0, policy_version 24782 (0.0009) +[2026-06-07 03:12:39,099][495927] Updated weights for policy 0, policy_version 24792 (0.0008) +[2026-06-07 03:12:39,247][495927] Updated weights for policy 0, policy_version 24802 (0.0008) +[2026-06-07 03:12:39,396][495927] Updated weights for policy 0, policy_version 24812 (0.0008) +[2026-06-07 03:12:40,011][495927] Updated weights for policy 0, policy_version 24822 (0.0008) +[2026-06-07 03:12:40,165][495927] Updated weights for policy 0, policy_version 24833 (0.0008) +[2026-06-07 03:12:40,361][495927] Updated weights for policy 0, policy_version 24846 (0.0008) +[2026-06-07 03:12:40,511][495927] Updated weights for policy 0, policy_version 24856 (0.0006) +[2026-06-07 03:12:40,658][495927] Updated weights for policy 0, policy_version 24866 (0.0008) +[2026-06-07 03:12:40,817][495927] Updated weights for policy 0, policy_version 24876 (0.0008) +[2026-06-07 03:12:41,430][495927] Updated weights for policy 0, policy_version 24886 (0.0009) +[2026-06-07 03:12:41,569][495927] Updated weights for policy 0, policy_version 24896 (0.0008) +[2026-06-07 03:12:41,764][495927] Updated weights for policy 0, policy_version 24909 (0.0008) +[2026-06-07 03:12:41,936][495927] Updated weights for policy 0, policy_version 24920 (0.0008) +[2026-06-07 03:12:42,080][495927] Updated weights for policy 0, policy_version 24930 (0.0008) +[2026-06-07 03:12:42,232][495927] Updated weights for policy 0, policy_version 24940 (0.0008) +[2026-06-07 03:12:42,886][495927] Updated weights for policy 0, policy_version 24952 (0.0009) +[2026-06-07 03:12:43,057][495927] Updated weights for policy 0, policy_version 24964 (0.0008) +[2026-06-07 03:12:43,199][495927] Updated weights for policy 0, policy_version 24974 (0.0008) +[2026-06-07 03:12:43,356][495927] Updated weights for policy 0, policy_version 24984 (0.0008) +[2026-06-07 03:12:43,522][495927] Updated weights for policy 0, policy_version 24995 (0.0008) +[2026-06-07 03:12:43,672][495927] Updated weights for policy 0, policy_version 25005 (0.0009) +[2026-06-07 03:12:43,909][492660] Fps is (10 sec: 22937.2, 60 sec: 22937.5, 300 sec: 22993.1). Total num frames: 12812288. Throughput: 0: 23031.4. Samples: 12829440. Policy #0 lag: (min: 63.0, avg: 78.0, max: 127.0) +[2026-06-07 03:12:43,910][492660] Avg episode reward: [(0, '861.464')] +[2026-06-07 03:12:43,920][495570] Saving new best policy, reward=861.464! +[2026-06-07 03:12:44,310][495927] Updated weights for policy 0, policy_version 25015 (0.0008) +[2026-06-07 03:12:44,461][495927] Updated weights for policy 0, policy_version 25026 (0.0008) +[2026-06-07 03:12:44,615][495927] Updated weights for policy 0, policy_version 25036 (0.0009) +[2026-06-07 03:12:44,774][495927] Updated weights for policy 0, policy_version 25047 (0.0008) +[2026-06-07 03:12:44,930][495927] Updated weights for policy 0, policy_version 25057 (0.0008) +[2026-06-07 03:12:45,097][495927] Updated weights for policy 0, policy_version 25068 (0.0008) +[2026-06-07 03:12:45,756][495927] Updated weights for policy 0, policy_version 25080 (0.0008) +[2026-06-07 03:12:45,900][495927] Updated weights for policy 0, policy_version 25090 (0.0008) +[2026-06-07 03:12:46,063][495927] Updated weights for policy 0, policy_version 25101 (0.0008) +[2026-06-07 03:12:46,233][495927] Updated weights for policy 0, policy_version 25112 (0.0008) +[2026-06-07 03:12:46,391][495927] Updated weights for policy 0, policy_version 25122 (0.0008) +[2026-06-07 03:12:46,551][495927] Updated weights for policy 0, policy_version 25133 (0.0009) +[2026-06-07 03:12:47,169][495927] Updated weights for policy 0, policy_version 25144 (0.0009) +[2026-06-07 03:12:47,312][495927] Updated weights for policy 0, policy_version 25154 (0.0009) +[2026-06-07 03:12:47,461][495927] Updated weights for policy 0, policy_version 25164 (0.0010) +[2026-06-07 03:12:47,611][495927] Updated weights for policy 0, policy_version 25174 (0.0009) +[2026-06-07 03:12:47,774][495927] Updated weights for policy 0, policy_version 25185 (0.0008) +[2026-06-07 03:12:47,936][495927] Updated weights for policy 0, policy_version 25196 (0.0008) +[2026-06-07 03:12:48,563][495927] Updated weights for policy 0, policy_version 25206 (0.0008) +[2026-06-07 03:12:48,717][495927] Updated weights for policy 0, policy_version 25216 (0.0008) +[2026-06-07 03:12:48,874][495927] Updated weights for policy 0, policy_version 25227 (0.0008) +[2026-06-07 03:12:48,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.7, 300 sec: 22993.1). Total num frames: 12910592. Throughput: 0: 22912.0. Samples: 12897536. Policy #0 lag: (min: 49.0, avg: 95.4, max: 110.0) +[2026-06-07 03:12:48,910][492660] Avg episode reward: [(0, '858.377')] +[2026-06-07 03:12:49,025][495927] Updated weights for policy 0, policy_version 25237 (0.0009) +[2026-06-07 03:12:49,171][495927] Updated weights for policy 0, policy_version 25247 (0.0008) +[2026-06-07 03:12:49,327][495927] Updated weights for policy 0, policy_version 25257 (0.0008) +[2026-06-07 03:12:49,938][495927] Updated weights for policy 0, policy_version 25267 (0.0009) +[2026-06-07 03:12:50,073][495927] Updated weights for policy 0, policy_version 25277 (0.0008) +[2026-06-07 03:12:50,232][495927] Updated weights for policy 0, policy_version 25288 (0.0008) +[2026-06-07 03:12:50,382][495927] Updated weights for policy 0, policy_version 25298 (0.0008) +[2026-06-07 03:12:50,570][495927] Updated weights for policy 0, policy_version 25310 (0.0009) +[2026-06-07 03:12:50,729][495927] Updated weights for policy 0, policy_version 25321 (0.0009) +[2026-06-07 03:12:51,367][495927] Updated weights for policy 0, policy_version 25332 (0.0008) +[2026-06-07 03:12:51,520][495927] Updated weights for policy 0, policy_version 25343 (0.0008) +[2026-06-07 03:12:51,698][495927] Updated weights for policy 0, policy_version 25355 (0.0008) +[2026-06-07 03:12:51,867][495927] Updated weights for policy 0, policy_version 25366 (0.0008) +[2026-06-07 03:12:52,026][495927] Updated weights for policy 0, policy_version 25377 (0.0008) +[2026-06-07 03:12:52,176][495927] Updated weights for policy 0, policy_version 25387 (0.0008) +[2026-06-07 03:12:52,794][495927] Updated weights for policy 0, policy_version 25397 (0.0009) +[2026-06-07 03:12:52,938][495927] Updated weights for policy 0, policy_version 25407 (0.0009) +[2026-06-07 03:12:53,078][495927] Updated weights for policy 0, policy_version 25417 (0.0008) +[2026-06-07 03:12:53,247][495927] Updated weights for policy 0, policy_version 25428 (0.0008) +[2026-06-07 03:12:53,394][495927] Updated weights for policy 0, policy_version 25438 (0.0008) +[2026-06-07 03:12:53,563][495927] Updated weights for policy 0, policy_version 25449 (0.0008) +[2026-06-07 03:12:53,909][492660] Fps is (10 sec: 22938.1, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 13041664. Throughput: 0: 22809.7. Samples: 13031680. Policy #0 lag: (min: 49.0, avg: 95.4, max: 110.0) +[2026-06-07 03:12:53,910][492660] Avg episode reward: [(0, '852.217')] +[2026-06-07 03:12:54,186][495927] Updated weights for policy 0, policy_version 25459 (0.0009) +[2026-06-07 03:12:54,336][495927] Updated weights for policy 0, policy_version 25470 (0.0008) +[2026-06-07 03:12:54,511][495927] Updated weights for policy 0, policy_version 25482 (0.0008) +[2026-06-07 03:12:54,672][495927] Updated weights for policy 0, policy_version 25492 (0.0008) +[2026-06-07 03:12:54,814][495927] Updated weights for policy 0, policy_version 25502 (0.0008) +[2026-06-07 03:12:54,971][495927] Updated weights for policy 0, policy_version 25512 (0.0008) +[2026-06-07 03:12:55,589][495927] Updated weights for policy 0, policy_version 25522 (0.0008) +[2026-06-07 03:12:55,720][495927] Updated weights for policy 0, policy_version 25532 (0.0008) +[2026-06-07 03:12:55,889][495927] Updated weights for policy 0, policy_version 25543 (0.0009) +[2026-06-07 03:12:56,050][495927] Updated weights for policy 0, policy_version 25554 (0.0008) +[2026-06-07 03:12:56,204][495927] Updated weights for policy 0, policy_version 25564 (0.0008) +[2026-06-07 03:12:56,351][495927] Updated weights for policy 0, policy_version 25574 (0.0008) +[2026-06-07 03:12:56,987][495927] Updated weights for policy 0, policy_version 25585 (0.0008) +[2026-06-07 03:12:57,127][495927] Updated weights for policy 0, policy_version 25595 (0.0008) +[2026-06-07 03:12:57,270][495927] Updated weights for policy 0, policy_version 25605 (0.0008) +[2026-06-07 03:12:57,421][495927] Updated weights for policy 0, policy_version 25615 (0.0008) +[2026-06-07 03:12:57,562][495927] Updated weights for policy 0, policy_version 25625 (0.0008) +[2026-06-07 03:12:57,723][495927] Updated weights for policy 0, policy_version 25635 (0.0008) +[2026-06-07 03:12:57,863][495927] Updated weights for policy 0, policy_version 25645 (0.0008) +[2026-06-07 03:12:58,499][495927] Updated weights for policy 0, policy_version 25655 (0.0008) +[2026-06-07 03:12:58,649][495927] Updated weights for policy 0, policy_version 25666 (0.0008) +[2026-06-07 03:12:58,803][495927] Updated weights for policy 0, policy_version 25676 (0.0008) +[2026-06-07 03:12:58,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 13139968. Throughput: 0: 23045.6. Samples: 13177600. Policy #0 lag: (min: 49.0, avg: 95.4, max: 110.0) +[2026-06-07 03:12:58,910][492660] Avg episode reward: [(0, '870.051')] +[2026-06-07 03:12:58,976][495927] Updated weights for policy 0, policy_version 25688 (0.0009) +[2026-06-07 03:12:59,140][495927] Updated weights for policy 0, policy_version 25699 (0.0009) +[2026-06-07 03:12:59,293][495927] Updated weights for policy 0, policy_version 25709 (0.0008) +[2026-06-07 03:12:59,337][495570] Saving new best policy, reward=870.051! +[2026-06-07 03:12:59,915][495927] Updated weights for policy 0, policy_version 25719 (0.0009) +[2026-06-07 03:13:00,072][495927] Updated weights for policy 0, policy_version 25730 (0.0008) +[2026-06-07 03:13:00,232][495927] Updated weights for policy 0, policy_version 25741 (0.0008) +[2026-06-07 03:13:00,388][495927] Updated weights for policy 0, policy_version 25751 (0.0008) +[2026-06-07 03:13:00,555][495927] Updated weights for policy 0, policy_version 25762 (0.0008) +[2026-06-07 03:13:00,718][495927] Updated weights for policy 0, policy_version 25773 (0.0008) +[2026-06-07 03:13:01,329][495927] Updated weights for policy 0, policy_version 25783 (0.0008) +[2026-06-07 03:13:01,473][495927] Updated weights for policy 0, policy_version 25793 (0.0008) +[2026-06-07 03:13:01,640][495927] Updated weights for policy 0, policy_version 25804 (0.0008) +[2026-06-07 03:13:01,792][495927] Updated weights for policy 0, policy_version 25814 (0.0008) +[2026-06-07 03:13:01,954][495927] Updated weights for policy 0, policy_version 25825 (0.0008) +[2026-06-07 03:13:02,099][495927] Updated weights for policy 0, policy_version 25835 (0.0008) +[2026-06-07 03:13:02,746][495927] Updated weights for policy 0, policy_version 25846 (0.0008) +[2026-06-07 03:13:02,890][495927] Updated weights for policy 0, policy_version 25856 (0.0008) +[2026-06-07 03:13:03,053][495927] Updated weights for policy 0, policy_version 25867 (0.0008) +[2026-06-07 03:13:03,217][495927] Updated weights for policy 0, policy_version 25878 (0.0008) +[2026-06-07 03:13:03,372][495927] Updated weights for policy 0, policy_version 25888 (0.0008) +[2026-06-07 03:13:03,527][495927] Updated weights for policy 0, policy_version 25898 (0.0008) +[2026-06-07 03:13:03,909][492660] Fps is (10 sec: 22937.4, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 13271040. Throughput: 0: 23054.3. Samples: 13246336. Policy #0 lag: (min: 49.0, avg: 95.4, max: 110.0) +[2026-06-07 03:13:03,910][492660] Avg episode reward: [(0, '928.884')] +[2026-06-07 03:13:03,915][495570] Saving new best policy, reward=928.884! +[2026-06-07 03:13:04,132][495927] Updated weights for policy 0, policy_version 25908 (0.0008) +[2026-06-07 03:13:04,276][495927] Updated weights for policy 0, policy_version 25918 (0.0008) +[2026-06-07 03:13:04,430][495927] Updated weights for policy 0, policy_version 25929 (0.0008) +[2026-06-07 03:13:04,584][495927] Updated weights for policy 0, policy_version 25939 (0.0008) +[2026-06-07 03:13:04,733][495927] Updated weights for policy 0, policy_version 25949 (0.0008) +[2026-06-07 03:13:04,889][495927] Updated weights for policy 0, policy_version 25959 (0.0008) +[2026-06-07 03:13:05,518][495927] Updated weights for policy 0, policy_version 25969 (0.0008) +[2026-06-07 03:13:05,667][495927] Updated weights for policy 0, policy_version 25979 (0.0008) +[2026-06-07 03:13:05,810][495927] Updated weights for policy 0, policy_version 25989 (0.0008) +[2026-06-07 03:13:05,975][495927] Updated weights for policy 0, policy_version 26000 (0.0008) +[2026-06-07 03:13:06,122][495927] Updated weights for policy 0, policy_version 26010 (0.0008) +[2026-06-07 03:13:06,270][495927] Updated weights for policy 0, policy_version 26020 (0.0008) +[2026-06-07 03:13:06,423][495927] Updated weights for policy 0, policy_version 26030 (0.0008) +[2026-06-07 03:13:07,055][495927] Updated weights for policy 0, policy_version 26040 (0.0008) +[2026-06-07 03:13:07,190][495927] Updated weights for policy 0, policy_version 26050 (0.0008) +[2026-06-07 03:13:07,341][495927] Updated weights for policy 0, policy_version 26060 (0.0008) +[2026-06-07 03:13:07,496][495927] Updated weights for policy 0, policy_version 26070 (0.0008) +[2026-06-07 03:13:07,647][495927] Updated weights for policy 0, policy_version 26080 (0.0008) +[2026-06-07 03:13:07,795][495927] Updated weights for policy 0, policy_version 26090 (0.0008) +[2026-06-07 03:13:08,432][495927] Updated weights for policy 0, policy_version 26100 (0.0008) +[2026-06-07 03:13:08,603][495927] Updated weights for policy 0, policy_version 26112 (0.0008) +[2026-06-07 03:13:08,749][495927] Updated weights for policy 0, policy_version 26122 (0.0008) +[2026-06-07 03:13:08,897][495927] Updated weights for policy 0, policy_version 26132 (0.0009) +[2026-06-07 03:13:08,909][492660] Fps is (10 sec: 22937.9, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 13369344. Throughput: 0: 23045.7. Samples: 13381248. Policy #0 lag: (min: 63.0, avg: 77.5, max: 127.0) +[2026-06-07 03:13:08,910][492660] Avg episode reward: [(0, '942.619')] +[2026-06-07 03:13:09,050][495927] Updated weights for policy 0, policy_version 26142 (0.0008) +[2026-06-07 03:13:09,198][495927] Updated weights for policy 0, policy_version 26152 (0.0008) +[2026-06-07 03:13:09,311][495570] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs3_seed11/checkpoint_p0/checkpoint_000026160_13402112.pth... +[2026-06-07 03:13:09,330][495570] Saving new best policy, reward=942.619! +[2026-06-07 03:13:09,842][495927] Updated weights for policy 0, policy_version 26162 (0.0009) +[2026-06-07 03:13:09,982][495927] Updated weights for policy 0, policy_version 26172 (0.0008) +[2026-06-07 03:13:10,127][495927] Updated weights for policy 0, policy_version 26182 (0.0008) +[2026-06-07 03:13:10,277][495927] Updated weights for policy 0, policy_version 26192 (0.0008) +[2026-06-07 03:13:10,427][495927] Updated weights for policy 0, policy_version 26202 (0.0008) +[2026-06-07 03:13:10,583][495927] Updated weights for policy 0, policy_version 26212 (0.0008) +[2026-06-07 03:13:10,738][495927] Updated weights for policy 0, policy_version 26222 (0.0008) +[2026-06-07 03:13:11,373][495927] Updated weights for policy 0, policy_version 26233 (0.0008) +[2026-06-07 03:13:11,521][495927] Updated weights for policy 0, policy_version 26243 (0.0008) +[2026-06-07 03:13:11,671][495927] Updated weights for policy 0, policy_version 26253 (0.0008) +[2026-06-07 03:13:11,819][495927] Updated weights for policy 0, policy_version 26263 (0.0008) +[2026-06-07 03:13:11,967][495927] Updated weights for policy 0, policy_version 26273 (0.0008) +[2026-06-07 03:13:12,117][495927] Updated weights for policy 0, policy_version 26283 (0.0008) +[2026-06-07 03:13:12,741][495927] Updated weights for policy 0, policy_version 26293 (0.0009) +[2026-06-07 03:13:12,879][495927] Updated weights for policy 0, policy_version 26303 (0.0008) +[2026-06-07 03:13:13,046][495927] Updated weights for policy 0, policy_version 26314 (0.0008) +[2026-06-07 03:13:13,210][495927] Updated weights for policy 0, policy_version 26325 (0.0008) +[2026-06-07 03:13:13,367][495927] Updated weights for policy 0, policy_version 26335 (0.0008) +[2026-06-07 03:13:13,513][495927] Updated weights for policy 0, policy_version 26345 (0.0008) +[2026-06-07 03:13:13,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 13500416. Throughput: 0: 22880.8. Samples: 13516032. Policy #0 lag: (min: 63.0, avg: 77.5, max: 127.0) +[2026-06-07 03:13:13,910][492660] Avg episode reward: [(0, '967.112')] +[2026-06-07 03:13:13,914][495570] Saving new best policy, reward=967.112! +[2026-06-07 03:13:14,142][495927] Updated weights for policy 0, policy_version 26355 (0.0008) +[2026-06-07 03:13:14,284][495927] Updated weights for policy 0, policy_version 26365 (0.0008) +[2026-06-07 03:13:14,461][495927] Updated weights for policy 0, policy_version 26377 (0.0009) +[2026-06-07 03:13:14,606][495927] Updated weights for policy 0, policy_version 26387 (0.0008) +[2026-06-07 03:13:14,768][495927] Updated weights for policy 0, policy_version 26398 (0.0008) +[2026-06-07 03:13:14,926][495927] Updated weights for policy 0, policy_version 26408 (0.0008) +[2026-06-07 03:13:15,570][495927] Updated weights for policy 0, policy_version 26418 (0.0009) +[2026-06-07 03:13:15,708][495927] Updated weights for policy 0, policy_version 26428 (0.0008) +[2026-06-07 03:13:15,864][495927] Updated weights for policy 0, policy_version 26439 (0.0008) +[2026-06-07 03:13:16,029][495927] Updated weights for policy 0, policy_version 26450 (0.0008) +[2026-06-07 03:13:16,185][495927] Updated weights for policy 0, policy_version 26460 (0.0008) +[2026-06-07 03:13:16,353][495927] Updated weights for policy 0, policy_version 26471 (0.0008) +[2026-06-07 03:13:16,981][495927] Updated weights for policy 0, policy_version 26481 (0.0009) +[2026-06-07 03:13:17,137][495927] Updated weights for policy 0, policy_version 26493 (0.0008) +[2026-06-07 03:13:17,302][495927] Updated weights for policy 0, policy_version 26504 (0.0009) +[2026-06-07 03:13:17,442][495927] Updated weights for policy 0, policy_version 26514 (0.0009) +[2026-06-07 03:13:17,600][495927] Updated weights for policy 0, policy_version 26524 (0.0008) +[2026-06-07 03:13:17,752][495927] Updated weights for policy 0, policy_version 26534 (0.0009) +[2026-06-07 03:13:17,900][495927] Updated weights for policy 0, policy_version 26544 (0.0009) +[2026-06-07 03:13:18,555][495927] Updated weights for policy 0, policy_version 26556 (0.0008) +[2026-06-07 03:13:18,737][495927] Updated weights for policy 0, policy_version 26568 (0.0008) +[2026-06-07 03:13:18,905][495927] Updated weights for policy 0, policy_version 26580 (0.0008) +[2026-06-07 03:13:18,909][492660] Fps is (10 sec: 22937.3, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 13598720. Throughput: 0: 22769.7. Samples: 13583872. Policy #0 lag: (min: 63.0, avg: 77.5, max: 127.0) +[2026-06-07 03:13:18,910][492660] Avg episode reward: [(0, '992.896')] +[2026-06-07 03:13:19,057][495927] Updated weights for policy 0, policy_version 26590 (0.0006) +[2026-06-07 03:13:19,229][495927] Updated weights for policy 0, policy_version 26602 (0.0008) +[2026-06-07 03:13:19,314][495570] Saving new best policy, reward=992.896! +[2026-06-07 03:13:19,852][495927] Updated weights for policy 0, policy_version 26612 (0.0009) +[2026-06-07 03:13:20,008][495927] Updated weights for policy 0, policy_version 26623 (0.0008) +[2026-06-07 03:13:20,174][495927] Updated weights for policy 0, policy_version 26634 (0.0009) +[2026-06-07 03:13:20,337][495927] Updated weights for policy 0, policy_version 26645 (0.0008) +[2026-06-07 03:13:20,496][495927] Updated weights for policy 0, policy_version 26656 (0.0008) +[2026-06-07 03:13:20,658][495927] Updated weights for policy 0, policy_version 26667 (0.0011) +[2026-06-07 03:13:21,269][495927] Updated weights for policy 0, policy_version 26677 (0.0010) +[2026-06-07 03:13:21,428][495927] Updated weights for policy 0, policy_version 26688 (0.0009) +[2026-06-07 03:13:21,598][495927] Updated weights for policy 0, policy_version 26700 (0.0008) +[2026-06-07 03:13:21,752][495927] Updated weights for policy 0, policy_version 26710 (0.0009) +[2026-06-07 03:13:21,894][495927] Updated weights for policy 0, policy_version 26720 (0.0009) +[2026-06-07 03:13:22,061][495927] Updated weights for policy 0, policy_version 26731 (0.0009) +[2026-06-07 03:13:22,692][495927] Updated weights for policy 0, policy_version 26742 (0.0009) +[2026-06-07 03:13:22,838][495927] Updated weights for policy 0, policy_version 26752 (0.0008) +[2026-06-07 03:13:22,989][495927] Updated weights for policy 0, policy_version 26762 (0.0008) +[2026-06-07 03:13:23,158][495927] Updated weights for policy 0, policy_version 26773 (0.0009) +[2026-06-07 03:13:23,319][495927] Updated weights for policy 0, policy_version 26784 (0.0008) +[2026-06-07 03:13:23,482][495927] Updated weights for policy 0, policy_version 26794 (0.0008) +[2026-06-07 03:13:23,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 13729792. Throughput: 0: 22795.4. Samples: 13721088. Policy #0 lag: (min: 63.0, avg: 76.5, max: 127.0) +[2026-06-07 03:13:23,910][492660] Avg episode reward: [(0, '1008.956')] +[2026-06-07 03:13:23,915][495570] Saving new best policy, reward=1008.956! +[2026-06-07 03:13:24,115][495927] Updated weights for policy 0, policy_version 26804 (0.0009) +[2026-06-07 03:13:24,267][495927] Updated weights for policy 0, policy_version 26815 (0.0009) +[2026-06-07 03:13:24,434][495927] Updated weights for policy 0, policy_version 26826 (0.0008) +[2026-06-07 03:13:24,585][495927] Updated weights for policy 0, policy_version 26836 (0.0008) +[2026-06-07 03:13:24,732][495927] Updated weights for policy 0, policy_version 26846 (0.0008) +[2026-06-07 03:13:24,904][495927] Updated weights for policy 0, policy_version 26857 (0.0008) +[2026-06-07 03:13:25,505][495927] Updated weights for policy 0, policy_version 26867 (0.0008) +[2026-06-07 03:13:25,668][495927] Updated weights for policy 0, policy_version 26878 (0.0009) +[2026-06-07 03:13:25,812][495927] Updated weights for policy 0, policy_version 26888 (0.0008) +[2026-06-07 03:13:25,966][495927] Updated weights for policy 0, policy_version 26898 (0.0008) +[2026-06-07 03:13:26,113][495927] Updated weights for policy 0, policy_version 26908 (0.0008) +[2026-06-07 03:13:26,268][495927] Updated weights for policy 0, policy_version 26918 (0.0008) +[2026-06-07 03:13:26,413][495927] Updated weights for policy 0, policy_version 26928 (0.0008) +[2026-06-07 03:13:27,024][495927] Updated weights for policy 0, policy_version 26938 (0.0008) +[2026-06-07 03:13:27,160][495927] Updated weights for policy 0, policy_version 26948 (0.0008) +[2026-06-07 03:13:27,328][495927] Updated weights for policy 0, policy_version 26959 (0.0008) +[2026-06-07 03:13:27,501][495927] Updated weights for policy 0, policy_version 26970 (0.0008) +[2026-06-07 03:13:27,668][495927] Updated weights for policy 0, policy_version 26981 (0.0008) +[2026-06-07 03:13:27,827][495927] Updated weights for policy 0, policy_version 26992 (0.0008) +[2026-06-07 03:13:28,443][495927] Updated weights for policy 0, policy_version 27002 (0.0008) +[2026-06-07 03:13:28,584][495927] Updated weights for policy 0, policy_version 27012 (0.0008) +[2026-06-07 03:13:28,738][495927] Updated weights for policy 0, policy_version 27022 (0.0009) +[2026-06-07 03:13:28,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 13828096. Throughput: 0: 23028.7. Samples: 13865728. Policy #0 lag: (min: 63.0, avg: 76.5, max: 127.0) +[2026-06-07 03:13:28,910][492660] Avg episode reward: [(0, '1025.250')] +[2026-06-07 03:13:28,914][495927] Updated weights for policy 0, policy_version 27034 (0.0008) +[2026-06-07 03:13:29,085][495927] Updated weights for policy 0, policy_version 27045 (0.0008) +[2026-06-07 03:13:29,237][495927] Updated weights for policy 0, policy_version 27055 (0.0009) +[2026-06-07 03:13:29,245][495570] Saving new best policy, reward=1025.250! +[2026-06-07 03:13:29,844][495927] Updated weights for policy 0, policy_version 27065 (0.0008) +[2026-06-07 03:13:30,011][495927] Updated weights for policy 0, policy_version 27076 (0.0008) +[2026-06-07 03:13:30,172][495927] Updated weights for policy 0, policy_version 27087 (0.0008) +[2026-06-07 03:13:30,343][495927] Updated weights for policy 0, policy_version 27098 (0.0008) +[2026-06-07 03:13:30,501][495927] Updated weights for policy 0, policy_version 27109 (0.0009) +[2026-06-07 03:13:30,659][495927] Updated weights for policy 0, policy_version 27119 (0.0008) +[2026-06-07 03:13:31,276][495927] Updated weights for policy 0, policy_version 27129 (0.0008) +[2026-06-07 03:13:31,418][495927] Updated weights for policy 0, policy_version 27139 (0.0008) +[2026-06-07 03:13:31,573][495927] Updated weights for policy 0, policy_version 27149 (0.0008) +[2026-06-07 03:13:31,751][495927] Updated weights for policy 0, policy_version 27161 (0.0008) +[2026-06-07 03:13:31,901][495927] Updated weights for policy 0, policy_version 27171 (0.0008) +[2026-06-07 03:13:32,056][495927] Updated weights for policy 0, policy_version 27181 (0.0008) +[2026-06-07 03:13:32,686][495927] Updated weights for policy 0, policy_version 27192 (0.0009) +[2026-06-07 03:13:32,867][495927] Updated weights for policy 0, policy_version 27204 (0.0008) +[2026-06-07 03:13:33,012][495927] Updated weights for policy 0, policy_version 27214 (0.0008) +[2026-06-07 03:13:33,179][495927] Updated weights for policy 0, policy_version 27225 (0.0008) +[2026-06-07 03:13:33,337][495927] Updated weights for policy 0, policy_version 27235 (0.0008) +[2026-06-07 03:13:33,487][495927] Updated weights for policy 0, policy_version 27245 (0.0008) +[2026-06-07 03:13:33,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 13959168. Throughput: 0: 23000.2. Samples: 13932544. Policy #0 lag: (min: 63.0, avg: 76.5, max: 127.0) +[2026-06-07 03:13:33,910][492660] Avg episode reward: [(0, '1022.222')] +[2026-06-07 03:13:34,109][495927] Updated weights for policy 0, policy_version 27256 (0.0009) +[2026-06-07 03:13:34,267][495927] Updated weights for policy 0, policy_version 27267 (0.0009) +[2026-06-07 03:13:34,421][495927] Updated weights for policy 0, policy_version 27277 (0.0009) +[2026-06-07 03:13:34,570][495927] Updated weights for policy 0, policy_version 27287 (0.0011) +[2026-06-07 03:13:34,754][495927] Updated weights for policy 0, policy_version 27299 (0.0007) +[2026-06-07 03:13:34,904][495927] Updated weights for policy 0, policy_version 27309 (0.0007) +[2026-06-07 03:13:35,523][495927] Updated weights for policy 0, policy_version 27320 (0.0009) +[2026-06-07 03:13:35,665][495927] Updated weights for policy 0, policy_version 27330 (0.0008) +[2026-06-07 03:13:35,834][495927] Updated weights for policy 0, policy_version 27341 (0.0008) +[2026-06-07 03:13:35,991][495927] Updated weights for policy 0, policy_version 27351 (0.0008) +[2026-06-07 03:13:36,150][495927] Updated weights for policy 0, policy_version 27362 (0.0008) +[2026-06-07 03:13:36,306][495927] Updated weights for policy 0, policy_version 27372 (0.0008) +[2026-06-07 03:13:36,929][495927] Updated weights for policy 0, policy_version 27382 (0.0009) +[2026-06-07 03:13:37,074][495927] Updated weights for policy 0, policy_version 27392 (0.0009) +[2026-06-07 03:13:37,221][495927] Updated weights for policy 0, policy_version 27402 (0.0008) +[2026-06-07 03:13:37,372][495927] Updated weights for policy 0, policy_version 27412 (0.0008) +[2026-06-07 03:13:37,522][495927] Updated weights for policy 0, policy_version 27422 (0.0008) +[2026-06-07 03:13:37,673][495927] Updated weights for policy 0, policy_version 27432 (0.0008) +[2026-06-07 03:13:38,317][495927] Updated weights for policy 0, policy_version 27443 (0.0009) +[2026-06-07 03:13:38,457][495927] Updated weights for policy 0, policy_version 27453 (0.0008) +[2026-06-07 03:13:38,605][495927] Updated weights for policy 0, policy_version 27463 (0.0008) +[2026-06-07 03:13:38,759][495927] Updated weights for policy 0, policy_version 27473 (0.0009) +[2026-06-07 03:13:38,908][495927] Updated weights for policy 0, policy_version 27483 (0.0008) +[2026-06-07 03:13:38,909][492660] Fps is (10 sec: 22937.8, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 14057472. Throughput: 0: 23017.2. Samples: 14067456. Policy #0 lag: (min: 63.0, avg: 76.5, max: 127.0) +[2026-06-07 03:13:38,910][492660] Avg episode reward: [(0, '1045.676')] +[2026-06-07 03:13:39,063][495927] Updated weights for policy 0, policy_version 27493 (0.0008) +[2026-06-07 03:13:39,221][495927] Updated weights for policy 0, policy_version 27503 (0.0008) +[2026-06-07 03:13:39,229][495570] Saving new best policy, reward=1045.676! +[2026-06-07 03:13:39,814][495927] Updated weights for policy 0, policy_version 27513 (0.0008) +[2026-06-07 03:13:39,961][495927] Updated weights for policy 0, policy_version 27523 (0.0009) +[2026-06-07 03:13:40,125][495927] Updated weights for policy 0, policy_version 27534 (0.0009) +[2026-06-07 03:13:40,274][495927] Updated weights for policy 0, policy_version 27544 (0.0008) +[2026-06-07 03:13:40,423][495927] Updated weights for policy 0, policy_version 27554 (0.0008) +[2026-06-07 03:13:40,582][495927] Updated weights for policy 0, policy_version 27564 (0.0008) +[2026-06-07 03:13:41,206][495927] Updated weights for policy 0, policy_version 27574 (0.0009) +[2026-06-07 03:13:41,363][495927] Updated weights for policy 0, policy_version 27585 (0.0008) +[2026-06-07 03:13:41,511][495927] Updated weights for policy 0, policy_version 27595 (0.0008) +[2026-06-07 03:13:41,673][495927] Updated weights for policy 0, policy_version 27605 (0.0009) +[2026-06-07 03:13:41,820][495927] Updated weights for policy 0, policy_version 27615 (0.0009) +[2026-06-07 03:13:41,984][495927] Updated weights for policy 0, policy_version 27626 (0.0008) +[2026-06-07 03:13:42,614][495927] Updated weights for policy 0, policy_version 27637 (0.0008) +[2026-06-07 03:13:42,760][495927] Updated weights for policy 0, policy_version 27647 (0.0008) +[2026-06-07 03:13:42,907][495927] Updated weights for policy 0, policy_version 27657 (0.0008) +[2026-06-07 03:13:43,054][495927] Updated weights for policy 0, policy_version 27667 (0.0008) +[2026-06-07 03:13:43,205][495927] Updated weights for policy 0, policy_version 27677 (0.0008) +[2026-06-07 03:13:43,349][495927] Updated weights for policy 0, policy_version 27687 (0.0008) +[2026-06-07 03:13:43,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.7, 300 sec: 22993.1). Total num frames: 14188544. Throughput: 0: 22761.3. Samples: 14201856. Policy #0 lag: (min: 63.0, avg: 77.3, max: 127.0) +[2026-06-07 03:13:43,910][492660] Avg episode reward: [(0, '1143.317')] +[2026-06-07 03:13:43,975][495927] Updated weights for policy 0, policy_version 27697 (0.0009) +[2026-06-07 03:13:44,133][495927] Updated weights for policy 0, policy_version 27708 (0.0009) +[2026-06-07 03:13:44,276][495927] Updated weights for policy 0, policy_version 27718 (0.0008) +[2026-06-07 03:13:44,437][495927] Updated weights for policy 0, policy_version 27728 (0.0008) +[2026-06-07 03:13:44,585][495927] Updated weights for policy 0, policy_version 27738 (0.0009) +[2026-06-07 03:13:44,725][495927] Updated weights for policy 0, policy_version 27748 (0.0008) +[2026-06-07 03:13:44,900][495927] Updated weights for policy 0, policy_version 27759 (0.0008) +[2026-06-07 03:13:44,912][495570] Saving new best policy, reward=1143.317! +[2026-06-07 03:13:45,540][495927] Updated weights for policy 0, policy_version 27770 (0.0008) +[2026-06-07 03:13:45,688][495927] Updated weights for policy 0, policy_version 27780 (0.0008) +[2026-06-07 03:13:45,838][495927] Updated weights for policy 0, policy_version 27790 (0.0008) +[2026-06-07 03:13:45,990][495927] Updated weights for policy 0, policy_version 27800 (0.0008) +[2026-06-07 03:13:46,145][495927] Updated weights for policy 0, policy_version 27810 (0.0008) +[2026-06-07 03:13:46,316][495927] Updated weights for policy 0, policy_version 27821 (0.0008) +[2026-06-07 03:13:46,915][495927] Updated weights for policy 0, policy_version 27831 (0.0008) +[2026-06-07 03:13:47,071][495927] Updated weights for policy 0, policy_version 27842 (0.0008) +[2026-06-07 03:13:47,221][495927] Updated weights for policy 0, policy_version 27852 (0.0008) +[2026-06-07 03:13:47,378][495927] Updated weights for policy 0, policy_version 27862 (0.0009) +[2026-06-07 03:13:47,522][495927] Updated weights for policy 0, policy_version 27872 (0.0008) +[2026-06-07 03:13:47,673][495927] Updated weights for policy 0, policy_version 27882 (0.0008) +[2026-06-07 03:13:48,285][495927] Updated weights for policy 0, policy_version 27892 (0.0008) +[2026-06-07 03:13:48,428][495927] Updated weights for policy 0, policy_version 27902 (0.0008) +[2026-06-07 03:13:48,578][495927] Updated weights for policy 0, policy_version 27912 (0.0008) +[2026-06-07 03:13:48,734][495927] Updated weights for policy 0, policy_version 27922 (0.0008) +[2026-06-07 03:13:48,882][495927] Updated weights for policy 0, policy_version 27932 (0.0008) +[2026-06-07 03:13:48,909][492660] Fps is (10 sec: 22937.2, 60 sec: 22937.5, 300 sec: 22993.1). Total num frames: 14286848. Throughput: 0: 22741.2. Samples: 14269696. Policy #0 lag: (min: 63.0, avg: 77.3, max: 127.0) +[2026-06-07 03:13:48,910][492660] Avg episode reward: [(0, '1163.396')] +[2026-06-07 03:13:49,045][495927] Updated weights for policy 0, policy_version 27943 (0.0009) +[2026-06-07 03:13:49,178][495570] Saving new best policy, reward=1163.396! +[2026-06-07 03:13:49,671][495927] Updated weights for policy 0, policy_version 27953 (0.0009) +[2026-06-07 03:13:49,813][495927] Updated weights for policy 0, policy_version 27963 (0.0008) +[2026-06-07 03:13:49,958][495927] Updated weights for policy 0, policy_version 27973 (0.0009) +[2026-06-07 03:13:50,108][495927] Updated weights for policy 0, policy_version 27983 (0.0009) +[2026-06-07 03:13:50,264][495927] Updated weights for policy 0, policy_version 27993 (0.0008) +[2026-06-07 03:13:50,412][495927] Updated weights for policy 0, policy_version 28003 (0.0008) +[2026-06-07 03:13:50,582][495927] Updated weights for policy 0, policy_version 28014 (0.0009) +[2026-06-07 03:13:51,200][495927] Updated weights for policy 0, policy_version 28025 (0.0009) +[2026-06-07 03:13:51,341][495927] Updated weights for policy 0, policy_version 28035 (0.0008) +[2026-06-07 03:13:51,496][495927] Updated weights for policy 0, policy_version 28045 (0.0009) +[2026-06-07 03:13:51,658][495927] Updated weights for policy 0, policy_version 28056 (0.0008) +[2026-06-07 03:13:51,827][495927] Updated weights for policy 0, policy_version 28067 (0.0008) +[2026-06-07 03:13:51,976][495927] Updated weights for policy 0, policy_version 28077 (0.0008) +[2026-06-07 03:13:52,601][495927] Updated weights for policy 0, policy_version 28087 (0.0008) +[2026-06-07 03:13:52,744][495927] Updated weights for policy 0, policy_version 28097 (0.0008) +[2026-06-07 03:13:52,903][495927] Updated weights for policy 0, policy_version 28107 (0.0008) +[2026-06-07 03:13:53,052][495927] Updated weights for policy 0, policy_version 28117 (0.0008) +[2026-06-07 03:13:53,224][495927] Updated weights for policy 0, policy_version 28129 (0.0008) +[2026-06-07 03:13:53,377][495927] Updated weights for policy 0, policy_version 28139 (0.0008) +[2026-06-07 03:13:53,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 14417920. Throughput: 0: 22892.1. Samples: 14411392. Policy #0 lag: (min: 63.0, avg: 77.3, max: 127.0) +[2026-06-07 03:13:53,910][492660] Avg episode reward: [(0, '1181.337')] +[2026-06-07 03:13:54,008][495927] Updated weights for policy 0, policy_version 28149 (0.0008) +[2026-06-07 03:13:54,151][495927] Updated weights for policy 0, policy_version 28159 (0.0009) +[2026-06-07 03:13:54,330][495927] Updated weights for policy 0, policy_version 28172 (0.0008) +[2026-06-07 03:13:54,494][495927] Updated weights for policy 0, policy_version 28183 (0.0008) +[2026-06-07 03:13:54,661][495927] Updated weights for policy 0, policy_version 28194 (0.0008) +[2026-06-07 03:13:54,818][495927] Updated weights for policy 0, policy_version 28205 (0.0008) +[2026-06-07 03:13:54,856][495570] Saving new best policy, reward=1181.337! +[2026-06-07 03:13:55,459][495927] Updated weights for policy 0, policy_version 28216 (0.0009) +[2026-06-07 03:13:55,622][495927] Updated weights for policy 0, policy_version 28228 (0.0008) +[2026-06-07 03:13:55,791][495927] Updated weights for policy 0, policy_version 28239 (0.0008) +[2026-06-07 03:13:55,941][495927] Updated weights for policy 0, policy_version 28249 (0.0008) +[2026-06-07 03:13:56,106][495927] Updated weights for policy 0, policy_version 28260 (0.0008) +[2026-06-07 03:13:56,262][495927] Updated weights for policy 0, policy_version 28270 (0.0009) +[2026-06-07 03:13:56,875][495927] Updated weights for policy 0, policy_version 28280 (0.0008) +[2026-06-07 03:13:57,024][495927] Updated weights for policy 0, policy_version 28290 (0.0008) +[2026-06-07 03:13:57,193][495927] Updated weights for policy 0, policy_version 28301 (0.0008) +[2026-06-07 03:13:57,338][495927] Updated weights for policy 0, policy_version 28311 (0.0009) +[2026-06-07 03:13:57,490][495927] Updated weights for policy 0, policy_version 28321 (0.0009) +[2026-06-07 03:13:57,656][495927] Updated weights for policy 0, policy_version 28332 (0.0008) +[2026-06-07 03:13:58,275][495927] Updated weights for policy 0, policy_version 28342 (0.0008) +[2026-06-07 03:13:58,424][495927] Updated weights for policy 0, policy_version 28352 (0.0008) +[2026-06-07 03:13:58,587][495927] Updated weights for policy 0, policy_version 28363 (0.0008) +[2026-06-07 03:13:58,735][495927] Updated weights for policy 0, policy_version 28373 (0.0008) +[2026-06-07 03:13:58,901][495927] Updated weights for policy 0, policy_version 28384 (0.0008) +[2026-06-07 03:13:58,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 14516224. Throughput: 0: 23025.7. Samples: 14552192. Policy #0 lag: (min: 63.0, avg: 77.3, max: 127.0) +[2026-06-07 03:13:58,910][492660] Avg episode reward: [(0, '1184.212')] +[2026-06-07 03:13:59,053][495927] Updated weights for policy 0, policy_version 28394 (0.0008) +[2026-06-07 03:13:59,135][495570] Saving new best policy, reward=1184.212! +[2026-06-07 03:13:59,685][495927] Updated weights for policy 0, policy_version 28404 (0.0009) +[2026-06-07 03:13:59,851][495927] Updated weights for policy 0, policy_version 28416 (0.0008) +[2026-06-07 03:14:00,009][495927] Updated weights for policy 0, policy_version 28427 (0.0008) +[2026-06-07 03:14:00,163][495927] Updated weights for policy 0, policy_version 28437 (0.0008) +[2026-06-07 03:14:00,328][495927] Updated weights for policy 0, policy_version 28448 (0.0008) +[2026-06-07 03:14:00,475][495927] Updated weights for policy 0, policy_version 28458 (0.0008) +[2026-06-07 03:14:01,115][495927] Updated weights for policy 0, policy_version 28469 (0.0009) +[2026-06-07 03:14:01,250][495927] Updated weights for policy 0, policy_version 28479 (0.0008) +[2026-06-07 03:14:01,415][495927] Updated weights for policy 0, policy_version 28490 (0.0008) +[2026-06-07 03:14:01,565][495927] Updated weights for policy 0, policy_version 28500 (0.0008) +[2026-06-07 03:14:01,737][495927] Updated weights for policy 0, policy_version 28511 (0.0008) +[2026-06-07 03:14:01,881][495927] Updated weights for policy 0, policy_version 28521 (0.0008) +[2026-06-07 03:14:02,511][495927] Updated weights for policy 0, policy_version 28531 (0.0009) +[2026-06-07 03:14:02,661][495927] Updated weights for policy 0, policy_version 28542 (0.0008) +[2026-06-07 03:14:02,822][495927] Updated weights for policy 0, policy_version 28552 (0.0008) +[2026-06-07 03:14:02,984][495927] Updated weights for policy 0, policy_version 28563 (0.0008) +[2026-06-07 03:14:03,157][495927] Updated weights for policy 0, policy_version 28574 (0.0008) +[2026-06-07 03:14:03,302][495927] Updated weights for policy 0, policy_version 28584 (0.0008) +[2026-06-07 03:14:03,909][492660] Fps is (10 sec: 22937.8, 60 sec: 22937.6, 300 sec: 22993.2). Total num frames: 14647296. Throughput: 0: 23017.3. Samples: 14619648. Policy #0 lag: (min: 23.0, avg: 37.7, max: 87.0) +[2026-06-07 03:14:03,910][492660] Avg episode reward: [(0, '1207.541')] +[2026-06-07 03:14:03,926][495927] Updated weights for policy 0, policy_version 28594 (0.0008) +[2026-06-07 03:14:04,063][495927] Updated weights for policy 0, policy_version 28604 (0.0008) +[2026-06-07 03:14:04,217][495927] Updated weights for policy 0, policy_version 28614 (0.0009) +[2026-06-07 03:14:04,380][495927] Updated weights for policy 0, policy_version 28625 (0.0009) +[2026-06-07 03:14:04,547][495927] Updated weights for policy 0, policy_version 28636 (0.0008) +[2026-06-07 03:14:04,702][495927] Updated weights for policy 0, policy_version 28646 (0.0008) +[2026-06-07 03:14:04,845][495570] Saving new best policy, reward=1207.541! +[2026-06-07 03:14:04,846][495927] Updated weights for policy 0, policy_version 28656 (0.0008) +[2026-06-07 03:14:05,449][495927] Updated weights for policy 0, policy_version 28666 (0.0009) +[2026-06-07 03:14:05,592][495927] Updated weights for policy 0, policy_version 28676 (0.0008) +[2026-06-07 03:14:05,768][495927] Updated weights for policy 0, policy_version 28688 (0.0008) +[2026-06-07 03:14:05,912][495927] Updated weights for policy 0, policy_version 28698 (0.0008) +[2026-06-07 03:14:06,065][495927] Updated weights for policy 0, policy_version 28708 (0.0008) +[2026-06-07 03:14:06,214][495927] Updated weights for policy 0, policy_version 28718 (0.0008) +[2026-06-07 03:14:06,858][495927] Updated weights for policy 0, policy_version 28729 (0.0008) +[2026-06-07 03:14:07,015][495927] Updated weights for policy 0, policy_version 28740 (0.0008) +[2026-06-07 03:14:07,159][495927] Updated weights for policy 0, policy_version 28750 (0.0008) +[2026-06-07 03:14:07,314][495927] Updated weights for policy 0, policy_version 28760 (0.0008) +[2026-06-07 03:14:07,473][495927] Updated weights for policy 0, policy_version 28771 (0.0008) +[2026-06-07 03:14:07,649][495927] Updated weights for policy 0, policy_version 28783 (0.0008) +[2026-06-07 03:14:08,274][495927] Updated weights for policy 0, policy_version 28793 (0.0008) +[2026-06-07 03:14:08,454][495927] Updated weights for policy 0, policy_version 28806 (0.0009) +[2026-06-07 03:14:08,639][495927] Updated weights for policy 0, policy_version 28818 (0.0008) +[2026-06-07 03:14:08,803][495927] Updated weights for policy 0, policy_version 28829 (0.0008) +[2026-06-07 03:14:08,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.5, 300 sec: 22993.1). Total num frames: 14745600. Throughput: 0: 22963.1. Samples: 14754432. Policy #0 lag: (min: 23.0, avg: 37.7, max: 87.0) +[2026-06-07 03:14:08,910][492660] Avg episode reward: [(0, '1172.083')] +[2026-06-07 03:14:08,947][495927] Updated weights for policy 0, policy_version 28839 (0.0009) +[2026-06-07 03:14:09,603][495927] Updated weights for policy 0, policy_version 28851 (0.0009) +[2026-06-07 03:14:09,739][495927] Updated weights for policy 0, policy_version 28861 (0.0009) +[2026-06-07 03:14:09,900][495927] Updated weights for policy 0, policy_version 28872 (0.0008) +[2026-06-07 03:14:10,061][495927] Updated weights for policy 0, policy_version 28883 (0.0008) +[2026-06-07 03:14:10,229][495927] Updated weights for policy 0, policy_version 28894 (0.0008) +[2026-06-07 03:14:10,372][495927] Updated weights for policy 0, policy_version 28904 (0.0009) +[2026-06-07 03:14:11,020][495927] Updated weights for policy 0, policy_version 28914 (0.0009) +[2026-06-07 03:14:11,155][495927] Updated weights for policy 0, policy_version 28924 (0.0008) +[2026-06-07 03:14:11,314][495927] Updated weights for policy 0, policy_version 28935 (0.0008) +[2026-06-07 03:14:11,494][495927] Updated weights for policy 0, policy_version 28947 (0.0005) +[2026-06-07 03:14:11,668][495927] Updated weights for policy 0, policy_version 28959 (0.0004) +[2026-06-07 03:14:11,825][495927] Updated weights for policy 0, policy_version 28969 (0.0004) +[2026-06-07 03:14:12,446][495927] Updated weights for policy 0, policy_version 28979 (0.0005) +[2026-06-07 03:14:12,595][495927] Updated weights for policy 0, policy_version 28990 (0.0008) +[2026-06-07 03:14:12,740][495927] Updated weights for policy 0, policy_version 29000 (0.0008) +[2026-06-07 03:14:12,886][495927] Updated weights for policy 0, policy_version 29010 (0.0008) +[2026-06-07 03:14:13,041][495927] Updated weights for policy 0, policy_version 29020 (0.0008) +[2026-06-07 03:14:13,192][495927] Updated weights for policy 0, policy_version 29030 (0.0008) +[2026-06-07 03:14:13,335][495927] Updated weights for policy 0, policy_version 29040 (0.0008) +[2026-06-07 03:14:13,909][492660] Fps is (10 sec: 22937.4, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 14876672. Throughput: 0: 22755.6. Samples: 14889728. Policy #0 lag: (min: 23.0, avg: 37.7, max: 87.0) +[2026-06-07 03:14:13,910][492660] Avg episode reward: [(0, '1192.092')] +[2026-06-07 03:14:13,960][495927] Updated weights for policy 0, policy_version 29050 (0.0009) +[2026-06-07 03:14:14,113][495927] Updated weights for policy 0, policy_version 29061 (0.0008) +[2026-06-07 03:14:14,277][495927] Updated weights for policy 0, policy_version 29072 (0.0009) +[2026-06-07 03:14:14,444][495927] Updated weights for policy 0, policy_version 29083 (0.0009) +[2026-06-07 03:14:14,594][495927] Updated weights for policy 0, policy_version 29093 (0.0008) +[2026-06-07 03:14:14,742][495927] Updated weights for policy 0, policy_version 29103 (0.0008) +[2026-06-07 03:14:15,393][495927] Updated weights for policy 0, policy_version 29113 (0.0008) +[2026-06-07 03:14:15,540][495927] Updated weights for policy 0, policy_version 29123 (0.0008) +[2026-06-07 03:14:15,701][495927] Updated weights for policy 0, policy_version 29134 (0.0009) +[2026-06-07 03:14:15,846][495927] Updated weights for policy 0, policy_version 29144 (0.0008) +[2026-06-07 03:14:16,002][495927] Updated weights for policy 0, policy_version 29154 (0.0008) +[2026-06-07 03:14:16,153][495927] Updated weights for policy 0, policy_version 29164 (0.0008) +[2026-06-07 03:14:16,759][495927] Updated weights for policy 0, policy_version 29174 (0.0009) +[2026-06-07 03:14:16,914][495927] Updated weights for policy 0, policy_version 29184 (0.0008) +[2026-06-07 03:14:17,062][495927] Updated weights for policy 0, policy_version 29195 (0.0008) +[2026-06-07 03:14:17,219][495927] Updated weights for policy 0, policy_version 29205 (0.0008) +[2026-06-07 03:14:17,384][495927] Updated weights for policy 0, policy_version 29216 (0.0008) +[2026-06-07 03:14:17,538][495927] Updated weights for policy 0, policy_version 29226 (0.0008) +[2026-06-07 03:14:18,163][495927] Updated weights for policy 0, policy_version 29236 (0.0008) +[2026-06-07 03:14:18,313][495927] Updated weights for policy 0, policy_version 29246 (0.0008) +[2026-06-07 03:14:18,450][495927] Updated weights for policy 0, policy_version 29256 (0.0008) +[2026-06-07 03:14:18,608][495927] Updated weights for policy 0, policy_version 29266 (0.0008) +[2026-06-07 03:14:18,758][495927] Updated weights for policy 0, policy_version 29276 (0.0008) +[2026-06-07 03:14:18,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.5, 300 sec: 22993.1). Total num frames: 14974976. Throughput: 0: 22800.9. Samples: 14958592. Policy #0 lag: (min: 23.0, avg: 37.7, max: 87.0) +[2026-06-07 03:14:18,910][492660] Avg episode reward: [(0, '1197.506')] +[2026-06-07 03:14:18,911][495927] Updated weights for policy 0, policy_version 29286 (0.0009) +[2026-06-07 03:14:19,546][495927] Updated weights for policy 0, policy_version 29297 (0.0009) +[2026-06-07 03:14:19,680][495927] Updated weights for policy 0, policy_version 29307 (0.0008) +[2026-06-07 03:14:19,840][495927] Updated weights for policy 0, policy_version 29317 (0.0008) +[2026-06-07 03:14:19,993][495927] Updated weights for policy 0, policy_version 29327 (0.0008) +[2026-06-07 03:14:20,153][495927] Updated weights for policy 0, policy_version 29338 (0.0008) +[2026-06-07 03:14:20,318][495927] Updated weights for policy 0, policy_version 29349 (0.0008) +[2026-06-07 03:14:20,474][495927] Updated weights for policy 0, policy_version 29359 (0.0009) +[2026-06-07 03:14:21,108][495927] Updated weights for policy 0, policy_version 29371 (0.0009) +[2026-06-07 03:14:21,269][495927] Updated weights for policy 0, policy_version 29382 (0.0008) +[2026-06-07 03:14:21,435][495927] Updated weights for policy 0, policy_version 29393 (0.0008) +[2026-06-07 03:14:21,588][495927] Updated weights for policy 0, policy_version 29403 (0.0008) +[2026-06-07 03:14:21,739][495927] Updated weights for policy 0, policy_version 29413 (0.0008) +[2026-06-07 03:14:21,890][495927] Updated weights for policy 0, policy_version 29423 (0.0008) +[2026-06-07 03:14:22,518][495927] Updated weights for policy 0, policy_version 29433 (0.0010) +[2026-06-07 03:14:22,672][495927] Updated weights for policy 0, policy_version 29443 (0.0010) +[2026-06-07 03:14:22,837][495927] Updated weights for policy 0, policy_version 29454 (0.0008) +[2026-06-07 03:14:22,982][495927] Updated weights for policy 0, policy_version 29464 (0.0008) +[2026-06-07 03:14:23,154][495927] Updated weights for policy 0, policy_version 29475 (0.0008) +[2026-06-07 03:14:23,297][495927] Updated weights for policy 0, policy_version 29485 (0.0008) +[2026-06-07 03:14:23,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 15106048. Throughput: 0: 23051.4. Samples: 15104768. Policy #0 lag: (min: 19.0, avg: 35.9, max: 83.0) +[2026-06-07 03:14:23,910][492660] Avg episode reward: [(0, '1188.835')] +[2026-06-07 03:14:23,933][495927] Updated weights for policy 0, policy_version 29496 (0.0009) +[2026-06-07 03:14:24,090][495927] Updated weights for policy 0, policy_version 29507 (0.0008) +[2026-06-07 03:14:24,240][495927] Updated weights for policy 0, policy_version 29517 (0.0008) +[2026-06-07 03:14:24,388][495927] Updated weights for policy 0, policy_version 29527 (0.0008) +[2026-06-07 03:14:24,545][495927] Updated weights for policy 0, policy_version 29537 (0.0009) +[2026-06-07 03:14:24,694][495927] Updated weights for policy 0, policy_version 29547 (0.0008) +[2026-06-07 03:14:25,326][495927] Updated weights for policy 0, policy_version 29557 (0.0008) +[2026-06-07 03:14:25,468][495927] Updated weights for policy 0, policy_version 29567 (0.0008) +[2026-06-07 03:14:25,634][495927] Updated weights for policy 0, policy_version 29578 (0.0009) +[2026-06-07 03:14:25,784][495927] Updated weights for policy 0, policy_version 29588 (0.0008) +[2026-06-07 03:14:25,930][495927] Updated weights for policy 0, policy_version 29598 (0.0008) +[2026-06-07 03:14:26,077][495927] Updated weights for policy 0, policy_version 29608 (0.0008) +[2026-06-07 03:14:26,719][495927] Updated weights for policy 0, policy_version 29619 (0.0009) +[2026-06-07 03:14:26,864][495927] Updated weights for policy 0, policy_version 29629 (0.0008) +[2026-06-07 03:14:27,038][495927] Updated weights for policy 0, policy_version 29641 (0.0008) +[2026-06-07 03:14:27,186][495927] Updated weights for policy 0, policy_version 29651 (0.0009) +[2026-06-07 03:14:27,338][495927] Updated weights for policy 0, policy_version 29661 (0.0008) +[2026-06-07 03:14:27,486][495927] Updated weights for policy 0, policy_version 29671 (0.0008) +[2026-06-07 03:14:28,130][495927] Updated weights for policy 0, policy_version 29682 (0.0008) +[2026-06-07 03:14:28,273][495927] Updated weights for policy 0, policy_version 29692 (0.0008) +[2026-06-07 03:14:28,420][495927] Updated weights for policy 0, policy_version 29702 (0.0008) +[2026-06-07 03:14:28,568][495927] Updated weights for policy 0, policy_version 29712 (0.0008) +[2026-06-07 03:14:28,742][495927] Updated weights for policy 0, policy_version 29723 (0.0008) +[2026-06-07 03:14:28,890][495927] Updated weights for policy 0, policy_version 29733 (0.0008) +[2026-06-07 03:14:28,909][492660] Fps is (10 sec: 22937.9, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 15204352. Throughput: 0: 23111.1. Samples: 15241856. Policy #0 lag: (min: 19.0, avg: 35.9, max: 83.0) +[2026-06-07 03:14:28,910][492660] Avg episode reward: [(0, '1188.551')] +[2026-06-07 03:14:29,058][495927] Updated weights for policy 0, policy_version 29744 (0.0008) +[2026-06-07 03:14:29,674][495927] Updated weights for policy 0, policy_version 29754 (0.0008) +[2026-06-07 03:14:29,816][495927] Updated weights for policy 0, policy_version 29764 (0.0008) +[2026-06-07 03:14:29,997][495927] Updated weights for policy 0, policy_version 29776 (0.0008) +[2026-06-07 03:14:30,148][495927] Updated weights for policy 0, policy_version 29786 (0.0008) +[2026-06-07 03:14:30,301][495927] Updated weights for policy 0, policy_version 29796 (0.0008) +[2026-06-07 03:14:30,470][495927] Updated weights for policy 0, policy_version 29807 (0.0008) +[2026-06-07 03:14:31,077][495927] Updated weights for policy 0, policy_version 29817 (0.0009) +[2026-06-07 03:14:31,224][495927] Updated weights for policy 0, policy_version 29827 (0.0009) +[2026-06-07 03:14:31,381][495927] Updated weights for policy 0, policy_version 29838 (0.0009) +[2026-06-07 03:14:31,541][495927] Updated weights for policy 0, policy_version 29849 (0.0008) +[2026-06-07 03:14:31,696][495927] Updated weights for policy 0, policy_version 29859 (0.0009) +[2026-06-07 03:14:31,864][495927] Updated weights for policy 0, policy_version 29870 (0.0009) +[2026-06-07 03:14:32,508][495927] Updated weights for policy 0, policy_version 29881 (0.0009) +[2026-06-07 03:14:32,680][495927] Updated weights for policy 0, policy_version 29893 (0.0009) +[2026-06-07 03:14:32,834][495927] Updated weights for policy 0, policy_version 29903 (0.0009) +[2026-06-07 03:14:32,984][495927] Updated weights for policy 0, policy_version 29913 (0.0008) +[2026-06-07 03:14:33,133][495927] Updated weights for policy 0, policy_version 29923 (0.0008) +[2026-06-07 03:14:33,293][495927] Updated weights for policy 0, policy_version 29934 (0.0008) +[2026-06-07 03:14:33,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 15335424. Throughput: 0: 23091.3. Samples: 15308800. Policy #0 lag: (min: 19.0, avg: 35.9, max: 83.0) +[2026-06-07 03:14:33,910][492660] Avg episode reward: [(0, '1237.433')] +[2026-06-07 03:14:33,925][495927] Updated weights for policy 0, policy_version 29944 (0.0008) +[2026-06-07 03:14:34,070][495927] Updated weights for policy 0, policy_version 29954 (0.0008) +[2026-06-07 03:14:34,225][495927] Updated weights for policy 0, policy_version 29965 (0.0008) +[2026-06-07 03:14:34,391][495927] Updated weights for policy 0, policy_version 29976 (0.0008) +[2026-06-07 03:14:34,547][495927] Updated weights for policy 0, policy_version 29986 (0.0009) +[2026-06-07 03:14:34,704][495927] Updated weights for policy 0, policy_version 29996 (0.0009) +[2026-06-07 03:14:34,755][495570] Saving new best policy, reward=1237.433! +[2026-06-07 03:14:35,318][495927] Updated weights for policy 0, policy_version 30006 (0.0008) +[2026-06-07 03:14:35,464][495927] Updated weights for policy 0, policy_version 30016 (0.0008) +[2026-06-07 03:14:35,605][495927] Updated weights for policy 0, policy_version 30026 (0.0008) +[2026-06-07 03:14:35,768][495927] Updated weights for policy 0, policy_version 30036 (0.0008) +[2026-06-07 03:14:35,919][495927] Updated weights for policy 0, policy_version 30046 (0.0008) +[2026-06-07 03:14:36,075][495927] Updated weights for policy 0, policy_version 30056 (0.0008) +[2026-06-07 03:14:36,735][495927] Updated weights for policy 0, policy_version 30068 (0.0008) +[2026-06-07 03:14:36,884][495927] Updated weights for policy 0, policy_version 30078 (0.0009) +[2026-06-07 03:14:37,030][495927] Updated weights for policy 0, policy_version 30088 (0.0008) +[2026-06-07 03:14:37,178][495927] Updated weights for policy 0, policy_version 30098 (0.0009) +[2026-06-07 03:14:37,337][495927] Updated weights for policy 0, policy_version 30109 (0.0008) +[2026-06-07 03:14:37,490][495927] Updated weights for policy 0, policy_version 30119 (0.0008) +[2026-06-07 03:14:38,131][495927] Updated weights for policy 0, policy_version 30129 (0.0009) +[2026-06-07 03:14:38,283][495927] Updated weights for policy 0, policy_version 30140 (0.0008) +[2026-06-07 03:14:38,430][495927] Updated weights for policy 0, policy_version 30150 (0.0008) +[2026-06-07 03:14:38,578][495927] Updated weights for policy 0, policy_version 30160 (0.0008) +[2026-06-07 03:14:38,730][495927] Updated weights for policy 0, policy_version 30170 (0.0008) +[2026-06-07 03:14:38,896][495927] Updated weights for policy 0, policy_version 30181 (0.0008) +[2026-06-07 03:14:38,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22993.2). Total num frames: 15433728. Throughput: 0: 22957.5. Samples: 15444480. Policy #0 lag: (min: 19.0, avg: 35.9, max: 83.0) +[2026-06-07 03:14:38,910][492660] Avg episode reward: [(0, '1236.770')] +[2026-06-07 03:14:39,050][495927] Updated weights for policy 0, policy_version 30191 (0.0009) +[2026-06-07 03:14:39,688][495927] Updated weights for policy 0, policy_version 30202 (0.0009) +[2026-06-07 03:14:39,839][495927] Updated weights for policy 0, policy_version 30212 (0.0008) +[2026-06-07 03:14:39,986][495927] Updated weights for policy 0, policy_version 30222 (0.0008) +[2026-06-07 03:14:40,138][495927] Updated weights for policy 0, policy_version 30232 (0.0008) +[2026-06-07 03:14:40,290][495927] Updated weights for policy 0, policy_version 30242 (0.0008) +[2026-06-07 03:14:40,452][495927] Updated weights for policy 0, policy_version 30253 (0.0009) +[2026-06-07 03:14:41,086][495927] Updated weights for policy 0, policy_version 30263 (0.0009) +[2026-06-07 03:14:41,226][495927] Updated weights for policy 0, policy_version 30273 (0.0008) +[2026-06-07 03:14:41,405][495927] Updated weights for policy 0, policy_version 30285 (0.0008) +[2026-06-07 03:14:41,552][495927] Updated weights for policy 0, policy_version 30295 (0.0008) +[2026-06-07 03:14:41,704][495927] Updated weights for policy 0, policy_version 30305 (0.0008) +[2026-06-07 03:14:41,853][495927] Updated weights for policy 0, policy_version 30315 (0.0008) +[2026-06-07 03:14:42,493][495927] Updated weights for policy 0, policy_version 30326 (0.0008) +[2026-06-07 03:14:42,669][495927] Updated weights for policy 0, policy_version 30339 (0.0008) +[2026-06-07 03:14:42,840][495927] Updated weights for policy 0, policy_version 30350 (0.0010) +[2026-06-07 03:14:42,988][495927] Updated weights for policy 0, policy_version 30360 (0.0008) +[2026-06-07 03:14:43,140][495927] Updated weights for policy 0, policy_version 30370 (0.0008) +[2026-06-07 03:14:43,303][495927] Updated weights for policy 0, policy_version 30381 (0.0008) +[2026-06-07 03:14:43,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 15564800. Throughput: 0: 22838.1. Samples: 15579904. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-07 03:14:43,910][492660] Avg episode reward: [(0, '1273.400')] +[2026-06-07 03:14:43,963][495927] Updated weights for policy 0, policy_version 30391 (0.0009) +[2026-06-07 03:14:44,132][495927] Updated weights for policy 0, policy_version 30403 (0.0009) +[2026-06-07 03:14:44,284][495927] Updated weights for policy 0, policy_version 30413 (0.0008) +[2026-06-07 03:14:44,440][495927] Updated weights for policy 0, policy_version 30423 (0.0008) +[2026-06-07 03:14:44,588][495927] Updated weights for policy 0, policy_version 30433 (0.0009) +[2026-06-07 03:14:44,742][495927] Updated weights for policy 0, policy_version 30443 (0.0008) +[2026-06-07 03:14:44,813][495570] Saving new best policy, reward=1273.400! +[2026-06-07 03:14:45,366][495927] Updated weights for policy 0, policy_version 30453 (0.0008) +[2026-06-07 03:14:45,501][495927] Updated weights for policy 0, policy_version 30463 (0.0009) +[2026-06-07 03:14:45,656][495927] Updated weights for policy 0, policy_version 30473 (0.0009) +[2026-06-07 03:14:45,804][495927] Updated weights for policy 0, policy_version 30483 (0.0008) +[2026-06-07 03:14:45,958][495927] Updated weights for policy 0, policy_version 30493 (0.0009) +[2026-06-07 03:14:46,107][495927] Updated weights for policy 0, policy_version 30503 (0.0009) +[2026-06-07 03:14:46,741][495927] Updated weights for policy 0, policy_version 30513 (0.0009) +[2026-06-07 03:14:46,894][495927] Updated weights for policy 0, policy_version 30524 (0.0009) +[2026-06-07 03:14:47,055][495927] Updated weights for policy 0, policy_version 30535 (0.0009) +[2026-06-07 03:14:47,208][495927] Updated weights for policy 0, policy_version 30545 (0.0009) +[2026-06-07 03:14:47,370][495927] Updated weights for policy 0, policy_version 30556 (0.0009) +[2026-06-07 03:14:47,532][495927] Updated weights for policy 0, policy_version 30567 (0.0008) +[2026-06-07 03:14:48,175][495927] Updated weights for policy 0, policy_version 30577 (0.0009) +[2026-06-07 03:14:48,329][495927] Updated weights for policy 0, policy_version 30588 (0.0008) +[2026-06-07 03:14:48,480][495927] Updated weights for policy 0, policy_version 30598 (0.0009) +[2026-06-07 03:14:48,634][495927] Updated weights for policy 0, policy_version 30609 (0.0008) +[2026-06-07 03:14:48,791][495927] Updated weights for policy 0, policy_version 30619 (0.0008) +[2026-06-07 03:14:48,909][492660] Fps is (10 sec: 22937.4, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 15663104. Throughput: 0: 22869.2. Samples: 15648768. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-07 03:14:48,910][492660] Avg episode reward: [(0, '1254.446')] +[2026-06-07 03:14:48,937][495927] Updated weights for policy 0, policy_version 30629 (0.0008) +[2026-06-07 03:14:49,085][495927] Updated weights for policy 0, policy_version 30639 (0.0008) +[2026-06-07 03:14:49,696][495927] Updated weights for policy 0, policy_version 30649 (0.0004) +[2026-06-07 03:14:49,852][495927] Updated weights for policy 0, policy_version 30660 (0.0004) +[2026-06-07 03:14:50,007][495927] Updated weights for policy 0, policy_version 30670 (0.0006) +[2026-06-07 03:14:50,174][495927] Updated weights for policy 0, policy_version 30681 (0.0008) +[2026-06-07 03:14:50,336][495927] Updated weights for policy 0, policy_version 30692 (0.0008) +[2026-06-07 03:14:50,488][495927] Updated weights for policy 0, policy_version 30702 (0.0008) +[2026-06-07 03:14:51,113][495927] Updated weights for policy 0, policy_version 30712 (0.0008) +[2026-06-07 03:14:51,250][495927] Updated weights for policy 0, policy_version 30722 (0.0008) +[2026-06-07 03:14:51,401][495927] Updated weights for policy 0, policy_version 30732 (0.0008) +[2026-06-07 03:14:51,552][495927] Updated weights for policy 0, policy_version 30742 (0.0009) +[2026-06-07 03:14:51,716][495927] Updated weights for policy 0, policy_version 30753 (0.0008) +[2026-06-07 03:14:51,863][495927] Updated weights for policy 0, policy_version 30763 (0.0010) +[2026-06-07 03:14:52,512][495927] Updated weights for policy 0, policy_version 30775 (0.0009) +[2026-06-07 03:14:52,656][495927] Updated weights for policy 0, policy_version 30785 (0.0008) +[2026-06-07 03:14:52,821][495927] Updated weights for policy 0, policy_version 30796 (0.0009) +[2026-06-07 03:14:52,966][495927] Updated weights for policy 0, policy_version 30806 (0.0007) +[2026-06-07 03:14:53,119][495927] Updated weights for policy 0, policy_version 30816 (0.0004) +[2026-06-07 03:14:53,276][495927] Updated weights for policy 0, policy_version 30827 (0.0004) +[2026-06-07 03:14:53,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 15794176. Throughput: 0: 23114.0. Samples: 15794560. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-07 03:14:53,910][492660] Avg episode reward: [(0, '1222.983')] +[2026-06-07 03:14:53,927][495927] Updated weights for policy 0, policy_version 30840 (0.0004) +[2026-06-07 03:14:54,098][495927] Updated weights for policy 0, policy_version 30852 (0.0004) +[2026-06-07 03:14:54,245][495927] Updated weights for policy 0, policy_version 30862 (0.0007) +[2026-06-07 03:14:54,406][495927] Updated weights for policy 0, policy_version 30873 (0.0008) +[2026-06-07 03:14:54,560][495927] Updated weights for policy 0, policy_version 30884 (0.0008) +[2026-06-07 03:14:54,725][495927] Updated weights for policy 0, policy_version 30895 (0.0008) +[2026-06-07 03:14:55,393][495927] Updated weights for policy 0, policy_version 30907 (0.0009) +[2026-06-07 03:14:55,555][495927] Updated weights for policy 0, policy_version 30919 (0.0009) +[2026-06-07 03:14:55,712][495927] Updated weights for policy 0, policy_version 30929 (0.0008) +[2026-06-07 03:14:55,854][495927] Updated weights for policy 0, policy_version 30939 (0.0008) +[2026-06-07 03:14:56,005][495927] Updated weights for policy 0, policy_version 30949 (0.0009) +[2026-06-07 03:14:56,155][495927] Updated weights for policy 0, policy_version 30959 (0.0008) +[2026-06-07 03:14:56,770][495927] Updated weights for policy 0, policy_version 30969 (0.0009) +[2026-06-07 03:14:56,917][495927] Updated weights for policy 0, policy_version 30979 (0.0009) +[2026-06-07 03:14:57,061][495927] Updated weights for policy 0, policy_version 30989 (0.0008) +[2026-06-07 03:14:57,249][495927] Updated weights for policy 0, policy_version 31002 (0.0009) +[2026-06-07 03:14:57,409][495927] Updated weights for policy 0, policy_version 31013 (0.0008) +[2026-06-07 03:14:57,557][495927] Updated weights for policy 0, policy_version 31023 (0.0008) +[2026-06-07 03:14:58,200][495927] Updated weights for policy 0, policy_version 31034 (0.0008) +[2026-06-07 03:14:58,343][495927] Updated weights for policy 0, policy_version 31044 (0.0008) +[2026-06-07 03:14:58,522][495927] Updated weights for policy 0, policy_version 31056 (0.0008) +[2026-06-07 03:14:58,683][495927] Updated weights for policy 0, policy_version 31067 (0.0008) +[2026-06-07 03:14:58,822][495927] Updated weights for policy 0, policy_version 31077 (0.0008) +[2026-06-07 03:14:58,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 15892480. Throughput: 0: 23173.6. Samples: 15932544. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-07 03:14:58,910][492660] Avg episode reward: [(0, '1249.662')] +[2026-06-07 03:14:58,991][495927] Updated weights for policy 0, policy_version 31088 (0.0008) +[2026-06-07 03:14:59,649][495927] Updated weights for policy 0, policy_version 31100 (0.0009) +[2026-06-07 03:14:59,808][495927] Updated weights for policy 0, policy_version 31111 (0.0009) +[2026-06-07 03:14:59,971][495927] Updated weights for policy 0, policy_version 31122 (0.0008) +[2026-06-07 03:15:00,126][495927] Updated weights for policy 0, policy_version 31133 (0.0008) +[2026-06-07 03:15:00,280][495927] Updated weights for policy 0, policy_version 31143 (0.0008) +[2026-06-07 03:15:00,960][495927] Updated weights for policy 0, policy_version 31154 (0.0009) +[2026-06-07 03:15:01,129][495927] Updated weights for policy 0, policy_version 31167 (0.0009) +[2026-06-07 03:15:01,286][495927] Updated weights for policy 0, policy_version 31178 (0.0008) +[2026-06-07 03:15:01,434][495927] Updated weights for policy 0, policy_version 31188 (0.0009) +[2026-06-07 03:15:01,589][495927] Updated weights for policy 0, policy_version 31198 (0.0009) +[2026-06-07 03:15:01,738][495927] Updated weights for policy 0, policy_version 31208 (0.0008) +[2026-06-07 03:15:02,398][495927] Updated weights for policy 0, policy_version 31218 (0.0009) +[2026-06-07 03:15:02,530][495927] Updated weights for policy 0, policy_version 31228 (0.0009) +[2026-06-07 03:15:02,691][495927] Updated weights for policy 0, policy_version 31239 (0.0008) +[2026-06-07 03:15:02,852][495927] Updated weights for policy 0, policy_version 31250 (0.0008) +[2026-06-07 03:15:03,011][495927] Updated weights for policy 0, policy_version 31260 (0.0008) +[2026-06-07 03:15:03,149][495927] Updated weights for policy 0, policy_version 31270 (0.0008) +[2026-06-07 03:15:03,803][495927] Updated weights for policy 0, policy_version 31281 (0.0009) +[2026-06-07 03:15:03,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 16023552. Throughput: 0: 23142.5. Samples: 16000000. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-07 03:15:03,910][492660] Avg episode reward: [(0, '1291.699')] +[2026-06-07 03:15:03,950][495927] Updated weights for policy 0, policy_version 31292 (0.0008) +[2026-06-07 03:15:04,132][495927] Updated weights for policy 0, policy_version 31304 (0.0008) +[2026-06-07 03:15:04,278][495927] Updated weights for policy 0, policy_version 31314 (0.0008) +[2026-06-07 03:15:04,432][495927] Updated weights for policy 0, policy_version 31324 (0.0009) +[2026-06-07 03:15:04,584][495927] Updated weights for policy 0, policy_version 31334 (0.0009) +[2026-06-07 03:15:04,737][495570] Saving new best policy, reward=1291.699! +[2026-06-07 03:15:04,740][495927] Updated weights for policy 0, policy_version 31344 (0.0009) +[2026-06-07 03:15:05,363][495927] Updated weights for policy 0, policy_version 31354 (0.0009) +[2026-06-07 03:15:05,545][495927] Updated weights for policy 0, policy_version 31366 (0.0009) +[2026-06-07 03:15:05,692][495927] Updated weights for policy 0, policy_version 31376 (0.0008) +[2026-06-07 03:15:05,842][495927] Updated weights for policy 0, policy_version 31386 (0.0008) +[2026-06-07 03:15:05,991][495927] Updated weights for policy 0, policy_version 31396 (0.0008) +[2026-06-07 03:15:06,153][495927] Updated weights for policy 0, policy_version 31407 (0.0008) +[2026-06-07 03:15:06,792][495927] Updated weights for policy 0, policy_version 31419 (0.0009) +[2026-06-07 03:15:06,947][495927] Updated weights for policy 0, policy_version 31429 (0.0008) +[2026-06-07 03:15:07,095][495927] Updated weights for policy 0, policy_version 31439 (0.0008) +[2026-06-07 03:15:07,245][495927] Updated weights for policy 0, policy_version 31449 (0.0008) +[2026-06-07 03:15:07,395][495927] Updated weights for policy 0, policy_version 31459 (0.0008) +[2026-06-07 03:15:07,544][495927] Updated weights for policy 0, policy_version 31469 (0.0009) +[2026-06-07 03:15:08,168][495927] Updated weights for policy 0, policy_version 31479 (0.0008) +[2026-06-07 03:15:08,307][495927] Updated weights for policy 0, policy_version 31489 (0.0008) +[2026-06-07 03:15:08,459][495927] Updated weights for policy 0, policy_version 31499 (0.0008) +[2026-06-07 03:15:08,636][495927] Updated weights for policy 0, policy_version 31511 (0.0010) +[2026-06-07 03:15:08,786][495927] Updated weights for policy 0, policy_version 31521 (0.0008) +[2026-06-07 03:15:08,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.7, 300 sec: 22993.1). Total num frames: 16121856. Throughput: 0: 22914.8. Samples: 16135936. Policy #0 lag: (min: 63.0, avg: 77.1, max: 127.0) +[2026-06-07 03:15:08,910][492660] Avg episode reward: [(0, '1280.540')] +[2026-06-07 03:15:08,948][495927] Updated weights for policy 0, policy_version 31532 (0.0008) +[2026-06-07 03:15:09,604][495927] Updated weights for policy 0, policy_version 31544 (0.0008) +[2026-06-07 03:15:09,739][495927] Updated weights for policy 0, policy_version 31554 (0.0008) +[2026-06-07 03:15:09,893][495927] Updated weights for policy 0, policy_version 31564 (0.0008) +[2026-06-07 03:15:10,040][495927] Updated weights for policy 0, policy_version 31574 (0.0008) +[2026-06-07 03:15:10,188][495927] Updated weights for policy 0, policy_version 31584 (0.0009) +[2026-06-07 03:15:10,372][495927] Updated weights for policy 0, policy_version 31596 (0.0008) +[2026-06-07 03:15:10,998][495927] Updated weights for policy 0, policy_version 31607 (0.0005) +[2026-06-07 03:15:11,142][495927] Updated weights for policy 0, policy_version 31617 (0.0004) +[2026-06-07 03:15:11,307][495927] Updated weights for policy 0, policy_version 31628 (0.0004) +[2026-06-07 03:15:11,457][495927] Updated weights for policy 0, policy_version 31638 (0.0004) +[2026-06-07 03:15:11,624][495927] Updated weights for policy 0, policy_version 31649 (0.0004) +[2026-06-07 03:15:11,776][495927] Updated weights for policy 0, policy_version 31659 (0.0004) +[2026-06-07 03:15:12,386][495927] Updated weights for policy 0, policy_version 31669 (0.0006) +[2026-06-07 03:15:12,537][495927] Updated weights for policy 0, policy_version 31679 (0.0008) +[2026-06-07 03:15:12,704][495927] Updated weights for policy 0, policy_version 31691 (0.0008) +[2026-06-07 03:15:12,851][495927] Updated weights for policy 0, policy_version 31701 (0.0009) +[2026-06-07 03:15:13,008][495927] Updated weights for policy 0, policy_version 31712 (0.0009) +[2026-06-07 03:15:13,166][495927] Updated weights for policy 0, policy_version 31722 (0.0008) +[2026-06-07 03:15:13,805][495927] Updated weights for policy 0, policy_version 31733 (0.0009) +[2026-06-07 03:15:13,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 16252928. Throughput: 0: 22866.5. Samples: 16270848. Policy #0 lag: (min: 63.0, avg: 77.1, max: 127.0) +[2026-06-07 03:15:13,910][492660] Avg episode reward: [(0, '1288.112')] +[2026-06-07 03:15:13,943][495927] Updated weights for policy 0, policy_version 31743 (0.0008) +[2026-06-07 03:15:14,104][495927] Updated weights for policy 0, policy_version 31754 (0.0008) +[2026-06-07 03:15:14,249][495927] Updated weights for policy 0, policy_version 31764 (0.0009) +[2026-06-07 03:15:14,411][495927] Updated weights for policy 0, policy_version 31775 (0.0008) +[2026-06-07 03:15:14,581][495927] Updated weights for policy 0, policy_version 31787 (0.0008) +[2026-06-07 03:15:15,280][495927] Updated weights for policy 0, policy_version 31799 (0.0009) +[2026-06-07 03:15:15,441][495927] Updated weights for policy 0, policy_version 31810 (0.0009) +[2026-06-07 03:15:15,588][495927] Updated weights for policy 0, policy_version 31820 (0.0008) +[2026-06-07 03:15:15,735][495927] Updated weights for policy 0, policy_version 31830 (0.0008) +[2026-06-07 03:15:15,884][495927] Updated weights for policy 0, policy_version 31840 (0.0008) +[2026-06-07 03:15:16,049][495927] Updated weights for policy 0, policy_version 31851 (0.0008) +[2026-06-07 03:15:16,666][495927] Updated weights for policy 0, policy_version 31861 (0.0008) +[2026-06-07 03:15:16,826][495927] Updated weights for policy 0, policy_version 31872 (0.0009) +[2026-06-07 03:15:16,974][495927] Updated weights for policy 0, policy_version 31882 (0.0009) +[2026-06-07 03:15:17,127][495927] Updated weights for policy 0, policy_version 31892 (0.0004) +[2026-06-07 03:15:17,293][495927] Updated weights for policy 0, policy_version 31903 (0.0005) +[2026-06-07 03:15:17,469][495927] Updated weights for policy 0, policy_version 31914 (0.0009) +[2026-06-07 03:15:18,101][495927] Updated weights for policy 0, policy_version 31925 (0.0010) +[2026-06-07 03:15:18,246][495927] Updated weights for policy 0, policy_version 31935 (0.0009) +[2026-06-07 03:15:18,403][495927] Updated weights for policy 0, policy_version 31945 (0.0009) +[2026-06-07 03:15:18,541][495927] Updated weights for policy 0, policy_version 31955 (0.0009) +[2026-06-07 03:15:18,694][495927] Updated weights for policy 0, policy_version 31965 (0.0009) +[2026-06-07 03:15:18,851][495927] Updated weights for policy 0, policy_version 31975 (0.0008) +[2026-06-07 03:15:18,909][492660] Fps is (10 sec: 22937.8, 60 sec: 22937.7, 300 sec: 22993.1). Total num frames: 16351232. Throughput: 0: 22980.3. Samples: 16342912. Policy #0 lag: (min: 63.0, avg: 77.1, max: 127.0) +[2026-06-07 03:15:18,910][492660] Avg episode reward: [(0, '1314.478')] +[2026-06-07 03:15:18,981][495570] Saving new best policy, reward=1314.478! +[2026-06-07 03:15:19,474][495927] Updated weights for policy 0, policy_version 31985 (0.0008) +[2026-06-07 03:15:19,620][495927] Updated weights for policy 0, policy_version 31995 (0.0009) +[2026-06-07 03:15:19,767][495927] Updated weights for policy 0, policy_version 32005 (0.0009) +[2026-06-07 03:15:19,936][495927] Updated weights for policy 0, policy_version 32016 (0.0009) +[2026-06-07 03:15:20,113][495927] Updated weights for policy 0, policy_version 32028 (0.0008) +[2026-06-07 03:15:20,284][495927] Updated weights for policy 0, policy_version 32039 (0.0008) +[2026-06-07 03:15:20,914][495927] Updated weights for policy 0, policy_version 32050 (0.0008) +[2026-06-07 03:15:21,073][495927] Updated weights for policy 0, policy_version 32061 (0.0009) +[2026-06-07 03:15:21,217][495927] Updated weights for policy 0, policy_version 32071 (0.0008) +[2026-06-07 03:15:21,374][495927] Updated weights for policy 0, policy_version 32081 (0.0009) +[2026-06-07 03:15:21,527][495927] Updated weights for policy 0, policy_version 32091 (0.0008) +[2026-06-07 03:15:21,690][495927] Updated weights for policy 0, policy_version 32102 (0.0008) +[2026-06-07 03:15:22,327][495927] Updated weights for policy 0, policy_version 32113 (0.0009) +[2026-06-07 03:15:22,485][495927] Updated weights for policy 0, policy_version 32124 (0.0009) +[2026-06-07 03:15:22,637][495927] Updated weights for policy 0, policy_version 32134 (0.0008) +[2026-06-07 03:15:22,786][495927] Updated weights for policy 0, policy_version 32144 (0.0008) +[2026-06-07 03:15:22,935][495927] Updated weights for policy 0, policy_version 32154 (0.0008) +[2026-06-07 03:15:23,093][495927] Updated weights for policy 0, policy_version 32164 (0.0008) +[2026-06-07 03:15:23,242][495927] Updated weights for policy 0, policy_version 32174 (0.0009) +[2026-06-07 03:15:23,862][495927] Updated weights for policy 0, policy_version 32184 (0.0008) +[2026-06-07 03:15:23,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 16482304. Throughput: 0: 23168.0. Samples: 16487040. Policy #0 lag: (min: 63.0, avg: 77.1, max: 127.0) +[2026-06-07 03:15:23,910][492660] Avg episode reward: [(0, '1378.324')] +[2026-06-07 03:15:24,015][495927] Updated weights for policy 0, policy_version 32194 (0.0005) +[2026-06-07 03:15:24,195][495927] Updated weights for policy 0, policy_version 32206 (0.0005) +[2026-06-07 03:15:24,347][495927] Updated weights for policy 0, policy_version 32216 (0.0005) +[2026-06-07 03:15:24,503][495927] Updated weights for policy 0, policy_version 32226 (0.0007) +[2026-06-07 03:15:24,668][495927] Updated weights for policy 0, policy_version 32237 (0.0008) +[2026-06-07 03:15:24,706][495570] Saving new best policy, reward=1378.324! +[2026-06-07 03:15:25,268][495927] Updated weights for policy 0, policy_version 32247 (0.0011) +[2026-06-07 03:15:25,415][495927] Updated weights for policy 0, policy_version 32257 (0.0010) +[2026-06-07 03:15:25,564][495927] Updated weights for policy 0, policy_version 32267 (0.0011) +[2026-06-07 03:15:25,717][495927] Updated weights for policy 0, policy_version 32277 (0.0009) +[2026-06-07 03:15:25,865][495927] Updated weights for policy 0, policy_version 32287 (0.0009) +[2026-06-07 03:15:26,017][495927] Updated weights for policy 0, policy_version 32297 (0.0008) +[2026-06-07 03:15:26,637][495927] Updated weights for policy 0, policy_version 32307 (0.0009) +[2026-06-07 03:15:26,783][495927] Updated weights for policy 0, policy_version 32317 (0.0010) +[2026-06-07 03:15:26,938][495927] Updated weights for policy 0, policy_version 32327 (0.0008) +[2026-06-07 03:15:27,081][495927] Updated weights for policy 0, policy_version 32337 (0.0008) +[2026-06-07 03:15:27,262][495927] Updated weights for policy 0, policy_version 32349 (0.0008) +[2026-06-07 03:15:27,411][495927] Updated weights for policy 0, policy_version 32359 (0.0010) +[2026-06-07 03:15:28,033][495927] Updated weights for policy 0, policy_version 32369 (0.0010) +[2026-06-07 03:15:28,179][495927] Updated weights for policy 0, policy_version 32379 (0.0008) +[2026-06-07 03:15:28,330][495927] Updated weights for policy 0, policy_version 32390 (0.0008) +[2026-06-07 03:15:28,478][495927] Updated weights for policy 0, policy_version 32400 (0.0008) +[2026-06-07 03:15:28,640][495927] Updated weights for policy 0, policy_version 32410 (0.0009) +[2026-06-07 03:15:28,792][495927] Updated weights for policy 0, policy_version 32420 (0.0011) +[2026-06-07 03:15:28,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 16580608. Throughput: 0: 23133.9. Samples: 16620928. Policy #0 lag: (min: 14.0, avg: 28.2, max: 78.0) +[2026-06-07 03:15:28,910][492660] Avg episode reward: [(0, '1373.055')] +[2026-06-07 03:15:28,971][495927] Updated weights for policy 0, policy_version 32432 (0.0010) +[2026-06-07 03:15:29,583][495927] Updated weights for policy 0, policy_version 32442 (0.0008) +[2026-06-07 03:15:29,728][495927] Updated weights for policy 0, policy_version 32452 (0.0009) +[2026-06-07 03:15:29,881][495927] Updated weights for policy 0, policy_version 32462 (0.0011) +[2026-06-07 03:15:30,031][495927] Updated weights for policy 0, policy_version 32472 (0.0010) +[2026-06-07 03:15:30,182][495927] Updated weights for policy 0, policy_version 32482 (0.0010) +[2026-06-07 03:15:30,333][495927] Updated weights for policy 0, policy_version 32492 (0.0009) +[2026-06-07 03:15:30,992][495927] Updated weights for policy 0, policy_version 32503 (0.0009) +[2026-06-07 03:15:31,135][495927] Updated weights for policy 0, policy_version 32513 (0.0008) +[2026-06-07 03:15:31,295][495927] Updated weights for policy 0, policy_version 32524 (0.0008) +[2026-06-07 03:15:31,443][495927] Updated weights for policy 0, policy_version 32534 (0.0008) +[2026-06-07 03:15:31,604][495927] Updated weights for policy 0, policy_version 32544 (0.0009) +[2026-06-07 03:15:31,756][495927] Updated weights for policy 0, policy_version 32554 (0.0009) +[2026-06-07 03:15:32,385][495927] Updated weights for policy 0, policy_version 32565 (0.0008) +[2026-06-07 03:15:32,527][495927] Updated weights for policy 0, policy_version 32575 (0.0008) +[2026-06-07 03:15:32,678][495927] Updated weights for policy 0, policy_version 32585 (0.0009) +[2026-06-07 03:15:32,829][495927] Updated weights for policy 0, policy_version 32595 (0.0008) +[2026-06-07 03:15:32,994][495927] Updated weights for policy 0, policy_version 32606 (0.0008) +[2026-06-07 03:15:33,155][495927] Updated weights for policy 0, policy_version 32616 (0.0008) +[2026-06-07 03:15:33,797][495927] Updated weights for policy 0, policy_version 32627 (0.0009) +[2026-06-07 03:15:33,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 16711680. Throughput: 0: 23105.5. Samples: 16688512. Policy #0 lag: (min: 14.0, avg: 28.2, max: 78.0) +[2026-06-07 03:15:33,910][492660] Avg episode reward: [(0, '1364.426')] +[2026-06-07 03:15:33,938][495927] Updated weights for policy 0, policy_version 32637 (0.0008) +[2026-06-07 03:15:34,083][495927] Updated weights for policy 0, policy_version 32647 (0.0008) +[2026-06-07 03:15:34,236][495927] Updated weights for policy 0, policy_version 32657 (0.0008) +[2026-06-07 03:15:34,379][495927] Updated weights for policy 0, policy_version 32667 (0.0008) +[2026-06-07 03:15:34,547][495927] Updated weights for policy 0, policy_version 32678 (0.0008) +[2026-06-07 03:15:34,696][495927] Updated weights for policy 0, policy_version 32688 (0.0009) +[2026-06-07 03:15:35,356][495927] Updated weights for policy 0, policy_version 32700 (0.0009) +[2026-06-07 03:15:35,500][495927] Updated weights for policy 0, policy_version 32710 (0.0008) +[2026-06-07 03:15:35,669][495927] Updated weights for policy 0, policy_version 32721 (0.0007) +[2026-06-07 03:15:35,823][495927] Updated weights for policy 0, policy_version 32731 (0.0008) +[2026-06-07 03:15:35,977][495927] Updated weights for policy 0, policy_version 32741 (0.0008) +[2026-06-07 03:15:36,129][495927] Updated weights for policy 0, policy_version 32751 (0.0008) +[2026-06-07 03:15:36,745][495927] Updated weights for policy 0, policy_version 32761 (0.0009) +[2026-06-07 03:15:36,886][495927] Updated weights for policy 0, policy_version 32771 (0.0008) +[2026-06-07 03:15:37,035][495927] Updated weights for policy 0, policy_version 32781 (0.0008) +[2026-06-07 03:15:37,206][495927] Updated weights for policy 0, policy_version 32792 (0.0009) +[2026-06-07 03:15:37,354][495927] Updated weights for policy 0, policy_version 32802 (0.0009) +[2026-06-07 03:15:37,509][495927] Updated weights for policy 0, policy_version 32812 (0.0009) +[2026-06-07 03:15:38,138][495927] Updated weights for policy 0, policy_version 32822 (0.0009) +[2026-06-07 03:15:38,288][495927] Updated weights for policy 0, policy_version 32832 (0.0008) +[2026-06-07 03:15:38,432][495927] Updated weights for policy 0, policy_version 32842 (0.0008) +[2026-06-07 03:15:38,584][495927] Updated weights for policy 0, policy_version 32852 (0.0008) +[2026-06-07 03:15:38,738][495927] Updated weights for policy 0, policy_version 32862 (0.0009) +[2026-06-07 03:15:38,889][495927] Updated weights for policy 0, policy_version 32872 (0.0009) +[2026-06-07 03:15:38,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 16809984. Throughput: 0: 22855.1. Samples: 16823040. Policy #0 lag: (min: 14.0, avg: 28.2, max: 78.0) +[2026-06-07 03:15:38,910][492660] Avg episode reward: [(0, '1394.388')] +[2026-06-07 03:15:39,000][495570] Saving new best policy, reward=1394.388! +[2026-06-07 03:15:39,512][495927] Updated weights for policy 0, policy_version 32882 (0.0009) +[2026-06-07 03:15:39,670][495927] Updated weights for policy 0, policy_version 32893 (0.0008) +[2026-06-07 03:15:39,809][495927] Updated weights for policy 0, policy_version 32903 (0.0008) +[2026-06-07 03:15:39,961][495927] Updated weights for policy 0, policy_version 32913 (0.0008) +[2026-06-07 03:15:40,111][495927] Updated weights for policy 0, policy_version 32923 (0.0008) +[2026-06-07 03:15:40,270][495927] Updated weights for policy 0, policy_version 32933 (0.0008) +[2026-06-07 03:15:40,420][495927] Updated weights for policy 0, policy_version 32943 (0.0008) +[2026-06-07 03:15:41,036][495927] Updated weights for policy 0, policy_version 32953 (0.0008) +[2026-06-07 03:15:41,182][495927] Updated weights for policy 0, policy_version 32963 (0.0008) +[2026-06-07 03:15:41,331][495927] Updated weights for policy 0, policy_version 32973 (0.0009) +[2026-06-07 03:15:41,520][495927] Updated weights for policy 0, policy_version 32985 (0.0008) +[2026-06-07 03:15:41,665][495927] Updated weights for policy 0, policy_version 32995 (0.0008) +[2026-06-07 03:15:41,811][495927] Updated weights for policy 0, policy_version 33005 (0.0008) +[2026-06-07 03:15:42,454][495927] Updated weights for policy 0, policy_version 33016 (0.0009) +[2026-06-07 03:15:42,611][495927] Updated weights for policy 0, policy_version 33027 (0.0008) +[2026-06-07 03:15:42,776][495927] Updated weights for policy 0, policy_version 33038 (0.0008) +[2026-06-07 03:15:42,928][495927] Updated weights for policy 0, policy_version 33048 (0.0009) +[2026-06-07 03:15:43,098][495927] Updated weights for policy 0, policy_version 33059 (0.0008) +[2026-06-07 03:15:43,254][495927] Updated weights for policy 0, policy_version 33069 (0.0008) +[2026-06-07 03:15:43,863][495927] Updated weights for policy 0, policy_version 33079 (0.0008) +[2026-06-07 03:15:43,909][492660] Fps is (10 sec: 22937.4, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 16941056. Throughput: 0: 22775.5. Samples: 16957440. Policy #0 lag: (min: 14.0, avg: 28.2, max: 78.0) +[2026-06-07 03:15:43,910][492660] Avg episode reward: [(0, '1363.093')] +[2026-06-07 03:15:44,001][495927] Updated weights for policy 0, policy_version 33089 (0.0008) +[2026-06-07 03:15:44,155][495927] Updated weights for policy 0, policy_version 33099 (0.0009) +[2026-06-07 03:15:44,322][495927] Updated weights for policy 0, policy_version 33110 (0.0008) +[2026-06-07 03:15:44,464][495927] Updated weights for policy 0, policy_version 33120 (0.0008) +[2026-06-07 03:15:44,617][495927] Updated weights for policy 0, policy_version 33130 (0.0008) +[2026-06-07 03:15:45,263][495927] Updated weights for policy 0, policy_version 33140 (0.0008) +[2026-06-07 03:15:45,407][495927] Updated weights for policy 0, policy_version 33150 (0.0008) +[2026-06-07 03:15:45,553][495927] Updated weights for policy 0, policy_version 33160 (0.0008) +[2026-06-07 03:15:45,728][495927] Updated weights for policy 0, policy_version 33172 (0.0009) +[2026-06-07 03:15:45,885][495927] Updated weights for policy 0, policy_version 33182 (0.0009) +[2026-06-07 03:15:46,053][495927] Updated weights for policy 0, policy_version 33193 (0.0009) +[2026-06-07 03:15:46,660][495927] Updated weights for policy 0, policy_version 33203 (0.0008) +[2026-06-07 03:15:46,821][495927] Updated weights for policy 0, policy_version 33214 (0.0008) +[2026-06-07 03:15:46,995][495927] Updated weights for policy 0, policy_version 33226 (0.0008) +[2026-06-07 03:15:47,145][495927] Updated weights for policy 0, policy_version 33236 (0.0009) +[2026-06-07 03:15:47,313][495927] Updated weights for policy 0, policy_version 33247 (0.0008) +[2026-06-07 03:15:47,462][495927] Updated weights for policy 0, policy_version 33257 (0.0009) +[2026-06-07 03:15:48,102][495927] Updated weights for policy 0, policy_version 33267 (0.0008) +[2026-06-07 03:15:48,245][495927] Updated weights for policy 0, policy_version 33277 (0.0008) +[2026-06-07 03:15:48,394][495927] Updated weights for policy 0, policy_version 33287 (0.0008) +[2026-06-07 03:15:48,565][495927] Updated weights for policy 0, policy_version 33298 (0.0008) +[2026-06-07 03:15:48,727][495927] Updated weights for policy 0, policy_version 33309 (0.0008) +[2026-06-07 03:15:48,896][495927] Updated weights for policy 0, policy_version 33320 (0.0008) +[2026-06-07 03:15:48,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 17039360. Throughput: 0: 22829.5. Samples: 17027328. Policy #0 lag: (min: 14.0, avg: 28.2, max: 78.0) +[2026-06-07 03:15:48,910][492660] Avg episode reward: [(0, '1331.164')] +[2026-06-07 03:15:49,506][495927] Updated weights for policy 0, policy_version 33330 (0.0008) +[2026-06-07 03:15:49,663][495927] Updated weights for policy 0, policy_version 33341 (0.0008) +[2026-06-07 03:15:49,806][495927] Updated weights for policy 0, policy_version 33351 (0.0008) +[2026-06-07 03:15:49,972][495927] Updated weights for policy 0, policy_version 33362 (0.0008) +[2026-06-07 03:15:50,124][495927] Updated weights for policy 0, policy_version 33372 (0.0008) +[2026-06-07 03:15:50,274][495927] Updated weights for policy 0, policy_version 33382 (0.0008) +[2026-06-07 03:15:50,427][495927] Updated weights for policy 0, policy_version 33392 (0.0008) +[2026-06-07 03:15:51,064][495927] Updated weights for policy 0, policy_version 33403 (0.0009) +[2026-06-07 03:15:51,215][495927] Updated weights for policy 0, policy_version 33413 (0.0008) +[2026-06-07 03:15:51,368][495927] Updated weights for policy 0, policy_version 33423 (0.0008) +[2026-06-07 03:15:51,515][495927] Updated weights for policy 0, policy_version 33433 (0.0009) +[2026-06-07 03:15:51,686][495927] Updated weights for policy 0, policy_version 33444 (0.0008) +[2026-06-07 03:15:51,857][495927] Updated weights for policy 0, policy_version 33456 (0.0008) +[2026-06-07 03:15:52,484][495927] Updated weights for policy 0, policy_version 33466 (0.0008) +[2026-06-07 03:15:52,631][495927] Updated weights for policy 0, policy_version 33476 (0.0008) +[2026-06-07 03:15:52,782][495927] Updated weights for policy 0, policy_version 33486 (0.0008) +[2026-06-07 03:15:52,948][495927] Updated weights for policy 0, policy_version 33497 (0.0009) +[2026-06-07 03:15:53,101][495927] Updated weights for policy 0, policy_version 33507 (0.0008) +[2026-06-07 03:15:53,253][495927] Updated weights for policy 0, policy_version 33517 (0.0008) +[2026-06-07 03:15:53,885][495927] Updated weights for policy 0, policy_version 33527 (0.0009) +[2026-06-07 03:15:53,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 17170432. Throughput: 0: 23037.2. Samples: 17172608. Policy #0 lag: (min: 63.0, avg: 78.3, max: 127.0) +[2026-06-07 03:15:53,910][492660] Avg episode reward: [(0, '1353.054')] +[2026-06-07 03:15:54,061][495927] Updated weights for policy 0, policy_version 33539 (0.0008) +[2026-06-07 03:15:54,201][495927] Updated weights for policy 0, policy_version 33549 (0.0008) +[2026-06-07 03:15:54,355][495927] Updated weights for policy 0, policy_version 33559 (0.0008) +[2026-06-07 03:15:54,533][495927] Updated weights for policy 0, policy_version 33571 (0.0008) +[2026-06-07 03:15:54,719][495927] Updated weights for policy 0, policy_version 33583 (0.0006) +[2026-06-07 03:15:55,330][495927] Updated weights for policy 0, policy_version 33593 (0.0008) +[2026-06-07 03:15:55,483][495927] Updated weights for policy 0, policy_version 33603 (0.0009) +[2026-06-07 03:15:55,650][495927] Updated weights for policy 0, policy_version 33614 (0.0008) +[2026-06-07 03:15:55,829][495927] Updated weights for policy 0, policy_version 33626 (0.0009) +[2026-06-07 03:15:55,994][495927] Updated weights for policy 0, policy_version 33637 (0.0010) +[2026-06-07 03:15:56,148][495927] Updated weights for policy 0, policy_version 33647 (0.0008) +[2026-06-07 03:15:56,781][495927] Updated weights for policy 0, policy_version 33659 (0.0008) +[2026-06-07 03:15:56,927][495927] Updated weights for policy 0, policy_version 33669 (0.0008) +[2026-06-07 03:15:57,085][495927] Updated weights for policy 0, policy_version 33679 (0.0008) +[2026-06-07 03:15:57,230][495927] Updated weights for policy 0, policy_version 33689 (0.0008) +[2026-06-07 03:15:57,386][495927] Updated weights for policy 0, policy_version 33699 (0.0010) +[2026-06-07 03:15:57,539][495927] Updated weights for policy 0, policy_version 33709 (0.0008) +[2026-06-07 03:15:58,161][495927] Updated weights for policy 0, policy_version 33719 (0.0008) +[2026-06-07 03:15:58,319][495927] Updated weights for policy 0, policy_version 33730 (0.0008) +[2026-06-07 03:15:58,468][495927] Updated weights for policy 0, policy_version 33740 (0.0009) +[2026-06-07 03:15:58,619][495927] Updated weights for policy 0, policy_version 33750 (0.0009) +[2026-06-07 03:15:58,794][495927] Updated weights for policy 0, policy_version 33762 (0.0009) +[2026-06-07 03:15:58,909][492660] Fps is (10 sec: 22937.8, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 17268736. Throughput: 0: 23057.1. Samples: 17308416. Policy #0 lag: (min: 63.0, avg: 78.3, max: 127.0) +[2026-06-07 03:15:58,910][492660] Avg episode reward: [(0, '1364.710')] +[2026-06-07 03:15:58,951][495927] Updated weights for policy 0, policy_version 33772 (0.0008) +[2026-06-07 03:15:59,574][495927] Updated weights for policy 0, policy_version 33782 (0.0009) +[2026-06-07 03:15:59,719][495927] Updated weights for policy 0, policy_version 33792 (0.0008) +[2026-06-07 03:15:59,883][495927] Updated weights for policy 0, policy_version 33802 (0.0008) +[2026-06-07 03:16:00,035][495927] Updated weights for policy 0, policy_version 33812 (0.0008) +[2026-06-07 03:16:00,188][495927] Updated weights for policy 0, policy_version 33822 (0.0008) +[2026-06-07 03:16:00,357][495927] Updated weights for policy 0, policy_version 33833 (0.0007) +[2026-06-07 03:16:00,968][495927] Updated weights for policy 0, policy_version 33844 (0.0007) +[2026-06-07 03:16:01,133][495927] Updated weights for policy 0, policy_version 33856 (0.0009) +[2026-06-07 03:16:01,280][495927] Updated weights for policy 0, policy_version 33866 (0.0008) +[2026-06-07 03:16:01,427][495927] Updated weights for policy 0, policy_version 33876 (0.0008) +[2026-06-07 03:16:01,590][495927] Updated weights for policy 0, policy_version 33887 (0.0009) +[2026-06-07 03:16:01,741][495927] Updated weights for policy 0, policy_version 33897 (0.0008) +[2026-06-07 03:16:02,391][495927] Updated weights for policy 0, policy_version 33908 (0.0008) +[2026-06-07 03:16:02,530][495927] Updated weights for policy 0, policy_version 33918 (0.0008) +[2026-06-07 03:16:02,684][495927] Updated weights for policy 0, policy_version 33929 (0.0008) +[2026-06-07 03:16:02,855][495927] Updated weights for policy 0, policy_version 33940 (0.0010) +[2026-06-07 03:16:03,021][495927] Updated weights for policy 0, policy_version 33952 (0.0008) +[2026-06-07 03:16:03,185][495927] Updated weights for policy 0, policy_version 33963 (0.0008) +[2026-06-07 03:16:03,837][495927] Updated weights for policy 0, policy_version 33974 (0.0009) +[2026-06-07 03:16:03,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 17399808. Throughput: 0: 22940.5. Samples: 17375232. Policy #0 lag: (min: 63.0, avg: 78.3, max: 127.0) +[2026-06-07 03:16:03,910][492660] Avg episode reward: [(0, '1388.168')] +[2026-06-07 03:16:03,977][495927] Updated weights for policy 0, policy_version 33984 (0.0008) +[2026-06-07 03:16:04,138][495927] Updated weights for policy 0, policy_version 33995 (0.0008) +[2026-06-07 03:16:04,288][495927] Updated weights for policy 0, policy_version 34005 (0.0008) +[2026-06-07 03:16:04,440][495927] Updated weights for policy 0, policy_version 34015 (0.0008) +[2026-06-07 03:16:04,593][495927] Updated weights for policy 0, policy_version 34025 (0.0008) +[2026-06-07 03:16:05,224][495927] Updated weights for policy 0, policy_version 34035 (0.0008) +[2026-06-07 03:16:05,372][495927] Updated weights for policy 0, policy_version 34046 (0.0008) +[2026-06-07 03:16:05,526][495927] Updated weights for policy 0, policy_version 34056 (0.0009) +[2026-06-07 03:16:05,691][495927] Updated weights for policy 0, policy_version 34067 (0.0008) +[2026-06-07 03:16:05,844][495927] Updated weights for policy 0, policy_version 34077 (0.0008) +[2026-06-07 03:16:06,011][495927] Updated weights for policy 0, policy_version 34088 (0.0008) +[2026-06-07 03:16:06,640][495927] Updated weights for policy 0, policy_version 34098 (0.0008) +[2026-06-07 03:16:06,782][495927] Updated weights for policy 0, policy_version 34108 (0.0009) +[2026-06-07 03:16:06,923][495927] Updated weights for policy 0, policy_version 34118 (0.0008) +[2026-06-07 03:16:07,066][495927] Updated weights for policy 0, policy_version 34128 (0.0009) +[2026-06-07 03:16:07,238][495927] Updated weights for policy 0, policy_version 34140 (0.0008) +[2026-06-07 03:16:07,385][495927] Updated weights for policy 0, policy_version 34150 (0.0008) +[2026-06-07 03:16:08,038][495927] Updated weights for policy 0, policy_version 34161 (0.0009) +[2026-06-07 03:16:08,177][495927] Updated weights for policy 0, policy_version 34171 (0.0008) +[2026-06-07 03:16:08,324][495927] Updated weights for policy 0, policy_version 34181 (0.0009) +[2026-06-07 03:16:08,474][495927] Updated weights for policy 0, policy_version 34191 (0.0008) +[2026-06-07 03:16:08,629][495927] Updated weights for policy 0, policy_version 34201 (0.0009) +[2026-06-07 03:16:08,780][495927] Updated weights for policy 0, policy_version 34211 (0.0008) +[2026-06-07 03:16:08,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 17498112. Throughput: 0: 22724.2. Samples: 17509632. Policy #0 lag: (min: 63.0, avg: 78.3, max: 127.0) +[2026-06-07 03:16:08,910][492660] Avg episode reward: [(0, '1362.027')] +[2026-06-07 03:16:08,930][495927] Updated weights for policy 0, policy_version 34221 (0.0009) +[2026-06-07 03:16:09,578][495927] Updated weights for policy 0, policy_version 34233 (0.0009) +[2026-06-07 03:16:09,734][495927] Updated weights for policy 0, policy_version 34243 (0.0007) +[2026-06-07 03:16:09,899][495927] Updated weights for policy 0, policy_version 34255 (0.0009) +[2026-06-07 03:16:10,046][495927] Updated weights for policy 0, policy_version 34265 (0.0008) +[2026-06-07 03:16:10,226][495927] Updated weights for policy 0, policy_version 34277 (0.0008) +[2026-06-07 03:16:10,374][495927] Updated weights for policy 0, policy_version 34287 (0.0009) +[2026-06-07 03:16:11,021][495927] Updated weights for policy 0, policy_version 34298 (0.0008) +[2026-06-07 03:16:11,174][495927] Updated weights for policy 0, policy_version 34308 (0.0009) +[2026-06-07 03:16:11,321][495927] Updated weights for policy 0, policy_version 34318 (0.0009) +[2026-06-07 03:16:11,467][495927] Updated weights for policy 0, policy_version 34328 (0.0009) +[2026-06-07 03:16:11,644][495927] Updated weights for policy 0, policy_version 34339 (0.0008) +[2026-06-07 03:16:11,784][495927] Updated weights for policy 0, policy_version 34349 (0.0006) +[2026-06-07 03:16:12,407][495927] Updated weights for policy 0, policy_version 34360 (0.0007) +[2026-06-07 03:16:12,554][495927] Updated weights for policy 0, policy_version 34370 (0.0009) +[2026-06-07 03:16:12,705][495927] Updated weights for policy 0, policy_version 34380 (0.0008) +[2026-06-07 03:16:12,853][495927] Updated weights for policy 0, policy_version 34390 (0.0009) +[2026-06-07 03:16:13,006][495927] Updated weights for policy 0, policy_version 34400 (0.0009) +[2026-06-07 03:16:13,162][495927] Updated weights for policy 0, policy_version 34411 (0.0009) +[2026-06-07 03:16:13,816][495927] Updated weights for policy 0, policy_version 34422 (0.0009) +[2026-06-07 03:16:13,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 17629184. Throughput: 0: 22749.8. Samples: 17644672. Policy #0 lag: (min: 63.0, avg: 78.3, max: 127.0) +[2026-06-07 03:16:13,910][492660] Avg episode reward: [(0, '1376.315')] +[2026-06-07 03:16:13,978][495927] Updated weights for policy 0, policy_version 34433 (0.0008) +[2026-06-07 03:16:14,126][495927] Updated weights for policy 0, policy_version 34443 (0.0008) +[2026-06-07 03:16:14,285][495927] Updated weights for policy 0, policy_version 34454 (0.0009) +[2026-06-07 03:16:14,446][495927] Updated weights for policy 0, policy_version 34464 (0.0009) +[2026-06-07 03:16:14,599][495927] Updated weights for policy 0, policy_version 34474 (0.0008) +[2026-06-07 03:16:15,234][495927] Updated weights for policy 0, policy_version 34485 (0.0009) +[2026-06-07 03:16:15,389][495927] Updated weights for policy 0, policy_version 34496 (0.0009) +[2026-06-07 03:16:15,531][495927] Updated weights for policy 0, policy_version 34506 (0.0008) +[2026-06-07 03:16:15,716][495927] Updated weights for policy 0, policy_version 34518 (0.0009) +[2026-06-07 03:16:15,866][495927] Updated weights for policy 0, policy_version 34528 (0.0008) +[2026-06-07 03:16:16,014][495927] Updated weights for policy 0, policy_version 34538 (0.0008) +[2026-06-07 03:16:16,638][495927] Updated weights for policy 0, policy_version 34548 (0.0008) +[2026-06-07 03:16:16,804][495927] Updated weights for policy 0, policy_version 34559 (0.0008) +[2026-06-07 03:16:16,945][495927] Updated weights for policy 0, policy_version 34569 (0.0008) +[2026-06-07 03:16:17,093][495927] Updated weights for policy 0, policy_version 34579 (0.0009) +[2026-06-07 03:16:17,253][495927] Updated weights for policy 0, policy_version 34590 (0.0009) +[2026-06-07 03:16:17,423][495927] Updated weights for policy 0, policy_version 34601 (0.0008) +[2026-06-07 03:16:18,030][495927] Updated weights for policy 0, policy_version 34611 (0.0009) +[2026-06-07 03:16:18,180][495927] Updated weights for policy 0, policy_version 34621 (0.0008) +[2026-06-07 03:16:18,328][495927] Updated weights for policy 0, policy_version 34631 (0.0008) +[2026-06-07 03:16:18,496][495927] Updated weights for policy 0, policy_version 34642 (0.0009) +[2026-06-07 03:16:18,639][495927] Updated weights for policy 0, policy_version 34652 (0.0008) +[2026-06-07 03:16:18,811][495927] Updated weights for policy 0, policy_version 34663 (0.0008) +[2026-06-07 03:16:18,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 17727488. Throughput: 0: 22846.6. Samples: 17716608. Policy #0 lag: (min: 63.0, avg: 76.6, max: 127.0) +[2026-06-07 03:16:18,910][492660] Avg episode reward: [(0, '1419.797')] +[2026-06-07 03:16:18,941][495570] Saving new best policy, reward=1419.797! +[2026-06-07 03:16:19,437][495927] Updated weights for policy 0, policy_version 34673 (0.0009) +[2026-06-07 03:16:19,576][495927] Updated weights for policy 0, policy_version 34683 (0.0008) +[2026-06-07 03:16:19,738][495927] Updated weights for policy 0, policy_version 34694 (0.0008) +[2026-06-07 03:16:19,906][495927] Updated weights for policy 0, policy_version 34705 (0.0008) +[2026-06-07 03:16:20,056][495927] Updated weights for policy 0, policy_version 34715 (0.0008) +[2026-06-07 03:16:20,201][495927] Updated weights for policy 0, policy_version 34725 (0.0008) +[2026-06-07 03:16:20,366][495927] Updated weights for policy 0, policy_version 34735 (0.0008) +[2026-06-07 03:16:20,996][495927] Updated weights for policy 0, policy_version 34746 (0.0008) +[2026-06-07 03:16:21,135][495927] Updated weights for policy 0, policy_version 34756 (0.0008) +[2026-06-07 03:16:21,285][495927] Updated weights for policy 0, policy_version 34766 (0.0008) +[2026-06-07 03:16:21,439][495927] Updated weights for policy 0, policy_version 34776 (0.0008) +[2026-06-07 03:16:21,587][495927] Updated weights for policy 0, policy_version 34786 (0.0008) +[2026-06-07 03:16:21,741][495927] Updated weights for policy 0, policy_version 34796 (0.0009) +[2026-06-07 03:16:22,368][495927] Updated weights for policy 0, policy_version 34806 (0.0008) +[2026-06-07 03:16:22,525][495927] Updated weights for policy 0, policy_version 34817 (0.0008) +[2026-06-07 03:16:22,672][495927] Updated weights for policy 0, policy_version 34827 (0.0008) +[2026-06-07 03:16:22,828][495927] Updated weights for policy 0, policy_version 34837 (0.0008) +[2026-06-07 03:16:22,976][495927] Updated weights for policy 0, policy_version 34847 (0.0008) +[2026-06-07 03:16:23,128][495927] Updated weights for policy 0, policy_version 34857 (0.0008) +[2026-06-07 03:16:23,765][495927] Updated weights for policy 0, policy_version 34867 (0.0008) +[2026-06-07 03:16:23,908][495927] Updated weights for policy 0, policy_version 34877 (0.0009) +[2026-06-07 03:16:23,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 17858560. Throughput: 0: 23040.0. Samples: 17859840. Policy #0 lag: (min: 63.0, avg: 76.6, max: 127.0) +[2026-06-07 03:16:23,910][492660] Avg episode reward: [(0, '1382.623')] +[2026-06-07 03:16:24,056][495927] Updated weights for policy 0, policy_version 34887 (0.0008) +[2026-06-07 03:16:24,221][495927] Updated weights for policy 0, policy_version 34898 (0.0008) +[2026-06-07 03:16:24,371][495927] Updated weights for policy 0, policy_version 34908 (0.0008) +[2026-06-07 03:16:24,520][495927] Updated weights for policy 0, policy_version 34918 (0.0008) +[2026-06-07 03:16:24,665][495927] Updated weights for policy 0, policy_version 34928 (0.0008) +[2026-06-07 03:16:25,298][495927] Updated weights for policy 0, policy_version 34939 (0.0008) +[2026-06-07 03:16:25,459][495927] Updated weights for policy 0, policy_version 34950 (0.0008) +[2026-06-07 03:16:25,611][495927] Updated weights for policy 0, policy_version 34960 (0.0008) +[2026-06-07 03:16:25,781][495927] Updated weights for policy 0, policy_version 34971 (0.0009) +[2026-06-07 03:16:25,934][495927] Updated weights for policy 0, policy_version 34981 (0.0008) +[2026-06-07 03:16:26,084][495927] Updated weights for policy 0, policy_version 34991 (0.0008) +[2026-06-07 03:16:26,703][495927] Updated weights for policy 0, policy_version 35001 (0.0008) +[2026-06-07 03:16:26,849][495927] Updated weights for policy 0, policy_version 35011 (0.0008) +[2026-06-07 03:16:26,997][495927] Updated weights for policy 0, policy_version 35021 (0.0008) +[2026-06-07 03:16:27,145][495927] Updated weights for policy 0, policy_version 35031 (0.0008) +[2026-06-07 03:16:27,297][495927] Updated weights for policy 0, policy_version 35041 (0.0008) +[2026-06-07 03:16:27,446][495927] Updated weights for policy 0, policy_version 35051 (0.0008) +[2026-06-07 03:16:28,101][495927] Updated weights for policy 0, policy_version 35062 (0.0009) +[2026-06-07 03:16:28,244][495927] Updated weights for policy 0, policy_version 35072 (0.0008) +[2026-06-07 03:16:28,409][495927] Updated weights for policy 0, policy_version 35083 (0.0009) +[2026-06-07 03:16:28,574][495927] Updated weights for policy 0, policy_version 35094 (0.0008) +[2026-06-07 03:16:28,726][495927] Updated weights for policy 0, policy_version 35104 (0.0008) +[2026-06-07 03:16:28,874][495927] Updated weights for policy 0, policy_version 35114 (0.0009) +[2026-06-07 03:16:28,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 17956864. Throughput: 0: 23060.0. Samples: 17995136. Policy #0 lag: (min: 63.0, avg: 76.6, max: 127.0) +[2026-06-07 03:16:28,910][492660] Avg episode reward: [(0, '1369.323')] +[2026-06-07 03:16:29,502][495927] Updated weights for policy 0, policy_version 35124 (0.0008) +[2026-06-07 03:16:29,661][495927] Updated weights for policy 0, policy_version 35135 (0.0008) +[2026-06-07 03:16:29,820][495927] Updated weights for policy 0, policy_version 35146 (0.0009) +[2026-06-07 03:16:29,973][495927] Updated weights for policy 0, policy_version 35156 (0.0008) +[2026-06-07 03:16:30,128][495927] Updated weights for policy 0, policy_version 35166 (0.0008) +[2026-06-07 03:16:30,292][495927] Updated weights for policy 0, policy_version 35177 (0.0008) +[2026-06-07 03:16:30,923][495927] Updated weights for policy 0, policy_version 35187 (0.0008) +[2026-06-07 03:16:31,079][495927] Updated weights for policy 0, policy_version 35198 (0.0008) +[2026-06-07 03:16:31,223][495927] Updated weights for policy 0, policy_version 35208 (0.0008) +[2026-06-07 03:16:31,369][495927] Updated weights for policy 0, policy_version 35218 (0.0008) +[2026-06-07 03:16:31,523][495927] Updated weights for policy 0, policy_version 35228 (0.0009) +[2026-06-07 03:16:31,679][495927] Updated weights for policy 0, policy_version 35238 (0.0008) +[2026-06-07 03:16:31,826][495927] Updated weights for policy 0, policy_version 35248 (0.0008) +[2026-06-07 03:16:32,457][495927] Updated weights for policy 0, policy_version 35259 (0.0009) +[2026-06-07 03:16:32,606][495927] Updated weights for policy 0, policy_version 35269 (0.0009) +[2026-06-07 03:16:32,755][495927] Updated weights for policy 0, policy_version 35279 (0.0009) +[2026-06-07 03:16:32,910][495927] Updated weights for policy 0, policy_version 35289 (0.0008) +[2026-06-07 03:16:33,077][495927] Updated weights for policy 0, policy_version 35300 (0.0008) +[2026-06-07 03:16:33,241][495927] Updated weights for policy 0, policy_version 35311 (0.0008) +[2026-06-07 03:16:33,853][495927] Updated weights for policy 0, policy_version 35321 (0.0008) +[2026-06-07 03:16:33,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 18087936. Throughput: 0: 23005.9. Samples: 18062592. Policy #0 lag: (min: 63.0, avg: 76.6, max: 127.0) +[2026-06-07 03:16:33,910][492660] Avg episode reward: [(0, '1376.502')] +[2026-06-07 03:16:33,992][495927] Updated weights for policy 0, policy_version 35331 (0.0009) +[2026-06-07 03:16:34,147][495927] Updated weights for policy 0, policy_version 35341 (0.0009) +[2026-06-07 03:16:34,301][495927] Updated weights for policy 0, policy_version 35351 (0.0008) +[2026-06-07 03:16:34,452][495927] Updated weights for policy 0, policy_version 35361 (0.0008) +[2026-06-07 03:16:34,605][495927] Updated weights for policy 0, policy_version 35371 (0.0009) +[2026-06-07 03:16:35,234][495927] Updated weights for policy 0, policy_version 35381 (0.0008) +[2026-06-07 03:16:35,386][495927] Updated weights for policy 0, policy_version 35391 (0.0008) +[2026-06-07 03:16:35,531][495927] Updated weights for policy 0, policy_version 35401 (0.0008) +[2026-06-07 03:16:35,679][495927] Updated weights for policy 0, policy_version 35411 (0.0008) +[2026-06-07 03:16:35,858][495927] Updated weights for policy 0, policy_version 35423 (0.0008) +[2026-06-07 03:16:36,017][495927] Updated weights for policy 0, policy_version 35433 (0.0008) +[2026-06-07 03:16:36,649][495927] Updated weights for policy 0, policy_version 35443 (0.0008) +[2026-06-07 03:16:36,811][495927] Updated weights for policy 0, policy_version 35454 (0.0008) +[2026-06-07 03:16:36,957][495927] Updated weights for policy 0, policy_version 35464 (0.0008) +[2026-06-07 03:16:37,114][495927] Updated weights for policy 0, policy_version 35474 (0.0008) +[2026-06-07 03:16:37,261][495927] Updated weights for policy 0, policy_version 35484 (0.0008) +[2026-06-07 03:16:37,408][495927] Updated weights for policy 0, policy_version 35494 (0.0008) +[2026-06-07 03:16:37,557][495927] Updated weights for policy 0, policy_version 35504 (0.0008) +[2026-06-07 03:16:38,182][495927] Updated weights for policy 0, policy_version 35514 (0.0009) +[2026-06-07 03:16:38,343][495927] Updated weights for policy 0, policy_version 35525 (0.0008) +[2026-06-07 03:16:38,506][495927] Updated weights for policy 0, policy_version 35536 (0.0008) +[2026-06-07 03:16:38,659][495927] Updated weights for policy 0, policy_version 35546 (0.0008) +[2026-06-07 03:16:38,811][495927] Updated weights for policy 0, policy_version 35556 (0.0008) +[2026-06-07 03:16:38,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 18186240. Throughput: 0: 22789.7. Samples: 18198144. Policy #0 lag: (min: 63.0, avg: 76.6, max: 127.0) +[2026-06-07 03:16:38,910][492660] Avg episode reward: [(0, '1385.116')] +[2026-06-07 03:16:38,976][495927] Updated weights for policy 0, policy_version 35567 (0.0008) +[2026-06-07 03:16:39,582][495927] Updated weights for policy 0, policy_version 35577 (0.0009) +[2026-06-07 03:16:39,728][495927] Updated weights for policy 0, policy_version 35587 (0.0008) +[2026-06-07 03:16:39,880][495927] Updated weights for policy 0, policy_version 35597 (0.0009) +[2026-06-07 03:16:40,038][495927] Updated weights for policy 0, policy_version 35607 (0.0008) +[2026-06-07 03:16:40,200][495927] Updated weights for policy 0, policy_version 35618 (0.0008) +[2026-06-07 03:16:40,358][495927] Updated weights for policy 0, policy_version 35628 (0.0009) +[2026-06-07 03:16:40,966][495927] Updated weights for policy 0, policy_version 35638 (0.0008) +[2026-06-07 03:16:41,109][495927] Updated weights for policy 0, policy_version 35648 (0.0009) +[2026-06-07 03:16:41,261][495927] Updated weights for policy 0, policy_version 35658 (0.0009) +[2026-06-07 03:16:41,410][495927] Updated weights for policy 0, policy_version 35668 (0.0008) +[2026-06-07 03:16:41,563][495927] Updated weights for policy 0, policy_version 35678 (0.0009) +[2026-06-07 03:16:41,728][495927] Updated weights for policy 0, policy_version 35689 (0.0008) +[2026-06-07 03:16:42,340][495927] Updated weights for policy 0, policy_version 35699 (0.0009) +[2026-06-07 03:16:42,486][495927] Updated weights for policy 0, policy_version 35709 (0.0009) +[2026-06-07 03:16:42,628][495927] Updated weights for policy 0, policy_version 35719 (0.0008) +[2026-06-07 03:16:42,783][495927] Updated weights for policy 0, policy_version 35729 (0.0008) +[2026-06-07 03:16:42,933][495927] Updated weights for policy 0, policy_version 35739 (0.0008) +[2026-06-07 03:16:43,104][495927] Updated weights for policy 0, policy_version 35750 (0.0009) +[2026-06-07 03:16:43,744][495927] Updated weights for policy 0, policy_version 35761 (0.0008) +[2026-06-07 03:16:43,884][495927] Updated weights for policy 0, policy_version 35771 (0.0008) +[2026-06-07 03:16:43,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22993.2). Total num frames: 18317312. Throughput: 0: 22758.4. Samples: 18332544. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-07 03:16:43,910][492660] Avg episode reward: [(0, '1374.978')] +[2026-06-07 03:16:44,034][495927] Updated weights for policy 0, policy_version 35781 (0.0009) +[2026-06-07 03:16:44,178][495927] Updated weights for policy 0, policy_version 35791 (0.0008) +[2026-06-07 03:16:44,331][495927] Updated weights for policy 0, policy_version 35801 (0.0008) +[2026-06-07 03:16:44,497][495927] Updated weights for policy 0, policy_version 35812 (0.0009) +[2026-06-07 03:16:44,643][495927] Updated weights for policy 0, policy_version 35822 (0.0008) +[2026-06-07 03:16:45,286][495927] Updated weights for policy 0, policy_version 35832 (0.0009) +[2026-06-07 03:16:45,438][495927] Updated weights for policy 0, policy_version 35842 (0.0008) +[2026-06-07 03:16:45,578][495927] Updated weights for policy 0, policy_version 35852 (0.0009) +[2026-06-07 03:16:45,733][495927] Updated weights for policy 0, policy_version 35862 (0.0009) +[2026-06-07 03:16:45,884][495927] Updated weights for policy 0, policy_version 35872 (0.0008) +[2026-06-07 03:16:46,029][495927] Updated weights for policy 0, policy_version 35882 (0.0008) +[2026-06-07 03:16:46,652][495927] Updated weights for policy 0, policy_version 35892 (0.0009) +[2026-06-07 03:16:46,789][495927] Updated weights for policy 0, policy_version 35902 (0.0009) +[2026-06-07 03:16:46,940][495927] Updated weights for policy 0, policy_version 35912 (0.0009) +[2026-06-07 03:16:47,123][495927] Updated weights for policy 0, policy_version 35924 (0.0008) +[2026-06-07 03:16:47,274][495927] Updated weights for policy 0, policy_version 35934 (0.0008) +[2026-06-07 03:16:47,425][495927] Updated weights for policy 0, policy_version 35944 (0.0009) +[2026-06-07 03:16:48,052][495927] Updated weights for policy 0, policy_version 35954 (0.0009) +[2026-06-07 03:16:48,202][495927] Updated weights for policy 0, policy_version 35965 (0.0008) +[2026-06-07 03:16:48,352][495927] Updated weights for policy 0, policy_version 35975 (0.0008) +[2026-06-07 03:16:48,501][495927] Updated weights for policy 0, policy_version 35985 (0.0008) +[2026-06-07 03:16:48,651][495927] Updated weights for policy 0, policy_version 35995 (0.0008) +[2026-06-07 03:16:48,809][495927] Updated weights for policy 0, policy_version 36005 (0.0008) +[2026-06-07 03:16:48,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 18415616. Throughput: 0: 22860.8. Samples: 18403968. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-07 03:16:48,910][492660] Avg episode reward: [(0, '1347.326')] +[2026-06-07 03:16:48,962][495927] Updated weights for policy 0, policy_version 36015 (0.0008) +[2026-06-07 03:16:49,581][495927] Updated weights for policy 0, policy_version 36025 (0.0008) +[2026-06-07 03:16:49,755][495927] Updated weights for policy 0, policy_version 36037 (0.0008) +[2026-06-07 03:16:49,905][495927] Updated weights for policy 0, policy_version 36047 (0.0008) +[2026-06-07 03:16:50,052][495927] Updated weights for policy 0, policy_version 36057 (0.0008) +[2026-06-07 03:16:50,229][495927] Updated weights for policy 0, policy_version 36068 (0.0008) +[2026-06-07 03:16:50,373][495927] Updated weights for policy 0, policy_version 36078 (0.0008) +[2026-06-07 03:16:50,990][495927] Updated weights for policy 0, policy_version 36088 (0.0008) +[2026-06-07 03:16:51,138][495927] Updated weights for policy 0, policy_version 36098 (0.0009) +[2026-06-07 03:16:51,304][495927] Updated weights for policy 0, policy_version 36109 (0.0008) +[2026-06-07 03:16:51,459][495927] Updated weights for policy 0, policy_version 36119 (0.0008) +[2026-06-07 03:16:51,613][495927] Updated weights for policy 0, policy_version 36129 (0.0008) +[2026-06-07 03:16:51,763][495927] Updated weights for policy 0, policy_version 36139 (0.0008) +[2026-06-07 03:16:52,384][495927] Updated weights for policy 0, policy_version 36149 (0.0009) +[2026-06-07 03:16:52,539][495927] Updated weights for policy 0, policy_version 36160 (0.0008) +[2026-06-07 03:16:52,686][495927] Updated weights for policy 0, policy_version 36170 (0.0008) +[2026-06-07 03:16:52,836][495927] Updated weights for policy 0, policy_version 36180 (0.0008) +[2026-06-07 03:16:53,008][495927] Updated weights for policy 0, policy_version 36191 (0.0008) +[2026-06-07 03:16:53,157][495927] Updated weights for policy 0, policy_version 36201 (0.0007) +[2026-06-07 03:16:53,774][495927] Updated weights for policy 0, policy_version 36211 (0.0006) +[2026-06-07 03:16:53,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 18546688. Throughput: 0: 23088.4. Samples: 18548608. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-07 03:16:53,910][492660] Avg episode reward: [(0, '1320.160')] +[2026-06-07 03:16:53,937][495927] Updated weights for policy 0, policy_version 36223 (0.0009) +[2026-06-07 03:16:54,087][495927] Updated weights for policy 0, policy_version 36233 (0.0008) +[2026-06-07 03:16:54,234][495927] Updated weights for policy 0, policy_version 36243 (0.0008) +[2026-06-07 03:16:54,383][495927] Updated weights for policy 0, policy_version 36253 (0.0009) +[2026-06-07 03:16:54,535][495927] Updated weights for policy 0, policy_version 36263 (0.0009) +[2026-06-07 03:16:55,187][495927] Updated weights for policy 0, policy_version 36273 (0.0009) +[2026-06-07 03:16:55,327][495927] Updated weights for policy 0, policy_version 36284 (0.0011) +[2026-06-07 03:16:55,485][495927] Updated weights for policy 0, policy_version 36295 (0.0009) +[2026-06-07 03:16:55,652][495927] Updated weights for policy 0, policy_version 36306 (0.0009) +[2026-06-07 03:16:55,816][495927] Updated weights for policy 0, policy_version 36317 (0.0008) +[2026-06-07 03:16:55,978][495927] Updated weights for policy 0, policy_version 36328 (0.0008) +[2026-06-07 03:16:56,623][495927] Updated weights for policy 0, policy_version 36338 (0.0008) +[2026-06-07 03:16:56,797][495927] Updated weights for policy 0, policy_version 36350 (0.0009) +[2026-06-07 03:16:56,977][495927] Updated weights for policy 0, policy_version 36363 (0.0007) +[2026-06-07 03:16:57,147][495927] Updated weights for policy 0, policy_version 36374 (0.0005) +[2026-06-07 03:16:57,342][495927] Updated weights for policy 0, policy_version 36387 (0.0005) +[2026-06-07 03:16:57,498][495927] Updated weights for policy 0, policy_version 36397 (0.0005) +[2026-06-07 03:16:58,118][495927] Updated weights for policy 0, policy_version 36408 (0.0007) +[2026-06-07 03:16:58,264][495927] Updated weights for policy 0, policy_version 36418 (0.0009) +[2026-06-07 03:16:58,413][495927] Updated weights for policy 0, policy_version 36428 (0.0008) +[2026-06-07 03:16:58,565][495927] Updated weights for policy 0, policy_version 36438 (0.0008) +[2026-06-07 03:16:58,730][495927] Updated weights for policy 0, policy_version 36449 (0.0008) +[2026-06-07 03:16:58,881][495927] Updated weights for policy 0, policy_version 36459 (0.0008) +[2026-06-07 03:16:58,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 18644992. Throughput: 0: 23082.7. Samples: 18683392. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-07 03:16:58,910][492660] Avg episode reward: [(0, '1378.156')] +[2026-06-07 03:16:59,518][495927] Updated weights for policy 0, policy_version 36470 (0.0009) +[2026-06-07 03:16:59,658][495927] Updated weights for policy 0, policy_version 36480 (0.0009) +[2026-06-07 03:16:59,808][495927] Updated weights for policy 0, policy_version 36490 (0.0009) +[2026-06-07 03:16:59,970][495927] Updated weights for policy 0, policy_version 36501 (0.0008) +[2026-06-07 03:17:00,125][495927] Updated weights for policy 0, policy_version 36511 (0.0009) +[2026-06-07 03:17:00,293][495927] Updated weights for policy 0, policy_version 36522 (0.0008) +[2026-06-07 03:17:00,906][495927] Updated weights for policy 0, policy_version 36532 (0.0009) +[2026-06-07 03:17:01,043][495927] Updated weights for policy 0, policy_version 36542 (0.0008) +[2026-06-07 03:17:01,196][495927] Updated weights for policy 0, policy_version 36552 (0.0008) +[2026-06-07 03:17:01,342][495927] Updated weights for policy 0, policy_version 36562 (0.0008) +[2026-06-07 03:17:01,499][495927] Updated weights for policy 0, policy_version 36572 (0.0008) +[2026-06-07 03:17:01,655][495927] Updated weights for policy 0, policy_version 36582 (0.0007) +[2026-06-07 03:17:01,802][495927] Updated weights for policy 0, policy_version 36592 (0.0005) +[2026-06-07 03:17:02,416][495927] Updated weights for policy 0, policy_version 36602 (0.0008) +[2026-06-07 03:17:02,572][495927] Updated weights for policy 0, policy_version 36613 (0.0008) +[2026-06-07 03:17:02,728][495927] Updated weights for policy 0, policy_version 36623 (0.0008) +[2026-06-07 03:17:02,873][495927] Updated weights for policy 0, policy_version 36633 (0.0008) +[2026-06-07 03:17:03,046][495927] Updated weights for policy 0, policy_version 36643 (0.0008) +[2026-06-07 03:17:03,196][495927] Updated weights for policy 0, policy_version 36653 (0.0008) +[2026-06-07 03:17:03,822][495927] Updated weights for policy 0, policy_version 36663 (0.0009) +[2026-06-07 03:17:03,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 18776064. Throughput: 0: 22966.1. Samples: 18750080. Policy #0 lag: (min: 43.0, avg: 59.2, max: 107.0) +[2026-06-07 03:17:03,910][492660] Avg episode reward: [(0, '1444.397')] +[2026-06-07 03:17:03,969][495927] Updated weights for policy 0, policy_version 36673 (0.0008) +[2026-06-07 03:17:04,112][495927] Updated weights for policy 0, policy_version 36683 (0.0009) +[2026-06-07 03:17:04,263][495927] Updated weights for policy 0, policy_version 36693 (0.0008) +[2026-06-07 03:17:04,413][495927] Updated weights for policy 0, policy_version 36703 (0.0009) +[2026-06-07 03:17:04,566][495927] Updated weights for policy 0, policy_version 36713 (0.0008) +[2026-06-07 03:17:04,660][495570] Saving new best policy, reward=1444.397! +[2026-06-07 03:17:05,188][495927] Updated weights for policy 0, policy_version 36723 (0.0008) +[2026-06-07 03:17:05,324][495927] Updated weights for policy 0, policy_version 36733 (0.0008) +[2026-06-07 03:17:05,490][495927] Updated weights for policy 0, policy_version 36744 (0.0008) +[2026-06-07 03:17:05,658][495927] Updated weights for policy 0, policy_version 36755 (0.0008) +[2026-06-07 03:17:05,806][495927] Updated weights for policy 0, policy_version 36765 (0.0008) +[2026-06-07 03:17:05,973][495927] Updated weights for policy 0, policy_version 36776 (0.0008) +[2026-06-07 03:17:06,624][495927] Updated weights for policy 0, policy_version 36786 (0.0008) +[2026-06-07 03:17:06,762][495927] Updated weights for policy 0, policy_version 36796 (0.0008) +[2026-06-07 03:17:06,903][495927] Updated weights for policy 0, policy_version 36806 (0.0008) +[2026-06-07 03:17:07,052][495927] Updated weights for policy 0, policy_version 36816 (0.0008) +[2026-06-07 03:17:07,209][495927] Updated weights for policy 0, policy_version 36826 (0.0008) +[2026-06-07 03:17:07,373][495927] Updated weights for policy 0, policy_version 36837 (0.0008) +[2026-06-07 03:17:07,523][495927] Updated weights for policy 0, policy_version 36847 (0.0008) +[2026-06-07 03:17:08,133][495927] Updated weights for policy 0, policy_version 36857 (0.0008) +[2026-06-07 03:17:08,293][495927] Updated weights for policy 0, policy_version 36868 (0.0009) +[2026-06-07 03:17:08,439][495927] Updated weights for policy 0, policy_version 36878 (0.0008) +[2026-06-07 03:17:08,595][495927] Updated weights for policy 0, policy_version 36888 (0.0008) +[2026-06-07 03:17:08,747][495927] Updated weights for policy 0, policy_version 36898 (0.0008) +[2026-06-07 03:17:08,901][495927] Updated weights for policy 0, policy_version 36908 (0.0009) +[2026-06-07 03:17:08,909][492660] Fps is (10 sec: 22937.4, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 18874368. Throughput: 0: 22789.7. Samples: 18885376. Policy #0 lag: (min: 43.0, avg: 59.2, max: 107.0) +[2026-06-07 03:17:08,910][492660] Avg episode reward: [(0, '1454.607')] +[2026-06-07 03:17:08,954][495570] Saving new best policy, reward=1454.607! +[2026-06-07 03:17:09,540][495927] Updated weights for policy 0, policy_version 36919 (0.0008) +[2026-06-07 03:17:09,678][495927] Updated weights for policy 0, policy_version 36929 (0.0008) +[2026-06-07 03:17:09,829][495927] Updated weights for policy 0, policy_version 36939 (0.0009) +[2026-06-07 03:17:09,978][495927] Updated weights for policy 0, policy_version 36949 (0.0008) +[2026-06-07 03:17:10,131][495927] Updated weights for policy 0, policy_version 36959 (0.0008) +[2026-06-07 03:17:10,280][495927] Updated weights for policy 0, policy_version 36969 (0.0008) +[2026-06-07 03:17:10,905][495927] Updated weights for policy 0, policy_version 36979 (0.0008) +[2026-06-07 03:17:11,043][495927] Updated weights for policy 0, policy_version 36989 (0.0008) +[2026-06-07 03:17:11,208][495927] Updated weights for policy 0, policy_version 37000 (0.0008) +[2026-06-07 03:17:11,361][495927] Updated weights for policy 0, policy_version 37010 (0.0008) +[2026-06-07 03:17:11,512][495927] Updated weights for policy 0, policy_version 37020 (0.0008) +[2026-06-07 03:17:11,652][495927] Updated weights for policy 0, policy_version 37030 (0.0008) +[2026-06-07 03:17:11,800][495927] Updated weights for policy 0, policy_version 37040 (0.0009) +[2026-06-07 03:17:12,489][495927] Updated weights for policy 0, policy_version 37053 (0.0009) +[2026-06-07 03:17:12,626][495927] Updated weights for policy 0, policy_version 37063 (0.0008) +[2026-06-07 03:17:12,788][495927] Updated weights for policy 0, policy_version 37074 (0.0008) +[2026-06-07 03:17:12,949][495927] Updated weights for policy 0, policy_version 37085 (0.0008) +[2026-06-07 03:17:13,100][495927] Updated weights for policy 0, policy_version 37095 (0.0009) +[2026-06-07 03:17:13,769][495927] Updated weights for policy 0, policy_version 37107 (0.0009) +[2026-06-07 03:17:13,896][495927] Updated weights for policy 0, policy_version 37117 (0.0008) +[2026-06-07 03:17:13,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 19005440. Throughput: 0: 22769.8. Samples: 19019776. Policy #0 lag: (min: 43.0, avg: 59.2, max: 107.0) +[2026-06-07 03:17:13,910][492660] Avg episode reward: [(0, '1521.392')] +[2026-06-07 03:17:14,047][495927] Updated weights for policy 0, policy_version 37127 (0.0009) +[2026-06-07 03:17:14,221][495927] Updated weights for policy 0, policy_version 37139 (0.0011) +[2026-06-07 03:17:14,383][495927] Updated weights for policy 0, policy_version 37150 (0.0009) +[2026-06-07 03:17:14,566][495927] Updated weights for policy 0, policy_version 37162 (0.0009) +[2026-06-07 03:17:14,651][495570] Saving new best policy, reward=1521.392! +[2026-06-07 03:17:15,226][495927] Updated weights for policy 0, policy_version 37172 (0.0009) +[2026-06-07 03:17:15,382][495927] Updated weights for policy 0, policy_version 37183 (0.0008) +[2026-06-07 03:17:15,575][495927] Updated weights for policy 0, policy_version 37196 (0.0009) +[2026-06-07 03:17:15,723][495927] Updated weights for policy 0, policy_version 37206 (0.0009) +[2026-06-07 03:17:15,879][495927] Updated weights for policy 0, policy_version 37216 (0.0008) +[2026-06-07 03:17:16,046][495927] Updated weights for policy 0, policy_version 37227 (0.0008) +[2026-06-07 03:17:16,668][495927] Updated weights for policy 0, policy_version 37237 (0.0008) +[2026-06-07 03:17:16,822][495927] Updated weights for policy 0, policy_version 37248 (0.0008) +[2026-06-07 03:17:16,971][495927] Updated weights for policy 0, policy_version 37258 (0.0008) +[2026-06-07 03:17:17,126][495927] Updated weights for policy 0, policy_version 37268 (0.0008) +[2026-06-07 03:17:17,275][495927] Updated weights for policy 0, policy_version 37278 (0.0011) +[2026-06-07 03:17:17,426][495927] Updated weights for policy 0, policy_version 37288 (0.0011) +[2026-06-07 03:17:18,066][495927] Updated weights for policy 0, policy_version 37299 (0.0010) +[2026-06-07 03:17:18,228][495927] Updated weights for policy 0, policy_version 37310 (0.0004) +[2026-06-07 03:17:18,384][495927] Updated weights for policy 0, policy_version 37321 (0.0009) +[2026-06-07 03:17:18,544][495927] Updated weights for policy 0, policy_version 37332 (0.0012) +[2026-06-07 03:17:18,698][495927] Updated weights for policy 0, policy_version 37342 (0.0009) +[2026-06-07 03:17:18,853][495927] Updated weights for policy 0, policy_version 37352 (0.0008) +[2026-06-07 03:17:18,909][492660] Fps is (10 sec: 22937.8, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 19103744. Throughput: 0: 22877.9. Samples: 19092096. Policy #0 lag: (min: 43.0, avg: 59.2, max: 107.0) +[2026-06-07 03:17:18,910][492660] Avg episode reward: [(0, '1511.745')] +[2026-06-07 03:17:19,491][495927] Updated weights for policy 0, policy_version 37362 (0.0008) +[2026-06-07 03:17:19,631][495927] Updated weights for policy 0, policy_version 37372 (0.0008) +[2026-06-07 03:17:19,784][495927] Updated weights for policy 0, policy_version 37383 (0.0008) +[2026-06-07 03:17:19,934][495927] Updated weights for policy 0, policy_version 37393 (0.0008) +[2026-06-07 03:17:20,099][495927] Updated weights for policy 0, policy_version 37404 (0.0010) +[2026-06-07 03:17:20,266][495927] Updated weights for policy 0, policy_version 37415 (0.0008) +[2026-06-07 03:17:20,905][495927] Updated weights for policy 0, policy_version 37425 (0.0008) +[2026-06-07 03:17:21,047][495927] Updated weights for policy 0, policy_version 37435 (0.0008) +[2026-06-07 03:17:21,195][495927] Updated weights for policy 0, policy_version 37445 (0.0008) +[2026-06-07 03:17:21,342][495927] Updated weights for policy 0, policy_version 37455 (0.0008) +[2026-06-07 03:17:21,495][495927] Updated weights for policy 0, policy_version 37465 (0.0008) +[2026-06-07 03:17:21,674][495927] Updated weights for policy 0, policy_version 37477 (0.0009) +[2026-06-07 03:17:21,828][495927] Updated weights for policy 0, policy_version 37487 (0.0008) +[2026-06-07 03:17:22,435][495927] Updated weights for policy 0, policy_version 37497 (0.0008) +[2026-06-07 03:17:22,597][495927] Updated weights for policy 0, policy_version 37508 (0.0008) +[2026-06-07 03:17:22,746][495927] Updated weights for policy 0, policy_version 37518 (0.0008) +[2026-06-07 03:17:22,915][495927] Updated weights for policy 0, policy_version 37529 (0.0008) +[2026-06-07 03:17:23,081][495927] Updated weights for policy 0, policy_version 37540 (0.0007) +[2026-06-07 03:17:23,247][495927] Updated weights for policy 0, policy_version 37551 (0.0007) +[2026-06-07 03:17:23,876][495927] Updated weights for policy 0, policy_version 37563 (0.0008) +[2026-06-07 03:17:23,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 19234816. Throughput: 0: 23079.8. Samples: 19236736. Policy #0 lag: (min: 43.0, avg: 59.2, max: 107.0) +[2026-06-07 03:17:23,910][492660] Avg episode reward: [(0, '1516.958')] +[2026-06-07 03:17:24,040][495927] Updated weights for policy 0, policy_version 37574 (0.0008) +[2026-06-07 03:17:24,190][495927] Updated weights for policy 0, policy_version 37584 (0.0008) +[2026-06-07 03:17:24,345][495927] Updated weights for policy 0, policy_version 37594 (0.0009) +[2026-06-07 03:17:24,506][495927] Updated weights for policy 0, policy_version 37605 (0.0010) +[2026-06-07 03:17:24,660][495927] Updated weights for policy 0, policy_version 37615 (0.0008) +[2026-06-07 03:17:25,270][495927] Updated weights for policy 0, policy_version 37625 (0.0008) +[2026-06-07 03:17:25,417][495927] Updated weights for policy 0, policy_version 37635 (0.0008) +[2026-06-07 03:17:25,590][495927] Updated weights for policy 0, policy_version 37647 (0.0008) +[2026-06-07 03:17:25,750][495927] Updated weights for policy 0, policy_version 37657 (0.0008) +[2026-06-07 03:17:25,901][495927] Updated weights for policy 0, policy_version 37667 (0.0008) +[2026-06-07 03:17:26,047][495927] Updated weights for policy 0, policy_version 37677 (0.0008) +[2026-06-07 03:17:26,666][495927] Updated weights for policy 0, policy_version 37687 (0.0008) +[2026-06-07 03:17:26,812][495927] Updated weights for policy 0, policy_version 37697 (0.0008) +[2026-06-07 03:17:26,966][495927] Updated weights for policy 0, policy_version 37707 (0.0008) +[2026-06-07 03:17:27,114][495927] Updated weights for policy 0, policy_version 37717 (0.0008) +[2026-06-07 03:17:27,268][495927] Updated weights for policy 0, policy_version 37727 (0.0008) +[2026-06-07 03:17:27,412][495927] Updated weights for policy 0, policy_version 37737 (0.0008) +[2026-06-07 03:17:28,057][495927] Updated weights for policy 0, policy_version 37748 (0.0008) +[2026-06-07 03:17:28,220][495927] Updated weights for policy 0, policy_version 37759 (0.0009) +[2026-06-07 03:17:28,374][495927] Updated weights for policy 0, policy_version 37770 (0.0008) +[2026-06-07 03:17:28,520][495927] Updated weights for policy 0, policy_version 37780 (0.0008) +[2026-06-07 03:17:28,661][495927] Updated weights for policy 0, policy_version 37790 (0.0008) +[2026-06-07 03:17:28,819][495927] Updated weights for policy 0, policy_version 37800 (0.0008) +[2026-06-07 03:17:28,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 19333120. Throughput: 0: 23068.4. Samples: 19370624. Policy #0 lag: (min: 43.0, avg: 59.2, max: 107.0) +[2026-06-07 03:17:28,910][492660] Avg episode reward: [(0, '1546.269')] +[2026-06-07 03:17:28,933][495570] Saving new best policy, reward=1546.269! +[2026-06-07 03:17:29,441][495927] Updated weights for policy 0, policy_version 37810 (0.0008) +[2026-06-07 03:17:29,624][495927] Updated weights for policy 0, policy_version 37823 (0.0008) +[2026-06-07 03:17:29,766][495927] Updated weights for policy 0, policy_version 37833 (0.0008) +[2026-06-07 03:17:29,919][495927] Updated weights for policy 0, policy_version 37843 (0.0008) +[2026-06-07 03:17:30,069][495927] Updated weights for policy 0, policy_version 37853 (0.0008) +[2026-06-07 03:17:30,255][495927] Updated weights for policy 0, policy_version 37866 (0.0008) +[2026-06-07 03:17:30,923][495927] Updated weights for policy 0, policy_version 37876 (0.0008) +[2026-06-07 03:17:31,057][495927] Updated weights for policy 0, policy_version 37886 (0.0008) +[2026-06-07 03:17:31,208][495927] Updated weights for policy 0, policy_version 37896 (0.0008) +[2026-06-07 03:17:31,361][495927] Updated weights for policy 0, policy_version 37906 (0.0008) +[2026-06-07 03:17:31,512][495927] Updated weights for policy 0, policy_version 37916 (0.0009) +[2026-06-07 03:17:31,661][495927] Updated weights for policy 0, policy_version 37926 (0.0008) +[2026-06-07 03:17:31,809][495927] Updated weights for policy 0, policy_version 37936 (0.0008) +[2026-06-07 03:17:32,452][495927] Updated weights for policy 0, policy_version 37948 (0.0008) +[2026-06-07 03:17:32,602][495927] Updated weights for policy 0, policy_version 37958 (0.0008) +[2026-06-07 03:17:32,748][495927] Updated weights for policy 0, policy_version 37968 (0.0008) +[2026-06-07 03:17:32,904][495927] Updated weights for policy 0, policy_version 37978 (0.0008) +[2026-06-07 03:17:33,086][495927] Updated weights for policy 0, policy_version 37990 (0.0009) +[2026-06-07 03:17:33,732][495927] Updated weights for policy 0, policy_version 38001 (0.0008) +[2026-06-07 03:17:33,871][495927] Updated weights for policy 0, policy_version 38011 (0.0008) +[2026-06-07 03:17:33,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 19464192. Throughput: 0: 23000.2. Samples: 19438976. Policy #0 lag: (min: 46.0, avg: 61.2, max: 110.0) +[2026-06-07 03:17:33,910][492660] Avg episode reward: [(0, '1517.406')] +[2026-06-07 03:17:34,020][495927] Updated weights for policy 0, policy_version 38021 (0.0009) +[2026-06-07 03:17:34,170][495927] Updated weights for policy 0, policy_version 38031 (0.0008) +[2026-06-07 03:17:34,318][495927] Updated weights for policy 0, policy_version 38041 (0.0008) +[2026-06-07 03:17:34,469][495927] Updated weights for policy 0, policy_version 38051 (0.0010) +[2026-06-07 03:17:34,618][495927] Updated weights for policy 0, policy_version 38061 (0.0009) +[2026-06-07 03:17:35,249][495927] Updated weights for policy 0, policy_version 38071 (0.0008) +[2026-06-07 03:17:35,391][495927] Updated weights for policy 0, policy_version 38081 (0.0008) +[2026-06-07 03:17:35,553][495927] Updated weights for policy 0, policy_version 38092 (0.0008) +[2026-06-07 03:17:35,701][495927] Updated weights for policy 0, policy_version 38102 (0.0008) +[2026-06-07 03:17:35,858][495927] Updated weights for policy 0, policy_version 38112 (0.0009) +[2026-06-07 03:17:36,011][495927] Updated weights for policy 0, policy_version 38122 (0.0008) +[2026-06-07 03:17:36,636][495927] Updated weights for policy 0, policy_version 38132 (0.0009) +[2026-06-07 03:17:36,781][495927] Updated weights for policy 0, policy_version 38142 (0.0008) +[2026-06-07 03:17:36,943][495927] Updated weights for policy 0, policy_version 38153 (0.0008) +[2026-06-07 03:17:37,102][495927] Updated weights for policy 0, policy_version 38163 (0.0008) +[2026-06-07 03:17:37,267][495927] Updated weights for policy 0, policy_version 38174 (0.0008) +[2026-06-07 03:17:37,414][495927] Updated weights for policy 0, policy_version 38184 (0.0008) +[2026-06-07 03:17:38,042][495927] Updated weights for policy 0, policy_version 38194 (0.0008) +[2026-06-07 03:17:38,186][495927] Updated weights for policy 0, policy_version 38204 (0.0008) +[2026-06-07 03:17:38,344][495927] Updated weights for policy 0, policy_version 38215 (0.0008) +[2026-06-07 03:17:38,497][495927] Updated weights for policy 0, policy_version 38225 (0.0009) +[2026-06-07 03:17:38,647][495927] Updated weights for policy 0, policy_version 38235 (0.0008) +[2026-06-07 03:17:38,811][495927] Updated weights for policy 0, policy_version 38246 (0.0008) +[2026-06-07 03:17:38,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 19562496. Throughput: 0: 22781.2. Samples: 19573760. Policy #0 lag: (min: 46.0, avg: 61.2, max: 110.0) +[2026-06-07 03:17:38,910][492660] Avg episode reward: [(0, '1580.342')] +[2026-06-07 03:17:38,960][495570] Saving new best policy, reward=1580.342! +[2026-06-07 03:17:38,964][495927] Updated weights for policy 0, policy_version 38256 (0.0008) +[2026-06-07 03:17:39,586][495927] Updated weights for policy 0, policy_version 38266 (0.0009) +[2026-06-07 03:17:39,751][495927] Updated weights for policy 0, policy_version 38277 (0.0008) +[2026-06-07 03:17:39,895][495927] Updated weights for policy 0, policy_version 38287 (0.0008) +[2026-06-07 03:17:40,044][495927] Updated weights for policy 0, policy_version 38297 (0.0008) +[2026-06-07 03:17:40,197][495927] Updated weights for policy 0, policy_version 38307 (0.0008) +[2026-06-07 03:17:40,343][495927] Updated weights for policy 0, policy_version 38317 (0.0008) +[2026-06-07 03:17:40,997][495927] Updated weights for policy 0, policy_version 38327 (0.0009) +[2026-06-07 03:17:41,154][495927] Updated weights for policy 0, policy_version 38338 (0.0009) +[2026-06-07 03:17:41,317][495927] Updated weights for policy 0, policy_version 38349 (0.0008) +[2026-06-07 03:17:41,488][495927] Updated weights for policy 0, policy_version 38360 (0.0008) +[2026-06-07 03:17:41,642][495927] Updated weights for policy 0, policy_version 38370 (0.0008) +[2026-06-07 03:17:41,793][495927] Updated weights for policy 0, policy_version 38380 (0.0008) +[2026-06-07 03:17:42,405][495927] Updated weights for policy 0, policy_version 38390 (0.0008) +[2026-06-07 03:17:42,548][495927] Updated weights for policy 0, policy_version 38400 (0.0008) +[2026-06-07 03:17:42,696][495927] Updated weights for policy 0, policy_version 38410 (0.0008) +[2026-06-07 03:17:42,865][495927] Updated weights for policy 0, policy_version 38421 (0.0008) +[2026-06-07 03:17:43,020][495927] Updated weights for policy 0, policy_version 38431 (0.0009) +[2026-06-07 03:17:43,181][495927] Updated weights for policy 0, policy_version 38442 (0.0008) +[2026-06-07 03:17:43,814][495927] Updated weights for policy 0, policy_version 38453 (0.0008) +[2026-06-07 03:17:43,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 19693568. Throughput: 0: 22795.4. Samples: 19709184. Policy #0 lag: (min: 46.0, avg: 61.2, max: 110.0) +[2026-06-07 03:17:43,910][492660] Avg episode reward: [(0, '1592.349')] +[2026-06-07 03:17:43,987][495927] Updated weights for policy 0, policy_version 38465 (0.0008) +[2026-06-07 03:17:44,136][495927] Updated weights for policy 0, policy_version 38475 (0.0008) +[2026-06-07 03:17:44,289][495927] Updated weights for policy 0, policy_version 38485 (0.0009) +[2026-06-07 03:17:44,436][495927] Updated weights for policy 0, policy_version 38495 (0.0008) +[2026-06-07 03:17:44,606][495927] Updated weights for policy 0, policy_version 38506 (0.0008) +[2026-06-07 03:17:44,688][495570] Saving new best policy, reward=1592.349! +[2026-06-07 03:17:45,258][495927] Updated weights for policy 0, policy_version 38517 (0.0008) +[2026-06-07 03:17:45,406][495927] Updated weights for policy 0, policy_version 38527 (0.0008) +[2026-06-07 03:17:45,551][495927] Updated weights for policy 0, policy_version 38537 (0.0009) +[2026-06-07 03:17:45,708][495927] Updated weights for policy 0, policy_version 38547 (0.0008) +[2026-06-07 03:17:45,869][495927] Updated weights for policy 0, policy_version 38558 (0.0008) +[2026-06-07 03:17:46,037][495927] Updated weights for policy 0, policy_version 38569 (0.0008) +[2026-06-07 03:17:46,670][495927] Updated weights for policy 0, policy_version 38580 (0.0009) +[2026-06-07 03:17:46,812][495927] Updated weights for policy 0, policy_version 38590 (0.0008) +[2026-06-07 03:17:46,959][495927] Updated weights for policy 0, policy_version 38600 (0.0006) +[2026-06-07 03:17:47,116][495927] Updated weights for policy 0, policy_version 38610 (0.0005) +[2026-06-07 03:17:47,266][495927] Updated weights for policy 0, policy_version 38620 (0.0004) +[2026-06-07 03:17:47,421][495927] Updated weights for policy 0, policy_version 38630 (0.0004) +[2026-06-07 03:17:47,562][495927] Updated weights for policy 0, policy_version 38640 (0.0005) +[2026-06-07 03:17:48,184][495927] Updated weights for policy 0, policy_version 38651 (0.0004) +[2026-06-07 03:17:48,337][495927] Updated weights for policy 0, policy_version 38661 (0.0004) +[2026-06-07 03:17:48,485][495927] Updated weights for policy 0, policy_version 38671 (0.0004) +[2026-06-07 03:17:48,649][495927] Updated weights for policy 0, policy_version 38682 (0.0005) +[2026-06-07 03:17:48,810][495927] Updated weights for policy 0, policy_version 38692 (0.0005) +[2026-06-07 03:17:48,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 19791872. Throughput: 0: 22872.2. Samples: 19779328. Policy #0 lag: (min: 46.0, avg: 61.2, max: 110.0) +[2026-06-07 03:17:48,910][492660] Avg episode reward: [(0, '1554.868')] +[2026-06-07 03:17:48,956][495927] Updated weights for policy 0, policy_version 38702 (0.0005) +[2026-06-07 03:17:49,567][495927] Updated weights for policy 0, policy_version 38712 (0.0005) +[2026-06-07 03:17:49,725][495927] Updated weights for policy 0, policy_version 38723 (0.0004) +[2026-06-07 03:17:49,876][495927] Updated weights for policy 0, policy_version 38733 (0.0004) +[2026-06-07 03:17:50,024][495927] Updated weights for policy 0, policy_version 38743 (0.0004) +[2026-06-07 03:17:50,178][495927] Updated weights for policy 0, policy_version 38753 (0.0004) +[2026-06-07 03:17:50,332][495927] Updated weights for policy 0, policy_version 38763 (0.0004) +[2026-06-07 03:17:50,944][495927] Updated weights for policy 0, policy_version 38774 (0.0004) +[2026-06-07 03:17:51,089][495927] Updated weights for policy 0, policy_version 38784 (0.0004) +[2026-06-07 03:17:51,235][495927] Updated weights for policy 0, policy_version 38794 (0.0004) +[2026-06-07 03:17:51,408][495927] Updated weights for policy 0, policy_version 38805 (0.0008) +[2026-06-07 03:17:51,566][495927] Updated weights for policy 0, policy_version 38816 (0.0008) +[2026-06-07 03:17:51,756][495927] Updated weights for policy 0, policy_version 38828 (0.0009) +[2026-06-07 03:17:52,385][495927] Updated weights for policy 0, policy_version 38838 (0.0008) +[2026-06-07 03:17:52,547][495927] Updated weights for policy 0, policy_version 38849 (0.0008) +[2026-06-07 03:17:52,703][495927] Updated weights for policy 0, policy_version 38860 (0.0008) +[2026-06-07 03:17:52,886][495927] Updated weights for policy 0, policy_version 38872 (0.0008) +[2026-06-07 03:17:53,036][495927] Updated weights for policy 0, policy_version 38882 (0.0008) +[2026-06-07 03:17:53,187][495927] Updated weights for policy 0, policy_version 38892 (0.0008) +[2026-06-07 03:17:53,808][495927] Updated weights for policy 0, policy_version 38902 (0.0008) +[2026-06-07 03:17:53,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 19922944. Throughput: 0: 23116.8. Samples: 19925632. Policy #0 lag: (min: 46.0, avg: 61.2, max: 110.0) +[2026-06-07 03:17:53,910][492660] Avg episode reward: [(0, '1617.588')] +[2026-06-07 03:17:53,957][495927] Updated weights for policy 0, policy_version 38912 (0.0009) +[2026-06-07 03:17:54,101][495927] Updated weights for policy 0, policy_version 38922 (0.0008) +[2026-06-07 03:17:54,255][495927] Updated weights for policy 0, policy_version 38932 (0.0008) +[2026-06-07 03:17:54,427][495927] Updated weights for policy 0, policy_version 38943 (0.0008) +[2026-06-07 03:17:54,576][495927] Updated weights for policy 0, policy_version 38953 (0.0008) +[2026-06-07 03:17:54,677][495570] Saving new best policy, reward=1617.588! +[2026-06-07 03:17:55,195][495927] Updated weights for policy 0, policy_version 38963 (0.0008) +[2026-06-07 03:17:55,337][495927] Updated weights for policy 0, policy_version 38973 (0.0008) +[2026-06-07 03:17:55,483][495927] Updated weights for policy 0, policy_version 38983 (0.0008) +[2026-06-07 03:17:55,670][495927] Updated weights for policy 0, policy_version 38995 (0.0008) +[2026-06-07 03:17:55,819][495927] Updated weights for policy 0, policy_version 39005 (0.0008) +[2026-06-07 03:17:55,974][495927] Updated weights for policy 0, policy_version 39015 (0.0008) +[2026-06-07 03:17:56,601][495927] Updated weights for policy 0, policy_version 39025 (0.0008) +[2026-06-07 03:17:56,747][495927] Updated weights for policy 0, policy_version 39036 (0.0008) +[2026-06-07 03:17:56,893][495927] Updated weights for policy 0, policy_version 39046 (0.0008) +[2026-06-07 03:17:57,049][495927] Updated weights for policy 0, policy_version 39056 (0.0008) +[2026-06-07 03:17:57,200][495927] Updated weights for policy 0, policy_version 39066 (0.0008) +[2026-06-07 03:17:57,356][495927] Updated weights for policy 0, policy_version 39076 (0.0008) +[2026-06-07 03:17:57,517][495927] Updated weights for policy 0, policy_version 39087 (0.0008) +[2026-06-07 03:17:58,145][495927] Updated weights for policy 0, policy_version 39098 (0.0009) +[2026-06-07 03:17:58,289][495927] Updated weights for policy 0, policy_version 39108 (0.0008) +[2026-06-07 03:17:58,443][495927] Updated weights for policy 0, policy_version 39118 (0.0008) +[2026-06-07 03:17:58,598][495927] Updated weights for policy 0, policy_version 39128 (0.0008) +[2026-06-07 03:17:58,755][495927] Updated weights for policy 0, policy_version 39139 (0.0008) +[2026-06-07 03:17:58,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 20021248. Throughput: 0: 23096.9. Samples: 20059136. Policy #0 lag: (min: 3.0, avg: 25.7, max: 67.0) +[2026-06-07 03:17:58,910][492660] Avg episode reward: [(0, '1692.107')] +[2026-06-07 03:17:58,941][495927] Updated weights for policy 0, policy_version 39151 (0.0008) +[2026-06-07 03:17:58,957][495570] Saving new best policy, reward=1692.107! +[2026-06-07 03:17:59,565][495927] Updated weights for policy 0, policy_version 39161 (0.0009) +[2026-06-07 03:17:59,722][495927] Updated weights for policy 0, policy_version 39171 (0.0008) +[2026-06-07 03:17:59,867][495927] Updated weights for policy 0, policy_version 39181 (0.0008) +[2026-06-07 03:18:00,023][495927] Updated weights for policy 0, policy_version 39191 (0.0008) +[2026-06-07 03:18:00,169][495927] Updated weights for policy 0, policy_version 39201 (0.0008) +[2026-06-07 03:18:00,326][495927] Updated weights for policy 0, policy_version 39211 (0.0008) +[2026-06-07 03:18:00,947][495927] Updated weights for policy 0, policy_version 39221 (0.0008) +[2026-06-07 03:18:01,100][495927] Updated weights for policy 0, policy_version 39232 (0.0008) +[2026-06-07 03:18:01,250][495927] Updated weights for policy 0, policy_version 39242 (0.0008) +[2026-06-07 03:18:01,402][495927] Updated weights for policy 0, policy_version 39252 (0.0008) +[2026-06-07 03:18:01,555][495927] Updated weights for policy 0, policy_version 39262 (0.0008) +[2026-06-07 03:18:01,707][495927] Updated weights for policy 0, policy_version 39272 (0.0008) +[2026-06-07 03:18:02,330][495927] Updated weights for policy 0, policy_version 39282 (0.0008) +[2026-06-07 03:18:02,462][495927] Updated weights for policy 0, policy_version 39292 (0.0008) +[2026-06-07 03:18:02,613][495927] Updated weights for policy 0, policy_version 39302 (0.0008) +[2026-06-07 03:18:02,770][495927] Updated weights for policy 0, policy_version 39312 (0.0008) +[2026-06-07 03:18:02,914][495927] Updated weights for policy 0, policy_version 39322 (0.0008) +[2026-06-07 03:18:03,090][495927] Updated weights for policy 0, policy_version 39333 (0.0009) +[2026-06-07 03:18:03,236][495927] Updated weights for policy 0, policy_version 39343 (0.0008) +[2026-06-07 03:18:03,868][495927] Updated weights for policy 0, policy_version 39353 (0.0008) +[2026-06-07 03:18:03,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 20152320. Throughput: 0: 22980.2. Samples: 20126208. Policy #0 lag: (min: 3.0, avg: 25.7, max: 67.0) +[2026-06-07 03:18:03,910][492660] Avg episode reward: [(0, '1721.222')] +[2026-06-07 03:18:04,032][495927] Updated weights for policy 0, policy_version 39364 (0.0008) +[2026-06-07 03:18:04,195][495927] Updated weights for policy 0, policy_version 39375 (0.0009) +[2026-06-07 03:18:04,346][495927] Updated weights for policy 0, policy_version 39385 (0.0008) +[2026-06-07 03:18:04,491][495927] Updated weights for policy 0, policy_version 39395 (0.0008) +[2026-06-07 03:18:04,652][495927] Updated weights for policy 0, policy_version 39405 (0.0009) +[2026-06-07 03:18:04,689][495570] Saving new best policy, reward=1721.222! +[2026-06-07 03:18:05,257][495927] Updated weights for policy 0, policy_version 39415 (0.0009) +[2026-06-07 03:18:05,413][495927] Updated weights for policy 0, policy_version 39426 (0.0008) +[2026-06-07 03:18:05,570][495927] Updated weights for policy 0, policy_version 39436 (0.0005) +[2026-06-07 03:18:05,720][495927] Updated weights for policy 0, policy_version 39446 (0.0005) +[2026-06-07 03:18:05,871][495927] Updated weights for policy 0, policy_version 39456 (0.0005) +[2026-06-07 03:18:06,021][495927] Updated weights for policy 0, policy_version 39466 (0.0005) +[2026-06-07 03:18:06,650][495927] Updated weights for policy 0, policy_version 39476 (0.0005) +[2026-06-07 03:18:06,798][495927] Updated weights for policy 0, policy_version 39486 (0.0005) +[2026-06-07 03:18:06,967][495927] Updated weights for policy 0, policy_version 39497 (0.0005) +[2026-06-07 03:18:07,138][495927] Updated weights for policy 0, policy_version 39508 (0.0007) +[2026-06-07 03:18:07,285][495927] Updated weights for policy 0, policy_version 39518 (0.0009) +[2026-06-07 03:18:07,440][495927] Updated weights for policy 0, policy_version 39528 (0.0009) +[2026-06-07 03:18:08,101][495927] Updated weights for policy 0, policy_version 39540 (0.0008) +[2026-06-07 03:18:08,288][495927] Updated weights for policy 0, policy_version 39553 (0.0009) +[2026-06-07 03:18:08,441][495927] Updated weights for policy 0, policy_version 39563 (0.0008) +[2026-06-07 03:18:08,590][495927] Updated weights for policy 0, policy_version 39573 (0.0008) +[2026-06-07 03:18:08,743][495927] Updated weights for policy 0, policy_version 39583 (0.0008) +[2026-06-07 03:18:08,892][495927] Updated weights for policy 0, policy_version 39593 (0.0009) +[2026-06-07 03:18:08,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 20250624. Throughput: 0: 22772.6. Samples: 20261504. Policy #0 lag: (min: 3.0, avg: 25.7, max: 67.0) +[2026-06-07 03:18:08,910][492660] Avg episode reward: [(0, '1754.693')] +[2026-06-07 03:18:08,990][495570] Saving new best policy, reward=1754.693! +[2026-06-07 03:18:09,531][495927] Updated weights for policy 0, policy_version 39603 (0.0009) +[2026-06-07 03:18:09,680][495927] Updated weights for policy 0, policy_version 39613 (0.0008) +[2026-06-07 03:18:09,816][495927] Updated weights for policy 0, policy_version 39623 (0.0009) +[2026-06-07 03:18:09,986][495927] Updated weights for policy 0, policy_version 39634 (0.0008) +[2026-06-07 03:18:10,134][495927] Updated weights for policy 0, policy_version 39644 (0.0008) +[2026-06-07 03:18:10,282][495927] Updated weights for policy 0, policy_version 39654 (0.0009) +[2026-06-07 03:18:10,436][495927] Updated weights for policy 0, policy_version 39664 (0.0008) +[2026-06-07 03:18:11,084][495927] Updated weights for policy 0, policy_version 39676 (0.0009) +[2026-06-07 03:18:11,266][495927] Updated weights for policy 0, policy_version 39689 (0.0008) +[2026-06-07 03:18:11,421][495927] Updated weights for policy 0, policy_version 39699 (0.0008) +[2026-06-07 03:18:11,588][495927] Updated weights for policy 0, policy_version 39710 (0.0008) +[2026-06-07 03:18:11,742][495927] Updated weights for policy 0, policy_version 39720 (0.0008) +[2026-06-07 03:18:12,384][495927] Updated weights for policy 0, policy_version 39730 (0.0009) +[2026-06-07 03:18:12,518][495927] Updated weights for policy 0, policy_version 39740 (0.0008) +[2026-06-07 03:18:12,671][495927] Updated weights for policy 0, policy_version 39750 (0.0008) +[2026-06-07 03:18:12,816][495927] Updated weights for policy 0, policy_version 39760 (0.0008) +[2026-06-07 03:18:12,991][495927] Updated weights for policy 0, policy_version 39771 (0.0008) +[2026-06-07 03:18:13,135][495927] Updated weights for policy 0, policy_version 39781 (0.0009) +[2026-06-07 03:18:13,283][495927] Updated weights for policy 0, policy_version 39791 (0.0008) +[2026-06-07 03:18:13,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 20381696. Throughput: 0: 22789.7. Samples: 20396160. Policy #0 lag: (min: 3.0, avg: 25.7, max: 67.0) +[2026-06-07 03:18:13,910][492660] Avg episode reward: [(0, '1774.207')] +[2026-06-07 03:18:13,919][495927] Updated weights for policy 0, policy_version 39801 (0.0008) +[2026-06-07 03:18:14,082][495927] Updated weights for policy 0, policy_version 39812 (0.0009) +[2026-06-07 03:18:14,230][495927] Updated weights for policy 0, policy_version 39822 (0.0008) +[2026-06-07 03:18:14,388][495927] Updated weights for policy 0, policy_version 39832 (0.0008) +[2026-06-07 03:18:14,541][495927] Updated weights for policy 0, policy_version 39842 (0.0008) +[2026-06-07 03:18:14,691][495927] Updated weights for policy 0, policy_version 39852 (0.0008) +[2026-06-07 03:18:14,744][495570] Saving new best policy, reward=1774.207! +[2026-06-07 03:18:15,314][495927] Updated weights for policy 0, policy_version 39862 (0.0008) +[2026-06-07 03:18:15,469][495927] Updated weights for policy 0, policy_version 39873 (0.0008) +[2026-06-07 03:18:15,616][495927] Updated weights for policy 0, policy_version 39883 (0.0008) +[2026-06-07 03:18:15,770][495927] Updated weights for policy 0, policy_version 39893 (0.0008) +[2026-06-07 03:18:15,934][495927] Updated weights for policy 0, policy_version 39904 (0.0008) +[2026-06-07 03:18:16,096][495927] Updated weights for policy 0, policy_version 39915 (0.0009) +[2026-06-07 03:18:16,731][495927] Updated weights for policy 0, policy_version 39926 (0.0009) +[2026-06-07 03:18:16,885][495927] Updated weights for policy 0, policy_version 39936 (0.0008) +[2026-06-07 03:18:17,028][495927] Updated weights for policy 0, policy_version 39946 (0.0008) +[2026-06-07 03:18:17,175][495927] Updated weights for policy 0, policy_version 39956 (0.0008) +[2026-06-07 03:18:17,348][495927] Updated weights for policy 0, policy_version 39967 (0.0008) +[2026-06-07 03:18:17,517][495927] Updated weights for policy 0, policy_version 39978 (0.0008) +[2026-06-07 03:18:18,141][495927] Updated weights for policy 0, policy_version 39988 (0.0008) +[2026-06-07 03:18:18,296][495927] Updated weights for policy 0, policy_version 39999 (0.0008) +[2026-06-07 03:18:18,439][495927] Updated weights for policy 0, policy_version 40009 (0.0005) +[2026-06-07 03:18:18,596][495927] Updated weights for policy 0, policy_version 40019 (0.0005) +[2026-06-07 03:18:18,750][495927] Updated weights for policy 0, policy_version 40029 (0.0005) +[2026-06-07 03:18:18,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 20480000. Throughput: 0: 22840.9. Samples: 20466816. Policy #0 lag: (min: 3.0, avg: 25.7, max: 67.0) +[2026-06-07 03:18:18,910][492660] Avg episode reward: [(0, '1718.993')] +[2026-06-07 03:18:18,922][495927] Updated weights for policy 0, policy_version 40040 (0.0005) +[2026-06-07 03:18:19,544][495927] Updated weights for policy 0, policy_version 40050 (0.0004) +[2026-06-07 03:18:19,686][495927] Updated weights for policy 0, policy_version 40060 (0.0004) +[2026-06-07 03:18:19,836][495927] Updated weights for policy 0, policy_version 40070 (0.0006) +[2026-06-07 03:18:20,003][495927] Updated weights for policy 0, policy_version 40081 (0.0008) +[2026-06-07 03:18:20,180][495927] Updated weights for policy 0, policy_version 40093 (0.0008) +[2026-06-07 03:18:20,355][495927] Updated weights for policy 0, policy_version 40104 (0.0008) +[2026-06-07 03:18:20,988][495927] Updated weights for policy 0, policy_version 40115 (0.0008) +[2026-06-07 03:18:21,163][495927] Updated weights for policy 0, policy_version 40127 (0.0008) +[2026-06-07 03:18:21,311][495927] Updated weights for policy 0, policy_version 40137 (0.0008) +[2026-06-07 03:18:21,479][495927] Updated weights for policy 0, policy_version 40148 (0.0008) +[2026-06-07 03:18:21,632][495927] Updated weights for policy 0, policy_version 40158 (0.0008) +[2026-06-07 03:18:21,785][495927] Updated weights for policy 0, policy_version 40168 (0.0008) +[2026-06-07 03:18:22,437][495927] Updated weights for policy 0, policy_version 40179 (0.0008) +[2026-06-07 03:18:22,576][495927] Updated weights for policy 0, policy_version 40189 (0.0008) +[2026-06-07 03:18:22,731][495927] Updated weights for policy 0, policy_version 40199 (0.0008) +[2026-06-07 03:18:22,898][495927] Updated weights for policy 0, policy_version 40210 (0.0008) +[2026-06-07 03:18:23,048][495927] Updated weights for policy 0, policy_version 40220 (0.0008) +[2026-06-07 03:18:23,198][495927] Updated weights for policy 0, policy_version 40230 (0.0008) +[2026-06-07 03:18:23,851][495927] Updated weights for policy 0, policy_version 40241 (0.0008) +[2026-06-07 03:18:23,909][492660] Fps is (10 sec: 22937.4, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 20611072. Throughput: 0: 23065.6. Samples: 20611712. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) +[2026-06-07 03:18:23,910][492660] Avg episode reward: [(0, '1650.902')] +[2026-06-07 03:18:23,986][495927] Updated weights for policy 0, policy_version 40251 (0.0008) +[2026-06-07 03:18:24,144][495927] Updated weights for policy 0, policy_version 40262 (0.0009) +[2026-06-07 03:18:24,295][495927] Updated weights for policy 0, policy_version 40272 (0.0008) +[2026-06-07 03:18:24,468][495927] Updated weights for policy 0, policy_version 40283 (0.0008) +[2026-06-07 03:18:24,635][495927] Updated weights for policy 0, policy_version 40294 (0.0009) +[2026-06-07 03:18:25,280][495927] Updated weights for policy 0, policy_version 40305 (0.0009) +[2026-06-07 03:18:25,419][495927] Updated weights for policy 0, policy_version 40315 (0.0008) +[2026-06-07 03:18:25,583][495927] Updated weights for policy 0, policy_version 40326 (0.0008) +[2026-06-07 03:18:25,731][495927] Updated weights for policy 0, policy_version 40336 (0.0008) +[2026-06-07 03:18:25,895][495927] Updated weights for policy 0, policy_version 40347 (0.0008) +[2026-06-07 03:18:26,046][495927] Updated weights for policy 0, policy_version 40357 (0.0008) +[2026-06-07 03:18:26,207][495927] Updated weights for policy 0, policy_version 40368 (0.0008) +[2026-06-07 03:18:26,850][495927] Updated weights for policy 0, policy_version 40379 (0.0009) +[2026-06-07 03:18:27,009][495927] Updated weights for policy 0, policy_version 40390 (0.0008) +[2026-06-07 03:18:27,171][495927] Updated weights for policy 0, policy_version 40401 (0.0008) +[2026-06-07 03:18:27,350][495927] Updated weights for policy 0, policy_version 40412 (0.0009) +[2026-06-07 03:18:27,499][495927] Updated weights for policy 0, policy_version 40422 (0.0008) +[2026-06-07 03:18:27,644][495927] Updated weights for policy 0, policy_version 40432 (0.0008) +[2026-06-07 03:18:28,328][495927] Updated weights for policy 0, policy_version 40446 (0.0009) +[2026-06-07 03:18:28,501][495927] Updated weights for policy 0, policy_version 40458 (0.0009) +[2026-06-07 03:18:28,647][495927] Updated weights for policy 0, policy_version 40468 (0.0008) +[2026-06-07 03:18:28,809][495927] Updated weights for policy 0, policy_version 40479 (0.0008) +[2026-06-07 03:18:28,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 20709376. Throughput: 0: 23091.2. Samples: 20748288. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) +[2026-06-07 03:18:28,910][492660] Avg episode reward: [(0, '1616.611')] +[2026-06-07 03:18:28,990][495927] Updated weights for policy 0, policy_version 40491 (0.0008) +[2026-06-07 03:18:29,642][495927] Updated weights for policy 0, policy_version 40501 (0.0008) +[2026-06-07 03:18:29,793][495927] Updated weights for policy 0, policy_version 40512 (0.0009) +[2026-06-07 03:18:29,939][495927] Updated weights for policy 0, policy_version 40522 (0.0008) +[2026-06-07 03:18:30,104][495927] Updated weights for policy 0, policy_version 40533 (0.0008) +[2026-06-07 03:18:30,273][495927] Updated weights for policy 0, policy_version 40545 (0.0008) +[2026-06-07 03:18:30,422][495927] Updated weights for policy 0, policy_version 40555 (0.0008) +[2026-06-07 03:18:31,075][495927] Updated weights for policy 0, policy_version 40565 (0.0008) +[2026-06-07 03:18:31,228][495927] Updated weights for policy 0, policy_version 40576 (0.0008) +[2026-06-07 03:18:31,386][495927] Updated weights for policy 0, policy_version 40587 (0.0008) +[2026-06-07 03:18:31,535][495927] Updated weights for policy 0, policy_version 40597 (0.0008) +[2026-06-07 03:18:31,684][495927] Updated weights for policy 0, policy_version 40607 (0.0008) +[2026-06-07 03:18:31,826][495927] Updated weights for policy 0, policy_version 40617 (0.0008) +[2026-06-07 03:18:32,519][495927] Updated weights for policy 0, policy_version 40631 (0.0008) +[2026-06-07 03:18:32,669][495927] Updated weights for policy 0, policy_version 40642 (0.0008) +[2026-06-07 03:18:32,834][495927] Updated weights for policy 0, policy_version 40653 (0.0009) +[2026-06-07 03:18:33,010][495927] Updated weights for policy 0, policy_version 40665 (0.0008) +[2026-06-07 03:18:33,166][495927] Updated weights for policy 0, policy_version 40676 (0.0008) +[2026-06-07 03:18:33,323][495927] Updated weights for policy 0, policy_version 40686 (0.0008) +[2026-06-07 03:18:33,909][492660] Fps is (10 sec: 22937.8, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 20840448. Throughput: 0: 23037.2. Samples: 20816000. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) +[2026-06-07 03:18:33,910][492660] Avg episode reward: [(0, '1630.887')] +[2026-06-07 03:18:33,995][495927] Updated weights for policy 0, policy_version 40698 (0.0008) +[2026-06-07 03:18:34,127][495927] Updated weights for policy 0, policy_version 40708 (0.0008) +[2026-06-07 03:18:34,291][495927] Updated weights for policy 0, policy_version 40719 (0.0009) +[2026-06-07 03:18:34,455][495927] Updated weights for policy 0, policy_version 40730 (0.0008) +[2026-06-07 03:18:34,614][495927] Updated weights for policy 0, policy_version 40741 (0.0008) +[2026-06-07 03:18:34,768][495927] Updated weights for policy 0, policy_version 40751 (0.0008) +[2026-06-07 03:18:35,427][495927] Updated weights for policy 0, policy_version 40762 (0.0008) +[2026-06-07 03:18:35,579][495927] Updated weights for policy 0, policy_version 40772 (0.0008) +[2026-06-07 03:18:35,746][495927] Updated weights for policy 0, policy_version 40783 (0.0008) +[2026-06-07 03:18:35,894][495927] Updated weights for policy 0, policy_version 40793 (0.0008) +[2026-06-07 03:18:36,076][495927] Updated weights for policy 0, policy_version 40805 (0.0008) +[2026-06-07 03:18:36,226][495927] Updated weights for policy 0, policy_version 40815 (0.0008) +[2026-06-07 03:18:36,844][495927] Updated weights for policy 0, policy_version 40826 (0.0009) +[2026-06-07 03:18:37,001][495927] Updated weights for policy 0, policy_version 40837 (0.0008) +[2026-06-07 03:18:37,155][495927] Updated weights for policy 0, policy_version 40847 (0.0008) +[2026-06-07 03:18:37,300][495927] Updated weights for policy 0, policy_version 40857 (0.0008) +[2026-06-07 03:18:37,469][495927] Updated weights for policy 0, policy_version 40868 (0.0009) +[2026-06-07 03:18:37,627][495927] Updated weights for policy 0, policy_version 40878 (0.0008) +[2026-06-07 03:18:38,277][495927] Updated weights for policy 0, policy_version 40889 (0.0008) +[2026-06-07 03:18:38,443][495927] Updated weights for policy 0, policy_version 40901 (0.0008) +[2026-06-07 03:18:38,594][495927] Updated weights for policy 0, policy_version 40911 (0.0008) +[2026-06-07 03:18:38,788][495927] Updated weights for policy 0, policy_version 40923 (0.0008) +[2026-06-07 03:18:38,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 20938752. Throughput: 0: 22789.7. Samples: 20951168. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) +[2026-06-07 03:18:38,910][492660] Avg episode reward: [(0, '1564.211')] +[2026-06-07 03:18:38,936][495927] Updated weights for policy 0, policy_version 40933 (0.0008) +[2026-06-07 03:18:39,086][495927] Updated weights for policy 0, policy_version 40943 (0.0008) +[2026-06-07 03:18:39,708][495927] Updated weights for policy 0, policy_version 40953 (0.0008) +[2026-06-07 03:18:39,854][495927] Updated weights for policy 0, policy_version 40963 (0.0008) +[2026-06-07 03:18:40,001][495927] Updated weights for policy 0, policy_version 40973 (0.0008) +[2026-06-07 03:18:40,164][495927] Updated weights for policy 0, policy_version 40984 (0.0008) +[2026-06-07 03:18:40,316][495927] Updated weights for policy 0, policy_version 40994 (0.0008) +[2026-06-07 03:18:40,468][495927] Updated weights for policy 0, policy_version 41004 (0.0008) +[2026-06-07 03:18:41,090][495927] Updated weights for policy 0, policy_version 41014 (0.0008) +[2026-06-07 03:18:41,241][495927] Updated weights for policy 0, policy_version 41024 (0.0008) +[2026-06-07 03:18:41,390][495927] Updated weights for policy 0, policy_version 41034 (0.0008) +[2026-06-07 03:18:41,554][495927] Updated weights for policy 0, policy_version 41045 (0.0008) +[2026-06-07 03:18:41,705][495927] Updated weights for policy 0, policy_version 41055 (0.0008) +[2026-06-07 03:18:41,865][495927] Updated weights for policy 0, policy_version 41065 (0.0008) +[2026-06-07 03:18:42,482][495927] Updated weights for policy 0, policy_version 41075 (0.0008) +[2026-06-07 03:18:42,644][495927] Updated weights for policy 0, policy_version 41086 (0.0008) +[2026-06-07 03:18:42,785][495927] Updated weights for policy 0, policy_version 41096 (0.0008) +[2026-06-07 03:18:42,947][495927] Updated weights for policy 0, policy_version 41106 (0.0008) +[2026-06-07 03:18:43,096][495927] Updated weights for policy 0, policy_version 41116 (0.0008) +[2026-06-07 03:18:43,263][495927] Updated weights for policy 0, policy_version 41127 (0.0008) +[2026-06-07 03:18:43,871][495927] Updated weights for policy 0, policy_version 41137 (0.0005) +[2026-06-07 03:18:43,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22993.2). Total num frames: 21069824. Throughput: 0: 22821.0. Samples: 21086080. Policy #0 lag: (min: 63.0, avg: 77.9, max: 127.0) +[2026-06-07 03:18:43,910][492660] Avg episode reward: [(0, '1565.830')] +[2026-06-07 03:18:44,016][495927] Updated weights for policy 0, policy_version 41147 (0.0008) +[2026-06-07 03:18:44,157][495927] Updated weights for policy 0, policy_version 41157 (0.0008) +[2026-06-07 03:18:44,324][495927] Updated weights for policy 0, policy_version 41168 (0.0009) +[2026-06-07 03:18:44,472][495927] Updated weights for policy 0, policy_version 41178 (0.0008) +[2026-06-07 03:18:44,631][495927] Updated weights for policy 0, policy_version 41188 (0.0008) +[2026-06-07 03:18:44,775][495927] Updated weights for policy 0, policy_version 41198 (0.0008) +[2026-06-07 03:18:45,433][495927] Updated weights for policy 0, policy_version 41209 (0.0009) +[2026-06-07 03:18:45,593][495927] Updated weights for policy 0, policy_version 41220 (0.0009) +[2026-06-07 03:18:45,759][495927] Updated weights for policy 0, policy_version 41231 (0.0008) +[2026-06-07 03:18:45,910][495927] Updated weights for policy 0, policy_version 41241 (0.0008) +[2026-06-07 03:18:46,063][495927] Updated weights for policy 0, policy_version 41251 (0.0008) +[2026-06-07 03:18:46,218][495927] Updated weights for policy 0, policy_version 41261 (0.0008) +[2026-06-07 03:18:46,809][495927] Updated weights for policy 0, policy_version 41271 (0.0008) +[2026-06-07 03:18:46,991][495927] Updated weights for policy 0, policy_version 41283 (0.0008) +[2026-06-07 03:18:47,141][495927] Updated weights for policy 0, policy_version 41293 (0.0008) +[2026-06-07 03:18:47,304][495927] Updated weights for policy 0, policy_version 41303 (0.0008) +[2026-06-07 03:18:47,456][495927] Updated weights for policy 0, policy_version 41313 (0.0009) +[2026-06-07 03:18:47,604][495927] Updated weights for policy 0, policy_version 41323 (0.0008) +[2026-06-07 03:18:48,196][495927] Updated weights for policy 0, policy_version 41333 (0.0008) +[2026-06-07 03:18:48,352][495927] Updated weights for policy 0, policy_version 41344 (0.0008) +[2026-06-07 03:18:48,504][495927] Updated weights for policy 0, policy_version 41354 (0.0008) +[2026-06-07 03:18:48,675][495927] Updated weights for policy 0, policy_version 41365 (0.0008) +[2026-06-07 03:18:48,823][495927] Updated weights for policy 0, policy_version 41375 (0.0008) +[2026-06-07 03:18:48,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 21168128. Throughput: 0: 22829.5. Samples: 21153536. Policy #0 lag: (min: 38.0, avg: 54.0, max: 102.0) +[2026-06-07 03:18:48,910][492660] Avg episode reward: [(0, '1599.627')] +[2026-06-07 03:18:48,976][495927] Updated weights for policy 0, policy_version 41385 (0.0008) +[2026-06-07 03:18:49,617][495927] Updated weights for policy 0, policy_version 41395 (0.0008) +[2026-06-07 03:18:49,750][495927] Updated weights for policy 0, policy_version 41405 (0.0008) +[2026-06-07 03:18:49,903][495927] Updated weights for policy 0, policy_version 41415 (0.0008) +[2026-06-07 03:18:50,049][495927] Updated weights for policy 0, policy_version 41425 (0.0008) +[2026-06-07 03:18:50,197][495927] Updated weights for policy 0, policy_version 41435 (0.0008) +[2026-06-07 03:18:50,354][495927] Updated weights for policy 0, policy_version 41445 (0.0008) +[2026-06-07 03:18:50,497][495927] Updated weights for policy 0, policy_version 41455 (0.0008) +[2026-06-07 03:18:51,134][495927] Updated weights for policy 0, policy_version 41466 (0.0008) +[2026-06-07 03:18:51,291][495927] Updated weights for policy 0, policy_version 41477 (0.0008) +[2026-06-07 03:18:51,463][495927] Updated weights for policy 0, policy_version 41488 (0.0008) +[2026-06-07 03:18:51,615][495927] Updated weights for policy 0, policy_version 41498 (0.0008) +[2026-06-07 03:18:51,778][495927] Updated weights for policy 0, policy_version 41509 (0.0008) +[2026-06-07 03:18:51,940][495927] Updated weights for policy 0, policy_version 41520 (0.0008) +[2026-06-07 03:18:52,580][495927] Updated weights for policy 0, policy_version 41531 (0.0009) +[2026-06-07 03:18:52,734][495927] Updated weights for policy 0, policy_version 41541 (0.0008) +[2026-06-07 03:18:52,891][495927] Updated weights for policy 0, policy_version 41552 (0.0008) +[2026-06-07 03:18:53,057][495927] Updated weights for policy 0, policy_version 41563 (0.0008) +[2026-06-07 03:18:53,211][495927] Updated weights for policy 0, policy_version 41573 (0.0008) +[2026-06-07 03:18:53,374][495927] Updated weights for policy 0, policy_version 41584 (0.0008) +[2026-06-07 03:18:53,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22993.2). Total num frames: 21299200. Throughput: 0: 23022.9. Samples: 21297536. Policy #0 lag: (min: 38.0, avg: 54.0, max: 102.0) +[2026-06-07 03:18:53,910][492660] Avg episode reward: [(0, '1642.837')] +[2026-06-07 03:18:54,013][495927] Updated weights for policy 0, policy_version 41595 (0.0008) +[2026-06-07 03:18:54,181][495927] Updated weights for policy 0, policy_version 41607 (0.0008) +[2026-06-07 03:18:54,334][495927] Updated weights for policy 0, policy_version 41617 (0.0008) +[2026-06-07 03:18:54,487][495927] Updated weights for policy 0, policy_version 41627 (0.0008) +[2026-06-07 03:18:54,657][495927] Updated weights for policy 0, policy_version 41638 (0.0008) +[2026-06-07 03:18:55,323][495927] Updated weights for policy 0, policy_version 41651 (0.0009) +[2026-06-07 03:18:55,469][495927] Updated weights for policy 0, policy_version 41661 (0.0008) +[2026-06-07 03:18:55,615][495927] Updated weights for policy 0, policy_version 41671 (0.0008) +[2026-06-07 03:18:55,772][495927] Updated weights for policy 0, policy_version 41681 (0.0008) +[2026-06-07 03:18:55,946][495927] Updated weights for policy 0, policy_version 41693 (0.0009) +[2026-06-07 03:18:56,098][495927] Updated weights for policy 0, policy_version 41703 (0.0008) +[2026-06-07 03:18:56,730][495927] Updated weights for policy 0, policy_version 41713 (0.0008) +[2026-06-07 03:18:56,877][495927] Updated weights for policy 0, policy_version 41723 (0.0008) +[2026-06-07 03:18:57,017][495927] Updated weights for policy 0, policy_version 41733 (0.0008) +[2026-06-07 03:18:57,170][495927] Updated weights for policy 0, policy_version 41743 (0.0008) +[2026-06-07 03:18:57,318][495927] Updated weights for policy 0, policy_version 41753 (0.0008) +[2026-06-07 03:18:57,472][495927] Updated weights for policy 0, policy_version 41763 (0.0008) +[2026-06-07 03:18:57,622][495927] Updated weights for policy 0, policy_version 41773 (0.0008) +[2026-06-07 03:18:58,260][495927] Updated weights for policy 0, policy_version 41784 (0.0008) +[2026-06-07 03:18:58,397][495927] Updated weights for policy 0, policy_version 41794 (0.0008) +[2026-06-07 03:18:58,552][495927] Updated weights for policy 0, policy_version 41804 (0.0008) +[2026-06-07 03:18:58,702][495927] Updated weights for policy 0, policy_version 41814 (0.0008) +[2026-06-07 03:18:58,848][495927] Updated weights for policy 0, policy_version 41824 (0.0008) +[2026-06-07 03:18:58,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 21397504. Throughput: 0: 23088.4. Samples: 21435136. Policy #0 lag: (min: 38.0, avg: 54.0, max: 102.0) +[2026-06-07 03:18:58,910][492660] Avg episode reward: [(0, '1641.782')] +[2026-06-07 03:18:59,002][495927] Updated weights for policy 0, policy_version 41834 (0.0008) +[2026-06-07 03:18:59,629][495927] Updated weights for policy 0, policy_version 41844 (0.0008) +[2026-06-07 03:18:59,783][495927] Updated weights for policy 0, policy_version 41855 (0.0008) +[2026-06-07 03:18:59,938][495927] Updated weights for policy 0, policy_version 41865 (0.0008) +[2026-06-07 03:19:00,107][495927] Updated weights for policy 0, policy_version 41876 (0.0008) +[2026-06-07 03:19:00,259][495927] Updated weights for policy 0, policy_version 41886 (0.0008) +[2026-06-07 03:19:00,420][495927] Updated weights for policy 0, policy_version 41897 (0.0008) +[2026-06-07 03:19:01,059][495927] Updated weights for policy 0, policy_version 41907 (0.0009) +[2026-06-07 03:19:01,214][495927] Updated weights for policy 0, policy_version 41918 (0.0008) +[2026-06-07 03:19:01,379][495927] Updated weights for policy 0, policy_version 41929 (0.0008) +[2026-06-07 03:19:01,533][495927] Updated weights for policy 0, policy_version 41939 (0.0007) +[2026-06-07 03:19:01,688][495927] Updated weights for policy 0, policy_version 41949 (0.0008) +[2026-06-07 03:19:01,827][495927] Updated weights for policy 0, policy_version 41959 (0.0008) +[2026-06-07 03:19:02,446][495927] Updated weights for policy 0, policy_version 41969 (0.0010) +[2026-06-07 03:19:02,602][495927] Updated weights for policy 0, policy_version 41980 (0.0008) +[2026-06-07 03:19:02,751][495927] Updated weights for policy 0, policy_version 41990 (0.0009) +[2026-06-07 03:19:02,902][495927] Updated weights for policy 0, policy_version 42000 (0.0008) +[2026-06-07 03:19:03,085][495927] Updated weights for policy 0, policy_version 42012 (0.0008) +[2026-06-07 03:19:03,231][495927] Updated weights for policy 0, policy_version 42022 (0.0008) +[2026-06-07 03:19:03,378][495927] Updated weights for policy 0, policy_version 42032 (0.0008) +[2026-06-07 03:19:03,909][492660] Fps is (10 sec: 22937.4, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 21528576. Throughput: 0: 23014.4. Samples: 21502464. Policy #0 lag: (min: 38.0, avg: 54.0, max: 102.0) +[2026-06-07 03:19:03,910][492660] Avg episode reward: [(0, '1661.677')] +[2026-06-07 03:19:03,998][495927] Updated weights for policy 0, policy_version 42042 (0.0009) +[2026-06-07 03:19:04,146][495927] Updated weights for policy 0, policy_version 42052 (0.0009) +[2026-06-07 03:19:04,308][495927] Updated weights for policy 0, policy_version 42063 (0.0009) +[2026-06-07 03:19:04,477][495927] Updated weights for policy 0, policy_version 42075 (0.0008) +[2026-06-07 03:19:04,630][495927] Updated weights for policy 0, policy_version 42085 (0.0010) +[2026-06-07 03:19:04,776][495927] Updated weights for policy 0, policy_version 42095 (0.0009) +[2026-06-07 03:19:05,450][495927] Updated weights for policy 0, policy_version 42107 (0.0010) +[2026-06-07 03:19:05,634][495927] Updated weights for policy 0, policy_version 42120 (0.0008) +[2026-06-07 03:19:05,784][495927] Updated weights for policy 0, policy_version 42130 (0.0008) +[2026-06-07 03:19:05,939][495927] Updated weights for policy 0, policy_version 42140 (0.0009) +[2026-06-07 03:19:06,114][495927] Updated weights for policy 0, policy_version 42152 (0.0009) +[2026-06-07 03:19:06,782][495927] Updated weights for policy 0, policy_version 42164 (0.0009) +[2026-06-07 03:19:06,929][495927] Updated weights for policy 0, policy_version 42174 (0.0008) +[2026-06-07 03:19:07,070][495927] Updated weights for policy 0, policy_version 42184 (0.0008) +[2026-06-07 03:19:07,237][495927] Updated weights for policy 0, policy_version 42195 (0.0008) +[2026-06-07 03:19:07,417][495927] Updated weights for policy 0, policy_version 42207 (0.0009) +[2026-06-07 03:19:07,572][495927] Updated weights for policy 0, policy_version 42217 (0.0009) +[2026-06-07 03:19:08,199][495927] Updated weights for policy 0, policy_version 42227 (0.0009) +[2026-06-07 03:19:08,342][495927] Updated weights for policy 0, policy_version 42237 (0.0009) +[2026-06-07 03:19:08,503][495927] Updated weights for policy 0, policy_version 42248 (0.0006) +[2026-06-07 03:19:08,673][495927] Updated weights for policy 0, policy_version 42260 (0.0008) +[2026-06-07 03:19:08,834][495927] Updated weights for policy 0, policy_version 42271 (0.0008) +[2026-06-07 03:19:08,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 21626880. Throughput: 0: 22789.7. Samples: 21637248. Policy #0 lag: (min: 38.0, avg: 54.0, max: 102.0) +[2026-06-07 03:19:08,910][492660] Avg episode reward: [(0, '1627.634')] +[2026-06-07 03:19:08,989][495927] Updated weights for policy 0, policy_version 42281 (0.0008) +[2026-06-07 03:19:09,629][495927] Updated weights for policy 0, policy_version 42291 (0.0008) +[2026-06-07 03:19:09,774][495927] Updated weights for policy 0, policy_version 42301 (0.0007) +[2026-06-07 03:19:09,917][495927] Updated weights for policy 0, policy_version 42311 (0.0008) +[2026-06-07 03:19:10,061][495927] Updated weights for policy 0, policy_version 42321 (0.0008) +[2026-06-07 03:19:10,222][495927] Updated weights for policy 0, policy_version 42331 (0.0007) +[2026-06-07 03:19:10,379][495927] Updated weights for policy 0, policy_version 42342 (0.0007) +[2026-06-07 03:19:11,064][495927] Updated weights for policy 0, policy_version 42355 (0.0009) +[2026-06-07 03:19:11,208][495927] Updated weights for policy 0, policy_version 42365 (0.0009) +[2026-06-07 03:19:11,348][495927] Updated weights for policy 0, policy_version 42375 (0.0009) +[2026-06-07 03:19:11,500][495927] Updated weights for policy 0, policy_version 42385 (0.0007) +[2026-06-07 03:19:11,660][495927] Updated weights for policy 0, policy_version 42396 (0.0004) +[2026-06-07 03:19:11,814][495927] Updated weights for policy 0, policy_version 42406 (0.0008) +[2026-06-07 03:19:12,465][495927] Updated weights for policy 0, policy_version 42418 (0.0008) +[2026-06-07 03:19:12,602][495927] Updated weights for policy 0, policy_version 42428 (0.0008) +[2026-06-07 03:19:12,751][495927] Updated weights for policy 0, policy_version 42438 (0.0010) +[2026-06-07 03:19:12,902][495927] Updated weights for policy 0, policy_version 42448 (0.0008) +[2026-06-07 03:19:13,051][495927] Updated weights for policy 0, policy_version 42458 (0.0008) +[2026-06-07 03:19:13,207][495927] Updated weights for policy 0, policy_version 42468 (0.0008) +[2026-06-07 03:19:13,369][495927] Updated weights for policy 0, policy_version 42479 (0.0008) +[2026-06-07 03:19:13,909][492660] Fps is (10 sec: 22937.8, 60 sec: 22937.6, 300 sec: 22993.2). Total num frames: 21757952. Throughput: 0: 22744.2. Samples: 21771776. Policy #0 lag: (min: 38.0, avg: 54.0, max: 102.0) +[2026-06-07 03:19:13,910][492660] Avg episode reward: [(0, '1596.346')] +[2026-06-07 03:19:13,979][495927] Updated weights for policy 0, policy_version 42489 (0.0008) +[2026-06-07 03:19:14,130][495927] Updated weights for policy 0, policy_version 42499 (0.0008) +[2026-06-07 03:19:14,279][495927] Updated weights for policy 0, policy_version 42509 (0.0008) +[2026-06-07 03:19:14,430][495927] Updated weights for policy 0, policy_version 42519 (0.0008) +[2026-06-07 03:19:14,578][495927] Updated weights for policy 0, policy_version 42529 (0.0006) +[2026-06-07 03:19:14,738][495927] Updated weights for policy 0, policy_version 42539 (0.0004) +[2026-06-07 03:19:15,351][495927] Updated weights for policy 0, policy_version 42549 (0.0004) +[2026-06-07 03:19:15,493][495927] Updated weights for policy 0, policy_version 42559 (0.0004) +[2026-06-07 03:19:15,653][495927] Updated weights for policy 0, policy_version 42570 (0.0004) +[2026-06-07 03:19:15,823][495927] Updated weights for policy 0, policy_version 42581 (0.0004) +[2026-06-07 03:19:15,972][495927] Updated weights for policy 0, policy_version 42591 (0.0004) +[2026-06-07 03:19:16,136][495927] Updated weights for policy 0, policy_version 42602 (0.0004) +[2026-06-07 03:19:16,746][495927] Updated weights for policy 0, policy_version 42613 (0.0004) +[2026-06-07 03:19:16,902][495927] Updated weights for policy 0, policy_version 42625 (0.0004) +[2026-06-07 03:19:17,078][495927] Updated weights for policy 0, policy_version 42637 (0.0004) +[2026-06-07 03:19:17,227][495927] Updated weights for policy 0, policy_version 42647 (0.0004) +[2026-06-07 03:19:17,375][495927] Updated weights for policy 0, policy_version 42657 (0.0004) +[2026-06-07 03:19:17,553][495927] Updated weights for policy 0, policy_version 42669 (0.0004) +[2026-06-07 03:19:18,188][495927] Updated weights for policy 0, policy_version 42680 (0.0004) +[2026-06-07 03:19:18,328][495927] Updated weights for policy 0, policy_version 42690 (0.0004) +[2026-06-07 03:19:18,489][495927] Updated weights for policy 0, policy_version 42701 (0.0004) +[2026-06-07 03:19:18,654][495927] Updated weights for policy 0, policy_version 42712 (0.0004) +[2026-06-07 03:19:18,806][495927] Updated weights for policy 0, policy_version 42723 (0.0004) +[2026-06-07 03:19:18,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 21856256. Throughput: 0: 22747.0. Samples: 21839616. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-07 03:19:18,910][492660] Avg episode reward: [(0, '1623.457')] +[2026-06-07 03:19:18,987][495927] Updated weights for policy 0, policy_version 42735 (0.0006) +[2026-06-07 03:19:19,627][495927] Updated weights for policy 0, policy_version 42745 (0.0008) +[2026-06-07 03:19:19,794][495927] Updated weights for policy 0, policy_version 42756 (0.0008) +[2026-06-07 03:19:19,941][495927] Updated weights for policy 0, policy_version 42766 (0.0008) +[2026-06-07 03:19:20,098][495927] Updated weights for policy 0, policy_version 42777 (0.0008) +[2026-06-07 03:19:20,273][495927] Updated weights for policy 0, policy_version 42788 (0.0008) +[2026-06-07 03:19:20,426][495927] Updated weights for policy 0, policy_version 42798 (0.0008) +[2026-06-07 03:19:21,055][495927] Updated weights for policy 0, policy_version 42808 (0.0009) +[2026-06-07 03:19:21,209][495927] Updated weights for policy 0, policy_version 42819 (0.0008) +[2026-06-07 03:19:21,352][495927] Updated weights for policy 0, policy_version 42829 (0.0008) +[2026-06-07 03:19:21,528][495927] Updated weights for policy 0, policy_version 42840 (0.0008) +[2026-06-07 03:19:21,681][495927] Updated weights for policy 0, policy_version 42850 (0.0008) +[2026-06-07 03:19:21,841][495927] Updated weights for policy 0, policy_version 42861 (0.0008) +[2026-06-07 03:19:22,478][495927] Updated weights for policy 0, policy_version 42871 (0.0009) +[2026-06-07 03:19:22,619][495927] Updated weights for policy 0, policy_version 42881 (0.0008) +[2026-06-07 03:19:22,769][495927] Updated weights for policy 0, policy_version 42891 (0.0008) +[2026-06-07 03:19:22,924][495927] Updated weights for policy 0, policy_version 42901 (0.0008) +[2026-06-07 03:19:23,079][495927] Updated weights for policy 0, policy_version 42911 (0.0009) +[2026-06-07 03:19:23,231][495927] Updated weights for policy 0, policy_version 42921 (0.0008) +[2026-06-07 03:19:23,841][495927] Updated weights for policy 0, policy_version 42931 (0.0009) +[2026-06-07 03:19:23,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 21987328. Throughput: 0: 22997.3. Samples: 21986048. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-07 03:19:23,910][492660] Avg episode reward: [(0, '1616.166')] +[2026-06-07 03:19:23,991][495927] Updated weights for policy 0, policy_version 42941 (0.0008) +[2026-06-07 03:19:24,135][495927] Updated weights for policy 0, policy_version 42951 (0.0008) +[2026-06-07 03:19:24,283][495927] Updated weights for policy 0, policy_version 42961 (0.0008) +[2026-06-07 03:19:24,439][495927] Updated weights for policy 0, policy_version 42971 (0.0008) +[2026-06-07 03:19:24,583][495927] Updated weights for policy 0, policy_version 42981 (0.0008) +[2026-06-07 03:19:24,734][495927] Updated weights for policy 0, policy_version 42991 (0.0010) +[2026-06-07 03:19:25,371][495927] Updated weights for policy 0, policy_version 43001 (0.0008) +[2026-06-07 03:19:25,512][495927] Updated weights for policy 0, policy_version 43011 (0.0008) +[2026-06-07 03:19:25,713][495927] Updated weights for policy 0, policy_version 43024 (0.0008) +[2026-06-07 03:19:25,868][495927] Updated weights for policy 0, policy_version 43034 (0.0010) +[2026-06-07 03:19:26,026][495927] Updated weights for policy 0, policy_version 43044 (0.0008) +[2026-06-07 03:19:26,183][495927] Updated weights for policy 0, policy_version 43054 (0.0008) +[2026-06-07 03:19:26,797][495927] Updated weights for policy 0, policy_version 43064 (0.0008) +[2026-06-07 03:19:26,942][495927] Updated weights for policy 0, policy_version 43074 (0.0008) +[2026-06-07 03:19:27,091][495927] Updated weights for policy 0, policy_version 43084 (0.0006) +[2026-06-07 03:19:27,240][495927] Updated weights for policy 0, policy_version 43094 (0.0005) +[2026-06-07 03:19:27,398][495927] Updated weights for policy 0, policy_version 43104 (0.0004) +[2026-06-07 03:19:27,565][495927] Updated weights for policy 0, policy_version 43115 (0.0004) +[2026-06-07 03:19:28,207][495927] Updated weights for policy 0, policy_version 43126 (0.0006) +[2026-06-07 03:19:28,354][495927] Updated weights for policy 0, policy_version 43136 (0.0005) +[2026-06-07 03:19:28,505][495927] Updated weights for policy 0, policy_version 43146 (0.0005) +[2026-06-07 03:19:28,658][495927] Updated weights for policy 0, policy_version 43156 (0.0006) +[2026-06-07 03:19:28,806][495927] Updated weights for policy 0, policy_version 43166 (0.0005) +[2026-06-07 03:19:28,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 22085632. Throughput: 0: 23031.5. Samples: 22122496. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-07 03:19:28,910][492660] Avg episode reward: [(0, '1600.418')] +[2026-06-07 03:19:28,960][495927] Updated weights for policy 0, policy_version 43176 (0.0005) +[2026-06-07 03:19:29,568][495927] Updated weights for policy 0, policy_version 43186 (0.0005) +[2026-06-07 03:19:29,704][495927] Updated weights for policy 0, policy_version 43196 (0.0005) +[2026-06-07 03:19:29,871][495927] Updated weights for policy 0, policy_version 43207 (0.0006) +[2026-06-07 03:19:30,036][495927] Updated weights for policy 0, policy_version 43218 (0.0005) +[2026-06-07 03:19:30,186][495927] Updated weights for policy 0, policy_version 43228 (0.0005) +[2026-06-07 03:19:30,339][495927] Updated weights for policy 0, policy_version 43238 (0.0004) +[2026-06-07 03:19:30,983][495927] Updated weights for policy 0, policy_version 43249 (0.0008) +[2026-06-07 03:19:31,147][495927] Updated weights for policy 0, policy_version 43261 (0.0008) +[2026-06-07 03:19:31,306][495927] Updated weights for policy 0, policy_version 43272 (0.0008) +[2026-06-07 03:19:31,477][495927] Updated weights for policy 0, policy_version 43283 (0.0008) +[2026-06-07 03:19:31,627][495927] Updated weights for policy 0, policy_version 43293 (0.0008) +[2026-06-07 03:19:31,775][495927] Updated weights for policy 0, policy_version 43303 (0.0010) +[2026-06-07 03:19:32,407][495927] Updated weights for policy 0, policy_version 43313 (0.0008) +[2026-06-07 03:19:32,544][495927] Updated weights for policy 0, policy_version 43323 (0.0007) +[2026-06-07 03:19:32,692][495927] Updated weights for policy 0, policy_version 43333 (0.0009) +[2026-06-07 03:19:32,848][495927] Updated weights for policy 0, policy_version 43343 (0.0008) +[2026-06-07 03:19:32,992][495927] Updated weights for policy 0, policy_version 43353 (0.0008) +[2026-06-07 03:19:33,149][495927] Updated weights for policy 0, policy_version 43363 (0.0008) +[2026-06-07 03:19:33,298][495927] Updated weights for policy 0, policy_version 43373 (0.0009) +[2026-06-07 03:19:33,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 22216704. Throughput: 0: 23028.6. Samples: 22189824. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-07 03:19:33,910][492660] Avg episode reward: [(0, '1623.768')] +[2026-06-07 03:19:33,923][495927] Updated weights for policy 0, policy_version 43383 (0.0009) +[2026-06-07 03:19:34,076][495927] Updated weights for policy 0, policy_version 43394 (0.0008) +[2026-06-07 03:19:34,234][495927] Updated weights for policy 0, policy_version 43404 (0.0009) +[2026-06-07 03:19:34,385][495927] Updated weights for policy 0, policy_version 43414 (0.0009) +[2026-06-07 03:19:34,537][495927] Updated weights for policy 0, policy_version 43424 (0.0008) +[2026-06-07 03:19:34,687][495927] Updated weights for policy 0, policy_version 43434 (0.0008) +[2026-06-07 03:19:35,309][495927] Updated weights for policy 0, policy_version 43444 (0.0008) +[2026-06-07 03:19:35,465][495927] Updated weights for policy 0, policy_version 43455 (0.0008) +[2026-06-07 03:19:35,615][495927] Updated weights for policy 0, policy_version 43465 (0.0008) +[2026-06-07 03:19:35,767][495927] Updated weights for policy 0, policy_version 43475 (0.0008) +[2026-06-07 03:19:35,923][495927] Updated weights for policy 0, policy_version 43485 (0.0007) +[2026-06-07 03:19:36,070][495927] Updated weights for policy 0, policy_version 43495 (0.0008) +[2026-06-07 03:19:36,704][495927] Updated weights for policy 0, policy_version 43505 (0.0008) +[2026-06-07 03:19:36,858][495927] Updated weights for policy 0, policy_version 43516 (0.0008) +[2026-06-07 03:19:37,005][495927] Updated weights for policy 0, policy_version 43526 (0.0008) +[2026-06-07 03:19:37,173][495927] Updated weights for policy 0, policy_version 43537 (0.0007) +[2026-06-07 03:19:37,327][495927] Updated weights for policy 0, policy_version 43547 (0.0006) +[2026-06-07 03:19:37,493][495927] Updated weights for policy 0, policy_version 43558 (0.0008) +[2026-06-07 03:19:37,644][495927] Updated weights for policy 0, policy_version 43568 (0.0008) +[2026-06-07 03:19:38,294][495927] Updated weights for policy 0, policy_version 43580 (0.0009) +[2026-06-07 03:19:38,442][495927] Updated weights for policy 0, policy_version 43590 (0.0009) +[2026-06-07 03:19:38,594][495927] Updated weights for policy 0, policy_version 43600 (0.0009) +[2026-06-07 03:19:38,752][495927] Updated weights for policy 0, policy_version 43610 (0.0009) +[2026-06-07 03:19:38,892][495927] Updated weights for policy 0, policy_version 43620 (0.0008) +[2026-06-07 03:19:38,909][492660] Fps is (10 sec: 22937.3, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 22315008. Throughput: 0: 22843.7. Samples: 22325504. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-07 03:19:38,910][492660] Avg episode reward: [(0, '1613.120')] +[2026-06-07 03:19:39,048][495927] Updated weights for policy 0, policy_version 43630 (0.0008) +[2026-06-07 03:19:39,694][495927] Updated weights for policy 0, policy_version 43641 (0.0009) +[2026-06-07 03:19:39,858][495927] Updated weights for policy 0, policy_version 43652 (0.0008) +[2026-06-07 03:19:40,015][495927] Updated weights for policy 0, policy_version 43662 (0.0008) +[2026-06-07 03:19:40,155][495927] Updated weights for policy 0, policy_version 43672 (0.0010) +[2026-06-07 03:19:40,308][495927] Updated weights for policy 0, policy_version 43682 (0.0008) +[2026-06-07 03:19:40,463][495927] Updated weights for policy 0, policy_version 43692 (0.0006) +[2026-06-07 03:19:41,056][495927] Updated weights for policy 0, policy_version 43702 (0.0009) +[2026-06-07 03:19:41,230][495927] Updated weights for policy 0, policy_version 43715 (0.0009) +[2026-06-07 03:19:41,418][495927] Updated weights for policy 0, policy_version 43728 (0.0008) +[2026-06-07 03:19:41,586][495927] Updated weights for policy 0, policy_version 43739 (0.0009) +[2026-06-07 03:19:41,754][495927] Updated weights for policy 0, policy_version 43750 (0.0008) +[2026-06-07 03:19:42,408][495927] Updated weights for policy 0, policy_version 43761 (0.0006) +[2026-06-07 03:19:42,546][495927] Updated weights for policy 0, policy_version 43771 (0.0008) +[2026-06-07 03:19:42,710][495927] Updated weights for policy 0, policy_version 43782 (0.0008) +[2026-06-07 03:19:42,859][495927] Updated weights for policy 0, policy_version 43792 (0.0008) +[2026-06-07 03:19:43,002][495927] Updated weights for policy 0, policy_version 43802 (0.0008) +[2026-06-07 03:19:43,174][495927] Updated weights for policy 0, policy_version 43813 (0.0005) +[2026-06-07 03:19:43,321][495927] Updated weights for policy 0, policy_version 43823 (0.0008) +[2026-06-07 03:19:43,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 22446080. Throughput: 0: 22792.5. Samples: 22460800. Policy #0 lag: (min: 63.0, avg: 77.4, max: 127.0) +[2026-06-07 03:19:43,910][492660] Avg episode reward: [(0, '1645.033')] +[2026-06-07 03:19:43,947][495927] Updated weights for policy 0, policy_version 43833 (0.0009) +[2026-06-07 03:19:44,086][495927] Updated weights for policy 0, policy_version 43843 (0.0008) +[2026-06-07 03:19:44,228][495927] Updated weights for policy 0, policy_version 43853 (0.0008) +[2026-06-07 03:19:44,386][495927] Updated weights for policy 0, policy_version 43863 (0.0008) +[2026-06-07 03:19:44,530][495927] Updated weights for policy 0, policy_version 43873 (0.0005) +[2026-06-07 03:19:44,711][495927] Updated weights for policy 0, policy_version 43885 (0.0004) +[2026-06-07 03:19:45,353][495927] Updated weights for policy 0, policy_version 43896 (0.0004) +[2026-06-07 03:19:45,522][495927] Updated weights for policy 0, policy_version 43908 (0.0004) +[2026-06-07 03:19:45,673][495927] Updated weights for policy 0, policy_version 43918 (0.0004) +[2026-06-07 03:19:45,818][495927] Updated weights for policy 0, policy_version 43928 (0.0004) +[2026-06-07 03:19:45,998][495927] Updated weights for policy 0, policy_version 43940 (0.0008) +[2026-06-07 03:19:46,170][495927] Updated weights for policy 0, policy_version 43951 (0.0008) +[2026-06-07 03:19:46,831][495927] Updated weights for policy 0, policy_version 43963 (0.0008) +[2026-06-07 03:19:46,979][495927] Updated weights for policy 0, policy_version 43973 (0.0009) +[2026-06-07 03:19:47,134][495927] Updated weights for policy 0, policy_version 43984 (0.0009) +[2026-06-07 03:19:47,305][495927] Updated weights for policy 0, policy_version 43995 (0.0008) +[2026-06-07 03:19:47,451][495927] Updated weights for policy 0, policy_version 44005 (0.0009) +[2026-06-07 03:19:47,613][495927] Updated weights for policy 0, policy_version 44015 (0.0009) +[2026-06-07 03:19:48,229][495927] Updated weights for policy 0, policy_version 44025 (0.0008) +[2026-06-07 03:19:48,371][495927] Updated weights for policy 0, policy_version 44035 (0.0008) +[2026-06-07 03:19:48,524][495927] Updated weights for policy 0, policy_version 44045 (0.0008) +[2026-06-07 03:19:48,674][495927] Updated weights for policy 0, policy_version 44055 (0.0008) +[2026-06-07 03:19:48,831][495927] Updated weights for policy 0, policy_version 44065 (0.0008) +[2026-06-07 03:19:48,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 22544384. Throughput: 0: 22832.4. Samples: 22529920. Policy #0 lag: (min: 63.0, avg: 77.1, max: 127.0) +[2026-06-07 03:19:48,910][492660] Avg episode reward: [(0, '1706.234')] +[2026-06-07 03:19:48,992][495927] Updated weights for policy 0, policy_version 44076 (0.0008) +[2026-06-07 03:19:49,611][495927] Updated weights for policy 0, policy_version 44086 (0.0008) +[2026-06-07 03:19:49,788][495927] Updated weights for policy 0, policy_version 44098 (0.0008) +[2026-06-07 03:19:49,930][495927] Updated weights for policy 0, policy_version 44108 (0.0008) +[2026-06-07 03:19:50,090][495927] Updated weights for policy 0, policy_version 44119 (0.0007) +[2026-06-07 03:19:50,280][495927] Updated weights for policy 0, policy_version 44132 (0.0007) +[2026-06-07 03:19:50,440][495927] Updated weights for policy 0, policy_version 44143 (0.0008) +[2026-06-07 03:19:51,068][495927] Updated weights for policy 0, policy_version 44153 (0.0008) +[2026-06-07 03:19:51,285][495927] Updated weights for policy 0, policy_version 44168 (0.0008) +[2026-06-07 03:19:51,463][495927] Updated weights for policy 0, policy_version 44180 (0.0008) +[2026-06-07 03:19:51,608][495927] Updated weights for policy 0, policy_version 44190 (0.0009) +[2026-06-07 03:19:51,760][495927] Updated weights for policy 0, policy_version 44200 (0.0010) +[2026-06-07 03:19:52,397][495927] Updated weights for policy 0, policy_version 44210 (0.0009) +[2026-06-07 03:19:52,553][495927] Updated weights for policy 0, policy_version 44221 (0.0010) +[2026-06-07 03:19:52,704][495927] Updated weights for policy 0, policy_version 44231 (0.0008) +[2026-06-07 03:19:52,872][495927] Updated weights for policy 0, policy_version 44242 (0.0008) +[2026-06-07 03:19:53,041][495927] Updated weights for policy 0, policy_version 44253 (0.0010) +[2026-06-07 03:19:53,207][495927] Updated weights for policy 0, policy_version 44264 (0.0012) +[2026-06-07 03:19:53,829][495927] Updated weights for policy 0, policy_version 44274 (0.0010) +[2026-06-07 03:19:53,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 22675456. Throughput: 0: 23065.5. Samples: 22675200. Policy #0 lag: (min: 63.0, avg: 77.1, max: 127.0) +[2026-06-07 03:19:53,910][492660] Avg episode reward: [(0, '1686.338')] +[2026-06-07 03:19:53,966][495927] Updated weights for policy 0, policy_version 44284 (0.0010) +[2026-06-07 03:19:54,121][495927] Updated weights for policy 0, policy_version 44295 (0.0010) +[2026-06-07 03:19:54,287][495927] Updated weights for policy 0, policy_version 44306 (0.0010) +[2026-06-07 03:19:54,439][495927] Updated weights for policy 0, policy_version 44316 (0.0010) +[2026-06-07 03:19:54,594][495927] Updated weights for policy 0, policy_version 44326 (0.0011) +[2026-06-07 03:19:54,738][495927] Updated weights for policy 0, policy_version 44336 (0.0008) +[2026-06-07 03:19:55,386][495927] Updated weights for policy 0, policy_version 44347 (0.0010) +[2026-06-07 03:19:55,531][495927] Updated weights for policy 0, policy_version 44357 (0.0009) +[2026-06-07 03:19:55,682][495927] Updated weights for policy 0, policy_version 44367 (0.0009) +[2026-06-07 03:19:55,834][495927] Updated weights for policy 0, policy_version 44377 (0.0008) +[2026-06-07 03:19:55,999][495927] Updated weights for policy 0, policy_version 44388 (0.0008) +[2026-06-07 03:19:56,176][495927] Updated weights for policy 0, policy_version 44399 (0.0010) +[2026-06-07 03:19:56,795][495927] Updated weights for policy 0, policy_version 44409 (0.0009) +[2026-06-07 03:19:56,961][495927] Updated weights for policy 0, policy_version 44421 (0.0008) +[2026-06-07 03:19:57,120][495927] Updated weights for policy 0, policy_version 44431 (0.0008) +[2026-06-07 03:19:57,286][495927] Updated weights for policy 0, policy_version 44442 (0.0009) +[2026-06-07 03:19:57,428][495927] Updated weights for policy 0, policy_version 44452 (0.0008) +[2026-06-07 03:19:57,592][495927] Updated weights for policy 0, policy_version 44462 (0.0008) +[2026-06-07 03:19:58,209][495927] Updated weights for policy 0, policy_version 44472 (0.0009) +[2026-06-07 03:19:58,340][495927] Updated weights for policy 0, policy_version 44482 (0.0009) +[2026-06-07 03:19:58,501][495927] Updated weights for policy 0, policy_version 44493 (0.0008) +[2026-06-07 03:19:58,653][495927] Updated weights for policy 0, policy_version 44503 (0.0009) +[2026-06-07 03:19:58,803][495927] Updated weights for policy 0, policy_version 44513 (0.0008) +[2026-06-07 03:19:58,909][492660] Fps is (10 sec: 22937.8, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 22773760. Throughput: 0: 23085.5. Samples: 22810624. Policy #0 lag: (min: 63.0, avg: 77.1, max: 127.0) +[2026-06-07 03:19:58,910][492660] Avg episode reward: [(0, '1670.431')] +[2026-06-07 03:19:58,966][495927] Updated weights for policy 0, policy_version 44523 (0.0008) +[2026-06-07 03:19:59,609][495927] Updated weights for policy 0, policy_version 44534 (0.0008) +[2026-06-07 03:19:59,768][495927] Updated weights for policy 0, policy_version 44545 (0.0008) +[2026-06-07 03:19:59,911][495927] Updated weights for policy 0, policy_version 44555 (0.0009) +[2026-06-07 03:20:00,072][495927] Updated weights for policy 0, policy_version 44565 (0.0008) +[2026-06-07 03:20:00,241][495927] Updated weights for policy 0, policy_version 44576 (0.0008) +[2026-06-07 03:20:00,416][495927] Updated weights for policy 0, policy_version 44588 (0.0008) +[2026-06-07 03:20:01,043][495927] Updated weights for policy 0, policy_version 44598 (0.0009) +[2026-06-07 03:20:01,188][495927] Updated weights for policy 0, policy_version 44608 (0.0008) +[2026-06-07 03:20:01,338][495927] Updated weights for policy 0, policy_version 44618 (0.0008) +[2026-06-07 03:20:01,517][495927] Updated weights for policy 0, policy_version 44630 (0.0008) +[2026-06-07 03:20:01,682][495927] Updated weights for policy 0, policy_version 44641 (0.0008) +[2026-06-07 03:20:01,832][495927] Updated weights for policy 0, policy_version 44651 (0.0008) +[2026-06-07 03:20:02,478][495927] Updated weights for policy 0, policy_version 44662 (0.0009) +[2026-06-07 03:20:02,620][495927] Updated weights for policy 0, policy_version 44672 (0.0008) +[2026-06-07 03:20:02,766][495927] Updated weights for policy 0, policy_version 44682 (0.0009) +[2026-06-07 03:20:02,912][495927] Updated weights for policy 0, policy_version 44692 (0.0008) +[2026-06-07 03:20:03,084][495927] Updated weights for policy 0, policy_version 44703 (0.0009) +[2026-06-07 03:20:03,257][495927] Updated weights for policy 0, policy_version 44714 (0.0008) +[2026-06-07 03:20:03,903][495927] Updated weights for policy 0, policy_version 44726 (0.0008) +[2026-06-07 03:20:03,909][492660] Fps is (10 sec: 22937.8, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 22904832. Throughput: 0: 23088.3. Samples: 22878592. Policy #0 lag: (min: 63.0, avg: 77.1, max: 127.0) +[2026-06-07 03:20:03,910][492660] Avg episode reward: [(0, '1693.739')] +[2026-06-07 03:20:04,047][495927] Updated weights for policy 0, policy_version 44736 (0.0007) +[2026-06-07 03:20:04,214][495927] Updated weights for policy 0, policy_version 44747 (0.0004) +[2026-06-07 03:20:04,366][495927] Updated weights for policy 0, policy_version 44757 (0.0004) +[2026-06-07 03:20:04,549][495927] Updated weights for policy 0, policy_version 44769 (0.0004) +[2026-06-07 03:20:04,701][495927] Updated weights for policy 0, policy_version 44779 (0.0007) +[2026-06-07 03:20:05,342][495927] Updated weights for policy 0, policy_version 44791 (0.0009) +[2026-06-07 03:20:05,512][495927] Updated weights for policy 0, policy_version 44802 (0.0008) +[2026-06-07 03:20:05,687][495927] Updated weights for policy 0, policy_version 44814 (0.0008) +[2026-06-07 03:20:05,840][495927] Updated weights for policy 0, policy_version 44824 (0.0008) +[2026-06-07 03:20:05,987][495927] Updated weights for policy 0, policy_version 44834 (0.0008) +[2026-06-07 03:20:06,149][495927] Updated weights for policy 0, policy_version 44844 (0.0008) +[2026-06-07 03:20:06,780][495927] Updated weights for policy 0, policy_version 44855 (0.0008) +[2026-06-07 03:20:06,918][495927] Updated weights for policy 0, policy_version 44865 (0.0009) +[2026-06-07 03:20:07,071][495927] Updated weights for policy 0, policy_version 44875 (0.0008) +[2026-06-07 03:20:07,221][495927] Updated weights for policy 0, policy_version 44885 (0.0008) +[2026-06-07 03:20:07,405][495927] Updated weights for policy 0, policy_version 44897 (0.0009) +[2026-06-07 03:20:07,595][495927] Updated weights for policy 0, policy_version 44910 (0.0009) +[2026-06-07 03:20:08,237][495927] Updated weights for policy 0, policy_version 44920 (0.0009) +[2026-06-07 03:20:08,383][495927] Updated weights for policy 0, policy_version 44930 (0.0008) +[2026-06-07 03:20:08,560][495927] Updated weights for policy 0, policy_version 44942 (0.0008) +[2026-06-07 03:20:08,722][495927] Updated weights for policy 0, policy_version 44953 (0.0008) +[2026-06-07 03:20:08,875][495927] Updated weights for policy 0, policy_version 44963 (0.0008) +[2026-06-07 03:20:08,909][492660] Fps is (10 sec: 22937.2, 60 sec: 22937.5, 300 sec: 22882.0). Total num frames: 23003136. Throughput: 0: 22838.0. Samples: 23013760. Policy #0 lag: (min: 63.0, avg: 77.1, max: 127.0) +[2026-06-07 03:20:08,910][492660] Avg episode reward: [(0, '1751.661')] +[2026-06-07 03:20:09,027][495927] Updated weights for policy 0, policy_version 44973 (0.0008) +[2026-06-07 03:20:09,683][495927] Updated weights for policy 0, policy_version 44985 (0.0008) +[2026-06-07 03:20:09,863][495927] Updated weights for policy 0, policy_version 44997 (0.0008) +[2026-06-07 03:20:10,005][495927] Updated weights for policy 0, policy_version 45007 (0.0008) +[2026-06-07 03:20:10,174][495927] Updated weights for policy 0, policy_version 45018 (0.0008) +[2026-06-07 03:20:10,345][495927] Updated weights for policy 0, policy_version 45029 (0.0009) +[2026-06-07 03:20:10,510][495927] Updated weights for policy 0, policy_version 45039 (0.0008) +[2026-06-07 03:20:11,109][495927] Updated weights for policy 0, policy_version 45049 (0.0004) +[2026-06-07 03:20:11,266][495927] Updated weights for policy 0, policy_version 45060 (0.0006) +[2026-06-07 03:20:11,419][495927] Updated weights for policy 0, policy_version 45070 (0.0009) +[2026-06-07 03:20:11,566][495927] Updated weights for policy 0, policy_version 45080 (0.0008) +[2026-06-07 03:20:11,727][495927] Updated weights for policy 0, policy_version 45091 (0.0008) +[2026-06-07 03:20:11,887][495927] Updated weights for policy 0, policy_version 45102 (0.0008) +[2026-06-07 03:20:12,524][495927] Updated weights for policy 0, policy_version 45112 (0.0009) +[2026-06-07 03:20:12,672][495927] Updated weights for policy 0, policy_version 45122 (0.0008) +[2026-06-07 03:20:12,818][495927] Updated weights for policy 0, policy_version 45132 (0.0008) +[2026-06-07 03:20:12,975][495927] Updated weights for policy 0, policy_version 45143 (0.0008) +[2026-06-07 03:20:13,136][495927] Updated weights for policy 0, policy_version 45153 (0.0008) +[2026-06-07 03:20:13,281][495927] Updated weights for policy 0, policy_version 45163 (0.0008) +[2026-06-07 03:20:13,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 23134208. Throughput: 0: 22809.6. Samples: 23148928. Policy #0 lag: (min: 63.0, avg: 77.1, max: 127.0) +[2026-06-07 03:20:13,910][492660] Avg episode reward: [(0, '1820.969')] +[2026-06-07 03:20:13,911][495927] Updated weights for policy 0, policy_version 45174 (0.0008) +[2026-06-07 03:20:14,101][495927] Updated weights for policy 0, policy_version 45187 (0.0008) +[2026-06-07 03:20:14,249][495927] Updated weights for policy 0, policy_version 45197 (0.0008) +[2026-06-07 03:20:14,417][495927] Updated weights for policy 0, policy_version 45208 (0.0008) +[2026-06-07 03:20:14,583][495927] Updated weights for policy 0, policy_version 45219 (0.0008) +[2026-06-07 03:20:14,749][495927] Updated weights for policy 0, policy_version 45230 (0.0008) +[2026-06-07 03:20:14,782][495570] Saving new best policy, reward=1820.969! +[2026-06-07 03:20:15,378][495927] Updated weights for policy 0, policy_version 45240 (0.0009) +[2026-06-07 03:20:15,534][495927] Updated weights for policy 0, policy_version 45251 (0.0008) +[2026-06-07 03:20:15,700][495927] Updated weights for policy 0, policy_version 45262 (0.0008) +[2026-06-07 03:20:15,859][495927] Updated weights for policy 0, policy_version 45272 (0.0008) +[2026-06-07 03:20:16,023][495927] Updated weights for policy 0, policy_version 45283 (0.0008) +[2026-06-07 03:20:16,194][495927] Updated weights for policy 0, policy_version 45294 (0.0008) +[2026-06-07 03:20:16,800][495927] Updated weights for policy 0, policy_version 45304 (0.0009) +[2026-06-07 03:20:16,946][495927] Updated weights for policy 0, policy_version 45314 (0.0008) +[2026-06-07 03:20:17,100][495927] Updated weights for policy 0, policy_version 45324 (0.0008) +[2026-06-07 03:20:17,269][495927] Updated weights for policy 0, policy_version 45335 (0.0006) +[2026-06-07 03:20:17,417][495927] Updated weights for policy 0, policy_version 45345 (0.0005) +[2026-06-07 03:20:17,588][495927] Updated weights for policy 0, policy_version 45356 (0.0005) +[2026-06-07 03:20:18,185][495927] Updated weights for policy 0, policy_version 45366 (0.0005) +[2026-06-07 03:20:18,337][495927] Updated weights for policy 0, policy_version 45376 (0.0004) +[2026-06-07 03:20:18,501][495927] Updated weights for policy 0, policy_version 45387 (0.0004) +[2026-06-07 03:20:18,653][495927] Updated weights for policy 0, policy_version 45397 (0.0004) +[2026-06-07 03:20:18,800][495927] Updated weights for policy 0, policy_version 45407 (0.0004) +[2026-06-07 03:20:18,909][492660] Fps is (10 sec: 22938.0, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 23232512. Throughput: 0: 22826.7. Samples: 23217024. Policy #0 lag: (min: 63.0, avg: 76.3, max: 127.0) +[2026-06-07 03:20:18,910][492660] Avg episode reward: [(0, '1852.832')] +[2026-06-07 03:20:18,954][495927] Updated weights for policy 0, policy_version 45417 (0.0004) +[2026-06-07 03:20:19,065][495570] Saving new best policy, reward=1852.832! +[2026-06-07 03:20:19,565][495927] Updated weights for policy 0, policy_version 45427 (0.0005) +[2026-06-07 03:20:19,703][495927] Updated weights for policy 0, policy_version 45437 (0.0006) +[2026-06-07 03:20:19,855][495927] Updated weights for policy 0, policy_version 45447 (0.0007) +[2026-06-07 03:20:20,002][495927] Updated weights for policy 0, policy_version 45457 (0.0008) +[2026-06-07 03:20:20,154][495927] Updated weights for policy 0, policy_version 45467 (0.0008) +[2026-06-07 03:20:20,324][495927] Updated weights for policy 0, policy_version 45478 (0.0008) +[2026-06-07 03:20:20,473][495927] Updated weights for policy 0, policy_version 45488 (0.0008) +[2026-06-07 03:20:21,079][495927] Updated weights for policy 0, policy_version 45498 (0.0008) +[2026-06-07 03:20:21,224][495927] Updated weights for policy 0, policy_version 45508 (0.0009) +[2026-06-07 03:20:21,391][495927] Updated weights for policy 0, policy_version 45519 (0.0008) +[2026-06-07 03:20:21,549][495927] Updated weights for policy 0, policy_version 45530 (0.0008) +[2026-06-07 03:20:21,716][495927] Updated weights for policy 0, policy_version 45541 (0.0008) +[2026-06-07 03:20:22,402][495927] Updated weights for policy 0, policy_version 45554 (0.0008) +[2026-06-07 03:20:22,545][495927] Updated weights for policy 0, policy_version 45564 (0.0008) +[2026-06-07 03:20:22,699][495927] Updated weights for policy 0, policy_version 45574 (0.0008) +[2026-06-07 03:20:22,867][495927] Updated weights for policy 0, policy_version 45585 (0.0008) +[2026-06-07 03:20:23,016][495927] Updated weights for policy 0, policy_version 45595 (0.0008) +[2026-06-07 03:20:23,193][495927] Updated weights for policy 0, policy_version 45607 (0.0008) +[2026-06-07 03:20:23,814][495927] Updated weights for policy 0, policy_version 45617 (0.0009) +[2026-06-07 03:20:23,909][492660] Fps is (10 sec: 22937.3, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 23363584. Throughput: 0: 23051.4. Samples: 23362816. Policy #0 lag: (min: 63.0, avg: 76.3, max: 127.0) +[2026-06-07 03:20:23,911][492660] Avg episode reward: [(0, '1939.105')] +[2026-06-07 03:20:23,972][495927] Updated weights for policy 0, policy_version 45628 (0.0008) +[2026-06-07 03:20:24,122][495927] Updated weights for policy 0, policy_version 45638 (0.0008) +[2026-06-07 03:20:24,279][495927] Updated weights for policy 0, policy_version 45649 (0.0008) +[2026-06-07 03:20:24,428][495927] Updated weights for policy 0, policy_version 45659 (0.0008) +[2026-06-07 03:20:24,581][495927] Updated weights for policy 0, policy_version 45669 (0.0008) +[2026-06-07 03:20:24,717][495927] Updated weights for policy 0, policy_version 45679 (0.0009) +[2026-06-07 03:20:24,727][495570] Saving new best policy, reward=1939.105! +[2026-06-07 03:20:25,353][495927] Updated weights for policy 0, policy_version 45689 (0.0009) +[2026-06-07 03:20:25,531][495927] Updated weights for policy 0, policy_version 45702 (0.0009) +[2026-06-07 03:20:25,688][495927] Updated weights for policy 0, policy_version 45712 (0.0009) +[2026-06-07 03:20:25,841][495927] Updated weights for policy 0, policy_version 45722 (0.0010) +[2026-06-07 03:20:25,986][495927] Updated weights for policy 0, policy_version 45732 (0.0008) +[2026-06-07 03:20:26,143][495927] Updated weights for policy 0, policy_version 45742 (0.0009) +[2026-06-07 03:20:26,762][495927] Updated weights for policy 0, policy_version 45753 (0.0009) +[2026-06-07 03:20:26,928][495927] Updated weights for policy 0, policy_version 45764 (0.0009) +[2026-06-07 03:20:27,087][495927] Updated weights for policy 0, policy_version 45775 (0.0009) +[2026-06-07 03:20:27,258][495927] Updated weights for policy 0, policy_version 45786 (0.0010) +[2026-06-07 03:20:27,403][495927] Updated weights for policy 0, policy_version 45796 (0.0009) +[2026-06-07 03:20:27,550][495927] Updated weights for policy 0, policy_version 45806 (0.0008) +[2026-06-07 03:20:28,213][495927] Updated weights for policy 0, policy_version 45818 (0.0008) +[2026-06-07 03:20:28,356][495927] Updated weights for policy 0, policy_version 45828 (0.0008) +[2026-06-07 03:20:28,536][495927] Updated weights for policy 0, policy_version 45840 (0.0008) +[2026-06-07 03:20:28,686][495927] Updated weights for policy 0, policy_version 45850 (0.0009) +[2026-06-07 03:20:28,830][495927] Updated weights for policy 0, policy_version 45860 (0.0007) +[2026-06-07 03:20:28,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 23461888. Throughput: 0: 23059.9. Samples: 23498496. Policy #0 lag: (min: 63.0, avg: 76.3, max: 127.0) +[2026-06-07 03:20:28,910][492660] Avg episode reward: [(0, '1946.156')] +[2026-06-07 03:20:28,999][495927] Updated weights for policy 0, policy_version 45871 (0.0005) +[2026-06-07 03:20:29,010][495570] Saving new best policy, reward=1946.156! +[2026-06-07 03:20:29,621][495927] Updated weights for policy 0, policy_version 45882 (0.0005) +[2026-06-07 03:20:29,764][495927] Updated weights for policy 0, policy_version 45892 (0.0009) +[2026-06-07 03:20:29,912][495927] Updated weights for policy 0, policy_version 45902 (0.0008) +[2026-06-07 03:20:30,072][495927] Updated weights for policy 0, policy_version 45912 (0.0008) +[2026-06-07 03:20:30,222][495927] Updated weights for policy 0, policy_version 45922 (0.0009) +[2026-06-07 03:20:30,372][495927] Updated weights for policy 0, policy_version 45932 (0.0008) +[2026-06-07 03:20:31,015][495927] Updated weights for policy 0, policy_version 45944 (0.0009) +[2026-06-07 03:20:31,163][495927] Updated weights for policy 0, policy_version 45954 (0.0008) +[2026-06-07 03:20:31,315][495927] Updated weights for policy 0, policy_version 45964 (0.0008) +[2026-06-07 03:20:31,463][495927] Updated weights for policy 0, policy_version 45974 (0.0008) +[2026-06-07 03:20:31,623][495927] Updated weights for policy 0, policy_version 45984 (0.0009) +[2026-06-07 03:20:31,771][495927] Updated weights for policy 0, policy_version 45994 (0.0009) +[2026-06-07 03:20:32,407][495927] Updated weights for policy 0, policy_version 46005 (0.0008) +[2026-06-07 03:20:32,568][495927] Updated weights for policy 0, policy_version 46016 (0.0009) +[2026-06-07 03:20:32,707][495927] Updated weights for policy 0, policy_version 46026 (0.0008) +[2026-06-07 03:20:32,855][495927] Updated weights for policy 0, policy_version 46036 (0.0009) +[2026-06-07 03:20:33,022][495927] Updated weights for policy 0, policy_version 46047 (0.0009) +[2026-06-07 03:20:33,176][495927] Updated weights for policy 0, policy_version 46057 (0.0009) +[2026-06-07 03:20:33,813][495927] Updated weights for policy 0, policy_version 46067 (0.0009) +[2026-06-07 03:20:33,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.5, 300 sec: 22993.1). Total num frames: 23592960. Throughput: 0: 23020.1. Samples: 23565824. Policy #0 lag: (min: 63.0, avg: 76.3, max: 127.0) +[2026-06-07 03:20:33,910][492660] Avg episode reward: [(0, '1965.073')] +[2026-06-07 03:20:33,947][495927] Updated weights for policy 0, policy_version 46077 (0.0008) +[2026-06-07 03:20:34,099][495927] Updated weights for policy 0, policy_version 46087 (0.0008) +[2026-06-07 03:20:34,243][495927] Updated weights for policy 0, policy_version 46097 (0.0008) +[2026-06-07 03:20:34,399][495927] Updated weights for policy 0, policy_version 46107 (0.0009) +[2026-06-07 03:20:34,544][495927] Updated weights for policy 0, policy_version 46117 (0.0009) +[2026-06-07 03:20:34,708][495570] Saving new best policy, reward=1965.073! +[2026-06-07 03:20:34,709][495927] Updated weights for policy 0, policy_version 46128 (0.0008) +[2026-06-07 03:20:35,349][495927] Updated weights for policy 0, policy_version 46139 (0.0009) +[2026-06-07 03:20:35,494][495927] Updated weights for policy 0, policy_version 46149 (0.0008) +[2026-06-07 03:20:35,657][495927] Updated weights for policy 0, policy_version 46160 (0.0009) +[2026-06-07 03:20:35,824][495927] Updated weights for policy 0, policy_version 46171 (0.0008) +[2026-06-07 03:20:35,976][495927] Updated weights for policy 0, policy_version 46181 (0.0008) +[2026-06-07 03:20:36,128][495927] Updated weights for policy 0, policy_version 46191 (0.0008) +[2026-06-07 03:20:36,752][495927] Updated weights for policy 0, policy_version 46201 (0.0009) +[2026-06-07 03:20:36,886][495927] Updated weights for policy 0, policy_version 46211 (0.0009) +[2026-06-07 03:20:37,034][495927] Updated weights for policy 0, policy_version 46221 (0.0009) +[2026-06-07 03:20:37,186][495927] Updated weights for policy 0, policy_version 46231 (0.0009) +[2026-06-07 03:20:37,368][495927] Updated weights for policy 0, policy_version 46243 (0.0009) +[2026-06-07 03:20:37,528][495927] Updated weights for policy 0, policy_version 46254 (0.0009) +[2026-06-07 03:20:38,198][495927] Updated weights for policy 0, policy_version 46266 (0.0009) +[2026-06-07 03:20:38,364][495927] Updated weights for policy 0, policy_version 46278 (0.0009) +[2026-06-07 03:20:38,525][495927] Updated weights for policy 0, policy_version 46289 (0.0009) +[2026-06-07 03:20:38,694][495927] Updated weights for policy 0, policy_version 46300 (0.0009) +[2026-06-07 03:20:38,836][495927] Updated weights for policy 0, policy_version 46310 (0.0009) +[2026-06-07 03:20:38,909][492660] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 23691264. Throughput: 0: 22789.7. Samples: 23700736. Policy #0 lag: (min: 63.0, avg: 76.3, max: 127.0) +[2026-06-07 03:20:38,910][492660] Avg episode reward: [(0, '1993.904')] +[2026-06-07 03:20:38,976][495570] Saving new best policy, reward=1993.904! +[2026-06-07 03:20:39,507][495927] Updated weights for policy 0, policy_version 46321 (0.0009) +[2026-06-07 03:20:39,651][495927] Updated weights for policy 0, policy_version 46332 (0.0009) +[2026-06-07 03:20:39,811][495927] Updated weights for policy 0, policy_version 46343 (0.0010) +[2026-06-07 03:20:39,951][495927] Updated weights for policy 0, policy_version 46353 (0.0009) +[2026-06-07 03:20:40,106][495927] Updated weights for policy 0, policy_version 46363 (0.0009) +[2026-06-07 03:20:40,252][495927] Updated weights for policy 0, policy_version 46373 (0.0009) +[2026-06-07 03:20:40,417][495927] Updated weights for policy 0, policy_version 46384 (0.0009) +[2026-06-07 03:20:41,054][495927] Updated weights for policy 0, policy_version 46394 (0.0009) +[2026-06-07 03:20:41,204][495927] Updated weights for policy 0, policy_version 46404 (0.0008) +[2026-06-07 03:20:41,355][495927] Updated weights for policy 0, policy_version 46414 (0.0008) +[2026-06-07 03:20:41,542][495927] Updated weights for policy 0, policy_version 46426 (0.0008) +[2026-06-07 03:20:41,720][495927] Updated weights for policy 0, policy_version 46438 (0.0009) +[2026-06-07 03:20:41,869][495927] Updated weights for policy 0, policy_version 46448 (0.0008) +[2026-06-07 03:20:42,510][495927] Updated weights for policy 0, policy_version 46458 (0.0008) +[2026-06-07 03:20:42,650][495927] Updated weights for policy 0, policy_version 46468 (0.0008) +[2026-06-07 03:20:42,813][495927] Updated weights for policy 0, policy_version 46479 (0.0008) +[2026-06-07 03:20:42,972][495927] Updated weights for policy 0, policy_version 46489 (0.0008) +[2026-06-07 03:20:43,123][495927] Updated weights for policy 0, policy_version 46499 (0.0008) +[2026-06-07 03:20:43,306][495927] Updated weights for policy 0, policy_version 46511 (0.0008) +[2026-06-07 03:20:43,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 23822336. Throughput: 0: 22806.7. Samples: 23836928. Policy #0 lag: (min: 63.0, avg: 76.3, max: 127.0) +[2026-06-07 03:20:43,910][492660] Avg episode reward: [(0, '2012.475')] +[2026-06-07 03:20:43,925][495927] Updated weights for policy 0, policy_version 46521 (0.0008) +[2026-06-07 03:20:44,072][495927] Updated weights for policy 0, policy_version 46531 (0.0008) +[2026-06-07 03:20:44,219][495927] Updated weights for policy 0, policy_version 46541 (0.0008) +[2026-06-07 03:20:44,382][495927] Updated weights for policy 0, policy_version 46552 (0.0008) +[2026-06-07 03:20:44,534][495927] Updated weights for policy 0, policy_version 46562 (0.0008) +[2026-06-07 03:20:44,681][495927] Updated weights for policy 0, policy_version 46572 (0.0008) +[2026-06-07 03:20:44,741][495570] Saving new best policy, reward=2012.475! +[2026-06-07 03:20:45,332][495927] Updated weights for policy 0, policy_version 46582 (0.0008) +[2026-06-07 03:20:45,472][495927] Updated weights for policy 0, policy_version 46592 (0.0008) +[2026-06-07 03:20:45,621][495927] Updated weights for policy 0, policy_version 46602 (0.0008) +[2026-06-07 03:20:45,769][495927] Updated weights for policy 0, policy_version 46612 (0.0008) +[2026-06-07 03:20:45,922][495927] Updated weights for policy 0, policy_version 46622 (0.0009) +[2026-06-07 03:20:46,075][495927] Updated weights for policy 0, policy_version 46632 (0.0004) +[2026-06-07 03:20:46,672][495927] Updated weights for policy 0, policy_version 46642 (0.0004) +[2026-06-07 03:20:46,823][495927] Updated weights for policy 0, policy_version 46653 (0.0005) +[2026-06-07 03:20:47,003][495927] Updated weights for policy 0, policy_version 46665 (0.0006) +[2026-06-07 03:20:47,170][495927] Updated weights for policy 0, policy_version 46676 (0.0005) +[2026-06-07 03:20:47,321][495927] Updated weights for policy 0, policy_version 46686 (0.0005) +[2026-06-07 03:20:47,490][495927] Updated weights for policy 0, policy_version 46697 (0.0005) +[2026-06-07 03:20:48,130][495927] Updated weights for policy 0, policy_version 46710 (0.0006) +[2026-06-07 03:20:48,283][495927] Updated weights for policy 0, policy_version 46721 (0.0008) +[2026-06-07 03:20:48,447][495927] Updated weights for policy 0, policy_version 46732 (0.0005) +[2026-06-07 03:20:48,602][495927] Updated weights for policy 0, policy_version 46742 (0.0006) +[2026-06-07 03:20:48,769][495927] Updated weights for policy 0, policy_version 46753 (0.0009) +[2026-06-07 03:20:48,909][492660] Fps is (10 sec: 22937.4, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 23920640. Throughput: 0: 22829.4. Samples: 23905920. Policy #0 lag: (min: 63.0, avg: 77.3, max: 127.0) +[2026-06-07 03:20:48,910][492660] Avg episode reward: [(0, '1971.687')] +[2026-06-07 03:20:48,933][495927] Updated weights for policy 0, policy_version 46764 (0.0008) +[2026-06-07 03:20:49,575][495927] Updated weights for policy 0, policy_version 46775 (0.0008) +[2026-06-07 03:20:49,715][495927] Updated weights for policy 0, policy_version 46785 (0.0008) +[2026-06-07 03:20:49,867][495927] Updated weights for policy 0, policy_version 46795 (0.0008) +[2026-06-07 03:20:50,030][495927] Updated weights for policy 0, policy_version 46806 (0.0009) +[2026-06-07 03:20:50,201][495927] Updated weights for policy 0, policy_version 46817 (0.0008) +[2026-06-07 03:20:50,366][495927] Updated weights for policy 0, policy_version 46828 (0.0008) +[2026-06-07 03:20:50,998][495927] Updated weights for policy 0, policy_version 46838 (0.0008) +[2026-06-07 03:20:51,135][495927] Updated weights for policy 0, policy_version 46848 (0.0008) +[2026-06-07 03:20:51,281][495927] Updated weights for policy 0, policy_version 46858 (0.0008) +[2026-06-07 03:20:51,434][495927] Updated weights for policy 0, policy_version 46868 (0.0008) +[2026-06-07 03:20:51,599][495927] Updated weights for policy 0, policy_version 46879 (0.0008) +[2026-06-07 03:20:51,768][495927] Updated weights for policy 0, policy_version 46890 (0.0009) +[2026-06-07 03:20:52,408][495927] Updated weights for policy 0, policy_version 46900 (0.0009) +[2026-06-07 03:20:52,545][495927] Updated weights for policy 0, policy_version 46910 (0.0008) +[2026-06-07 03:20:52,694][495927] Updated weights for policy 0, policy_version 46920 (0.0008) +[2026-06-07 03:20:52,861][495927] Updated weights for policy 0, policy_version 46931 (0.0008) +[2026-06-07 03:20:53,003][495927] Updated weights for policy 0, policy_version 46941 (0.0008) +[2026-06-07 03:20:53,166][495927] Updated weights for policy 0, policy_version 46951 (0.0008) +[2026-06-07 03:20:53,804][495927] Updated weights for policy 0, policy_version 46961 (0.0008) +[2026-06-07 03:20:53,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 24051712. Throughput: 0: 23079.9. Samples: 24052352. Policy #0 lag: (min: 63.0, avg: 77.3, max: 127.0) +[2026-06-07 03:20:53,910][492660] Avg episode reward: [(0, '2056.031')] +[2026-06-07 03:20:53,959][495927] Updated weights for policy 0, policy_version 46972 (0.0009) +[2026-06-07 03:20:54,110][495927] Updated weights for policy 0, policy_version 46983 (0.0008) +[2026-06-07 03:20:54,272][495927] Updated weights for policy 0, policy_version 46993 (0.0009) +[2026-06-07 03:20:54,423][495927] Updated weights for policy 0, policy_version 47003 (0.0008) +[2026-06-07 03:20:54,583][495927] Updated weights for policy 0, policy_version 47014 (0.0008) +[2026-06-07 03:20:54,731][495570] Saving new best policy, reward=2056.031! +[2026-06-07 03:20:55,231][495927] Updated weights for policy 0, policy_version 47025 (0.0008) +[2026-06-07 03:20:55,387][495927] Updated weights for policy 0, policy_version 47036 (0.0009) +[2026-06-07 03:20:55,549][495927] Updated weights for policy 0, policy_version 47047 (0.0008) +[2026-06-07 03:20:55,698][495927] Updated weights for policy 0, policy_version 47057 (0.0008) +[2026-06-07 03:20:55,874][495927] Updated weights for policy 0, policy_version 47069 (0.0009) +[2026-06-07 03:20:56,039][495927] Updated weights for policy 0, policy_version 47080 (0.0008) +[2026-06-07 03:20:56,669][495927] Updated weights for policy 0, policy_version 47090 (0.0009) +[2026-06-07 03:20:56,829][495927] Updated weights for policy 0, policy_version 47102 (0.0008) +[2026-06-07 03:20:57,022][495927] Updated weights for policy 0, policy_version 47116 (0.0008) +[2026-06-07 03:20:57,171][495927] Updated weights for policy 0, policy_version 47126 (0.0010) +[2026-06-07 03:20:57,324][495927] Updated weights for policy 0, policy_version 47136 (0.0009) +[2026-06-07 03:20:57,472][495927] Updated weights for policy 0, policy_version 47146 (0.0008) +[2026-06-07 03:20:58,150][495927] Updated weights for policy 0, policy_version 47158 (0.0008) +[2026-06-07 03:20:58,305][495927] Updated weights for policy 0, policy_version 47169 (0.0008) +[2026-06-07 03:20:58,448][495927] Updated weights for policy 0, policy_version 47179 (0.0010) +[2026-06-07 03:20:58,631][495927] Updated weights for policy 0, policy_version 47192 (0.0007) +[2026-06-07 03:20:58,786][495927] Updated weights for policy 0, policy_version 47202 (0.0007) +[2026-06-07 03:20:58,909][492660] Fps is (10 sec: 22937.8, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 24150016. Throughput: 0: 23085.5. Samples: 24187776. Policy #0 lag: (min: 63.0, avg: 77.3, max: 127.0) +[2026-06-07 03:20:58,910][492660] Avg episode reward: [(0, '2092.486')] +[2026-06-07 03:20:58,975][495927] Updated weights for policy 0, policy_version 47215 (0.0008) +[2026-06-07 03:20:58,982][495570] Saving new best policy, reward=2092.486! +[2026-06-07 03:20:59,619][495927] Updated weights for policy 0, policy_version 47226 (0.0009) +[2026-06-07 03:20:59,777][495927] Updated weights for policy 0, policy_version 47237 (0.0008) +[2026-06-07 03:20:59,924][495927] Updated weights for policy 0, policy_version 47247 (0.0008) +[2026-06-07 03:21:00,066][495927] Updated weights for policy 0, policy_version 47257 (0.0008) +[2026-06-07 03:21:00,241][495927] Updated weights for policy 0, policy_version 47269 (0.0009) +[2026-06-07 03:21:00,396][495927] Updated weights for policy 0, policy_version 47279 (0.0009) +[2026-06-07 03:21:01,030][495927] Updated weights for policy 0, policy_version 47289 (0.0008) +[2026-06-07 03:21:01,194][495927] Updated weights for policy 0, policy_version 47301 (0.0009) +[2026-06-07 03:21:01,351][495927] Updated weights for policy 0, policy_version 47312 (0.0008) +[2026-06-07 03:21:01,506][495927] Updated weights for policy 0, policy_version 47322 (0.0008) +[2026-06-07 03:21:01,673][495927] Updated weights for policy 0, policy_version 47334 (0.0008) +[2026-06-07 03:21:02,352][495927] Updated weights for policy 0, policy_version 47345 (0.0009) +[2026-06-07 03:21:02,491][495927] Updated weights for policy 0, policy_version 47355 (0.0008) +[2026-06-07 03:21:02,657][495927] Updated weights for policy 0, policy_version 47367 (0.0008) +[2026-06-07 03:21:02,801][495927] Updated weights for policy 0, policy_version 47377 (0.0008) +[2026-06-07 03:21:02,957][495927] Updated weights for policy 0, policy_version 47387 (0.0008) +[2026-06-07 03:21:03,110][495927] Updated weights for policy 0, policy_version 47397 (0.0009) +[2026-06-07 03:21:03,264][495927] Updated weights for policy 0, policy_version 47407 (0.0008) +[2026-06-07 03:21:03,901][495927] Updated weights for policy 0, policy_version 47418 (0.0009) +[2026-06-07 03:21:03,909][492660] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 24281088. Throughput: 0: 23071.3. Samples: 24255232. Policy #0 lag: (min: 63.0, avg: 77.3, max: 127.0) +[2026-06-07 03:21:03,910][492660] Avg episode reward: [(0, '2081.620')] +[2026-06-07 03:21:04,071][495927] Updated weights for policy 0, policy_version 47430 (0.0009) +[2026-06-07 03:21:04,232][495927] Updated weights for policy 0, policy_version 47440 (0.0008) +[2026-06-07 03:21:04,381][495927] Updated weights for policy 0, policy_version 47450 (0.0008) +[2026-06-07 03:21:04,531][495927] Updated weights for policy 0, policy_version 47460 (0.0008) +[2026-06-07 03:21:04,697][495927] Updated weights for policy 0, policy_version 47471 (0.0009) +[2026-06-07 03:21:05,333][495927] Updated weights for policy 0, policy_version 47482 (0.0009) +[2026-06-07 03:21:05,473][495927] Updated weights for policy 0, policy_version 47492 (0.0008) +[2026-06-07 03:21:05,628][495927] Updated weights for policy 0, policy_version 47502 (0.0008) +[2026-06-07 03:21:05,794][495927] Updated weights for policy 0, policy_version 47513 (0.0008) +[2026-06-07 03:21:05,960][495927] Updated weights for policy 0, policy_version 47524 (0.0008) +[2026-06-07 03:21:06,111][495927] Updated weights for policy 0, policy_version 47534 (0.0008) +[2026-06-07 03:21:06,739][495927] Updated weights for policy 0, policy_version 47545 (0.0009) +[2026-06-07 03:21:06,886][495927] Updated weights for policy 0, policy_version 47555 (0.0008) +[2026-06-07 03:21:07,037][495927] Updated weights for policy 0, policy_version 47565 (0.0008) +[2026-06-07 03:21:07,184][495927] Updated weights for policy 0, policy_version 47575 (0.0008) +[2026-06-07 03:21:07,351][495927] Updated weights for policy 0, policy_version 47586 (0.0009) +[2026-06-07 03:21:07,502][495927] Updated weights for policy 0, policy_version 47596 (0.0008) +[2026-06-07 03:21:08,145][495927] Updated weights for policy 0, policy_version 47608 (0.0008) +[2026-06-07 03:21:08,289][495927] Updated weights for policy 0, policy_version 47618 (0.0008) +[2026-06-07 03:21:08,438][495927] Updated weights for policy 0, policy_version 47628 (0.0009) +[2026-06-07 03:21:08,583][495927] Updated weights for policy 0, policy_version 47638 (0.0008) +[2026-06-07 03:21:08,751][495927] Updated weights for policy 0, policy_version 47649 (0.0008) +[2026-06-07 03:21:08,909][492660] Fps is (10 sec: 22937.3, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 24379392. Throughput: 0: 22823.8. Samples: 24389888. Policy #0 lag: (min: 63.0, avg: 77.3, max: 127.0) +[2026-06-07 03:21:08,910][492660] Avg episode reward: [(0, '2063.325')] +[2026-06-07 03:21:08,912][495927] Updated weights for policy 0, policy_version 47660 (0.0008) +[2026-06-07 03:21:09,552][495927] Updated weights for policy 0, policy_version 47672 (0.0008) +[2026-06-07 03:21:09,703][495927] Updated weights for policy 0, policy_version 47682 (0.0008) +[2026-06-07 03:21:09,882][495927] Updated weights for policy 0, policy_version 47694 (0.0008) +[2026-06-07 03:21:10,032][495927] Updated weights for policy 0, policy_version 47704 (0.0009) +[2026-06-07 03:21:10,200][495927] Updated weights for policy 0, policy_version 47715 (0.0008) +[2026-06-07 03:21:10,357][495927] Updated weights for policy 0, policy_version 47725 (0.0008) +[2026-06-07 03:21:10,969][495927] Updated weights for policy 0, policy_version 47735 (0.0008) +[2026-06-07 03:21:11,128][495927] Updated weights for policy 0, policy_version 47746 (0.0008) +[2026-06-07 03:21:11,279][495927] Updated weights for policy 0, policy_version 47756 (0.0008) +[2026-06-07 03:21:11,429][495927] Updated weights for policy 0, policy_version 47766 (0.0008) +[2026-06-07 03:21:11,593][495927] Updated weights for policy 0, policy_version 47777 (0.0008) +[2026-06-07 03:21:11,741][495927] Updated weights for policy 0, policy_version 47787 (0.0008) +[2026-06-07 03:21:12,382][495927] Updated weights for policy 0, policy_version 47797 (0.0008) +[2026-06-07 03:21:12,535][495927] Updated weights for policy 0, policy_version 47808 (0.0009) +[2026-06-07 03:21:12,685][495927] Updated weights for policy 0, policy_version 47818 (0.0006) +[2026-06-07 03:21:12,831][495927] Updated weights for policy 0, policy_version 47828 (0.0005) +[2026-06-07 03:21:12,987][495927] Updated weights for policy 0, policy_version 47838 (0.0005) +[2026-06-07 03:21:13,150][495927] Updated weights for policy 0, policy_version 47849 (0.0005) +[2026-06-07 03:21:13,785][495927] Updated weights for policy 0, policy_version 47860 (0.0007) +[2026-06-07 03:21:13,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 24510464. Throughput: 0: 22798.2. Samples: 24524416. Policy #0 lag: (min: 63.0, avg: 77.3, max: 127.0) +[2026-06-07 03:21:13,910][492660] Avg episode reward: [(0, '2063.325')] +[2026-06-07 03:21:13,921][495927] Updated weights for policy 0, policy_version 47870 (0.0007) +[2026-06-07 03:21:14,073][495927] Updated weights for policy 0, policy_version 47880 (0.0008) +[2026-06-07 03:21:14,218][495927] Updated weights for policy 0, policy_version 47890 (0.0008) +[2026-06-07 03:21:14,372][495927] Updated weights for policy 0, policy_version 47900 (0.0008) +[2026-06-07 03:21:14,534][495927] Updated weights for policy 0, policy_version 47911 (0.0008) +[2026-06-07 03:21:15,204][495927] Updated weights for policy 0, policy_version 47921 (0.0008) +[2026-06-07 03:21:15,352][495927] Updated weights for policy 0, policy_version 47931 (0.0009) +[2026-06-07 03:21:15,502][495927] Updated weights for policy 0, policy_version 47942 (0.0008) +[2026-06-07 03:21:15,658][495927] Updated weights for policy 0, policy_version 47952 (0.0008) +[2026-06-07 03:21:15,813][495927] Updated weights for policy 0, policy_version 47962 (0.0008) +[2026-06-07 03:21:15,964][495927] Updated weights for policy 0, policy_version 47972 (0.0008) +[2026-06-07 03:21:16,131][495927] Updated weights for policy 0, policy_version 47983 (0.0008) +[2026-06-07 03:21:16,749][495927] Updated weights for policy 0, policy_version 47994 (0.0008) +[2026-06-07 03:21:16,911][495927] Updated weights for policy 0, policy_version 48005 (0.0009) +[2026-06-07 03:21:17,061][495927] Updated weights for policy 0, policy_version 48015 (0.0008) +[2026-06-07 03:21:17,211][495927] Updated weights for policy 0, policy_version 48025 (0.0008) +[2026-06-07 03:21:17,359][495927] Updated weights for policy 0, policy_version 48035 (0.0008) +[2026-06-07 03:21:17,512][495927] Updated weights for policy 0, policy_version 48045 (0.0008) +[2026-06-07 03:21:18,159][495927] Updated weights for policy 0, policy_version 48057 (0.0008) +[2026-06-07 03:21:18,312][495927] Updated weights for policy 0, policy_version 48068 (0.0008) +[2026-06-07 03:21:18,482][495927] Updated weights for policy 0, policy_version 48079 (0.0009) +[2026-06-07 03:21:18,636][495927] Updated weights for policy 0, policy_version 48089 (0.0008) +[2026-06-07 03:21:18,803][495927] Updated weights for policy 0, policy_version 48100 (0.0008) +[2026-06-07 03:21:18,909][492660] Fps is (10 sec: 22938.0, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 24608768. Throughput: 0: 22892.2. Samples: 24595968. Policy #0 lag: (min: 9.0, avg: 24.4, max: 73.0) +[2026-06-07 03:21:18,910][492660] Avg episode reward: [(0, '2038.689')] +[2026-06-07 03:21:18,948][495927] Updated weights for policy 0, policy_version 48110 (0.0008) +[2026-06-07 03:21:19,566][495927] Updated weights for policy 0, policy_version 48120 (0.0008) +[2026-06-07 03:21:19,720][495927] Updated weights for policy 0, policy_version 48131 (0.0008) +[2026-06-07 03:21:19,886][495927] Updated weights for policy 0, policy_version 48142 (0.0007) +[2026-06-07 03:21:20,048][495927] Updated weights for policy 0, policy_version 48153 (0.0008) +[2026-06-07 03:21:20,203][495927] Updated weights for policy 0, policy_version 48163 (0.0010) +[2026-06-07 03:21:20,356][495927] Updated weights for policy 0, policy_version 48173 (0.0010) +[2026-06-07 03:21:20,982][495927] Updated weights for policy 0, policy_version 48183 (0.0010) +[2026-06-07 03:21:21,124][495927] Updated weights for policy 0, policy_version 48193 (0.0008) +[2026-06-07 03:21:21,274][495927] Updated weights for policy 0, policy_version 48203 (0.0008) +[2026-06-07 03:21:21,433][495927] Updated weights for policy 0, policy_version 48213 (0.0008) +[2026-06-07 03:21:21,589][495927] Updated weights for policy 0, policy_version 48224 (0.0008) +[2026-06-07 03:21:21,740][495927] Updated weights for policy 0, policy_version 48234 (0.0008) +[2026-06-07 03:21:22,374][495927] Updated weights for policy 0, policy_version 48244 (0.0008) +[2026-06-07 03:21:22,526][495927] Updated weights for policy 0, policy_version 48254 (0.0008) +[2026-06-07 03:21:22,669][495927] Updated weights for policy 0, policy_version 48264 (0.0008) +[2026-06-07 03:21:22,835][495927] Updated weights for policy 0, policy_version 48275 (0.0008) +[2026-06-07 03:21:22,984][495927] Updated weights for policy 0, policy_version 48285 (0.0008) +[2026-06-07 03:21:23,137][495927] Updated weights for policy 0, policy_version 48295 (0.0008) +[2026-06-07 03:21:23,758][495927] Updated weights for policy 0, policy_version 48305 (0.0009) +[2026-06-07 03:21:23,899][495927] Updated weights for policy 0, policy_version 48315 (0.0008) +[2026-06-07 03:21:23,909][492660] Fps is (10 sec: 22937.1, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 24739840. Throughput: 0: 23116.7. Samples: 24740992. Policy #0 lag: (min: 9.0, avg: 24.4, max: 73.0) +[2026-06-07 03:21:23,910][492660] Avg episode reward: [(0, '2029.009')] +[2026-06-07 03:21:24,046][495927] Updated weights for policy 0, policy_version 48325 (0.0008) +[2026-06-07 03:21:24,212][495927] Updated weights for policy 0, policy_version 48336 (0.0008) +[2026-06-07 03:21:24,380][495927] Updated weights for policy 0, policy_version 48347 (0.0008) +[2026-06-07 03:21:24,550][495927] Updated weights for policy 0, policy_version 48358 (0.0008) +[2026-06-07 03:21:24,696][495927] Updated weights for policy 0, policy_version 48368 (0.0008) +[2026-06-07 03:21:25,325][495927] Updated weights for policy 0, policy_version 48379 (0.0008) +[2026-06-07 03:21:25,475][495927] Updated weights for policy 0, policy_version 48389 (0.0008) +[2026-06-07 03:21:25,638][495927] Updated weights for policy 0, policy_version 48400 (0.0008) +[2026-06-07 03:21:25,792][495927] Updated weights for policy 0, policy_version 48410 (0.0008) +[2026-06-07 03:21:25,946][495927] Updated weights for policy 0, policy_version 48420 (0.0008) +[2026-06-07 03:21:26,097][495927] Updated weights for policy 0, policy_version 48430 (0.0008) +[2026-06-07 03:21:26,712][495927] Updated weights for policy 0, policy_version 48440 (0.0008) +[2026-06-07 03:21:26,864][495927] Updated weights for policy 0, policy_version 48450 (0.0008) +[2026-06-07 03:21:27,013][495927] Updated weights for policy 0, policy_version 48460 (0.0008) +[2026-06-07 03:21:27,167][495927] Updated weights for policy 0, policy_version 48470 (0.0009) +[2026-06-07 03:21:27,317][495927] Updated weights for policy 0, policy_version 48480 (0.0008) +[2026-06-07 03:21:27,463][495927] Updated weights for policy 0, policy_version 48490 (0.0008) +[2026-06-07 03:21:28,096][495927] Updated weights for policy 0, policy_version 48500 (0.0008) +[2026-06-07 03:21:28,254][495927] Updated weights for policy 0, policy_version 48511 (0.0008) +[2026-06-07 03:21:28,406][495927] Updated weights for policy 0, policy_version 48521 (0.0008) +[2026-06-07 03:21:28,564][495927] Updated weights for policy 0, policy_version 48532 (0.0008) +[2026-06-07 03:21:28,711][495927] Updated weights for policy 0, policy_version 48542 (0.0008) +[2026-06-07 03:21:28,870][495927] Updated weights for policy 0, policy_version 48552 (0.0008) +[2026-06-07 03:21:28,909][492660] Fps is (10 sec: 22937.6, 60 sec: 22937.6, 300 sec: 22882.1). Total num frames: 24838144. Throughput: 0: 23079.9. Samples: 24875520. Policy #0 lag: (min: 9.0, avg: 24.4, max: 73.0) +[2026-06-07 03:21:28,910][492660] Avg episode reward: [(0, '2087.560')] +[2026-06-07 03:21:29,487][495927] Updated weights for policy 0, policy_version 48562 (0.0009) +[2026-06-07 03:21:29,638][495927] Updated weights for policy 0, policy_version 48573 (0.0008) +[2026-06-07 03:21:29,786][495927] Updated weights for policy 0, policy_version 48583 (0.0008) +[2026-06-07 03:21:29,933][495927] Updated weights for policy 0, policy_version 48593 (0.0010) +[2026-06-07 03:21:30,085][495927] Updated weights for policy 0, policy_version 48603 (0.0010) +[2026-06-07 03:21:30,233][495927] Updated weights for policy 0, policy_version 48613 (0.0008) +[2026-06-07 03:21:30,396][495927] Updated weights for policy 0, policy_version 48624 (0.0008) +[2026-06-07 03:21:31,038][495927] Updated weights for policy 0, policy_version 48634 (0.0008) +[2026-06-07 03:21:31,178][495927] Updated weights for policy 0, policy_version 48644 (0.0011) +[2026-06-07 03:21:31,329][495927] Updated weights for policy 0, policy_version 48654 (0.0009) +[2026-06-07 03:21:31,479][495927] Updated weights for policy 0, policy_version 48664 (0.0008) +[2026-06-07 03:21:31,633][495927] Updated weights for policy 0, policy_version 48674 (0.0008) +[2026-06-07 03:21:31,783][495927] Updated weights for policy 0, policy_version 48684 (0.0008) +[2026-06-07 03:21:32,396][495927] Updated weights for policy 0, policy_version 48694 (0.0009) +[2026-06-07 03:21:32,565][495927] Updated weights for policy 0, policy_version 48706 (0.0008) +[2026-06-07 03:21:32,716][495927] Updated weights for policy 0, policy_version 48716 (0.0008) +[2026-06-07 03:21:32,857][495927] Updated weights for policy 0, policy_version 48726 (0.0008) +[2026-06-07 03:21:33,010][495927] Updated weights for policy 0, policy_version 48736 (0.0008) +[2026-06-07 03:21:33,159][495927] Updated weights for policy 0, policy_version 48746 (0.0009) +[2026-06-07 03:21:33,806][495927] Updated weights for policy 0, policy_version 48756 (0.0009) +[2026-06-07 03:21:33,909][492660] Fps is (10 sec: 22938.0, 60 sec: 22937.6, 300 sec: 22993.1). Total num frames: 24969216. Throughput: 0: 23040.1. Samples: 24942720. Policy #0 lag: (min: 9.0, avg: 24.4, max: 73.0) +[2026-06-07 03:21:33,910][492660] Avg episode reward: [(0, '2113.825')] +[2026-06-07 03:21:33,962][495927] Updated weights for policy 0, policy_version 48767 (0.0008) +[2026-06-07 03:21:34,108][495927] Updated weights for policy 0, policy_version 48777 (0.0008) +[2026-06-07 03:21:34,259][495927] Updated weights for policy 0, policy_version 48787 (0.0008) +[2026-06-07 03:21:34,405][495927] Updated weights for policy 0, policy_version 48797 (0.0009) +[2026-06-07 03:21:34,567][495927] Updated weights for policy 0, policy_version 48807 (0.0008) +[2026-06-07 03:21:34,695][495570] Saving new best policy, reward=2113.825! +[2026-06-07 03:21:35,186][495927] Updated weights for policy 0, policy_version 48817 (0.0009) +[2026-06-07 03:21:35,327][495927] Updated weights for policy 0, policy_version 48827 (0.0010) +[2026-06-07 03:21:35,388][495570] Early stopping after 2 epochs (16 sgd steps), loss delta 0.0000007 +[2026-06-07 03:21:35,389][495570] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs3_seed11/checkpoint_p0/checkpoint_000048832_25034752.pth... +[2026-06-07 03:21:35,391][495929] Stopping RolloutWorker_w0... +[2026-06-07 03:21:35,391][495570] Stopping Batcher_0... +[2026-06-07 03:21:35,391][495929] Loop rollout_proc0_evt_loop terminating... +[2026-06-07 03:21:35,391][492660] Component RolloutWorker_w0 stopped! +[2026-06-07 03:21:35,391][495570] Loop batcher_evt_loop terminating... +[2026-06-07 03:21:35,392][492660] Component Batcher_0 stopped! +[2026-06-07 03:21:35,393][492660] Component RolloutWorker_w1 stopped! +[2026-06-07 03:21:35,392][495928] Stopping RolloutWorker_w1... +[2026-06-07 03:21:35,393][495928] Loop rollout_proc1_evt_loop terminating... +[2026-06-07 03:21:35,412][495570] Saving results/checkpoints_factor_sweeps/flappy/context_window/flappy_frame_stack_fixed_l2_fs3_seed11/checkpoint_p0/checkpoint_000048832_25034752.pth... +[2026-06-07 03:21:35,426][495927] Weights refcount: 2 0 +[2026-06-07 03:21:35,427][495927] Stopping InferenceWorker_p0-w0... +[2026-06-07 03:21:35,428][495927] Loop inference_proc0-0_evt_loop terminating... +[2026-06-07 03:21:35,428][492660] Component InferenceWorker_p0-w0 stopped! +[2026-06-07 03:21:35,434][495570] Stopping LearnerWorker_p0... +[2026-06-07 03:21:35,435][495570] Loop learner_proc0_evt_loop terminating... +[2026-06-07 03:21:35,434][492660] Component LearnerWorker_p0 stopped! +[2026-06-07 03:21:35,435][492660] Waiting for process learner_proc0 to stop... +[2026-06-07 03:21:36,311][492660] Waiting for process inference_proc0-0 to join... +[2026-06-07 03:21:36,312][492660] Waiting for process rollout_proc0 to join... +[2026-06-07 03:21:36,313][492660] Waiting for process rollout_proc1 to join... +[2026-06-07 03:21:36,313][492660] Batcher 0 profile tree view: +batching: 0.9232, releasing_batches: 0.0342 +[2026-06-07 03:21:36,314][492660] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0000 + wait_policy_total: 658.5018 +update_model: 43.8565 + weight_update: 0.0009 +one_step: 0.0009 + handle_policy_step: 370.1869 + deserialize: 4.7773, stack: 0.3413, obs_to_device_normalize: 52.3054, forward: 135.8429, prepare_outputs: 151.3761, send_messages: 9.8724 +[2026-06-07 03:21:36,314][492660] Learner 0 profile tree view: +misc: 0.0047, prepare_batch: 75.4545 +train: 720.7042 + epoch_init: 0.0578, minibatch_init: 2.4307, losses_postprocess: 232.0770, kl_divergence: 25.7926, after_optimizer: 284.8051 + calculate_losses: 38.9867 + losses_init: 0.0804, forward_head: 12.9007, bptt_initial: 0.3607, bptt: 0.4101, tail: 8.7930, advantages_returns: 2.8390, losses: 10.6639 + update: 133.4116 + clip: 12.8841 +[2026-06-07 03:21:36,314][492660] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.0332, enqueue_policy_requests: 126.5313, process_policy_outputs: 7.5034, env_step: 695.0919, finalize_trajectories: 0.1033, complete_rollouts: 0.0784 +post_env_step: 16.1058 + process_env_step: 4.6897 +[2026-06-07 03:21:36,315][492660] RolloutWorker_w1 profile tree view: +wait_for_trajectories: 0.0345, enqueue_policy_requests: 130.5124, process_policy_outputs: 7.5546, env_step: 697.4614, finalize_trajectories: 0.1110, complete_rollouts: 0.0798 +post_env_step: 15.9662 + process_env_step: 4.5868 +[2026-06-07 03:21:36,316][492660] Loop Runner_EvtLoop terminating... +[2026-06-07 03:21:36,317][492660] Runner profile tree view: +main_loop: 1101.7519 +[2026-06-07 03:21:36,317][492660] Collected {0: 25034752}, FPS: 22722.7