diff --git "a/code/RL_model/verl/verl_train/.log" "b/code/RL_model/verl/verl_train/.log" new file mode 100644--- /dev/null +++ "b/code/RL_model/verl/verl_train/.log" @@ -0,0 +1,1123 @@ +/home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/torch/cuda/__init__.py:63: FutureWarning: The pynvml package is deprecated. Please install nvidia-ml-py instead. If you did not install pynvml directly, please report this to the maintainers of the package that installed pynvml for you. + import pynvml # type: ignore[import] +INFO 02-07 12:56:22 [__init__.py:216] Automatically detected platform cuda. +/home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/megatron/core/models/backends.py:21: UserWarning: Apex is not installed. Falling back to Torch Norm + warnings.warn("Apex is not installed. Falling back to Torch Norm") +/home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/megatron/core/optimizer/__init__.py:18: UserWarning: Transformer Engine and Apex are not installed. Falling back to Torch optimizers. + warnings.warn( +/home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/megatron/core/optimizer/optimizer.py:28: UserWarning: Transformer Engine and Apex are not installed. Falling back to local implementations of multi_tensor_applier and multi_tensor_scale + warnings.warn( +/home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/megatron/core/optimizer/clip_grads.py:29: UserWarning: Transformer Engine and Apex are not installed. Falling back to local implementations of multi_tensor_applier, multi_tensor_l2norm, and multi_tensor_scale + warnings.warn( +/home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/megatron/core/models/gpt/gpt_layer_specs.py:67: UserWarning: Apex is not installed. Falling back to Torch Norm + warnings.warn("Apex is not installed. Falling back to Torch Norm") +ray init kwargs: {'num_cpus': None, 'runtime_env': {'env_vars': {'TOKENIZERS_PARALLELISM': 'true', 'NCCL_DEBUG': 'WARN', 'VLLM_LOGGING_LEVEL': 'WARN', 'VLLM_ALLOW_RUNTIME_LORA_UPDATING': 'true', 'CUDA_DEVICE_MAX_CONNECTIONS': '1', 'NCCL_CUMEM_ENABLE': '0', 'VLLM_DISABLE_COMPILE_CACHE': '1', 'HCCL_HOST_SOCKET_PORT_RANGE': 'auto', 'HCCL_NPU_SOCKET_PORT_RANGE': 'auto'}, 'working_dir': None}} +2026-02-07 12:56:39,494 INFO worker.py:1998 -- Started a local Ray instance. View the dashboard at http://127.0.0.1:8301  +/home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/ray/_private/worker.py:2046: FutureWarning: Tip: In future versions of Ray, Ray will no longer override accelerator visible devices env var if num_gpus=0 or num_gpus=None (default). To enable this behavior and turn off this error message, set RAY_ACCEL_ENV_VAR_OVERRIDE_ON_ZERO=0 + warnings.warn( +(pid=896026) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/torch/cuda/__init__.py:63: FutureWarning: The pynvml package is deprecated. Please install nvidia-ml-py instead. If you did not install pynvml directly, please report this to the maintainers of the package that installed pynvml for you. +(pid=896026) import pynvml # type: ignore[import] +(pid=896026) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/megatron/core/models/backends.py:21: UserWarning: Apex is not installed. Falling back to Torch Norm +(pid=896026) warnings.warn("Apex is not installed. Falling back to Torch Norm") +(pid=896026) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/megatron/core/optimizer/__init__.py:18: UserWarning: Transformer Engine and Apex are not installed. Falling back to Torch optimizers. +(pid=896026) warnings.warn( +(pid=896026) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/megatron/core/optimizer/optimizer.py:28: UserWarning: Transformer Engine and Apex are not installed. Falling back to local implementations of multi_tensor_applier and multi_tensor_scale +(pid=896026) warnings.warn( +(pid=896026) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/megatron/core/optimizer/clip_grads.py:29: UserWarning: Transformer Engine and Apex are not installed. Falling back to local implementations of multi_tensor_applier, multi_tensor_l2norm, and multi_tensor_scale +(pid=896026) warnings.warn( +(pid=896026) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/megatron/core/models/gpt/gpt_layer_specs.py:67: UserWarning: Apex is not installed. Falling back to Torch Norm +(pid=896026) warnings.warn("Apex is not installed. Falling back to Torch Norm") +(TaskRunner pid=896026) TaskRunner hostname: gamma, PID: 896026 +(TaskRunner pid=896026) {'actor_rollout_ref': {'actor': {'_target_': 'verl.workers.config.FSDPActorConfig', +(TaskRunner pid=896026) 'calculate_entropy': False, +(TaskRunner pid=896026) 'calculate_sum_pi_squared': False, +(TaskRunner pid=896026) 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', +(TaskRunner pid=896026) 'async_save': False, +(TaskRunner pid=896026) 'load_contents': ['model', +(TaskRunner pid=896026) 'optimizer', +(TaskRunner pid=896026) 'extra'], +(TaskRunner pid=896026) 'save_contents': ['model', +(TaskRunner pid=896026) 'optimizer', +(TaskRunner pid=896026) 'extra']}, +(TaskRunner pid=896026) 'clip_ratio': 0.2, +(TaskRunner pid=896026) 'clip_ratio_c': 3.0, +(TaskRunner pid=896026) 'clip_ratio_high': 0.2, +(TaskRunner pid=896026) 'clip_ratio_low': 0.2, +(TaskRunner pid=896026) 'data_loader_seed': 42, +(TaskRunner pid=896026) 'entropy_checkpointing': False, +(TaskRunner pid=896026) 'entropy_coeff': 0, +(TaskRunner pid=896026) 'entropy_from_logits_with_chunking': False, +(TaskRunner pid=896026) 'freeze_vision_tower': False, +(TaskRunner pid=896026) 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', +(TaskRunner pid=896026) 'dtype': 'bfloat16', +(TaskRunner pid=896026) 'entropy_checkpointing': False, +(TaskRunner pid=896026) 'entropy_from_logits_with_chunking': False, +(TaskRunner pid=896026) 'forward_only': False, +(TaskRunner pid=896026) 'forward_prefetch': False, +(TaskRunner pid=896026) 'fsdp_size': -1, +(TaskRunner pid=896026) 'full_determinism': False, +(TaskRunner pid=896026) 'model_dtype': 'fp32', +(TaskRunner pid=896026) 'offload_policy': False, +(TaskRunner pid=896026) 'optimizer_offload': False, +(TaskRunner pid=896026) 'param_offload': False, +(TaskRunner pid=896026) 'reshard_after_forward': True, +(TaskRunner pid=896026) 'seed': 42, +(TaskRunner pid=896026) 'strategy': 'fsdp', +(TaskRunner pid=896026) 'ulysses_sequence_parallel_size': 1, +(TaskRunner pid=896026) 'use_orig_params': False, +(TaskRunner pid=896026) 'use_torch_compile': True, +(TaskRunner pid=896026) 'wrap_policy': {'min_num_params': 0}}, +(TaskRunner pid=896026) 'grad_clip': 1.0, +(TaskRunner pid=896026) 'kl_loss_coef': 0.001, +(TaskRunner pid=896026) 'kl_loss_type': 'low_var_kl', +(TaskRunner pid=896026) 'loss_agg_mode': 'token-mean', +(TaskRunner pid=896026) 'loss_scale_factor': None, +(TaskRunner pid=896026) 'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', +(TaskRunner pid=896026) 'betas': [0.9, 0.999], +(TaskRunner pid=896026) 'clip_grad': 1.0, +(TaskRunner pid=896026) 'lr': 1e-06, +(TaskRunner pid=896026) 'lr_scheduler_type': 'constant', +(TaskRunner pid=896026) 'lr_warmup_steps': -1, +(TaskRunner pid=896026) 'lr_warmup_steps_ratio': 0.0, +(TaskRunner pid=896026) 'min_lr_ratio': 0.0, +(TaskRunner pid=896026) 'num_cycles': 0.5, +(TaskRunner pid=896026) 'optimizer': 'AdamW', +(TaskRunner pid=896026) 'optimizer_impl': 'torch.optim', +(TaskRunner pid=896026) 'override_optimizer_config': None, +(TaskRunner pid=896026) 'total_training_steps': -1, +(TaskRunner pid=896026) 'warmup_style': None, +(TaskRunner pid=896026) 'weight_decay': 0.01}, +(TaskRunner pid=896026) 'policy_loss': {'_target_': 'verl.workers.config.PolicyLossConfig', +(TaskRunner pid=896026) 'clip_cov_lb': 1.0, +(TaskRunner pid=896026) 'clip_cov_ratio': 0.0002, +(TaskRunner pid=896026) 'clip_cov_ub': 5.0, +(TaskRunner pid=896026) 'kl_cov_ratio': 0.0002, +(TaskRunner pid=896026) 'loss_mode': 'vanilla', +(TaskRunner pid=896026) 'ppo_kl_coef': 0.1}, +(TaskRunner pid=896026) 'ppo_epochs': 1, +(TaskRunner pid=896026) 'ppo_max_token_len_per_gpu': 16384, +(TaskRunner pid=896026) 'ppo_micro_batch_size': None, +(TaskRunner pid=896026) 'ppo_micro_batch_size_per_gpu': 2, +(TaskRunner pid=896026) 'ppo_mini_batch_size': 4, +(TaskRunner pid=896026) 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', +(TaskRunner pid=896026) 'all_ranks': False, +(TaskRunner pid=896026) 'enable': False, +(TaskRunner pid=896026) 'ranks': [], +(TaskRunner pid=896026) 'save_path': 'outputs/profile', +(TaskRunner pid=896026) 'tool': None, +(TaskRunner pid=896026) 'tool_config': {'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', +(TaskRunner pid=896026) 'analysis': True, +(TaskRunner pid=896026) 'contents': [], +(TaskRunner pid=896026) 'discrete': False, +(TaskRunner pid=896026) 'level': 'level0'}, +(TaskRunner pid=896026) 'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', +(TaskRunner pid=896026) 'discrete': False}, +(TaskRunner pid=896026) 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', +(TaskRunner pid=896026) 'contents': [], +(TaskRunner pid=896026) 'discrete': False}, +(TaskRunner pid=896026) 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', +(TaskRunner pid=896026) 'stack_depth': 32, +(TaskRunner pid=896026) 'trace_alloc_max_entries': 100000}}}, +(TaskRunner pid=896026) 'rollout_n': 3, +(TaskRunner pid=896026) 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', +(TaskRunner pid=896026) 'mode': 'disabled', +(TaskRunner pid=896026) 'record_file': None, +(TaskRunner pid=896026) 'replay_file': None}, +(TaskRunner pid=896026) 'shuffle': False, +(TaskRunner pid=896026) 'strategy': 'fsdp', +(TaskRunner pid=896026) 'sum_pi_squared_checkpointing': False, +(TaskRunner pid=896026) 'tau_neg': 1.05, +(TaskRunner pid=896026) 'tau_pos': 1.0, +(TaskRunner pid=896026) 'ulysses_sequence_parallel_size': 1, +(TaskRunner pid=896026) 'use_dynamic_bsz': False, +(TaskRunner pid=896026) 'use_fused_kernels': False, +(TaskRunner pid=896026) 'use_kl_loss': True, +(TaskRunner pid=896026) 'use_prefix_grouper': False, +(TaskRunner pid=896026) 'use_remove_padding': True, +(TaskRunner pid=896026) 'use_torch_compile': True}, +(TaskRunner pid=896026) 'hybrid_engine': True, +(TaskRunner pid=896026) 'model': {'_target_': 'verl.workers.config.HFModelConfig', +(TaskRunner pid=896026) 'custom_chat_template': None, +(TaskRunner pid=896026) 'enable_activation_offload': False, +(TaskRunner pid=896026) 'enable_gradient_checkpointing': True, +(TaskRunner pid=896026) 'exclude_modules': None, +(TaskRunner pid=896026) 'external_lib': None, +(TaskRunner pid=896026) 'fused_kernel_options': {'impl_backend': 'torch'}, +(TaskRunner pid=896026) 'hf_config_path': None, +(TaskRunner pid=896026) 'lora_adapter_path': None, +(TaskRunner pid=896026) 'lora_alpha': 16, +(TaskRunner pid=896026) 'lora_rank': 0, +(TaskRunner pid=896026) 'mtp': {'_target_': 'verl.workers.config.MtpConfig', +(TaskRunner pid=896026) 'detach_encoder': False, +(TaskRunner pid=896026) 'enable': False, +(TaskRunner pid=896026) 'enable_rollout': False, +(TaskRunner pid=896026) 'enable_train': False, +(TaskRunner pid=896026) 'method': 'mtp', +(TaskRunner pid=896026) 'mtp_loss_scaling_factor': 0.1, +(TaskRunner pid=896026) 'num_speculative_tokens': 1, +(TaskRunner pid=896026) 'speculative_algorithm': 'EAGLE', +(TaskRunner pid=896026) 'speculative_eagle_topk': 1, +(TaskRunner pid=896026) 'speculative_num_draft_tokens': 4, +(TaskRunner pid=896026) 'speculative_num_steps': 3}, +(TaskRunner pid=896026) 'override_config': {}, +(TaskRunner pid=896026) 'path': 'Qwen/Qwen3-4B-Instruct-2507', +(TaskRunner pid=896026) 'target_modules': 'all-linear', +(TaskRunner pid=896026) 'tiled_mlp': {'enabled': False, +(TaskRunner pid=896026) 'num_shards': 4}, +(TaskRunner pid=896026) 'tokenizer_path': None, +(TaskRunner pid=896026) 'trust_remote_code': False, +(TaskRunner pid=896026) 'use_fused_kernels': False, +(TaskRunner pid=896026) 'use_liger': False, +(TaskRunner pid=896026) 'use_remove_padding': True, +(TaskRunner pid=896026) 'use_shm': False}, +(TaskRunner pid=896026) 'nccl_timeout': 600, +(TaskRunner pid=896026) 'ref': {'_target_': 'verl.workers.config.FSDPActorConfig', +(TaskRunner pid=896026) 'entropy_checkpointing': False, +(TaskRunner pid=896026) 'entropy_from_logits_with_chunking': False, +(TaskRunner pid=896026) 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', +(TaskRunner pid=896026) 'dtype': 'bfloat16', +(TaskRunner pid=896026) 'entropy_checkpointing': False, +(TaskRunner pid=896026) 'entropy_from_logits_with_chunking': False, +(TaskRunner pid=896026) 'forward_only': True, +(TaskRunner pid=896026) 'forward_prefetch': False, +(TaskRunner pid=896026) 'fsdp_size': -1, +(TaskRunner pid=896026) 'full_determinism': False, +(TaskRunner pid=896026) 'model_dtype': 'fp32', +(TaskRunner pid=896026) 'offload_policy': False, +(TaskRunner pid=896026) 'optimizer_offload': False, +(TaskRunner pid=896026) 'param_offload': False, +(TaskRunner pid=896026) 'reshard_after_forward': True, +(TaskRunner pid=896026) 'seed': 42, +(TaskRunner pid=896026) 'strategy': 'fsdp', +(TaskRunner pid=896026) 'ulysses_sequence_parallel_size': 1, +(TaskRunner pid=896026) 'use_orig_params': False, +(TaskRunner pid=896026) 'use_torch_compile': True, +(TaskRunner pid=896026) 'wrap_policy': {'min_num_params': 0}}, +(TaskRunner pid=896026) 'log_prob_max_token_len_per_gpu': 16384, +(TaskRunner pid=896026) 'log_prob_micro_batch_size': None, +(TaskRunner pid=896026) 'log_prob_micro_batch_size_per_gpu': 32, +(TaskRunner pid=896026) 'log_prob_use_dynamic_bsz': False, +(TaskRunner pid=896026) 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', +(TaskRunner pid=896026) 'all_ranks': False, +(TaskRunner pid=896026) 'enable': False, +(TaskRunner pid=896026) 'ranks': [], +(TaskRunner pid=896026) 'save_path': 'outputs/profile', +(TaskRunner pid=896026) 'tool': None, +(TaskRunner pid=896026) 'tool_config': {'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', +(TaskRunner pid=896026) 'analysis': True, +(TaskRunner pid=896026) 'contents': [], +(TaskRunner pid=896026) 'discrete': False, +(TaskRunner pid=896026) 'level': 'level0'}, +(TaskRunner pid=896026) 'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', +(TaskRunner pid=896026) 'discrete': False}, +(TaskRunner pid=896026) 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', +(TaskRunner pid=896026) 'contents': [], +(TaskRunner pid=896026) 'discrete': False}, +(TaskRunner pid=896026) 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', +(TaskRunner pid=896026) 'stack_depth': 32, +(TaskRunner pid=896026) 'trace_alloc_max_entries': 100000}}}, +(TaskRunner pid=896026) 'rollout_n': 3, +(TaskRunner pid=896026) 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', +(TaskRunner pid=896026) 'mode': 'disabled', +(TaskRunner pid=896026) 'record_file': None, +(TaskRunner pid=896026) 'replay_file': None}, +(TaskRunner pid=896026) 'strategy': 'fsdp', +(TaskRunner pid=896026) 'ulysses_sequence_parallel_size': 1, +(TaskRunner pid=896026) 'use_torch_compile': True}, +(TaskRunner pid=896026) 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', +(TaskRunner pid=896026) 'agent': {'_target_': 'verl.workers.config.AgentLoopConfig', +(TaskRunner pid=896026) 'agent_loop_config_path': None, +(TaskRunner pid=896026) 'custom_async_server': {'_target_': 'verl.workers.config.CustomAsyncServerConfig', +(TaskRunner pid=896026) 'name': None, +(TaskRunner pid=896026) 'path': None}, +(TaskRunner pid=896026) 'default_agent_loop': 'single_turn_agent', +(TaskRunner pid=896026) 'num_workers': 8}, +(TaskRunner pid=896026) 'calculate_log_probs': False, +(TaskRunner pid=896026) 'checkpoint_engine': {'_target_': 'verl.workers.config.CheckpointEngineConfig', +(TaskRunner pid=896026) 'backend': 'naive', +(TaskRunner pid=896026) 'engine_kwargs': {}, +(TaskRunner pid=896026) 'update_weights_bucket_megabytes': 2048}, +(TaskRunner pid=896026) 'cudagraph_capture_sizes': None, +(TaskRunner pid=896026) 'data_parallel_size': 1, +(TaskRunner pid=896026) 'disable_log_stats': True, +(TaskRunner pid=896026) 'do_sample': True, +(TaskRunner pid=896026) 'dtype': 'bfloat16', +(TaskRunner pid=896026) 'enable_chunked_prefill': True, +(TaskRunner pid=896026) 'enable_prefix_caching': True, +(TaskRunner pid=896026) 'enable_rollout_routing_replay': False, +(TaskRunner pid=896026) 'enforce_eager': False, +(TaskRunner pid=896026) 'engine_kwargs': {'sglang': {}, +(TaskRunner pid=896026) 'trtllm': {}, +(TaskRunner pid=896026) 'vllm': {}}, +(TaskRunner pid=896026) 'expert_parallel_size': 1, +(TaskRunner pid=896026) 'free_cache_engine': True, +(TaskRunner pid=896026) 'gpu_memory_utilization': 0.6, +(TaskRunner pid=896026) 'ignore_eos': False, +(TaskRunner pid=896026) 'layered_summon': False, +(TaskRunner pid=896026) 'load_format': 'dummy', +(TaskRunner pid=896026) 'log_prob_max_token_len_per_gpu': 16384, +(TaskRunner pid=896026) 'log_prob_micro_batch_size': None, +(TaskRunner pid=896026) 'log_prob_micro_batch_size_per_gpu': 2, +(TaskRunner pid=896026) 'log_prob_use_dynamic_bsz': False, +(TaskRunner pid=896026) 'logprobs_mode': 'processed_logprobs', +(TaskRunner pid=896026) 'max_model_len': 8192, +(TaskRunner pid=896026) 'max_num_batched_tokens': 8192, +(TaskRunner pid=896026) 'max_num_seqs': 1024, +(TaskRunner pid=896026) 'mode': 'async', +(TaskRunner pid=896026) 'mtp': {'_target_': 'verl.workers.config.MtpConfig', +(TaskRunner pid=896026) 'detach_encoder': False, +(TaskRunner pid=896026) 'enable': False, +(TaskRunner pid=896026) 'enable_rollout': False, +(TaskRunner pid=896026) 'enable_train': False, +(TaskRunner pid=896026) 'method': 'mtp', +(TaskRunner pid=896026) 'mtp_loss_scaling_factor': 0.1, +(TaskRunner pid=896026) 'num_speculative_tokens': 1, +(TaskRunner pid=896026) 'speculative_algorithm': 'EAGLE', +(TaskRunner pid=896026) 'speculative_eagle_topk': 1, +(TaskRunner pid=896026) 'speculative_num_draft_tokens': 4, +(TaskRunner pid=896026) 'speculative_num_steps': 3}, +(TaskRunner pid=896026) 'multi_stage_wake_up': False, +(TaskRunner pid=896026) 'multi_turn': {'_target_': 'verl.workers.config.MultiTurnConfig', +(TaskRunner pid=896026) 'enable': False, +(TaskRunner pid=896026) 'format': 'hermes', +(TaskRunner pid=896026) 'interaction_config_path': None, +(TaskRunner pid=896026) 'max_assistant_turns': None, +(TaskRunner pid=896026) 'max_parallel_calls': 1, +(TaskRunner pid=896026) 'max_tool_response_length': 256, +(TaskRunner pid=896026) 'max_user_turns': None, +(TaskRunner pid=896026) 'num_repeat_rollouts': None, +(TaskRunner pid=896026) 'tokenization_sanity_check_mode': 'strict', +(TaskRunner pid=896026) 'tool_config_path': None, +(TaskRunner pid=896026) 'tool_response_truncate_side': 'middle', +(TaskRunner pid=896026) 'use_inference_chat_template': False}, +(TaskRunner pid=896026) 'n': 3, +(TaskRunner pid=896026) 'name': 'vllm', +(TaskRunner pid=896026) 'over_sample_rate': 0, +(TaskRunner pid=896026) 'pipeline_model_parallel_size': 1, +(TaskRunner pid=896026) 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', +(TaskRunner pid=896026) 'all_ranks': False, +(TaskRunner pid=896026) 'enable': False, +(TaskRunner pid=896026) 'ranks': [], +(TaskRunner pid=896026) 'save_path': 'outputs/profile', +(TaskRunner pid=896026) 'tool': None, +(TaskRunner pid=896026) 'tool_config': {'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', +(TaskRunner pid=896026) 'analysis': True, +(TaskRunner pid=896026) 'contents': [], +(TaskRunner pid=896026) 'discrete': False, +(TaskRunner pid=896026) 'level': 'level0'}, +(TaskRunner pid=896026) 'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', +(TaskRunner pid=896026) 'discrete': False}, +(TaskRunner pid=896026) 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', +(TaskRunner pid=896026) 'contents': [], +(TaskRunner pid=896026) 'discrete': False}, +(TaskRunner pid=896026) 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', +(TaskRunner pid=896026) 'stack_depth': 32, +(TaskRunner pid=896026) 'trace_alloc_max_entries': 100000}}}, +(TaskRunner pid=896026) 'prometheus': {'_target_': 'verl.workers.config.PrometheusConfig', +(TaskRunner pid=896026) 'enable': False, +(TaskRunner pid=896026) 'file': '/tmp/ray/session_latest/metrics/prometheus/prometheus.yml', +(TaskRunner pid=896026) 'port': 9090, +(TaskRunner pid=896026) 'served_model_name': 'Qwen/Qwen3-4B-Instruct-2507'}, +(TaskRunner pid=896026) 'prompt_length': 1024, +(TaskRunner pid=896026) 'quantization': None, +(TaskRunner pid=896026) 'quantization_config_file': None, +(TaskRunner pid=896026) 'response_length': 2048, +(TaskRunner pid=896026) 'scheduling_policy': 'fcfs', +(TaskRunner pid=896026) 'skip_dump_dir': '/tmp/rollout_dump', +(TaskRunner pid=896026) 'skip_rollout': False, +(TaskRunner pid=896026) 'skip_tokenizer_init': True, +(TaskRunner pid=896026) 'temperature': 1.0, +(TaskRunner pid=896026) 'tensor_model_parallel_size': 1, +(TaskRunner pid=896026) 'top_k': -1, +(TaskRunner pid=896026) 'top_p': 1, +(TaskRunner pid=896026) 'trace': {'_target_': 'verl.workers.config.TraceConfig', +(TaskRunner pid=896026) 'backend': None, +(TaskRunner pid=896026) 'max_samples_per_step_per_worker': None, +(TaskRunner pid=896026) 'token2text': False}, +(TaskRunner pid=896026) 'val_kwargs': {'_target_': 'verl.workers.config.SamplingConfig', +(TaskRunner pid=896026) 'do_sample': False, +(TaskRunner pid=896026) 'n': 1, +(TaskRunner pid=896026) 'temperature': 0, +(TaskRunner pid=896026) 'top_k': -1, +(TaskRunner pid=896026) 'top_p': 1.0}}}, +(TaskRunner pid=896026) 'algorithm': {'_target_': 'verl.trainer.config.AlgoConfig', +(TaskRunner pid=896026) 'adv_estimator': 'grpo', +(TaskRunner pid=896026) 'gamma': 1.0, +(TaskRunner pid=896026) 'kl_ctrl': {'_target_': 'verl.trainer.config.KLControlConfig', +(TaskRunner pid=896026) 'horizon': 10000, +(TaskRunner pid=896026) 'kl_coef': 0.001, +(TaskRunner pid=896026) 'target_kl': 0.1, +(TaskRunner pid=896026) 'type': 'fixed'}, +(TaskRunner pid=896026) 'kl_penalty': 'kl', +(TaskRunner pid=896026) 'lam': 1.0, +(TaskRunner pid=896026) 'norm_adv_by_std_in_grpo': True, +(TaskRunner pid=896026) 'pf_ppo': {'reweight_method': 'pow', 'weight_pow': 2.0}, +(TaskRunner pid=896026) 'rollout_correction': {'bypass_mode': False, +(TaskRunner pid=896026) 'loss_type': 'ppo_clip', +(TaskRunner pid=896026) 'rollout_is': None, +(TaskRunner pid=896026) 'rollout_is_batch_normalize': False, +(TaskRunner pid=896026) 'rollout_is_threshold': 2.0, +(TaskRunner pid=896026) 'rollout_rs': None, +(TaskRunner pid=896026) 'rollout_rs_threshold': None}, +(TaskRunner pid=896026) 'use_kl_in_reward': False, +(TaskRunner pid=896026) 'use_pf_ppo': False}, +(TaskRunner pid=896026) 'critic': {'_target_': 'verl.workers.config.FSDPCriticConfig', +(TaskRunner pid=896026) 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', +(TaskRunner pid=896026) 'async_save': False, +(TaskRunner pid=896026) 'load_contents': ['model', 'optimizer', 'extra'], +(TaskRunner pid=896026) 'save_contents': ['model', 'optimizer', 'extra']}, +(TaskRunner pid=896026) 'cliprange_value': 0.5, +(TaskRunner pid=896026) 'data_loader_seed': 42, +(TaskRunner pid=896026) 'enable': None, +(TaskRunner pid=896026) 'forward_max_token_len_per_gpu': 32768, +(TaskRunner pid=896026) 'forward_micro_batch_size': None, +(TaskRunner pid=896026) 'forward_micro_batch_size_per_gpu': None, +(TaskRunner pid=896026) 'grad_clip': 1.0, +(TaskRunner pid=896026) 'loss_agg_mode': 'token-mean', +(TaskRunner pid=896026) 'model': {'_target_': 'verl.workers.config.FSDPCriticModelCfg', +(TaskRunner pid=896026) 'enable_activation_offload': False, +(TaskRunner pid=896026) 'enable_gradient_checkpointing': True, +(TaskRunner pid=896026) 'external_lib': None, +(TaskRunner pid=896026) 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', +(TaskRunner pid=896026) 'dtype': 'bfloat16', +(TaskRunner pid=896026) 'entropy_checkpointing': False, +(TaskRunner pid=896026) 'entropy_from_logits_with_chunking': False, +(TaskRunner pid=896026) 'forward_only': False, +(TaskRunner pid=896026) 'forward_prefetch': False, +(TaskRunner pid=896026) 'fsdp_size': -1, +(TaskRunner pid=896026) 'full_determinism': False, +(TaskRunner pid=896026) 'model_dtype': 'fp32', +(TaskRunner pid=896026) 'offload_policy': False, +(TaskRunner pid=896026) 'optimizer_offload': False, +(TaskRunner pid=896026) 'param_offload': False, +(TaskRunner pid=896026) 'reshard_after_forward': True, +(TaskRunner pid=896026) 'seed': 42, +(TaskRunner pid=896026) 'strategy': 'fsdp', +(TaskRunner pid=896026) 'ulysses_sequence_parallel_size': 1, +(TaskRunner pid=896026) 'use_orig_params': False, +(TaskRunner pid=896026) 'use_torch_compile': True, +(TaskRunner pid=896026) 'wrap_policy': {'min_num_params': 0}}, +(TaskRunner pid=896026) 'lora_alpha': 16, +(TaskRunner pid=896026) 'lora_rank': 0, +(TaskRunner pid=896026) 'override_config': {}, +(TaskRunner pid=896026) 'path': '~/models/deepseek-llm-7b-chat', +(TaskRunner pid=896026) 'target_modules': 'all-linear', +(TaskRunner pid=896026) 'tiled_mlp': {'enabled': False, 'num_shards': 4}, +(TaskRunner pid=896026) 'tokenizer_path': 'Qwen/Qwen3-4B-Instruct-2507', +(TaskRunner pid=896026) 'trust_remote_code': False, +(TaskRunner pid=896026) 'use_remove_padding': False, +(TaskRunner pid=896026) 'use_shm': False}, +(TaskRunner pid=896026) 'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', +(TaskRunner pid=896026) 'betas': [0.9, 0.999], +(TaskRunner pid=896026) 'clip_grad': 1.0, +(TaskRunner pid=896026) 'lr': 1e-05, +(TaskRunner pid=896026) 'lr_scheduler_type': 'constant', +(TaskRunner pid=896026) 'lr_warmup_steps': -1, +(TaskRunner pid=896026) 'lr_warmup_steps_ratio': 0.0, +(TaskRunner pid=896026) 'min_lr_ratio': 0.0, +(TaskRunner pid=896026) 'num_cycles': 0.5, +(TaskRunner pid=896026) 'optimizer': 'AdamW', +(TaskRunner pid=896026) 'optimizer_impl': 'torch.optim', +(TaskRunner pid=896026) 'override_optimizer_config': None, +(TaskRunner pid=896026) 'total_training_steps': -1, +(TaskRunner pid=896026) 'warmup_style': None, +(TaskRunner pid=896026) 'weight_decay': 0.01}, +(TaskRunner pid=896026) 'ppo_epochs': 1, +(TaskRunner pid=896026) 'ppo_max_token_len_per_gpu': 32768, +(TaskRunner pid=896026) 'ppo_micro_batch_size': None, +(TaskRunner pid=896026) 'ppo_micro_batch_size_per_gpu': None, +(TaskRunner pid=896026) 'ppo_mini_batch_size': 4, +(TaskRunner pid=896026) 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', +(TaskRunner pid=896026) 'all_ranks': False, +(TaskRunner pid=896026) 'enable': False, +(TaskRunner pid=896026) 'ranks': [], +(TaskRunner pid=896026) 'save_path': 'outputs/profile', +(TaskRunner pid=896026) 'tool': None, +(TaskRunner pid=896026) 'tool_config': {'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', +(TaskRunner pid=896026) 'analysis': True, +(TaskRunner pid=896026) 'contents': [], +(TaskRunner pid=896026) 'discrete': False, +(TaskRunner pid=896026) 'level': 'level0'}, +(TaskRunner pid=896026) 'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', +(TaskRunner pid=896026) 'discrete': False}, +(TaskRunner pid=896026) 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', +(TaskRunner pid=896026) 'contents': [], +(TaskRunner pid=896026) 'discrete': False}, +(TaskRunner pid=896026) 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', +(TaskRunner pid=896026) 'stack_depth': 32, +(TaskRunner pid=896026) 'trace_alloc_max_entries': 100000}}}, +(TaskRunner pid=896026) 'rollout_n': +(TaskRunner pid=896026) 3, +(TaskRunner pid=896026) 'shuffle': +(TaskRunner pid=896026) False, +(TaskRunner pid=896026) 'strategy': +(TaskRunner pid=896026) 'fsdp', +(TaskRunner pid=896026) 'ulysses_sequence_parallel_size': +(TaskRunner pid=896026) 1, +(TaskRunner pid=896026) 'use_dynamic_bsz': False}, +(TaskRunner pid=896026) 'custom_reward_function': +(TaskRunner pid=896026) {'name': 'compute_score', +(TaskRunner pid=896026) 'path': +(TaskRunner pid=896026) '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/reward_func/reward.py' +(TaskRunner pid=896026) }, +(TaskRunner pid=896026) 'data': +(TaskRunner pid=896026) {'apply_chat_template_kwargs': {}, +(TaskRunner pid=896026) 'custom_cls': {'name': None, 'path': None}, +(TaskRunner pid=896026) 'datagen': {'name': None, 'path': None}, +(TaskRunner pid=896026) 'dataloader_num_workers': 8, +(TaskRunner pid=896026) 'filter_overlong_prompts': True, +(TaskRunner pid=896026) 'filter_overlong_prompts_workers': 1, +(TaskRunner pid=896026) 'image_key': 'images', +(TaskRunner pid=896026) 'image_patch_size': 14, +(TaskRunner pid=896026) 'max_prompt_length': 1024, +(TaskRunner pid=896026) 'max_response_length': 2048, +(TaskRunner pid=896026) 'prompt_key': 'prompt', +(TaskRunner pid=896026) 'return_full_prompt': False, +(TaskRunner pid=896026) 'return_multi_modal_inputs': True, +(TaskRunner pid=896026) 'return_raw_chat': True, +(TaskRunner pid=896026) 'return_raw_input_ids': False, +(TaskRunner pid=896026) 'reward_fn_key': 'data_source', +(TaskRunner pid=896026) 'sampler': {'class_name': None, 'class_path': None}, +(TaskRunner pid=896026) 'seed': None, +(TaskRunner pid=896026) 'shuffle': True, +(TaskRunner pid=896026) 'tokenizer': None, +(TaskRunner pid=896026) 'tool_config_path': None, +(TaskRunner pid=896026) 'train_batch_size': 8, +(TaskRunner pid=896026) 'train_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/train.parquet', +(TaskRunner pid=896026) 'train_max_samples': -1, +(TaskRunner pid=896026) 'truncation': 'error', +(TaskRunner pid=896026) 'trust_remote_code': False, +(TaskRunner pid=896026) 'use_shm': False, +(TaskRunner pid=896026) 'val_batch_size': None, +(TaskRunner pid=896026) 'val_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/test.parquet', +(TaskRunner pid=896026) 'val_max_samples': -1, +(TaskRunner pid=896026) 'validation_shuffle': False, +(TaskRunner pid=896026) 'video_key': 'videos'}, +(TaskRunner pid=896026) 'global_profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', +(TaskRunner pid=896026) 'global_tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', +(TaskRunner pid=896026) 'controller_nsight_options': {'cuda-graph-trace': 'graph', +(TaskRunner pid=896026) 'cuda-memory-usage': 'true', +(TaskRunner pid=896026) 'trace': 'cuda,nvtx,cublas,ucx'}, +(TaskRunner pid=896026) 'discrete': False, +(TaskRunner pid=896026) 'worker_nsight_options': {'capture-range': 'cudaProfilerApi', +(TaskRunner pid=896026) 'capture-range-end': None, +(TaskRunner pid=896026) 'cuda-graph-trace': 'graph', +(TaskRunner pid=896026) 'cuda-memory-usage': 'true', +(TaskRunner pid=896026) 'kill': 'none', +(TaskRunner pid=896026) 'trace': 'cuda,nvtx,cublas,ucx'}}, +(TaskRunner pid=896026) 'torch_memory': {'context': 'all', +(TaskRunner pid=896026) 'kw_args': {}, +(TaskRunner pid=896026) 'stack_depth': 32, +(TaskRunner pid=896026) 'stacks': 'all', +(TaskRunner pid=896026) 'trace_alloc_max_entries': 100000}}, +(TaskRunner pid=896026) 'profile_continuous_steps': False, +(TaskRunner pid=896026) 'save_path': 'outputs/profile', +(TaskRunner pid=896026) 'steps': None, +(TaskRunner pid=896026) 'tool': None}, +(TaskRunner pid=896026) 'ray_kwargs': {'ray_init': {'num_cpus': None}, 'timeline_json_file': None}, +(TaskRunner pid=896026) 'reward_manager': {'_target_': 'verl.trainer.config.config.RewardManagerConfig', +(TaskRunner pid=896026) 'module': {'_target_': 'verl.trainer.config.config.ModuleConfig', +(TaskRunner pid=896026) 'name': 'custom_reward_manager', +(TaskRunner pid=896026) 'path': None}, +(TaskRunner pid=896026) 'name': 'naive', +(TaskRunner pid=896026) 'source': 'register'}, +(TaskRunner pid=896026) 'reward_model': {'enable': False, +(TaskRunner pid=896026) 'enable_resource_pool': False, +(TaskRunner pid=896026) 'forward_max_token_len_per_gpu': 32768, +(TaskRunner pid=896026) 'launch_reward_fn_async': False, +(TaskRunner pid=896026) 'max_length': None, +(TaskRunner pid=896026) 'micro_batch_size': None, +(TaskRunner pid=896026) 'micro_batch_size_per_gpu': None, +(TaskRunner pid=896026) 'model': {'external_lib': None, +(TaskRunner pid=896026) 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', +(TaskRunner pid=896026) 'forward_prefetch': False, +(TaskRunner pid=896026) 'fsdp_size': -1, +(TaskRunner pid=896026) 'param_offload': False, +(TaskRunner pid=896026) 'reshard_after_forward': True, +(TaskRunner pid=896026) 'wrap_policy': {'min_num_params': 0}}, +(TaskRunner pid=896026) 'input_tokenizer': 'Qwen/Qwen3-4B-Instruct-2507', +(TaskRunner pid=896026) 'override_config': {}, +(TaskRunner pid=896026) 'path': '~/models/FsfairX-LLaMA3-RM-v0.1', +(TaskRunner pid=896026) 'trust_remote_code': False, +(TaskRunner pid=896026) 'use_fused_kernels': False, +(TaskRunner pid=896026) 'use_remove_padding': False, +(TaskRunner pid=896026) 'use_shm': False}, +(TaskRunner pid=896026) 'n_gpus_per_node': 8, +(TaskRunner pid=896026) 'nnodes': 0, +(TaskRunner pid=896026) 'num_workers': 1, +(TaskRunner pid=896026) 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', +(TaskRunner pid=896026) 'all_ranks': False, +(TaskRunner pid=896026) 'enable': False, +(TaskRunner pid=896026) 'ranks': [], +(TaskRunner pid=896026) 'save_path': 'outputs/profile', +(TaskRunner pid=896026) 'tool': None, +(TaskRunner pid=896026) 'tool_config': {'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', +(TaskRunner pid=896026) 'analysis': True, +(TaskRunner pid=896026) 'contents': [], +(TaskRunner pid=896026) 'discrete': False, +(TaskRunner pid=896026) 'level': 'level0'}, +(TaskRunner pid=896026) 'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', +(TaskRunner pid=896026) 'discrete': False}, +(TaskRunner pid=896026) 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', +(TaskRunner pid=896026) 'contents': [], +(TaskRunner pid=896026) 'discrete': False}, +(TaskRunner pid=896026) 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', +(TaskRunner pid=896026) 'stack_depth': 32, +(TaskRunner pid=896026) 'trace_alloc_max_entries': 100000}}}, +(TaskRunner pid=896026) 'reward_loop_class_name': None, +(TaskRunner pid=896026) 'reward_loop_module_path': None, +(TaskRunner pid=896026) 'reward_loop_source': 'register', +(TaskRunner pid=896026) 'reward_manager': 'naive', +(TaskRunner pid=896026) 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', +(TaskRunner pid=896026) 'cudagraph_capture_sizes': None, +(TaskRunner pid=896026) 'data_parallel_size': 1, +(TaskRunner pid=896026) 'disable_log_stats': True, +(TaskRunner pid=896026) 'dtype': 'bfloat16', +(TaskRunner pid=896026) 'enable_chunked_prefill': True, +(TaskRunner pid=896026) 'enable_prefix_caching': True, +(TaskRunner pid=896026) 'enforce_eager': True, +(TaskRunner pid=896026) 'engine_kwargs': {}, +(TaskRunner pid=896026) 'expert_parallel_size': 1, +(TaskRunner pid=896026) 'free_cache_engine': True, +(TaskRunner pid=896026) 'gpu_memory_utilization': 0.5, +(TaskRunner pid=896026) 'limit_images': None, +(TaskRunner pid=896026) 'load_format': 'auto', +(TaskRunner pid=896026) 'max_model_len': None, +(TaskRunner pid=896026) 'max_num_batched_tokens': 8192, +(TaskRunner pid=896026) 'max_num_seqs': 1024, +(TaskRunner pid=896026) 'name': '???', +(TaskRunner pid=896026) 'prompt_length': 2048, +(TaskRunner pid=896026) 'response_length': 2048, +(TaskRunner pid=896026) 'skip_tokenizer_init': False, +(TaskRunner pid=896026) 'tensor_model_parallel_size': 2}, +(TaskRunner pid=896026) 'sandbox_fusion': {'max_concurrent': 64, +(TaskRunner pid=896026) 'memory_limit_mb': 1024, +(TaskRunner pid=896026) 'url': None}, +(TaskRunner pid=896026) 'strategy': 'fsdp', +(TaskRunner pid=896026) 'ulysses_sequence_parallel_size': 1, +(TaskRunner pid=896026) 'use_dynamic_bsz': False, +(TaskRunner pid=896026) 'use_reward_loop': True}, +(TaskRunner pid=896026) 'trainer': {'balance_batch': True, +(TaskRunner pid=896026) 'critic_warmup': 0, +(TaskRunner pid=896026) 'default_hdfs_dir': None, +(TaskRunner pid=896026) 'default_local_dir': '/home/mshahidul/readctrl/code/RL_model/train_v2', +(TaskRunner pid=896026) 'del_local_ckpt_after_load': False, +(TaskRunner pid=896026) 'device': 'cuda', +(TaskRunner pid=896026) 'esi_redundant_time': 0, +(TaskRunner pid=896026) 'experiment_name': '', +(TaskRunner pid=896026) 'log_val_generations': 0, +(TaskRunner pid=896026) 'logger': ['console', 'wandb'], +(TaskRunner pid=896026) 'max_actor_ckpt_to_keep': 1, +(TaskRunner pid=896026) 'max_critic_ckpt_to_keep': 1, +(TaskRunner pid=896026) 'n_gpus_per_node': 2, +(TaskRunner pid=896026) 'nnodes': 1, +(TaskRunner pid=896026) 'project_name': '', +(TaskRunner pid=896026) 'ray_wait_register_center_timeout': 300, +(TaskRunner pid=896026) 'remove_previous_ckpt_in_save': True, +(TaskRunner pid=896026) 'resume_from_path': None, +(TaskRunner pid=896026) 'resume_mode': 'auto', +(TaskRunner pid=896026) 'rollout_data_dir': None, +(TaskRunner pid=896026) 'save_freq': 100, +(TaskRunner pid=896026) 'test_freq': 1, +(TaskRunner pid=896026) 'total_epochs': 15, +(TaskRunner pid=896026) 'total_training_steps': None, +(TaskRunner pid=896026) 'use_legacy_worker_impl': 'auto', +(TaskRunner pid=896026) 'val_before_train': True, +(TaskRunner pid=896026) 'val_only': False, +(TaskRunner pid=896026) 'validation_data_dir': None}, +(TaskRunner pid=896026) 'transfer_queue': {'enable': False}} +(TaskRunner pid=896026) /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/verl/trainer/main_ppo.py:300: UserWarning: Disabled critic as algorithm.adv_estimator != gae. If it is not intended, please set critic.enable=True +(TaskRunner pid=896026) use_critic=need_critic(config), +(TaskRunner pid=896026) [validate_config] All configuration checks passed successfully! +(TaskRunner pid=896026) /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/verl/utils/tokenizer.py:109: UserWarning: Failed to create processor: Unsupported processor type: Qwen2TokenizerFast. This may affect multimodal processing +(TaskRunner pid=896026) warnings.warn(f"Failed to create processor: {e}. This may affect multimodal processing", stacklevel=1) +(TaskRunner pid=896026) Using dataset class: RLHFDataset +(TaskRunner pid=896026) dataset len: 3226 +(TaskRunner pid=896026) Setting TOKENIZERS_PARALLELISM=false for forked processes. +(TaskRunner pid=896026) WARNING:2026-02-07 12:57:16,729:Setting TOKENIZERS_PARALLELISM=false for forked processes. +(TaskRunner pid=896026) Filtering prompts longer than 1024 tokens (num_proc=1): 0%| | 0/3226 [00:00 +(pid=897656) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/torch/cuda/__init__.py:63: FutureWarning: The pynvml package is deprecated. Please install nvidia-ml-py instead. If you did not install pynvml directly, please report this to the maintainers of the package that installed pynvml for you. +(pid=897656) import pynvml # type: ignore[import] +(pid=897656) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/megatron/core/models/backends.py:21: UserWarning: Apex is not installed. Falling back to Torch Norm +(pid=897656) warnings.warn("Apex is not installed. Falling back to Torch Norm") +(pid=897656) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/megatron/core/optimizer/__init__.py:18: UserWarning: Transformer Engine and Apex are not installed. Falling back to Torch optimizers. +(pid=897656) warnings.warn( +(pid=897656) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/megatron/core/optimizer/optimizer.py:28: UserWarning: Transformer Engine and Apex are not installed. Falling back to local implementations of multi_tensor_applier and multi_tensor_scale +(pid=897656) warnings.warn( +(pid=897656) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/megatron/core/optimizer/clip_grads.py:29: UserWarning: Transformer Engine and Apex are not installed. Falling back to local implementations of multi_tensor_applier, multi_tensor_l2norm, and multi_tensor_scale +(pid=897656) warnings.warn( +(pid=897657) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/torch/cuda/__init__.py:63: FutureWarning: The pynvml package is deprecated. Please install nvidia-ml-py instead. If you did not install pynvml directly, please report this to the maintainers of the package that installed pynvml for you. +(pid=897657) import pynvml # type: ignore[import] +(pid=897656) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/megatron/core/models/gpt/gpt_layer_specs.py:67: UserWarning: Apex is not installed. Falling back to Torch Norm +(pid=897656) warnings.warn("Apex is not installed. Falling back to Torch Norm") +(pid=897657) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/megatron/core/models/gpt/gpt_layer_specs.py:67: UserWarning: Apex is not installed. Falling back to Torch Norm [repeated 2x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/user-guides/configure-logging.html#log-deduplication for more options.) +(pid=897657) warnings.warn("Apex is not installed. Falling back to Torch Norm") [repeated 2x across cluster] +(pid=897657) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/megatron/core/optimizer/__init__.py:18: UserWarning: Transformer Engine and Apex are not installed. Falling back to Torch optimizers. +(pid=897657) warnings.warn( [repeated 3x across cluster] +(pid=897657) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/megatron/core/optimizer/optimizer.py:28: UserWarning: Transformer Engine and Apex are not installed. Falling back to local implementations of multi_tensor_applier and multi_tensor_scale +(pid=897657) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/megatron/core/optimizer/clip_grads.py:29: UserWarning: Transformer Engine and Apex are not installed. Falling back to local implementations of multi_tensor_applier, multi_tensor_l2norm, and multi_tensor_scale +(WorkerDict pid=897656) [Gloo] Rank 0 is connected to 1 peer ranks. Expected number of connected peer ranks is : 1 +(WorkerDict pid=897656) reference model: Qwen/Qwen3-4B-Instruct-2507 +(WorkerDict pid=897657) /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/verl/utils/tokenizer.py:109: UserWarning: Failed to create processor: Unsupported processor type: Qwen2TokenizerFast. This may affect multimodal processing +(WorkerDict pid=897657) warnings.warn(f"Failed to create processor: {e}. This may affect multimodal processing", stacklevel=1) +(WorkerDict pid=897656) Model config after override: Qwen3Config { +(WorkerDict pid=897656) "architectures": [ +(WorkerDict pid=897656) "Qwen3ForCausalLM" +(WorkerDict pid=897656) ], +(WorkerDict pid=897656) "attention_bias": false, +(WorkerDict pid=897656) "attention_dropout": 0.0, +(WorkerDict pid=897656) "dtype": "bfloat16", +(WorkerDict pid=897656) "eos_token_id": 151645, +(WorkerDict pid=897656) "head_dim": 128, +(WorkerDict pid=897656) "hidden_act": "silu", +(WorkerDict pid=897656) "hidden_size": 2560, +(WorkerDict pid=897656) "initializer_range": 0.02, +(WorkerDict pid=897656) "intermediate_size": 9728, +(WorkerDict pid=897656) "layer_types": [ +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention" +(WorkerDict pid=897656) ], +(WorkerDict pid=897656) "max_position_embeddings": 262144, +(WorkerDict pid=897656) "max_window_layers": 36, +(WorkerDict pid=897656) "model_type": "qwen3", +(WorkerDict pid=897656) "num_attention_heads": 32, +(WorkerDict pid=897656) "num_hidden_layers": 36, +(WorkerDict pid=897656) "num_key_value_heads": 8, +(WorkerDict pid=897656) "pad_token_id": 151643, +(WorkerDict pid=897656) "rms_norm_eps": 1e-06, +(WorkerDict pid=897656) "rope_scaling": null, +(WorkerDict pid=897656) "rope_theta": 5000000, +(WorkerDict pid=897656) "sliding_window": null, +(WorkerDict pid=897656) "tie_word_embeddings": true, +(WorkerDict pid=897656) "transformers_version": "4.56.1", +(WorkerDict pid=897656) "use_cache": true, +(WorkerDict pid=897656) "use_sliding_window": false, +(WorkerDict pid=897656) "vocab_size": 151936 +(WorkerDict pid=897656) } +(WorkerDict pid=897656) +(WorkerDict pid=897657) `torch_dtype` is deprecated! Use `dtype` instead! +(WorkerDict pid=897657) Flash Attention 2 only supports torch.float16 and torch.bfloat16 dtypes, but the current dype in Qwen3ForCausalLM is torch.float32. You should run training or inference using Automatic Mixed-Precision via the `with torch.autocast(device_type='torch_device'):` decorator, or load the model with the `dtype` argument. Example: `model = AutoModel.from_pretrained("openai/whisper-tiny", attn_implementation="flash_attention_2", dtype=torch.float16)` +(WorkerDict pid=897657) Flash Attention 2 only supports torch.float16 and torch.bfloat16 dtypes, but the current dype in Qwen3Model is torch.float32. You should run training or inference using Automatic Mixed-Precision via the `with torch.autocast(device_type='torch_device'):` decorator, or load the model with the `dtype` argument. Example: `model = AutoModel.from_pretrained("openai/whisper-tiny", attn_implementation="flash_attention_2", dtype=torch.float16)` +(WorkerDict pid=897657) Loading checkpoint shards: 0%| | 0/3 [00:00, policies=[functools.partial(, transformer_layer_cls={})]) +(WorkerDict pid=897656) NCCL version 2.27.3+cuda12.9 +(WorkerDict pid=897656) +(WorkerDict pid=897656) [2026-02-07 12:58:25] gamma:897656:899170 [0] ras/client_support.cc:160 NCCL WARN Call to bind failed: Address already in use +(WorkerDict pid=897656) Monkey patch _flash_attention_forward in transformers.integrations.flash_attention +(WorkerDict pid=897656) Skipping monkey patch for Qwen3ForCausalLM as use_fused_kernels is False or fused_kernels_backend is torch +(WorkerDict pid=897657) +(WorkerDict pid=897656) Ref use_remove_padding=True +(WorkerDict pid=897656) Ref use_fused_kernels=False +(WorkerDict pid=897656) Ref use_prefix_grouper=False +(WorkerDict pid=897657) [2026-02-07 12:58:25] gamma:897657:899174 [0] ras/client_support.cc:160 NCCL WARN Call to bind failed: Address already in use +(WorkerDict pid=897656) /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/verl/utils/tokenizer.py:109: UserWarning: Failed to create processor: Unsupported processor type: Qwen2TokenizerFast. This may affect multimodal processing +(WorkerDict pid=897656) warnings.warn(f"Failed to create processor: {e}. This may affect multimodal processing", stacklevel=1) +(WorkerDict pid=897656) Loading checkpoint shards: 67%|██████▋ | 2/3 [00:13<00:06, 6.60s/it] +(WorkerDict pid=897656) Loading checkpoint shards: 100%|██████████| 3/3 [00:13<00:00, 3.64s/it] Loading checkpoint shards: 100%|██████████| 3/3 [00:13<00:00, 4.49s/it] +(WorkerDict pid=897656) Model config after override: Qwen3Config { +(WorkerDict pid=897656) "architectures": [ +(WorkerDict pid=897656) "Qwen3ForCausalLM" +(WorkerDict pid=897656) ], +(WorkerDict pid=897656) "attention_bias": false, +(WorkerDict pid=897656) "attention_dropout": 0.0, +(WorkerDict pid=897656) "dtype": "bfloat16", +(WorkerDict pid=897656) "eos_token_id": 151645, +(WorkerDict pid=897656) "head_dim": 128, +(WorkerDict pid=897656) "hidden_act": "silu", +(WorkerDict pid=897656) "hidden_size": 2560, +(WorkerDict pid=897656) "initializer_range": 0.02, +(WorkerDict pid=897656) "intermediate_size": 9728, +(WorkerDict pid=897656) "layer_types": [ +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention", +(WorkerDict pid=897656) "full_attention" +(WorkerDict pid=897656) ], +(WorkerDict pid=897656) "max_position_embeddings": 262144, +(WorkerDict pid=897656) "max_window_layers": 36, +(WorkerDict pid=897656) "model_type": "qwen3", +(WorkerDict pid=897656) "num_attention_heads": 32, +(WorkerDict pid=897656) "num_hidden_layers": 36, +(WorkerDict pid=897656) "num_key_value_heads": 8, +(WorkerDict pid=897656) "pad_token_id": 151643, +(WorkerDict pid=897656) "rms_norm_eps": 1e-06, +(WorkerDict pid=897656) "rope_scaling": null, +(WorkerDict pid=897656) "rope_theta": 5000000, +(WorkerDict pid=897656) "sliding_window": null, +(WorkerDict pid=897656) "tie_word_embeddings": true, +(WorkerDict pid=897656) "transformers_version": "4.56.1", +(WorkerDict pid=897656) "use_cache": true, +(WorkerDict pid=897656) "use_sliding_window": false, +(WorkerDict pid=897656) "vocab_size": 151936 +(WorkerDict pid=897656) } +(WorkerDict pid=897656) +(WorkerDict pid=897656) Loading checkpoint shards: 0%| | 0/3 [00:00, policies=[functools.partial(, transformer_layer_cls={})]) +(WorkerDict pid=897656) Total steps: 6045, num_warmup_steps: 0 +(WorkerDict pid=897656) Actor use_remove_padding=True +(WorkerDict pid=897656) Actor use_fused_kernels=False +(WorkerDict pid=897656) Actor use_prefix_grouper=False +(WorkerDict pid=897656) Monkey patch _flash_attention_forward in transformers.integrations.flash_attention +(WorkerDict pid=897656) Skipping monkey patch for Qwen3ForCausalLM as use_fused_kernels is False or fused_kernels_backend is torch +(WorkerDict pid=897657) /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/verl/utils/tokenizer.py:109: UserWarning: Failed to create processor: Unsupported processor type: Qwen2TokenizerFast. This may affect multimodal processing +(WorkerDict pid=897657) warnings.warn(f"Failed to create processor: {e}. This may affect multimodal processing", stacklevel=1) +(WorkerDict pid=897656) Loading checkpoint shards: 67%|██████▋ | 2/3 [00:13<00:06, 6.91s/it] +(WorkerDict pid=897656) Loading checkpoint shards: 100%|██████████| 3/3 [00:13<00:00, 3.81s/it] Loading checkpoint shards: 100%|██████████| 3/3 [00:13<00:00, 4.65s/it] +(WorkerDict pid=897656) [Gloo] Rank 0 is connected to 1 peer ranks. Expected number of connected peer ranks is : 1 +(WorkerDict pid=897656) [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 +(WorkerDict pid=897656) [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 +(WorkerDict pid=897656) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py:678: FutureWarning: FSDP.state_dict_type() and FSDP.set_state_dict_type() are being deprecated. Please use APIs, get_state_dict() and set_state_dict(), which can support different parallelisms, FSDP1, FSDP2, DDP. API doc: https://pytorch.org/docs/stable/distributed.checkpoint.html#torch.distributed.checkpoint.state_dict.get_state_dict .Tutorial: https://pytorch.org/tutorials/recipes/distributed_checkpoint_recipe.html . +(WorkerDict pid=897656) warnings.warn( +(WorkerDict pid=897656) /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/verl/utils/tokenizer.py:109: UserWarning: Failed to create processor: Unsupported processor type: Qwen2TokenizerFast. This may affect multimodal processing +(WorkerDict pid=897656) warnings.warn(f"Failed to create processor: {e}. This may affect multimodal processing", stacklevel=1) +(TaskRunner pid=896026) WARNING 02-07 12:59:32 [api_server.py:1213] LoRA dynamic loading & unloading is enabled in the API server. This should ONLY be used for local development! +(WorkerDict pid=897657) [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 [repeated 3x across cluster] +(pid=901562) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/torch/cuda/__init__.py:63: FutureWarning: The pynvml package is deprecated. Please install nvidia-ml-py instead. If you did not install pynvml directly, please report this to the maintainers of the package that installed pynvml for you. +(pid=901562) import pynvml # type: ignore[import] +(WorkerDict pid=897657) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py:678: FutureWarning: FSDP.state_dict_type() and FSDP.set_state_dict_type() are being deprecated. Please use APIs, get_state_dict() and set_state_dict(), which can support different parallelisms, FSDP1, FSDP2, DDP. API doc: https://pytorch.org/docs/stable/distributed.checkpoint.html#torch.distributed.checkpoint.state_dict.get_state_dict .Tutorial: https://pytorch.org/tutorials/recipes/distributed_checkpoint_recipe.html . +(WorkerDict pid=897657) warnings.warn( +(pid=901562) WARNING 02-07 12:59:59 [api_server.py:1213] LoRA dynamic loading & unloading is enabled in the API server. This should ONLY be used for local development! +(pid=901571) WARNING 02-07 12:59:59 [api_server.py:1213] LoRA dynamic loading & unloading is enabled in the API server. This should ONLY be used for local development! +(vLLMHttpServer pid=901562) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/megatron/core/models/backends.py:21: UserWarning: Apex is not installed. Falling back to Torch Norm +(vLLMHttpServer pid=901562) warnings.warn("Apex is not installed. Falling back to Torch Norm") +(pid=901571) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/torch/cuda/__init__.py:63: FutureWarning: The pynvml package is deprecated. Please install nvidia-ml-py instead. If you did not install pynvml directly, please report this to the maintainers of the package that installed pynvml for you. +(pid=901571) import pynvml # type: ignore[import] +(vLLMHttpServer pid=901562) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/megatron/core/optimizer/__init__.py:18: UserWarning: Transformer Engine and Apex are not installed. Falling back to Torch optimizers. +(vLLMHttpServer pid=901562) warnings.warn( +(vLLMHttpServer pid=901562) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/megatron/core/optimizer/optimizer.py:28: UserWarning: Transformer Engine and Apex are not installed. Falling back to local implementations of multi_tensor_applier and multi_tensor_scale +(vLLMHttpServer pid=901562) warnings.warn( +(vLLMHttpServer pid=901562) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/megatron/core/optimizer/clip_grads.py:29: UserWarning: Transformer Engine and Apex are not installed. Falling back to local implementations of multi_tensor_applier, multi_tensor_l2norm, and multi_tensor_scale +(vLLMHttpServer pid=901562) warnings.warn( +(vLLMHttpServer pid=901562) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/megatron/core/models/gpt/gpt_layer_specs.py:67: UserWarning: Apex is not installed. Falling back to Torch Norm +(vLLMHttpServer pid=901562) warnings.warn("Apex is not installed. Falling back to Torch Norm") +(vLLMHttpServer pid=901562) /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/verl/utils/tokenizer.py:109: UserWarning: Failed to create processor: Unsupported processor type: Qwen2TokenizerFast. This may affect multimodal processing +(vLLMHttpServer pid=901562) warnings.warn(f"Failed to create processor: {e}. This may affect multimodal processing", stacklevel=1) +(vLLMHttpServer pid=901562) WARNING:2026-02-07 13:00:03,461:agent loop only support torch and npu profiler, got None +(vLLMHttpServer pid=901562) INFO:2026-02-07 13:00:03,463:vLLMHttpServer, replica_rank: 1, node_rank: 0, CUDA_VISIBLE_DEVICES: 3, master_address: 172.16.34.29, master_port: 32975, data_parallel_rpc_port: 43977, data_parallel_master_port: 36417 +(vLLMHttpServer pid=901562) INFO:2026-02-07 13:00:03,480:override_generation_config: {'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'repetition_penalty': 1.0, 'max_new_tokens': 2048} +(vLLMHttpServer pid=901562) INFO:2026-02-07 13:00:03,480:enable_sleep_mode: True +(vLLMHttpServer pid=901562) `torch_dtype` is deprecated! Use `dtype` instead! +(vLLMHttpServer pid=901562) WARNING 02-07 13:00:05 [__init__.py:3036] We must use the `spawn` multiprocessing start method. Overriding VLLM_WORKER_MULTIPROC_METHOD to 'spawn'. See https://docs.vllm.ai/en/latest/usage/troubleshooting.html#python-multiprocessing for more information. Reasons: In a Ray actor and can only be spawned; CUDA is initialized +(vLLMHttpServer pid=901562) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/torch/cuda/__init__.py:63: FutureWarning: The pynvml package is deprecated. Please install nvidia-ml-py instead. If you did not install pynvml directly, please report this to the maintainers of the package that installed pynvml for you. +(vLLMHttpServer pid=901562) import pynvml # type: ignore[import] +(vLLMHttpServer pid=901571) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/megatron/core/models/gpt/gpt_layer_specs.py:67: UserWarning: Apex is not installed. Falling back to Torch Norm [repeated 2x across cluster] +(vLLMHttpServer pid=901571) warnings.warn("Apex is not installed. Falling back to Torch Norm") [repeated 2x across cluster] +(vLLMHttpServer pid=901571) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/megatron/core/optimizer/__init__.py:18: UserWarning: Transformer Engine and Apex are not installed. Falling back to Torch optimizers. +(vLLMHttpServer pid=901571) warnings.warn( [repeated 3x across cluster] +(vLLMHttpServer pid=901571) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/megatron/core/optimizer/optimizer.py:28: UserWarning: Transformer Engine and Apex are not installed. Falling back to local implementations of multi_tensor_applier and multi_tensor_scale +(vLLMHttpServer pid=901571) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/megatron/core/optimizer/clip_grads.py:29: UserWarning: Transformer Engine and Apex are not installed. Falling back to local implementations of multi_tensor_applier, multi_tensor_l2norm, and multi_tensor_scale +(vLLMHttpServer pid=901562) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/torch/cuda/__init__.py:63: FutureWarning: The pynvml package is deprecated. Please install nvidia-ml-py instead. If you did not install pynvml directly, please report this to the maintainers of the package that installed pynvml for you. +(vLLMHttpServer pid=901562) import pynvml # type: ignore[import] +(vLLMHttpServer pid=901571) /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/verl/utils/tokenizer.py:109: UserWarning: Failed to create processor: Unsupported processor type: Qwen2TokenizerFast. This may affect multimodal processing +(vLLMHttpServer pid=901571) warnings.warn(f"Failed to create processor: {e}. This may affect multimodal processing", stacklevel=1) +(vLLMHttpServer pid=901571) WARNING:2026-02-07 13:00:30,122:agent loop only support torch and npu profiler, got None +(vLLMHttpServer pid=901571) INFO:2026-02-07 13:00:30,124:vLLMHttpServer, replica_rank: 0, node_rank: 0, CUDA_VISIBLE_DEVICES: 2, master_address: 172.16.34.29, master_port: 40643, data_parallel_rpc_port: 37497, data_parallel_master_port: 45967 +(vLLMHttpServer pid=901571) INFO:2026-02-07 13:00:30,143:override_generation_config: {'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'repetition_penalty': 1.0, 'max_new_tokens': 2048} +(vLLMHttpServer pid=901571) INFO:2026-02-07 13:00:30,143:enable_sleep_mode: True +(vLLMHttpServer pid=901571) ['serve', +(vLLMHttpServer pid=901571) 'Qwen/Qwen3-4B-Instruct-2507', +(vLLMHttpServer pid=901571) '--dtype', +(vLLMHttpServer pid=901571) 'bfloat16', +(vLLMHttpServer pid=901571) '--load_format', +(vLLMHttpServer pid=901571) 'dummy', +(vLLMHttpServer pid=901571) '--distributed_executor_backend', +(vLLMHttpServer pid=901571) 'mp', +(vLLMHttpServer pid=901571) '--worker_extension_cls', +(vLLMHttpServer pid=901571) 'verl.workers.rollout.vllm_rollout.utils.vLLMColocateWorkerExtension', +(vLLMHttpServer pid=901571) '--max_model_len', +(vLLMHttpServer pid=901571) '8192', +(vLLMHttpServer pid=901571) '--max_num_seqs', +(vLLMHttpServer pid=901571) '1024', +(vLLMHttpServer pid=901571) '--enable_chunked_prefill', +(vLLMHttpServer pid=901571) '--max_num_batched_tokens', +(vLLMHttpServer pid=901571) '8192', +(vLLMHttpServer pid=901571) '--enable_prefix_caching', +(vLLMHttpServer pid=901571) '--enable_sleep_mode', +(vLLMHttpServer pid=901571) '--logprobs_mode', +(vLLMHttpServer pid=901571) 'processed_logprobs', +(vLLMHttpServer pid=901571) '--gpu_memory_utilization', +(vLLMHttpServer pid=901571) '0.6', +(vLLMHttpServer pid=901571) '--disable_log_stats', +(vLLMHttpServer pid=901571) '--tensor_parallel_size', +(vLLMHttpServer pid=901571) '1', +(vLLMHttpServer pid=901571) '--seed', +(vLLMHttpServer pid=901571) '0', +(vLLMHttpServer pid=901571) '--override_generation_config', +(vLLMHttpServer pid=901571) '{"temperature": 1.0, "top_k": -1, "top_p": 1, "repetition_penalty": 1.0, ' +(vLLMHttpServer pid=901571) '"max_new_tokens": 2048}', +(vLLMHttpServer pid=901571) '--hf_overrides', +(vLLMHttpServer pid=901571) '{}', +(vLLMHttpServer pid=901571) '--scheduling_policy', +(vLLMHttpServer pid=901571) 'fcfs', +(vLLMHttpServer pid=901571) '--compilation_config', +(vLLMHttpServer pid=901571) '{"cudagraph_mode": "FULL_AND_PIECEWISE"}'] +(vLLMHttpServer pid=901571) `torch_dtype` is deprecated! Use `dtype` instead! +(vLLMHttpServer pid=901571) WARNING 02-07 13:00:31 [__init__.py:3036] We must use the `spawn` multiprocessing start method. Overriding VLLM_WORKER_MULTIPROC_METHOD to 'spawn'. See https://docs.vllm.ai/en/latest/usage/troubleshooting.html#python-multiprocessing for more information. Reasons: In a Ray actor and can only be spawned; CUDA is initialized +(vLLMHttpServer pid=901571) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/torch/cuda/__init__.py:63: FutureWarning: The pynvml package is deprecated. Please install nvidia-ml-py instead. If you did not install pynvml directly, please report this to the maintainers of the package that installed pynvml for you. +(vLLMHttpServer pid=901571) import pynvml # type: ignore[import] +(vLLMHttpServer pid=901562) W0207 13:00:51.123000 903087 /data/home_beta/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/torch/utils/cpp_extension.py:2425] TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation. +(vLLMHttpServer pid=901562) W0207 13:00:51.123000 903087 /data/home_beta/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/torch/utils/cpp_extension.py:2425] If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'] to specific architectures. +(vLLMHttpServer pid=901571) /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/torch/cuda/__init__.py:63: FutureWarning: The pynvml package is deprecated. Please install nvidia-ml-py instead. If you did not install pynvml directly, please report this to the maintainers of the package that installed pynvml for you. +(vLLMHttpServer pid=901571) import pynvml # type: ignore[import] +(vLLMHttpServer pid=901562) [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 +(vLLMHttpServer pid=901562) [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 +(vLLMHttpServer pid=901562) [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 +(vLLMHttpServer pid=901562) [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 +(vLLMHttpServer pid=901562) [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 +(vLLMHttpServer pid=901562) [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 +(vLLMHttpServer pid=901571) W0207 13:01:17.192000 903817 /data/home_beta/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/torch/utils/cpp_extension.py:2425] TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation. +(vLLMHttpServer pid=901571) W0207 13:01:17.192000 903817 /data/home_beta/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/torch/utils/cpp_extension.py:2425] If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'] to specific architectures. +(vLLMHttpServer pid=901571) [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 [repeated 6x across cluster] +(vLLMHttpServer pid=901562) (Worker pid=903087) Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 0%| | 0/67 [00:00