| | /home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/torch/cuda/__init__.py:63: FutureWarning: The pynvml package is deprecated. Please install nvidia-ml-py instead. If you did not install pynvml directly, please report this to the maintainers of the package that installed pynvml for you. |
| | import pynvml # type: ignore[import] |
| | 2026-02-01 20:43:15,317 INFO worker.py:2014 -- Started a local Ray instance. View the dashboard at [1m[32mhttp: |
| | /home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/ray/_private/worker.py:2062: FutureWarning: Tip: In future versions of Ray, Ray will no longer override accelerator visible devices env var if num_gpus=0 or num_gpus=None (default). To enable this behavior and turn off this error message, set RAY_ACCEL_ENV_VAR_OVERRIDE_ON_ZERO=0 |
| | warnings.warn( |
| | [36m(pid=1646422)[0m /home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/torch/cuda/__init__.py:63: FutureWarning: The pynvml package is deprecated. Please install nvidia-ml-py instead. If you did not install pynvml directly, please report this to the maintainers of the package that installed pynvml for you. |
| | [36m(pid=1646422)[0m import pynvml # type: ignore[import] |
| | [36m(main_task pid=1646422)[0m {'actor_rollout_ref': {'actor': {'clip_ratio': 0.2, |
| | [36m(main_task pid=1646422)[0m 'entropy_coeff': 0.001, |
| | [36m(main_task pid=1646422)[0m 'fsdp_config': {'fsdp_size': -1, |
| | [36m(main_task pid=1646422)[0m 'grad_offload': False, |
| | [36m(main_task pid=1646422)[0m 'optimizer_offload': True, |
| | [36m(main_task pid=1646422)[0m 'param_offload': True, |
| | [36m(main_task pid=1646422)[0m 'wrap_policy': {'min_num_params': 0}}, |
| | [36m(main_task pid=1646422)[0m 'grad_clip': 1.0, |
| | [36m(main_task pid=1646422)[0m 'kl_loss_coef': 0.001, |
| | [36m(main_task pid=1646422)[0m 'kl_loss_type': 'low_var_kl', |
| | [36m(main_task pid=1646422)[0m 'optim': {'lr': 1e-06, |
| | [36m(main_task pid=1646422)[0m 'lr_warmup_steps_ratio': 0.0, |
| | [36m(main_task pid=1646422)[0m 'min_lr_ratio': None, |
| | [36m(main_task pid=1646422)[0m 'total_training_steps': -1, |
| | [36m(main_task pid=1646422)[0m 'warmup_style': 'constant'}, |
| | [36m(main_task pid=1646422)[0m 'ppo_epochs': 1, |
| | [36m(main_task pid=1646422)[0m 'ppo_max_token_len_per_gpu': 16384, |
| | [36m(main_task pid=1646422)[0m 'ppo_micro_batch_size': 64, |
| | [36m(main_task pid=1646422)[0m 'ppo_micro_batch_size_per_gpu': 16, |
| | [36m(main_task pid=1646422)[0m 'ppo_mini_batch_size': 64, |
| | [36m(main_task pid=1646422)[0m 'shuffle': False, |
| | [36m(main_task pid=1646422)[0m 'state_masking': False, |
| | [36m(main_task pid=1646422)[0m 'strategy': 'fsdp', |
| | [36m(main_task pid=1646422)[0m 'ulysses_sequence_parallel_size': 1, |
| | [36m(main_task pid=1646422)[0m 'use_dynamic_bsz': False, |
| | [36m(main_task pid=1646422)[0m 'use_kl_loss': False}, |
| | [36m(main_task pid=1646422)[0m 'hybrid_engine': True, |
| | [36m(main_task pid=1646422)[0m 'model': {'enable_gradient_checkpointing': True, |
| | [36m(main_task pid=1646422)[0m 'external_lib': None, |
| | [36m(main_task pid=1646422)[0m 'override_config': {}, |
| | [36m(main_task pid=1646422)[0m 'path': 'Qwen/Qwen3-4B-Instruct-2507', |
| | [36m(main_task pid=1646422)[0m 'use_remove_padding': False}, |
| | [36m(main_task pid=1646422)[0m 'ref': {'fsdp_config': {'fsdp_size': -1, |
| | [36m(main_task pid=1646422)[0m 'param_offload': True, |
| | [36m(main_task pid=1646422)[0m 'wrap_policy': {'min_num_params': 0}}, |
| | [36m(main_task pid=1646422)[0m 'log_prob_max_token_len_per_gpu': 16384, |
| | [36m(main_task pid=1646422)[0m 'log_prob_micro_batch_size': 64, |
| | [36m(main_task pid=1646422)[0m 'log_prob_use_dynamic_bsz': False, |
| | [36m(main_task pid=1646422)[0m 'ulysses_sequence_parallel_size': 1}, |
| | [36m(main_task pid=1646422)[0m 'rollout': {'do_sample': True, |
| | [36m(main_task pid=1646422)[0m 'dtype': 'bfloat16', |
| | [36m(main_task pid=1646422)[0m 'enforce_eager': True, |
| | [36m(main_task pid=1646422)[0m 'free_cache_engine': True, |
| | [36m(main_task pid=1646422)[0m 'gpu_memory_utilization': 0.4, |
| | [36m(main_task pid=1646422)[0m 'ignore_eos': False, |
| | [36m(main_task pid=1646422)[0m 'load_format': 'dummy_dtensor', |
| | [36m(main_task pid=1646422)[0m 'log_prob_max_token_len_per_gpu': 16384, |
| | [36m(main_task pid=1646422)[0m 'log_prob_micro_batch_size': 64, |
| | [36m(main_task pid=1646422)[0m 'log_prob_use_dynamic_bsz': False, |
| | [36m(main_task pid=1646422)[0m 'max_num_batched_tokens': 8192, |
| | [36m(main_task pid=1646422)[0m 'max_num_seqs': 1024, |
| | [36m(main_task pid=1646422)[0m 'n': 1, |
| | [36m(main_task pid=1646422)[0m 'n_agent': 1, |
| | [36m(main_task pid=1646422)[0m 'name': 'vllm', |
| | [36m(main_task pid=1646422)[0m 'prompt_length': 4096, |
| | [36m(main_task pid=1646422)[0m 'response_length': 1024, |
| | [36m(main_task pid=1646422)[0m 'temperature': 1.0, |
| | [36m(main_task pid=1646422)[0m 'tensor_model_parallel_size': 1, |
| | [36m(main_task pid=1646422)[0m 'top_k': -1, |
| | [36m(main_task pid=1646422)[0m 'top_p': 0.95}}, |
| | [36m(main_task pid=1646422)[0m 'algorithm': {'adv_estimator': 'grpo', |
| | [36m(main_task pid=1646422)[0m 'gamma': 1.0, |
| | [36m(main_task pid=1646422)[0m 'kl_ctrl': {'kl_coef': 0.001, 'type': 'fixed'}, |
| | [36m(main_task pid=1646422)[0m 'kl_penalty': 'kl', |
| | [36m(main_task pid=1646422)[0m 'lam': 1.0, |
| | [36m(main_task pid=1646422)[0m 'no_think_rl': False, |
| | [36m(main_task pid=1646422)[0m 'state_masking': {'end_state_marker': '</information>', |
| | [36m(main_task pid=1646422)[0m 'start_state_marker': '<information>'}}, |
| | [36m(main_task pid=1646422)[0m 'critic': {'cliprange_value': 0.5, |
| | [36m(main_task pid=1646422)[0m 'forward_max_token_len_per_gpu': 32768, |
| | [36m(main_task pid=1646422)[0m 'forward_micro_batch_size': 64, |
| | [36m(main_task pid=1646422)[0m 'grad_clip': 1.0, |
| | [36m(main_task pid=1646422)[0m 'model': {'enable_gradient_checkpointing': False, |
| | [36m(main_task pid=1646422)[0m 'external_lib': None, |
| | [36m(main_task pid=1646422)[0m 'fsdp_config': {'fsdp_size': -1, |
| | [36m(main_task pid=1646422)[0m 'grad_offload': False, |
| | [36m(main_task pid=1646422)[0m 'optimizer_offload': False, |
| | [36m(main_task pid=1646422)[0m 'param_offload': False, |
| | [36m(main_task pid=1646422)[0m 'wrap_policy': {'min_num_params': 0}}, |
| | [36m(main_task pid=1646422)[0m 'override_config': {}, |
| | [36m(main_task pid=1646422)[0m 'path': '~/models/deepseek-llm-7b-chat', |
| | [36m(main_task pid=1646422)[0m 'tokenizer_path': 'Qwen/Qwen3-4B-Instruct-2507', |
| | [36m(main_task pid=1646422)[0m 'use_remove_padding': False}, |
| | [36m(main_task pid=1646422)[0m 'optim': {'lr': 1e-05, |
| | [36m(main_task pid=1646422)[0m 'lr_warmup_steps_ratio': 0.0, |
| | [36m(main_task pid=1646422)[0m 'min_lr_ratio': None, |
| | [36m(main_task pid=1646422)[0m 'total_training_steps': -1, |
| | [36m(main_task pid=1646422)[0m 'warmup_style': 'constant'}, |
| | [36m(main_task pid=1646422)[0m 'ppo_epochs': 1, |
| | [36m(main_task pid=1646422)[0m 'ppo_max_token_len_per_gpu': 32768, |
| | [36m(main_task pid=1646422)[0m 'ppo_micro_batch_size': 64, |
| | [36m(main_task pid=1646422)[0m 'ppo_mini_batch_size': 64, |
| | [36m(main_task pid=1646422)[0m 'shuffle': False, |
| | [36m(main_task pid=1646422)[0m 'strategy': 'fsdp', |
| | [36m(main_task pid=1646422)[0m 'ulysses_sequence_parallel_size': 1, |
| | [36m(main_task pid=1646422)[0m 'use_dynamic_bsz': False}, |
| | [36m(main_task pid=1646422)[0m 'data': {'max_obs_length': 512, |
| | [36m(main_task pid=1646422)[0m 'max_prompt_length': 4096, |
| | [36m(main_task pid=1646422)[0m 'max_response_length': 1024, |
| | [36m(main_task pid=1646422)[0m 'max_start_length': 256, |
| | [36m(main_task pid=1646422)[0m 'prompt_key': 'prompt', |
| | [36m(main_task pid=1646422)[0m 'return_raw_chat': False, |
| | [36m(main_task pid=1646422)[0m 'return_raw_input_ids': False, |
| | [36m(main_task pid=1646422)[0m 'shuffle_train_dataloader': True, |
| | [36m(main_task pid=1646422)[0m 'tokenizer': None, |
| | [36m(main_task pid=1646422)[0m 'train_batch_size': 128, |
| | [36m(main_task pid=1646422)[0m 'train_data_num': None, |
| | [36m(main_task pid=1646422)[0m 'train_files': '/home/mshahidul/readctrl/code/RL_model/verl/Search-R1/dataset/train.parquet', |
| | [36m(main_task pid=1646422)[0m 'val_batch_size': 64, |
| | [36m(main_task pid=1646422)[0m 'val_data_num': None, |
| | [36m(main_task pid=1646422)[0m 'val_files': '/home/mshahidul/readctrl/code/RL_model/verl/Search-R1/dataset/test.parquet'}, |
| | [36m(main_task pid=1646422)[0m 'do_search': False, |
| | [36m(main_task pid=1646422)[0m 'max_turns': 1, |
| | [36m(main_task pid=1646422)[0m 'retriever': {'topk': 3, 'url': 'http://127.0.0.1:8000/retrieve'}, |
| | [36m(main_task pid=1646422)[0m 'reward_model': {'enable': False, |
| | [36m(main_task pid=1646422)[0m 'final_format_score': 0, |
| | [36m(main_task pid=1646422)[0m 'forward_max_token_len_per_gpu': 32768, |
| | [36m(main_task pid=1646422)[0m 'max_length': None, |
| | [36m(main_task pid=1646422)[0m 'micro_batch_size': 64, |
| | [36m(main_task pid=1646422)[0m 'model': {'external_lib': None, |
| | [36m(main_task pid=1646422)[0m 'fsdp_config': {'min_num_params': 0, |
| | [36m(main_task pid=1646422)[0m 'param_offload': False}, |
| | [36m(main_task pid=1646422)[0m 'input_tokenizer': 'Qwen/Qwen3-4B-Instruct-2507', |
| | [36m(main_task pid=1646422)[0m 'path': '~/models/FsfairX-LLaMA3-RM-v0.1', |
| | [36m(main_task pid=1646422)[0m 'use_remove_padding': False}, |
| | [36m(main_task pid=1646422)[0m 'retrieval_score': 0, |
| | [36m(main_task pid=1646422)[0m 'strategy': 'fsdp', |
| | [36m(main_task pid=1646422)[0m 'structure_format_score': 0, |
| | [36m(main_task pid=1646422)[0m 'ulysses_sequence_parallel_size': 1, |
| | [36m(main_task pid=1646422)[0m 'use_dynamic_bsz': False}, |
| | [36m(main_task pid=1646422)[0m 'trainer': {'critic_warmup': 0, |
| | [36m(main_task pid=1646422)[0m 'default_hdfs_dir': '~/experiments/gsm8k/ppo/llm_guard_3B_10k_v2', |
| | [36m(main_task pid=1646422)[0m 'default_local_dir': 'verl_checkpoints/llm_guard_3B_10k_v2', |
| | [36m(main_task pid=1646422)[0m 'experiment_name': 'llm_guard_3B_10k_v2', |
| | [36m(main_task pid=1646422)[0m 'logger': ['wandb'], |
| | [36m(main_task pid=1646422)[0m 'n_gpus_per_node': 2, |
| | [36m(main_task pid=1646422)[0m 'nnodes': 1, |
| | [36m(main_task pid=1646422)[0m 'project_name': '', |
| | [36m(main_task pid=1646422)[0m 'save_freq': 100, |
| | [36m(main_task pid=1646422)[0m 'test_freq': 50, |
| | [36m(main_task pid=1646422)[0m 'total_epochs': 15, |
| | [36m(main_task pid=1646422)[0m 'total_training_steps': 1005}} |
| | [36m(main_task pid=1646422)[0m W0201 20:43:46.380000 1646422 /data/home_beta/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/torch/utils/cpp_extension.py:117] No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' |
| | Error executing job with overrides: ['data.train_files=/home/mshahidul/readctrl/code/RL_model/verl/Search-R1/dataset/train.parquet', 'data.val_files=/home/mshahidul/readctrl/code/RL_model/verl/Search-R1/dataset/test.parquet', 'data.train_batch_size=128', 'data.val_batch_size=64', 'data.max_prompt_length=4096', 'data.max_response_length=1024', 'data.shuffle_train_dataloader=True', 'algorithm.adv_estimator=grpo', 'actor_rollout_ref.model.path=Qwen/Qwen3-4B-Instruct-2507', 'actor_rollout_ref.model.enable_gradient_checkpointing=true', 'actor_rollout_ref.model.use_remove_padding=False', 'actor_rollout_ref.actor.optim.lr=1e-6', 'actor_rollout_ref.actor.ppo_mini_batch_size=64', '+actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=16', 'actor_rollout_ref.actor.fsdp_config.param_offload=true', 'actor_rollout_ref.actor.fsdp_config.optimizer_offload=true', 'actor_rollout_ref.rollout.log_prob_micro_batch_size=64', 'actor_rollout_ref.rollout.tensor_model_parallel_size=1', 'actor_rollout_ref.rollout.name=vllm', 'actor_rollout_ref.rollout.gpu_memory_utilization=0.4', 'actor_rollout_ref.ref.log_prob_micro_batch_size=64', 'actor_rollout_ref.ref.fsdp_config.param_offload=True', 'actor_rollout_ref.actor.kl_loss_coef=0.001', 'trainer.logger=[wandb]', 'trainer.n_gpus_per_node=2', 'trainer.nnodes=1', 'trainer.save_freq=100', 'trainer.test_freq=50', 'trainer.project_name=', 'trainer.experiment_name=llm_guard_3B_10k_v2', 'trainer.total_epochs=15', 'trainer.total_training_steps=1005', 'trainer.default_local_dir=verl_checkpoints/llm_guard_3B_10k_v2', 'do_search=false', 'max_turns=1'] |
| | Traceback (most recent call last): |
| | File "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/Search-R1/verl/trainer/main_ppo.py", line 110, in main |
| | ray.get(main_task.remote(config)) |
| | File "/home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/ray/_private/auto_init_hook.py", line 22, in auto_init_wrapper |
| | return fn(*args, **kwargs) |
| | ^^^^^^^^^^^^^^^^^^^ |
| | File "/home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/ray/_private/client_mode_hook.py", line 104, in wrapper |
| | return func(*args, **kwargs) |
| | ^^^^^^^^^^^^^^^^^^^^^ |
| | File "/home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/ray/_private/worker.py", line 2972, in get |
| | values, debugger_breakpoint = worker.get_objects( |
| | ^^^^^^^^^^^^^^^^^^^ |
| | File "/home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/ray/_private/worker.py", line 1031, in get_objects |
| | raise value.as_instanceof_cause() |
| | ray.exceptions.RayTaskError(ImportError): [36mray::main_task()[39m (pid=1646422, ip=172.16.34.29) |
| | File "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/Search-R1/verl/trainer/main_ppo.py", line 136, in main_task |
| | from verl.workers.fsdp_workers import ActorRolloutRefWorker, CriticWorker |
| | File "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/Search-R1/verl/workers/fsdp_workers.py", line 39, in <module> |
| | from verl.workers.sharding_manager.fsdp_ulysses import FSDPUlyssesShardingManager |
| | File "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/Search-R1/verl/workers/sharding_manager/__init__.py", line 23, in <module> |
| | from .megatron_vllm import AllGatherPPModel, MegatronVLLMShardingManager |
| | File "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/Search-R1/verl/workers/sharding_manager/megatron_vllm.py", line 230, in <module> |
| | from verl.third_party.vllm import parallel_state as vllm_ps |
| | File "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/Search-R1/verl/third_party/vllm/__init__.py", line 52, in <module> |
| | from vllm import LLM, LLMEngine, parallel_state |
| | ImportError: cannot import name 'parallel_state' from 'vllm' (/home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/vllm/__init__.py) |
| |
|
| | Set the environment variable HYDRA_FULL_ERROR=1 for a complete stack trace. |
| | [W201 20:43:50.186179538 AllocatorConfig.cpp:28] Warning: PYTORCH_CUDA_ALLOC_CONF is deprecated, use PYTORCH_ALLOC_CONF instead (function operator()) |
| |
|