/home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/torch/cuda/__init__.py:63: FutureWarning: The pynvml package is deprecated. Please install nvidia-ml-py instead. If you did not install pynvml directly, please report this to the maintainers of the package that installed pynvml for you. import pynvml # type: ignore[import] 2026-02-01 20:43:15,317 INFO worker.py:2014 -- Started a local Ray instance. View the dashboard at http://127.0.0.1:8301  /home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/ray/_private/worker.py:2062: FutureWarning: Tip: In future versions of Ray, Ray will no longer override accelerator visible devices env var if num_gpus=0 or num_gpus=None (default). To enable this behavior and turn off this error message, set RAY_ACCEL_ENV_VAR_OVERRIDE_ON_ZERO=0 warnings.warn( (pid=1646422) /home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/torch/cuda/__init__.py:63: FutureWarning: The pynvml package is deprecated. Please install nvidia-ml-py instead. If you did not install pynvml directly, please report this to the maintainers of the package that installed pynvml for you. (pid=1646422) import pynvml # type: ignore[import] (main_task pid=1646422) {'actor_rollout_ref': {'actor': {'clip_ratio': 0.2, (main_task pid=1646422) 'entropy_coeff': 0.001, (main_task pid=1646422) 'fsdp_config': {'fsdp_size': -1, (main_task pid=1646422) 'grad_offload': False, (main_task pid=1646422) 'optimizer_offload': True, (main_task pid=1646422) 'param_offload': True, (main_task pid=1646422) 'wrap_policy': {'min_num_params': 0}}, (main_task pid=1646422) 'grad_clip': 1.0, (main_task pid=1646422) 'kl_loss_coef': 0.001, (main_task pid=1646422) 'kl_loss_type': 'low_var_kl', (main_task pid=1646422) 'optim': {'lr': 1e-06, (main_task pid=1646422) 'lr_warmup_steps_ratio': 0.0, (main_task pid=1646422) 'min_lr_ratio': None, (main_task pid=1646422) 'total_training_steps': -1, (main_task pid=1646422) 'warmup_style': 'constant'}, (main_task pid=1646422) 'ppo_epochs': 1, (main_task pid=1646422) 'ppo_max_token_len_per_gpu': 16384, (main_task pid=1646422) 'ppo_micro_batch_size': 64, (main_task pid=1646422) 'ppo_micro_batch_size_per_gpu': 16, (main_task pid=1646422) 'ppo_mini_batch_size': 64, (main_task pid=1646422) 'shuffle': False, (main_task pid=1646422) 'state_masking': False, (main_task pid=1646422) 'strategy': 'fsdp', (main_task pid=1646422) 'ulysses_sequence_parallel_size': 1, (main_task pid=1646422) 'use_dynamic_bsz': False, (main_task pid=1646422) 'use_kl_loss': False}, (main_task pid=1646422) 'hybrid_engine': True, (main_task pid=1646422) 'model': {'enable_gradient_checkpointing': True, (main_task pid=1646422) 'external_lib': None, (main_task pid=1646422) 'override_config': {}, (main_task pid=1646422) 'path': 'Qwen/Qwen3-4B-Instruct-2507', (main_task pid=1646422) 'use_remove_padding': False}, (main_task pid=1646422) 'ref': {'fsdp_config': {'fsdp_size': -1, (main_task pid=1646422) 'param_offload': True, (main_task pid=1646422) 'wrap_policy': {'min_num_params': 0}}, (main_task pid=1646422) 'log_prob_max_token_len_per_gpu': 16384, (main_task pid=1646422) 'log_prob_micro_batch_size': 64, (main_task pid=1646422) 'log_prob_use_dynamic_bsz': False, (main_task pid=1646422) 'ulysses_sequence_parallel_size': 1}, (main_task pid=1646422) 'rollout': {'do_sample': True, (main_task pid=1646422) 'dtype': 'bfloat16', (main_task pid=1646422) 'enforce_eager': True, (main_task pid=1646422) 'free_cache_engine': True, (main_task pid=1646422) 'gpu_memory_utilization': 0.4, (main_task pid=1646422) 'ignore_eos': False, (main_task pid=1646422) 'load_format': 'dummy_dtensor', (main_task pid=1646422) 'log_prob_max_token_len_per_gpu': 16384, (main_task pid=1646422) 'log_prob_micro_batch_size': 64, (main_task pid=1646422) 'log_prob_use_dynamic_bsz': False, (main_task pid=1646422) 'max_num_batched_tokens': 8192, (main_task pid=1646422) 'max_num_seqs': 1024, (main_task pid=1646422) 'n': 1, (main_task pid=1646422) 'n_agent': 1, (main_task pid=1646422) 'name': 'vllm', (main_task pid=1646422) 'prompt_length': 4096, (main_task pid=1646422) 'response_length': 1024, (main_task pid=1646422) 'temperature': 1.0, (main_task pid=1646422) 'tensor_model_parallel_size': 1, (main_task pid=1646422) 'top_k': -1, (main_task pid=1646422) 'top_p': 0.95}}, (main_task pid=1646422) 'algorithm': {'adv_estimator': 'grpo', (main_task pid=1646422) 'gamma': 1.0, (main_task pid=1646422) 'kl_ctrl': {'kl_coef': 0.001, 'type': 'fixed'}, (main_task pid=1646422) 'kl_penalty': 'kl', (main_task pid=1646422) 'lam': 1.0, (main_task pid=1646422) 'no_think_rl': False, (main_task pid=1646422) 'state_masking': {'end_state_marker': '', (main_task pid=1646422) 'start_state_marker': ''}}, (main_task pid=1646422) 'critic': {'cliprange_value': 0.5, (main_task pid=1646422) 'forward_max_token_len_per_gpu': 32768, (main_task pid=1646422) 'forward_micro_batch_size': 64, (main_task pid=1646422) 'grad_clip': 1.0, (main_task pid=1646422) 'model': {'enable_gradient_checkpointing': False, (main_task pid=1646422) 'external_lib': None, (main_task pid=1646422) 'fsdp_config': {'fsdp_size': -1, (main_task pid=1646422) 'grad_offload': False, (main_task pid=1646422) 'optimizer_offload': False, (main_task pid=1646422) 'param_offload': False, (main_task pid=1646422) 'wrap_policy': {'min_num_params': 0}}, (main_task pid=1646422) 'override_config': {}, (main_task pid=1646422) 'path': '~/models/deepseek-llm-7b-chat', (main_task pid=1646422) 'tokenizer_path': 'Qwen/Qwen3-4B-Instruct-2507', (main_task pid=1646422) 'use_remove_padding': False}, (main_task pid=1646422) 'optim': {'lr': 1e-05, (main_task pid=1646422) 'lr_warmup_steps_ratio': 0.0, (main_task pid=1646422) 'min_lr_ratio': None, (main_task pid=1646422) 'total_training_steps': -1, (main_task pid=1646422) 'warmup_style': 'constant'}, (main_task pid=1646422) 'ppo_epochs': 1, (main_task pid=1646422) 'ppo_max_token_len_per_gpu': 32768, (main_task pid=1646422) 'ppo_micro_batch_size': 64, (main_task pid=1646422) 'ppo_mini_batch_size': 64, (main_task pid=1646422) 'shuffle': False, (main_task pid=1646422) 'strategy': 'fsdp', (main_task pid=1646422) 'ulysses_sequence_parallel_size': 1, (main_task pid=1646422) 'use_dynamic_bsz': False}, (main_task pid=1646422) 'data': {'max_obs_length': 512, (main_task pid=1646422) 'max_prompt_length': 4096, (main_task pid=1646422) 'max_response_length': 1024, (main_task pid=1646422) 'max_start_length': 256, (main_task pid=1646422) 'prompt_key': 'prompt', (main_task pid=1646422) 'return_raw_chat': False, (main_task pid=1646422) 'return_raw_input_ids': False, (main_task pid=1646422) 'shuffle_train_dataloader': True, (main_task pid=1646422) 'tokenizer': None, (main_task pid=1646422) 'train_batch_size': 128, (main_task pid=1646422) 'train_data_num': None, (main_task pid=1646422) 'train_files': '/home/mshahidul/readctrl/code/RL_model/verl/Search-R1/dataset/train.parquet', (main_task pid=1646422) 'val_batch_size': 64, (main_task pid=1646422) 'val_data_num': None, (main_task pid=1646422) 'val_files': '/home/mshahidul/readctrl/code/RL_model/verl/Search-R1/dataset/test.parquet'}, (main_task pid=1646422) 'do_search': False, (main_task pid=1646422) 'max_turns': 1, (main_task pid=1646422) 'retriever': {'topk': 3, 'url': 'http://127.0.0.1:8000/retrieve'}, (main_task pid=1646422) 'reward_model': {'enable': False, (main_task pid=1646422) 'final_format_score': 0, (main_task pid=1646422) 'forward_max_token_len_per_gpu': 32768, (main_task pid=1646422) 'max_length': None, (main_task pid=1646422) 'micro_batch_size': 64, (main_task pid=1646422) 'model': {'external_lib': None, (main_task pid=1646422) 'fsdp_config': {'min_num_params': 0, (main_task pid=1646422) 'param_offload': False}, (main_task pid=1646422) 'input_tokenizer': 'Qwen/Qwen3-4B-Instruct-2507', (main_task pid=1646422) 'path': '~/models/FsfairX-LLaMA3-RM-v0.1', (main_task pid=1646422) 'use_remove_padding': False}, (main_task pid=1646422) 'retrieval_score': 0, (main_task pid=1646422) 'strategy': 'fsdp', (main_task pid=1646422) 'structure_format_score': 0, (main_task pid=1646422) 'ulysses_sequence_parallel_size': 1, (main_task pid=1646422) 'use_dynamic_bsz': False}, (main_task pid=1646422) 'trainer': {'critic_warmup': 0, (main_task pid=1646422) 'default_hdfs_dir': '~/experiments/gsm8k/ppo/llm_guard_3B_10k_v2', (main_task pid=1646422) 'default_local_dir': 'verl_checkpoints/llm_guard_3B_10k_v2', (main_task pid=1646422) 'experiment_name': 'llm_guard_3B_10k_v2', (main_task pid=1646422) 'logger': ['wandb'], (main_task pid=1646422) 'n_gpus_per_node': 2, (main_task pid=1646422) 'nnodes': 1, (main_task pid=1646422) 'project_name': '', (main_task pid=1646422) 'save_freq': 100, (main_task pid=1646422) 'test_freq': 50, (main_task pid=1646422) 'total_epochs': 15, (main_task pid=1646422) 'total_training_steps': 1005}} (main_task pid=1646422) W0201 20:43:46.380000 1646422 /data/home_beta/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/torch/utils/cpp_extension.py:117] No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' Error executing job with overrides: ['data.train_files=/home/mshahidul/readctrl/code/RL_model/verl/Search-R1/dataset/train.parquet', 'data.val_files=/home/mshahidul/readctrl/code/RL_model/verl/Search-R1/dataset/test.parquet', 'data.train_batch_size=128', 'data.val_batch_size=64', 'data.max_prompt_length=4096', 'data.max_response_length=1024', 'data.shuffle_train_dataloader=True', 'algorithm.adv_estimator=grpo', 'actor_rollout_ref.model.path=Qwen/Qwen3-4B-Instruct-2507', 'actor_rollout_ref.model.enable_gradient_checkpointing=true', 'actor_rollout_ref.model.use_remove_padding=False', 'actor_rollout_ref.actor.optim.lr=1e-6', 'actor_rollout_ref.actor.ppo_mini_batch_size=64', '+actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=16', 'actor_rollout_ref.actor.fsdp_config.param_offload=true', 'actor_rollout_ref.actor.fsdp_config.optimizer_offload=true', 'actor_rollout_ref.rollout.log_prob_micro_batch_size=64', 'actor_rollout_ref.rollout.tensor_model_parallel_size=1', 'actor_rollout_ref.rollout.name=vllm', 'actor_rollout_ref.rollout.gpu_memory_utilization=0.4', 'actor_rollout_ref.ref.log_prob_micro_batch_size=64', 'actor_rollout_ref.ref.fsdp_config.param_offload=True', 'actor_rollout_ref.actor.kl_loss_coef=0.001', 'trainer.logger=[wandb]', 'trainer.n_gpus_per_node=2', 'trainer.nnodes=1', 'trainer.save_freq=100', 'trainer.test_freq=50', 'trainer.project_name=', 'trainer.experiment_name=llm_guard_3B_10k_v2', 'trainer.total_epochs=15', 'trainer.total_training_steps=1005', 'trainer.default_local_dir=verl_checkpoints/llm_guard_3B_10k_v2', 'do_search=false', 'max_turns=1'] Traceback (most recent call last): File "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/Search-R1/verl/trainer/main_ppo.py", line 110, in main ray.get(main_task.remote(config)) File "/home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/ray/_private/auto_init_hook.py", line 22, in auto_init_wrapper return fn(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^ File "/home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/ray/_private/client_mode_hook.py", line 104, in wrapper return func(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^ File "/home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/ray/_private/worker.py", line 2972, in get values, debugger_breakpoint = worker.get_objects( ^^^^^^^^^^^^^^^^^^^ File "/home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/ray/_private/worker.py", line 1031, in get_objects raise value.as_instanceof_cause() ray.exceptions.RayTaskError(ImportError): ray::main_task() (pid=1646422, ip=172.16.34.29) File "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/Search-R1/verl/trainer/main_ppo.py", line 136, in main_task from verl.workers.fsdp_workers import ActorRolloutRefWorker, CriticWorker File "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/Search-R1/verl/workers/fsdp_workers.py", line 39, in from verl.workers.sharding_manager.fsdp_ulysses import FSDPUlyssesShardingManager File "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/Search-R1/verl/workers/sharding_manager/__init__.py", line 23, in from .megatron_vllm import AllGatherPPModel, MegatronVLLMShardingManager File "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/Search-R1/verl/workers/sharding_manager/megatron_vllm.py", line 230, in from verl.third_party.vllm import parallel_state as vllm_ps File "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/Search-R1/verl/third_party/vllm/__init__.py", line 52, in from vllm import LLM, LLMEngine, parallel_state ImportError: cannot import name 'parallel_state' from 'vllm' (/home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/vllm/__init__.py) Set the environment variable HYDRA_FULL_ERROR=1 for a complete stack trace. [W201 20:43:50.186179538 AllocatorConfig.cpp:28] Warning: PYTORCH_CUDA_ALLOC_CONF is deprecated, use PYTORCH_ALLOC_CONF instead (function operator())