File size: 17,788 Bytes
c7a6fe6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 | /home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/torch/cuda/__init__.py:63: FutureWarning: The pynvml package is deprecated. Please install nvidia-ml-py instead. If you did not install pynvml directly, please report this to the maintainers of the package that installed pynvml for you.
import pynvml # type: ignore[import]
2026-02-01 20:43:15,317 INFO worker.py:2014 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8301 [39m[22m
/home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/ray/_private/worker.py:2062: FutureWarning: Tip: In future versions of Ray, Ray will no longer override accelerator visible devices env var if num_gpus=0 or num_gpus=None (default). To enable this behavior and turn off this error message, set RAY_ACCEL_ENV_VAR_OVERRIDE_ON_ZERO=0
warnings.warn(
[36m(pid=1646422)[0m /home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/torch/cuda/__init__.py:63: FutureWarning: The pynvml package is deprecated. Please install nvidia-ml-py instead. If you did not install pynvml directly, please report this to the maintainers of the package that installed pynvml for you.
[36m(pid=1646422)[0m import pynvml # type: ignore[import]
[36m(main_task pid=1646422)[0m {'actor_rollout_ref': {'actor': {'clip_ratio': 0.2,
[36m(main_task pid=1646422)[0m 'entropy_coeff': 0.001,
[36m(main_task pid=1646422)[0m 'fsdp_config': {'fsdp_size': -1,
[36m(main_task pid=1646422)[0m 'grad_offload': False,
[36m(main_task pid=1646422)[0m 'optimizer_offload': True,
[36m(main_task pid=1646422)[0m 'param_offload': True,
[36m(main_task pid=1646422)[0m 'wrap_policy': {'min_num_params': 0}},
[36m(main_task pid=1646422)[0m 'grad_clip': 1.0,
[36m(main_task pid=1646422)[0m 'kl_loss_coef': 0.001,
[36m(main_task pid=1646422)[0m 'kl_loss_type': 'low_var_kl',
[36m(main_task pid=1646422)[0m 'optim': {'lr': 1e-06,
[36m(main_task pid=1646422)[0m 'lr_warmup_steps_ratio': 0.0,
[36m(main_task pid=1646422)[0m 'min_lr_ratio': None,
[36m(main_task pid=1646422)[0m 'total_training_steps': -1,
[36m(main_task pid=1646422)[0m 'warmup_style': 'constant'},
[36m(main_task pid=1646422)[0m 'ppo_epochs': 1,
[36m(main_task pid=1646422)[0m 'ppo_max_token_len_per_gpu': 16384,
[36m(main_task pid=1646422)[0m 'ppo_micro_batch_size': 64,
[36m(main_task pid=1646422)[0m 'ppo_micro_batch_size_per_gpu': 16,
[36m(main_task pid=1646422)[0m 'ppo_mini_batch_size': 64,
[36m(main_task pid=1646422)[0m 'shuffle': False,
[36m(main_task pid=1646422)[0m 'state_masking': False,
[36m(main_task pid=1646422)[0m 'strategy': 'fsdp',
[36m(main_task pid=1646422)[0m 'ulysses_sequence_parallel_size': 1,
[36m(main_task pid=1646422)[0m 'use_dynamic_bsz': False,
[36m(main_task pid=1646422)[0m 'use_kl_loss': False},
[36m(main_task pid=1646422)[0m 'hybrid_engine': True,
[36m(main_task pid=1646422)[0m 'model': {'enable_gradient_checkpointing': True,
[36m(main_task pid=1646422)[0m 'external_lib': None,
[36m(main_task pid=1646422)[0m 'override_config': {},
[36m(main_task pid=1646422)[0m 'path': 'Qwen/Qwen3-4B-Instruct-2507',
[36m(main_task pid=1646422)[0m 'use_remove_padding': False},
[36m(main_task pid=1646422)[0m 'ref': {'fsdp_config': {'fsdp_size': -1,
[36m(main_task pid=1646422)[0m 'param_offload': True,
[36m(main_task pid=1646422)[0m 'wrap_policy': {'min_num_params': 0}},
[36m(main_task pid=1646422)[0m 'log_prob_max_token_len_per_gpu': 16384,
[36m(main_task pid=1646422)[0m 'log_prob_micro_batch_size': 64,
[36m(main_task pid=1646422)[0m 'log_prob_use_dynamic_bsz': False,
[36m(main_task pid=1646422)[0m 'ulysses_sequence_parallel_size': 1},
[36m(main_task pid=1646422)[0m 'rollout': {'do_sample': True,
[36m(main_task pid=1646422)[0m 'dtype': 'bfloat16',
[36m(main_task pid=1646422)[0m 'enforce_eager': True,
[36m(main_task pid=1646422)[0m 'free_cache_engine': True,
[36m(main_task pid=1646422)[0m 'gpu_memory_utilization': 0.4,
[36m(main_task pid=1646422)[0m 'ignore_eos': False,
[36m(main_task pid=1646422)[0m 'load_format': 'dummy_dtensor',
[36m(main_task pid=1646422)[0m 'log_prob_max_token_len_per_gpu': 16384,
[36m(main_task pid=1646422)[0m 'log_prob_micro_batch_size': 64,
[36m(main_task pid=1646422)[0m 'log_prob_use_dynamic_bsz': False,
[36m(main_task pid=1646422)[0m 'max_num_batched_tokens': 8192,
[36m(main_task pid=1646422)[0m 'max_num_seqs': 1024,
[36m(main_task pid=1646422)[0m 'n': 1,
[36m(main_task pid=1646422)[0m 'n_agent': 1,
[36m(main_task pid=1646422)[0m 'name': 'vllm',
[36m(main_task pid=1646422)[0m 'prompt_length': 4096,
[36m(main_task pid=1646422)[0m 'response_length': 1024,
[36m(main_task pid=1646422)[0m 'temperature': 1.0,
[36m(main_task pid=1646422)[0m 'tensor_model_parallel_size': 1,
[36m(main_task pid=1646422)[0m 'top_k': -1,
[36m(main_task pid=1646422)[0m 'top_p': 0.95}},
[36m(main_task pid=1646422)[0m 'algorithm': {'adv_estimator': 'grpo',
[36m(main_task pid=1646422)[0m 'gamma': 1.0,
[36m(main_task pid=1646422)[0m 'kl_ctrl': {'kl_coef': 0.001, 'type': 'fixed'},
[36m(main_task pid=1646422)[0m 'kl_penalty': 'kl',
[36m(main_task pid=1646422)[0m 'lam': 1.0,
[36m(main_task pid=1646422)[0m 'no_think_rl': False,
[36m(main_task pid=1646422)[0m 'state_masking': {'end_state_marker': '</information>',
[36m(main_task pid=1646422)[0m 'start_state_marker': '<information>'}},
[36m(main_task pid=1646422)[0m 'critic': {'cliprange_value': 0.5,
[36m(main_task pid=1646422)[0m 'forward_max_token_len_per_gpu': 32768,
[36m(main_task pid=1646422)[0m 'forward_micro_batch_size': 64,
[36m(main_task pid=1646422)[0m 'grad_clip': 1.0,
[36m(main_task pid=1646422)[0m 'model': {'enable_gradient_checkpointing': False,
[36m(main_task pid=1646422)[0m 'external_lib': None,
[36m(main_task pid=1646422)[0m 'fsdp_config': {'fsdp_size': -1,
[36m(main_task pid=1646422)[0m 'grad_offload': False,
[36m(main_task pid=1646422)[0m 'optimizer_offload': False,
[36m(main_task pid=1646422)[0m 'param_offload': False,
[36m(main_task pid=1646422)[0m 'wrap_policy': {'min_num_params': 0}},
[36m(main_task pid=1646422)[0m 'override_config': {},
[36m(main_task pid=1646422)[0m 'path': '~/models/deepseek-llm-7b-chat',
[36m(main_task pid=1646422)[0m 'tokenizer_path': 'Qwen/Qwen3-4B-Instruct-2507',
[36m(main_task pid=1646422)[0m 'use_remove_padding': False},
[36m(main_task pid=1646422)[0m 'optim': {'lr': 1e-05,
[36m(main_task pid=1646422)[0m 'lr_warmup_steps_ratio': 0.0,
[36m(main_task pid=1646422)[0m 'min_lr_ratio': None,
[36m(main_task pid=1646422)[0m 'total_training_steps': -1,
[36m(main_task pid=1646422)[0m 'warmup_style': 'constant'},
[36m(main_task pid=1646422)[0m 'ppo_epochs': 1,
[36m(main_task pid=1646422)[0m 'ppo_max_token_len_per_gpu': 32768,
[36m(main_task pid=1646422)[0m 'ppo_micro_batch_size': 64,
[36m(main_task pid=1646422)[0m 'ppo_mini_batch_size': 64,
[36m(main_task pid=1646422)[0m 'shuffle': False,
[36m(main_task pid=1646422)[0m 'strategy': 'fsdp',
[36m(main_task pid=1646422)[0m 'ulysses_sequence_parallel_size': 1,
[36m(main_task pid=1646422)[0m 'use_dynamic_bsz': False},
[36m(main_task pid=1646422)[0m 'data': {'max_obs_length': 512,
[36m(main_task pid=1646422)[0m 'max_prompt_length': 4096,
[36m(main_task pid=1646422)[0m 'max_response_length': 1024,
[36m(main_task pid=1646422)[0m 'max_start_length': 256,
[36m(main_task pid=1646422)[0m 'prompt_key': 'prompt',
[36m(main_task pid=1646422)[0m 'return_raw_chat': False,
[36m(main_task pid=1646422)[0m 'return_raw_input_ids': False,
[36m(main_task pid=1646422)[0m 'shuffle_train_dataloader': True,
[36m(main_task pid=1646422)[0m 'tokenizer': None,
[36m(main_task pid=1646422)[0m 'train_batch_size': 128,
[36m(main_task pid=1646422)[0m 'train_data_num': None,
[36m(main_task pid=1646422)[0m 'train_files': '/home/mshahidul/readctrl/code/RL_model/verl/Search-R1/dataset/train.parquet',
[36m(main_task pid=1646422)[0m 'val_batch_size': 64,
[36m(main_task pid=1646422)[0m 'val_data_num': None,
[36m(main_task pid=1646422)[0m 'val_files': '/home/mshahidul/readctrl/code/RL_model/verl/Search-R1/dataset/test.parquet'},
[36m(main_task pid=1646422)[0m 'do_search': False,
[36m(main_task pid=1646422)[0m 'max_turns': 1,
[36m(main_task pid=1646422)[0m 'retriever': {'topk': 3, 'url': 'http://127.0.0.1:8000/retrieve'},
[36m(main_task pid=1646422)[0m 'reward_model': {'enable': False,
[36m(main_task pid=1646422)[0m 'final_format_score': 0,
[36m(main_task pid=1646422)[0m 'forward_max_token_len_per_gpu': 32768,
[36m(main_task pid=1646422)[0m 'max_length': None,
[36m(main_task pid=1646422)[0m 'micro_batch_size': 64,
[36m(main_task pid=1646422)[0m 'model': {'external_lib': None,
[36m(main_task pid=1646422)[0m 'fsdp_config': {'min_num_params': 0,
[36m(main_task pid=1646422)[0m 'param_offload': False},
[36m(main_task pid=1646422)[0m 'input_tokenizer': 'Qwen/Qwen3-4B-Instruct-2507',
[36m(main_task pid=1646422)[0m 'path': '~/models/FsfairX-LLaMA3-RM-v0.1',
[36m(main_task pid=1646422)[0m 'use_remove_padding': False},
[36m(main_task pid=1646422)[0m 'retrieval_score': 0,
[36m(main_task pid=1646422)[0m 'strategy': 'fsdp',
[36m(main_task pid=1646422)[0m 'structure_format_score': 0,
[36m(main_task pid=1646422)[0m 'ulysses_sequence_parallel_size': 1,
[36m(main_task pid=1646422)[0m 'use_dynamic_bsz': False},
[36m(main_task pid=1646422)[0m 'trainer': {'critic_warmup': 0,
[36m(main_task pid=1646422)[0m 'default_hdfs_dir': '~/experiments/gsm8k/ppo/llm_guard_3B_10k_v2',
[36m(main_task pid=1646422)[0m 'default_local_dir': 'verl_checkpoints/llm_guard_3B_10k_v2',
[36m(main_task pid=1646422)[0m 'experiment_name': 'llm_guard_3B_10k_v2',
[36m(main_task pid=1646422)[0m 'logger': ['wandb'],
[36m(main_task pid=1646422)[0m 'n_gpus_per_node': 2,
[36m(main_task pid=1646422)[0m 'nnodes': 1,
[36m(main_task pid=1646422)[0m 'project_name': '',
[36m(main_task pid=1646422)[0m 'save_freq': 100,
[36m(main_task pid=1646422)[0m 'test_freq': 50,
[36m(main_task pid=1646422)[0m 'total_epochs': 15,
[36m(main_task pid=1646422)[0m 'total_training_steps': 1005}}
[36m(main_task pid=1646422)[0m W0201 20:43:46.380000 1646422 /data/home_beta/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/torch/utils/cpp_extension.py:117] No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'
Error executing job with overrides: ['data.train_files=/home/mshahidul/readctrl/code/RL_model/verl/Search-R1/dataset/train.parquet', 'data.val_files=/home/mshahidul/readctrl/code/RL_model/verl/Search-R1/dataset/test.parquet', 'data.train_batch_size=128', 'data.val_batch_size=64', 'data.max_prompt_length=4096', 'data.max_response_length=1024', 'data.shuffle_train_dataloader=True', 'algorithm.adv_estimator=grpo', 'actor_rollout_ref.model.path=Qwen/Qwen3-4B-Instruct-2507', 'actor_rollout_ref.model.enable_gradient_checkpointing=true', 'actor_rollout_ref.model.use_remove_padding=False', 'actor_rollout_ref.actor.optim.lr=1e-6', 'actor_rollout_ref.actor.ppo_mini_batch_size=64', '+actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=16', 'actor_rollout_ref.actor.fsdp_config.param_offload=true', 'actor_rollout_ref.actor.fsdp_config.optimizer_offload=true', 'actor_rollout_ref.rollout.log_prob_micro_batch_size=64', 'actor_rollout_ref.rollout.tensor_model_parallel_size=1', 'actor_rollout_ref.rollout.name=vllm', 'actor_rollout_ref.rollout.gpu_memory_utilization=0.4', 'actor_rollout_ref.ref.log_prob_micro_batch_size=64', 'actor_rollout_ref.ref.fsdp_config.param_offload=True', 'actor_rollout_ref.actor.kl_loss_coef=0.001', 'trainer.logger=[wandb]', 'trainer.n_gpus_per_node=2', 'trainer.nnodes=1', 'trainer.save_freq=100', 'trainer.test_freq=50', 'trainer.project_name=', 'trainer.experiment_name=llm_guard_3B_10k_v2', 'trainer.total_epochs=15', 'trainer.total_training_steps=1005', 'trainer.default_local_dir=verl_checkpoints/llm_guard_3B_10k_v2', 'do_search=false', 'max_turns=1']
Traceback (most recent call last):
File "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/Search-R1/verl/trainer/main_ppo.py", line 110, in main
ray.get(main_task.remote(config))
File "/home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/ray/_private/auto_init_hook.py", line 22, in auto_init_wrapper
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/ray/_private/client_mode_hook.py", line 104, in wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/ray/_private/worker.py", line 2972, in get
values, debugger_breakpoint = worker.get_objects(
^^^^^^^^^^^^^^^^^^^
File "/home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/ray/_private/worker.py", line 1031, in get_objects
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(ImportError): [36mray::main_task()[39m (pid=1646422, ip=172.16.34.29)
File "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/Search-R1/verl/trainer/main_ppo.py", line 136, in main_task
from verl.workers.fsdp_workers import ActorRolloutRefWorker, CriticWorker
File "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/Search-R1/verl/workers/fsdp_workers.py", line 39, in <module>
from verl.workers.sharding_manager.fsdp_ulysses import FSDPUlyssesShardingManager
File "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/Search-R1/verl/workers/sharding_manager/__init__.py", line 23, in <module>
from .megatron_vllm import AllGatherPPModel, MegatronVLLMShardingManager
File "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/Search-R1/verl/workers/sharding_manager/megatron_vllm.py", line 230, in <module>
from verl.third_party.vllm import parallel_state as vllm_ps
File "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/Search-R1/verl/third_party/vllm/__init__.py", line 52, in <module>
from vllm import LLM, LLMEngine, parallel_state
ImportError: cannot import name 'parallel_state' from 'vllm' (/home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/vllm/__init__.py)
Set the environment variable HYDRA_FULL_ERROR=1 for a complete stack trace.
[W201 20:43:50.186179538 AllocatorConfig.cpp:28] Warning: PYTORCH_CUDA_ALLOC_CONF is deprecated, use PYTORCH_ALLOC_CONF instead (function operator())
|