readctrl / code /RL_model /verl /Search-R1 /llm_guard_3B_10k_v2.log
shahidul034's picture
Add files using upload-large-folder tool
c7a6fe6 verified
/home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/torch/cuda/__init__.py:63: FutureWarning: The pynvml package is deprecated. Please install nvidia-ml-py instead. If you did not install pynvml directly, please report this to the maintainers of the package that installed pynvml for you.
import pynvml # type: ignore[import]
2026-02-01 20:43:15,317 INFO worker.py:2014 -- Started a local Ray instance. View the dashboard at http://127.0.0.1:8301 
/home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/ray/_private/worker.py:2062: FutureWarning: Tip: In future versions of Ray, Ray will no longer override accelerator visible devices env var if num_gpus=0 or num_gpus=None (default). To enable this behavior and turn off this error message, set RAY_ACCEL_ENV_VAR_OVERRIDE_ON_ZERO=0
warnings.warn(
(pid=1646422) /home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/torch/cuda/__init__.py:63: FutureWarning: The pynvml package is deprecated. Please install nvidia-ml-py instead. If you did not install pynvml directly, please report this to the maintainers of the package that installed pynvml for you.
(pid=1646422) import pynvml # type: ignore[import]
(main_task pid=1646422) {'actor_rollout_ref': {'actor': {'clip_ratio': 0.2,
(main_task pid=1646422) 'entropy_coeff': 0.001,
(main_task pid=1646422) 'fsdp_config': {'fsdp_size': -1,
(main_task pid=1646422) 'grad_offload': False,
(main_task pid=1646422) 'optimizer_offload': True,
(main_task pid=1646422) 'param_offload': True,
(main_task pid=1646422) 'wrap_policy': {'min_num_params': 0}},
(main_task pid=1646422) 'grad_clip': 1.0,
(main_task pid=1646422) 'kl_loss_coef': 0.001,
(main_task pid=1646422) 'kl_loss_type': 'low_var_kl',
(main_task pid=1646422) 'optim': {'lr': 1e-06,
(main_task pid=1646422) 'lr_warmup_steps_ratio': 0.0,
(main_task pid=1646422) 'min_lr_ratio': None,
(main_task pid=1646422) 'total_training_steps': -1,
(main_task pid=1646422) 'warmup_style': 'constant'},
(main_task pid=1646422) 'ppo_epochs': 1,
(main_task pid=1646422) 'ppo_max_token_len_per_gpu': 16384,
(main_task pid=1646422) 'ppo_micro_batch_size': 64,
(main_task pid=1646422) 'ppo_micro_batch_size_per_gpu': 16,
(main_task pid=1646422) 'ppo_mini_batch_size': 64,
(main_task pid=1646422) 'shuffle': False,
(main_task pid=1646422) 'state_masking': False,
(main_task pid=1646422) 'strategy': 'fsdp',
(main_task pid=1646422) 'ulysses_sequence_parallel_size': 1,
(main_task pid=1646422) 'use_dynamic_bsz': False,
(main_task pid=1646422) 'use_kl_loss': False},
(main_task pid=1646422) 'hybrid_engine': True,
(main_task pid=1646422) 'model': {'enable_gradient_checkpointing': True,
(main_task pid=1646422) 'external_lib': None,
(main_task pid=1646422) 'override_config': {},
(main_task pid=1646422) 'path': 'Qwen/Qwen3-4B-Instruct-2507',
(main_task pid=1646422) 'use_remove_padding': False},
(main_task pid=1646422) 'ref': {'fsdp_config': {'fsdp_size': -1,
(main_task pid=1646422) 'param_offload': True,
(main_task pid=1646422) 'wrap_policy': {'min_num_params': 0}},
(main_task pid=1646422) 'log_prob_max_token_len_per_gpu': 16384,
(main_task pid=1646422) 'log_prob_micro_batch_size': 64,
(main_task pid=1646422) 'log_prob_use_dynamic_bsz': False,
(main_task pid=1646422) 'ulysses_sequence_parallel_size': 1},
(main_task pid=1646422) 'rollout': {'do_sample': True,
(main_task pid=1646422) 'dtype': 'bfloat16',
(main_task pid=1646422) 'enforce_eager': True,
(main_task pid=1646422) 'free_cache_engine': True,
(main_task pid=1646422) 'gpu_memory_utilization': 0.4,
(main_task pid=1646422) 'ignore_eos': False,
(main_task pid=1646422) 'load_format': 'dummy_dtensor',
(main_task pid=1646422) 'log_prob_max_token_len_per_gpu': 16384,
(main_task pid=1646422) 'log_prob_micro_batch_size': 64,
(main_task pid=1646422) 'log_prob_use_dynamic_bsz': False,
(main_task pid=1646422) 'max_num_batched_tokens': 8192,
(main_task pid=1646422) 'max_num_seqs': 1024,
(main_task pid=1646422) 'n': 1,
(main_task pid=1646422) 'n_agent': 1,
(main_task pid=1646422) 'name': 'vllm',
(main_task pid=1646422) 'prompt_length': 4096,
(main_task pid=1646422) 'response_length': 1024,
(main_task pid=1646422) 'temperature': 1.0,
(main_task pid=1646422) 'tensor_model_parallel_size': 1,
(main_task pid=1646422) 'top_k': -1,
(main_task pid=1646422) 'top_p': 0.95}},
(main_task pid=1646422) 'algorithm': {'adv_estimator': 'grpo',
(main_task pid=1646422) 'gamma': 1.0,
(main_task pid=1646422) 'kl_ctrl': {'kl_coef': 0.001, 'type': 'fixed'},
(main_task pid=1646422) 'kl_penalty': 'kl',
(main_task pid=1646422) 'lam': 1.0,
(main_task pid=1646422) 'no_think_rl': False,
(main_task pid=1646422) 'state_masking': {'end_state_marker': '</information>',
(main_task pid=1646422) 'start_state_marker': '<information>'}},
(main_task pid=1646422) 'critic': {'cliprange_value': 0.5,
(main_task pid=1646422) 'forward_max_token_len_per_gpu': 32768,
(main_task pid=1646422) 'forward_micro_batch_size': 64,
(main_task pid=1646422) 'grad_clip': 1.0,
(main_task pid=1646422) 'model': {'enable_gradient_checkpointing': False,
(main_task pid=1646422) 'external_lib': None,
(main_task pid=1646422) 'fsdp_config': {'fsdp_size': -1,
(main_task pid=1646422) 'grad_offload': False,
(main_task pid=1646422) 'optimizer_offload': False,
(main_task pid=1646422) 'param_offload': False,
(main_task pid=1646422) 'wrap_policy': {'min_num_params': 0}},
(main_task pid=1646422) 'override_config': {},
(main_task pid=1646422) 'path': '~/models/deepseek-llm-7b-chat',
(main_task pid=1646422) 'tokenizer_path': 'Qwen/Qwen3-4B-Instruct-2507',
(main_task pid=1646422) 'use_remove_padding': False},
(main_task pid=1646422) 'optim': {'lr': 1e-05,
(main_task pid=1646422) 'lr_warmup_steps_ratio': 0.0,
(main_task pid=1646422) 'min_lr_ratio': None,
(main_task pid=1646422) 'total_training_steps': -1,
(main_task pid=1646422) 'warmup_style': 'constant'},
(main_task pid=1646422) 'ppo_epochs': 1,
(main_task pid=1646422) 'ppo_max_token_len_per_gpu': 32768,
(main_task pid=1646422) 'ppo_micro_batch_size': 64,
(main_task pid=1646422) 'ppo_mini_batch_size': 64,
(main_task pid=1646422) 'shuffle': False,
(main_task pid=1646422) 'strategy': 'fsdp',
(main_task pid=1646422) 'ulysses_sequence_parallel_size': 1,
(main_task pid=1646422) 'use_dynamic_bsz': False},
(main_task pid=1646422) 'data': {'max_obs_length': 512,
(main_task pid=1646422) 'max_prompt_length': 4096,
(main_task pid=1646422) 'max_response_length': 1024,
(main_task pid=1646422) 'max_start_length': 256,
(main_task pid=1646422) 'prompt_key': 'prompt',
(main_task pid=1646422) 'return_raw_chat': False,
(main_task pid=1646422) 'return_raw_input_ids': False,
(main_task pid=1646422) 'shuffle_train_dataloader': True,
(main_task pid=1646422) 'tokenizer': None,
(main_task pid=1646422) 'train_batch_size': 128,
(main_task pid=1646422) 'train_data_num': None,
(main_task pid=1646422) 'train_files': '/home/mshahidul/readctrl/code/RL_model/verl/Search-R1/dataset/train.parquet',
(main_task pid=1646422) 'val_batch_size': 64,
(main_task pid=1646422) 'val_data_num': None,
(main_task pid=1646422) 'val_files': '/home/mshahidul/readctrl/code/RL_model/verl/Search-R1/dataset/test.parquet'},
(main_task pid=1646422) 'do_search': False,
(main_task pid=1646422) 'max_turns': 1,
(main_task pid=1646422) 'retriever': {'topk': 3, 'url': 'http://127.0.0.1:8000/retrieve'},
(main_task pid=1646422) 'reward_model': {'enable': False,
(main_task pid=1646422) 'final_format_score': 0,
(main_task pid=1646422) 'forward_max_token_len_per_gpu': 32768,
(main_task pid=1646422) 'max_length': None,
(main_task pid=1646422) 'micro_batch_size': 64,
(main_task pid=1646422) 'model': {'external_lib': None,
(main_task pid=1646422) 'fsdp_config': {'min_num_params': 0,
(main_task pid=1646422) 'param_offload': False},
(main_task pid=1646422) 'input_tokenizer': 'Qwen/Qwen3-4B-Instruct-2507',
(main_task pid=1646422) 'path': '~/models/FsfairX-LLaMA3-RM-v0.1',
(main_task pid=1646422) 'use_remove_padding': False},
(main_task pid=1646422) 'retrieval_score': 0,
(main_task pid=1646422) 'strategy': 'fsdp',
(main_task pid=1646422) 'structure_format_score': 0,
(main_task pid=1646422) 'ulysses_sequence_parallel_size': 1,
(main_task pid=1646422) 'use_dynamic_bsz': False},
(main_task pid=1646422) 'trainer': {'critic_warmup': 0,
(main_task pid=1646422) 'default_hdfs_dir': '~/experiments/gsm8k/ppo/llm_guard_3B_10k_v2',
(main_task pid=1646422) 'default_local_dir': 'verl_checkpoints/llm_guard_3B_10k_v2',
(main_task pid=1646422) 'experiment_name': 'llm_guard_3B_10k_v2',
(main_task pid=1646422) 'logger': ['wandb'],
(main_task pid=1646422) 'n_gpus_per_node': 2,
(main_task pid=1646422) 'nnodes': 1,
(main_task pid=1646422) 'project_name': '',
(main_task pid=1646422) 'save_freq': 100,
(main_task pid=1646422) 'test_freq': 50,
(main_task pid=1646422) 'total_epochs': 15,
(main_task pid=1646422) 'total_training_steps': 1005}}
(main_task pid=1646422) W0201 20:43:46.380000 1646422 /data/home_beta/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/torch/utils/cpp_extension.py:117] No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'
Error executing job with overrides: ['data.train_files=/home/mshahidul/readctrl/code/RL_model/verl/Search-R1/dataset/train.parquet', 'data.val_files=/home/mshahidul/readctrl/code/RL_model/verl/Search-R1/dataset/test.parquet', 'data.train_batch_size=128', 'data.val_batch_size=64', 'data.max_prompt_length=4096', 'data.max_response_length=1024', 'data.shuffle_train_dataloader=True', 'algorithm.adv_estimator=grpo', 'actor_rollout_ref.model.path=Qwen/Qwen3-4B-Instruct-2507', 'actor_rollout_ref.model.enable_gradient_checkpointing=true', 'actor_rollout_ref.model.use_remove_padding=False', 'actor_rollout_ref.actor.optim.lr=1e-6', 'actor_rollout_ref.actor.ppo_mini_batch_size=64', '+actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=16', 'actor_rollout_ref.actor.fsdp_config.param_offload=true', 'actor_rollout_ref.actor.fsdp_config.optimizer_offload=true', 'actor_rollout_ref.rollout.log_prob_micro_batch_size=64', 'actor_rollout_ref.rollout.tensor_model_parallel_size=1', 'actor_rollout_ref.rollout.name=vllm', 'actor_rollout_ref.rollout.gpu_memory_utilization=0.4', 'actor_rollout_ref.ref.log_prob_micro_batch_size=64', 'actor_rollout_ref.ref.fsdp_config.param_offload=True', 'actor_rollout_ref.actor.kl_loss_coef=0.001', 'trainer.logger=[wandb]', 'trainer.n_gpus_per_node=2', 'trainer.nnodes=1', 'trainer.save_freq=100', 'trainer.test_freq=50', 'trainer.project_name=', 'trainer.experiment_name=llm_guard_3B_10k_v2', 'trainer.total_epochs=15', 'trainer.total_training_steps=1005', 'trainer.default_local_dir=verl_checkpoints/llm_guard_3B_10k_v2', 'do_search=false', 'max_turns=1']
Traceback (most recent call last):
File "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/Search-R1/verl/trainer/main_ppo.py", line 110, in main
ray.get(main_task.remote(config))
File "/home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/ray/_private/auto_init_hook.py", line 22, in auto_init_wrapper
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/ray/_private/client_mode_hook.py", line 104, in wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/ray/_private/worker.py", line 2972, in get
values, debugger_breakpoint = worker.get_objects(
^^^^^^^^^^^^^^^^^^^
File "/home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/ray/_private/worker.py", line 1031, in get_objects
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(ImportError): ray::main_task() (pid=1646422, ip=172.16.34.29)
File "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/Search-R1/verl/trainer/main_ppo.py", line 136, in main_task
from verl.workers.fsdp_workers import ActorRolloutRefWorker, CriticWorker
File "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/Search-R1/verl/workers/fsdp_workers.py", line 39, in <module>
from verl.workers.sharding_manager.fsdp_ulysses import FSDPUlyssesShardingManager
File "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/Search-R1/verl/workers/sharding_manager/__init__.py", line 23, in <module>
from .megatron_vllm import AllGatherPPModel, MegatronVLLMShardingManager
File "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/Search-R1/verl/workers/sharding_manager/megatron_vllm.py", line 230, in <module>
from verl.third_party.vllm import parallel_state as vllm_ps
File "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/Search-R1/verl/third_party/vllm/__init__.py", line 52, in <module>
from vllm import LLM, LLMEngine, parallel_state
ImportError: cannot import name 'parallel_state' from 'vllm' (/home/mshahidul/miniconda3/envs/verl/lib/python3.12/site-packages/vllm/__init__.py)
Set the environment variable HYDRA_FULL_ERROR=1 for a complete stack trace.
[W201 20:43:50.186179538 AllocatorConfig.cpp:28] Warning: PYTORCH_CUDA_ALLOC_CONF is deprecated, use PYTORCH_ALLOC_CONF instead (function operator())