| | config: {'actor_rollout_ref': {'actor': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-06, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 45, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': True, 'optimizer_offload': True, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'ppo_mini_batch_size': 256, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': 16, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 16384, 'clip_ratio': 0.2, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.2, 'tau_pos': 1.0, 'tau_neg': 1.05, 'freeze_vision_tower': False, 'policy_loss': {'_target_': 'verl.workers.config.PolicyLossConfig', 'loss_mode': 'vanilla', 'clip_cov_ratio': 0.0002, 'clip_cov_lb': 1.0, 'clip_cov_ub': 5.0, 'kl_cov_ratio': 0.0002, 'ppo_kl_coef': 0.1}, 'clip_ratio_c': 3.0, 'loss_agg_mode': 'token-mean', 'loss_scale_factor': None, 'entropy_coeff': 0, 'calculate_entropy': False, 'use_kl_loss': True, 'use_prefix_grouper': False, 'use_torch_compile': True, 'kl_loss_coef': 0.001, 'kl_loss_type': 'low_var_kl', 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'use_fused_kernels': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'grad_clip': 1.0, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False, 'use_remove_padding': True, 'calculate_sum_pi_squared': False, 'sum_pi_squared_checkpointing': False}, 'ref': {'rollout_n': 3, 'strategy': 'fsdp', 'use_torch_compile': True, 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': True, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': True, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False}, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': 'vllm', 'mode': 'async', 'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'prompt_length': 1024, 'response_length': 2048, 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.4, 'ignore_eos': False, 'enforce_eager': True, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'tensor_model_parallel_size': 1, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'pipeline_model_parallel_size': 1, 'max_num_batched_tokens': 8192, 'max_model_len': 8192, 'max_num_seqs': 1024, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'logprobs_mode': 'processed_logprobs', 'scheduling_policy': 'fcfs', 'load_format': 'dummy', 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'disable_log_stats': True, 'do_sample': True, 'n': 3, 'over_sample_rate': 0, 'multi_stage_wake_up': False, 'engine_kwargs': {'vllm': {}, 'sglang': {}, 'trtllm': {}}, 'val_kwargs': {'_target_': 'verl.workers.config.SamplingConfig', 'top_k': -1, 'top_p': 1.0, 'temperature': 0, 'n': 1, 'do_sample': False}, 'multi_turn': {'_target_': 'verl.workers.config.MultiTurnConfig', 'enable': False, 'max_assistant_turns': None, 'tool_config_path': None, 'max_user_turns': None, 'max_parallel_calls': 1, 'max_tool_response_length': 256, 'tool_response_truncate_side': 'middle', 'interaction_config_path': None, 'use_inference_chat_template': False, 'tokenization_sanity_check_mode': 'strict', 'format': 'hermes', 'num_repeat_rollouts': None}, 'calculate_log_probs': False, 'agent': {'_target_': 'verl.workers.config.AgentLoopConfig', 'num_workers': 8, 'default_agent_loop': 'single_turn_agent', 'agent_loop_config_path': None, 'custom_async_server': {'_target_': 'verl.workers.config.CustomAsyncServerConfig', 'path': None, 'name': None}}, 'checkpoint_engine': {'_target_': 'verl.workers.config.CheckpointEngineConfig', 'backend': 'naive', 'update_weights_bucket_megabytes': 2048, 'engine_kwargs': {}}, 'trace': {'_target_': 'verl.workers.config.TraceConfig', 'backend': None, 'token2text': False, 'max_samples_per_step_per_worker': None}, 'skip_rollout': False, 'skip_dump_dir': '/tmp/rollout_dump', 'skip_tokenizer_init': True, 'enable_rollout_routing_replay': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'prometheus': {'_target_': 'verl.workers.config.PrometheusConfig', 'enable': False, 'port': 9090, 'file': '/tmp/ray/session_latest/metrics/prometheus/prometheus.yml', 'served_model_name': 'Qwen/Qwen3-4B-Instruct-2507'}, 'quantization': None, 'quantization_config_file': None, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}, 'layered_summon': False}, 'model': {'_target_': 'verl.workers.config.HFModelConfig', 'path': 'Qwen/Qwen3-4B-Instruct-2507', 'hf_config_path': None, 'tokenizer_path': None, 'use_shm': False, 'trust_remote_code': False, 'custom_chat_template': None, 'external_lib': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': True, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'exclude_modules': None, 'lora_adapter_path': None, 'use_liger': False, 'use_fused_kernels': False, 'fused_kernel_options': {'impl_backend': 'torch'}, 'tiled_mlp': {'enabled': False, 'num_shards': 4}, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}}, 'hybrid_engine': True, 'nccl_timeout': 600}, 'data': {'tokenizer': None, 'use_shm': False, 'train_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/train.parquet', 'val_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/test.parquet', 'train_max_samples': -1, 'val_max_samples': -1, 'prompt_key': 'prompt', 'reward_fn_key': 'data_source', 'max_prompt_length': 1024, 'max_response_length': 2048, 'train_batch_size': 512, 'val_batch_size': None, 'tool_config_path': None, 'return_raw_input_ids': False, 'return_raw_chat': True, 'return_full_prompt': False, 'shuffle': True, 'seed': None, 'dataloader_num_workers': 8, 'image_patch_size': 14, 'validation_shuffle': False, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 1, 'truncation': 'error', 'image_key': 'images', 'video_key': 'videos', 'trust_remote_code': False, 'custom_cls': {'path': None, 'name': None}, 'return_multi_modal_inputs': True, 'sampler': {'class_path': None, 'class_name': None}, 'datagen': {'path': None, 'name': None}, 'apply_chat_template_kwargs': {}}, 'reward_manager': {'_target_': 'verl.trainer.config.config.RewardManagerConfig', 'source': 'register', 'name': 'naive', 'module': {'_target_': 'verl.trainer.config.config.ModuleConfig', 'path': None, 'name': 'custom_reward_manager'}}, 'critic': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-05, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 45, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'model': {'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, 'path': '~/models/deepseek-llm-7b-chat', 'tokenizer_path': 'Qwen/Qwen3-4B-Instruct-2507', 'override_config': {}, 'external_lib': None, 'trust_remote_code': False, '_target_': 'verl.workers.config.FSDPCriticModelCfg', 'use_shm': False, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': False, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'tiled_mlp': {'enabled': False, 'num_shards': 4}}, '_target_': 'verl.workers.config.FSDPCriticConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'enable': None, 'ppo_mini_batch_size': 256, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': None, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 32768, 'forward_max_token_len_per_gpu': 32768, 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'cliprange_value': 0.5, 'loss_agg_mode': 'token-mean', 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'forward_micro_batch_size': None, 'forward_micro_batch_size_per_gpu': None, 'ulysses_sequence_parallel_size': 1, 'grad_clip': 1.0}, 'reward_model': {'enable': False, 'enable_resource_pool': False, 'n_gpus_per_node': 8, 'nnodes': 0, 'strategy': 'fsdp', 'model': {'input_tokenizer': 'Qwen/Qwen3-4B-Instruct-2507', 'path': '~/models/FsfairX-LLaMA3-RM-v0.1', 'external_lib': None, 'trust_remote_code': False, 'override_config': {}, 'use_shm': False, 'use_remove_padding': False, 'use_fused_kernels': False, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False}}, 'micro_batch_size': None, 'micro_batch_size_per_gpu': None, 'max_length': None, 'use_dynamic_bsz': False, 'forward_max_token_len_per_gpu': 32768, 'reward_manager': 'naive', 'reward_loop_source': 'register', 'reward_loop_module_path': None, 'reward_loop_class_name': None, 'launch_reward_fn_async': False, 'sandbox_fusion': {'url': None, 'max_concurrent': 64, 'memory_limit_mb': 1024}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'ulysses_sequence_parallel_size': 1, 'use_reward_loop': True, 'num_workers': 1, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': '???', 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.5, 'enforce_eager': True, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'tensor_model_parallel_size': 2, 'max_num_batched_tokens': 8192, 'max_model_len': None, 'max_num_seqs': 1024, 'load_format': 'auto', 'engine_kwargs': {}, 'limit_images': None, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'disable_log_stats': True, 'skip_tokenizer_init': False, 'prompt_length': 2048, 'response_length': 2048}}, 'algorithm': {'rollout_correction': {'rollout_is': None, 'rollout_is_threshold': 2.0, 'rollout_rs': None, 'rollout_rs_threshold': None, 'bypass_mode': False, 'loss_type': 'ppo_clip', 'rollout_is_batch_normalize': False}, '_target_': 'verl.trainer.config.AlgoConfig', 'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'norm_adv_by_std_in_grpo': True, 'use_kl_in_reward': False, 'kl_penalty': 'kl', 'kl_ctrl': {'_target_': 'verl.trainer.config.KLControlConfig', 'type': 'fixed', 'kl_coef': 0.001, 'horizon': 10000, 'target_kl': 0.1}, 'use_pf_ppo': False, 'pf_ppo': {'reweight_method': 'pow', 'weight_pow': 2.0}}, 'custom_reward_function': {'path': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/reward_func/reward_func/reward_new_v2.py', 'name': 'compute_score'}, 'trainer': {'balance_batch': True, 'total_epochs': 15, 'total_training_steps': None, 'project_name': 'readctrl-verl', 'experiment_name': 'qwen3-4b-instruct-en', 'logger': ['console', 'wandb'], 'log_val_generations': 0, 'rollout_data_dir': None, 'validation_data_dir': None, 'nnodes': 1, 'n_gpus_per_node': 2, 'save_freq': 5, 'esi_redundant_time': 0, 'resume_mode': 'auto', 'resume_from_path': None, 'val_before_train': True, 'val_only': False, 'test_freq': 10, 'critic_warmup': 0, 'default_hdfs_dir': None, 'del_local_ckpt_after_load': False, 'default_local_dir': '/home/mshahidul/readctrl/code/RL_model/models/RL_model_subclaim_classifier_v2', 'max_actor_ckpt_to_keep': 1, 'max_critic_ckpt_to_keep': 1, 'ray_wait_register_center_timeout': 300, 'device': 'cuda', 'use_legacy_worker_impl': 'auto', 'remove_previous_ckpt_in_save': True}, 'global_profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'steps': None, 'profile_continuous_steps': False, 'save_path': 'outputs/profile', 'global_tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False, 'controller_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph'}, 'worker_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph', 'capture-range': 'cudaProfilerApi', 'capture-range-end': None, 'kill': 'none'}}, 'torch_memory': {'trace_alloc_max_entries': 100000, 'stack_depth': 32, 'context': 'all', 'stacks': 'all', 'kw_args': {}}}}, 'transfer_queue': {'enable': False}, 'ray_kwargs': {'ray_init': {'num_cpus': None}, 'timeline_json_file': None}, '_wandb': {}} |