| Python version: | \n", + "3.9.2 | \n", + "
| Ray version: | \n", + "2.10.0 | \n", + "
Please try again in 30 seconds.\n
\n\n"} +{"time":"2026-02-07T06:36:33.048102186-05:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/shahidulshakib034-khulna-university-of-engineering-techn/readctrl-verl/0ndh0r3l/file_stream\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260206_205901-0ndh0r3l/logs/debug.log b/code/RL_model/verl/verl_train/wandb/run-20260206_205901-0ndh0r3l/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..e2190c20c19927122c1b5bf5f7e7d6837678acdf --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260206_205901-0ndh0r3l/logs/debug.log @@ -0,0 +1,19 @@ +2026-02-06 20:59:01,529 INFO MainThread:3004444 [wandb_setup.py:_flush():81] Current SDK version is 0.24.1 +2026-02-06 20:59:01,529 INFO MainThread:3004444 [wandb_setup.py:_flush():81] Configure stats pid to 3004444 +2026-02-06 20:59:01,529 INFO MainThread:3004444 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-02-06 20:59:01,529 INFO MainThread:3004444 [wandb_init.py:setup_run_log_directory():717] Logging user logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260206_205901-0ndh0r3l/logs/debug.log +2026-02-06 20:59:01,530 INFO MainThread:3004444 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260206_205901-0ndh0r3l/logs/debug-internal.log +2026-02-06 20:59:01,530 INFO MainThread:3004444 [wandb_init.py:init():844] calling init triggers +2026-02-06 20:59:01,531 INFO MainThread:3004444 [wandb_init.py:init():849] wandb.init called with sweep_config: {} +config: {'actor_rollout_ref': {'actor': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-06, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 90, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'ppo_mini_batch_size': 256, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': 32, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 16384, 'clip_ratio': 0.2, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.2, 'tau_pos': 1.0, 'tau_neg': 1.05, 'freeze_vision_tower': False, 'policy_loss': {'_target_': 'verl.workers.config.PolicyLossConfig', 'loss_mode': 'vanilla', 'clip_cov_ratio': 0.0002, 'clip_cov_lb': 1.0, 'clip_cov_ub': 5.0, 'kl_cov_ratio': 0.0002, 'ppo_kl_coef': 0.1}, 'clip_ratio_c': 3.0, 'loss_agg_mode': 'token-mean', 'loss_scale_factor': None, 'entropy_coeff': 0, 'calculate_entropy': False, 'use_kl_loss': True, 'use_prefix_grouper': False, 'use_torch_compile': True, 'kl_loss_coef': 0.001, 'kl_loss_type': 'low_var_kl', 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'use_fused_kernels': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'grad_clip': 1.0, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False, 'use_remove_padding': True, 'calculate_sum_pi_squared': False, 'sum_pi_squared_checkpointing': False}, 'ref': {'rollout_n': 3, 'strategy': 'fsdp', 'use_torch_compile': True, 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': True, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False}, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': 'vllm', 'mode': 'async', 'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'prompt_length': 1024, 'response_length': 2048, 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'tensor_model_parallel_size': 1, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'pipeline_model_parallel_size': 1, 'max_num_batched_tokens': 8192, 'max_model_len': 8192, 'max_num_seqs': 1024, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'logprobs_mode': 'processed_logprobs', 'scheduling_policy': 'fcfs', 'load_format': 'dummy', 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'disable_log_stats': True, 'do_sample': True, 'n': 3, 'over_sample_rate': 0, 'multi_stage_wake_up': False, 'engine_kwargs': {'vllm': {}, 'sglang': {}, 'trtllm': {}}, 'val_kwargs': {'_target_': 'verl.workers.config.SamplingConfig', 'top_k': -1, 'top_p': 1.0, 'temperature': 0, 'n': 1, 'do_sample': False}, 'multi_turn': {'_target_': 'verl.workers.config.MultiTurnConfig', 'enable': False, 'max_assistant_turns': None, 'tool_config_path': None, 'max_user_turns': None, 'max_parallel_calls': 1, 'max_tool_response_length': 256, 'tool_response_truncate_side': 'middle', 'interaction_config_path': None, 'use_inference_chat_template': False, 'tokenization_sanity_check_mode': 'strict', 'format': 'hermes', 'num_repeat_rollouts': None}, 'calculate_log_probs': False, 'agent': {'_target_': 'verl.workers.config.AgentLoopConfig', 'num_workers': 8, 'default_agent_loop': 'single_turn_agent', 'agent_loop_config_path': None, 'custom_async_server': {'_target_': 'verl.workers.config.CustomAsyncServerConfig', 'path': None, 'name': None}}, 'checkpoint_engine': {'_target_': 'verl.workers.config.CheckpointEngineConfig', 'backend': 'naive', 'update_weights_bucket_megabytes': 2048, 'engine_kwargs': {}}, 'trace': {'_target_': 'verl.workers.config.TraceConfig', 'backend': None, 'token2text': False, 'max_samples_per_step_per_worker': None}, 'skip_rollout': False, 'skip_dump_dir': '/tmp/rollout_dump', 'skip_tokenizer_init': True, 'enable_rollout_routing_replay': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'prometheus': {'_target_': 'verl.workers.config.PrometheusConfig', 'enable': False, 'port': 9090, 'file': '/tmp/ray/session_latest/metrics/prometheus/prometheus.yml', 'served_model_name': 'Qwen/Qwen3-4B-Instruct-2507'}, 'quantization': None, 'quantization_config_file': None, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}, 'layered_summon': False}, 'model': {'_target_': 'verl.workers.config.HFModelConfig', 'path': 'Qwen/Qwen3-4B-Instruct-2507', 'hf_config_path': None, 'tokenizer_path': None, 'use_shm': False, 'trust_remote_code': False, 'custom_chat_template': None, 'external_lib': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': True, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'exclude_modules': None, 'lora_adapter_path': None, 'use_liger': False, 'use_fused_kernels': False, 'fused_kernel_options': {'impl_backend': 'torch'}, 'tiled_mlp': {'enabled': False, 'num_shards': 4}, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}}, 'hybrid_engine': True, 'nccl_timeout': 600}, 'data': {'tokenizer': None, 'use_shm': False, 'train_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/train.parquet', 'val_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/test.parquet', 'train_max_samples': -1, 'val_max_samples': -1, 'prompt_key': 'prompt', 'reward_fn_key': 'data_source', 'max_prompt_length': 1024, 'max_response_length': 2048, 'train_batch_size': 512, 'val_batch_size': None, 'tool_config_path': None, 'return_raw_input_ids': False, 'return_raw_chat': True, 'return_full_prompt': False, 'shuffle': True, 'seed': None, 'dataloader_num_workers': 8, 'image_patch_size': 14, 'validation_shuffle': False, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 1, 'truncation': 'error', 'image_key': 'images', 'video_key': 'videos', 'trust_remote_code': False, 'custom_cls': {'path': None, 'name': None}, 'return_multi_modal_inputs': True, 'sampler': {'class_path': None, 'class_name': None}, 'datagen': {'path': None, 'name': None}, 'apply_chat_template_kwargs': {}}, 'reward_manager': {'_target_': 'verl.trainer.config.config.RewardManagerConfig', 'source': 'register', 'name': 'naive', 'module': {'_target_': 'verl.trainer.config.config.ModuleConfig', 'path': None, 'name': 'custom_reward_manager'}}, 'critic': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-05, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 90, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'model': {'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, 'path': '~/models/deepseek-llm-7b-chat', 'tokenizer_path': 'Qwen/Qwen3-4B-Instruct-2507', 'override_config': {}, 'external_lib': None, 'trust_remote_code': False, '_target_': 'verl.workers.config.FSDPCriticModelCfg', 'use_shm': False, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': False, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'tiled_mlp': {'enabled': False, 'num_shards': 4}}, '_target_': 'verl.workers.config.FSDPCriticConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'enable': None, 'ppo_mini_batch_size': 256, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': None, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 32768, 'forward_max_token_len_per_gpu': 32768, 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'cliprange_value': 0.5, 'loss_agg_mode': 'token-mean', 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'forward_micro_batch_size': None, 'forward_micro_batch_size_per_gpu': None, 'ulysses_sequence_parallel_size': 1, 'grad_clip': 1.0}, 'reward_model': {'enable': False, 'enable_resource_pool': False, 'n_gpus_per_node': 8, 'nnodes': 0, 'strategy': 'fsdp', 'model': {'input_tokenizer': 'Qwen/Qwen3-4B-Instruct-2507', 'path': '~/models/FsfairX-LLaMA3-RM-v0.1', 'external_lib': None, 'trust_remote_code': False, 'override_config': {}, 'use_shm': False, 'use_remove_padding': False, 'use_fused_kernels': False, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False}}, 'micro_batch_size': None, 'micro_batch_size_per_gpu': None, 'max_length': None, 'use_dynamic_bsz': False, 'forward_max_token_len_per_gpu': 32768, 'reward_manager': 'naive', 'reward_loop_source': 'register', 'reward_loop_module_path': None, 'reward_loop_class_name': None, 'launch_reward_fn_async': False, 'sandbox_fusion': {'url': None, 'max_concurrent': 64, 'memory_limit_mb': 1024}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'ulysses_sequence_parallel_size': 1, 'use_reward_loop': True, 'num_workers': 1, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': '???', 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.5, 'enforce_eager': True, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'tensor_model_parallel_size': 2, 'max_num_batched_tokens': 8192, 'max_model_len': None, 'max_num_seqs': 1024, 'load_format': 'auto', 'engine_kwargs': {}, 'limit_images': None, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'disable_log_stats': True, 'skip_tokenizer_init': False, 'prompt_length': 2048, 'response_length': 2048}}, 'algorithm': {'rollout_correction': {'rollout_is': None, 'rollout_is_threshold': 2.0, 'rollout_rs': None, 'rollout_rs_threshold': None, 'bypass_mode': False, 'loss_type': 'ppo_clip', 'rollout_is_batch_normalize': False}, '_target_': 'verl.trainer.config.AlgoConfig', 'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'norm_adv_by_std_in_grpo': True, 'use_kl_in_reward': False, 'kl_penalty': 'kl', 'kl_ctrl': {'_target_': 'verl.trainer.config.KLControlConfig', 'type': 'fixed', 'kl_coef': 0.001, 'horizon': 10000, 'target_kl': 0.1}, 'use_pf_ppo': False, 'pf_ppo': {'reweight_method': 'pow', 'weight_pow': 2.0}}, 'custom_reward_function': {'path': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/reward_func/reward.py', 'name': 'compute_score'}, 'trainer': {'balance_batch': True, 'total_epochs': 15, 'total_training_steps': None, 'project_name': 'readctrl-verl', 'experiment_name': 'qwen3-4b-instruct-optimized-multiclinsum-gs', 'logger': ['console', 'wandb'], 'log_val_generations': 0, 'rollout_data_dir': None, 'validation_data_dir': None, 'nnodes': 1, 'n_gpus_per_node': 2, 'save_freq': 20, 'esi_redundant_time': 0, 'resume_mode': 'auto', 'resume_from_path': None, 'val_before_train': True, 'val_only': False, 'test_freq': 5, 'critic_warmup': 0, 'default_hdfs_dir': None, 'del_local_ckpt_after_load': False, 'default_local_dir': '/home/mshahidul/readctrl/code/RL_model/train_v2', 'max_actor_ckpt_to_keep': 1, 'max_critic_ckpt_to_keep': 1, 'ray_wait_register_center_timeout': 300, 'device': 'cuda', 'use_legacy_worker_impl': 'auto', 'remove_previous_ckpt_in_save': True}, 'global_profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'steps': None, 'profile_continuous_steps': False, 'save_path': 'outputs/profile', 'global_tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False, 'controller_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph'}, 'worker_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph', 'capture-range': 'cudaProfilerApi', 'capture-range-end': None, 'kill': 'none'}}, 'torch_memory': {'trace_alloc_max_entries': 100000, 'stack_depth': 32, 'context': 'all', 'stacks': 'all', 'kw_args': {}}}}, 'transfer_queue': {'enable': False}, 'ray_kwargs': {'ray_init': {'num_cpus': None}, 'timeline_json_file': None}, '_wandb': {}} +2026-02-06 20:59:01,531 INFO MainThread:3004444 [wandb_init.py:init():892] starting backend +2026-02-06 20:59:01,822 INFO MainThread:3004444 [wandb_init.py:init():895] sending inform_init request +2026-02-06 20:59:01,831 INFO MainThread:3004444 [wandb_init.py:init():903] backend started and connected +2026-02-06 20:59:01,844 INFO MainThread:3004444 [wandb_init.py:init():973] updated telemetry +2026-02-06 20:59:01,868 INFO MainThread:3004444 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-02-06 20:59:02,668 INFO MainThread:3004444 [wandb_init.py:init():1042] starting run threads in backend +2026-02-06 20:59:03,446 INFO MainThread:3004444 [wandb_run.py:_console_start():2529] atexit reg +2026-02-06 20:59:03,446 INFO MainThread:3004444 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-02-06 20:59:03,446 INFO MainThread:3004444 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-02-06 20:59:03,447 INFO MainThread:3004444 [wandb_run.py:_redirect():2469] Redirects installed. +2026-02-06 20:59:03,459 INFO MainThread:3004444 [wandb_init.py:init():1082] run started, returning control to user process diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_103450-gjiqvndf/files/output.log b/code/RL_model/verl/verl_train/wandb/run-20260207_103450-gjiqvndf/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..d78af0c289eb892ae52257239a41a200ae2d6cd8 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_103450-gjiqvndf/files/output.log @@ -0,0 +1,6 @@ +wandb: Detected [openai] in use. +wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script. +wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/ +Checkpoint tracker file does not exist: /home/mshahidul/readctrl/code/RL_model/train_v2/latest_checkpointed_iteration.txt +Training from scratch +test_gen_batch meta info: {'eos_token_id': 151645, 'pad_token_id': 151643, 'recompute_log_prob': False, 'do_sample': False, 'validate': True, 'global_steps': 0} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_103450-gjiqvndf/files/requirements.txt b/code/RL_model/verl/verl_train/wandb/run-20260207_103450-gjiqvndf/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..c168a7097ef00c84a34fe1db69f95d6eaafb03f2 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_103450-gjiqvndf/files/requirements.txt @@ -0,0 +1,269 @@ +verl==0.8.0.dev0 +psutil==7.1.3 +colorama==0.4.6 +annotated-doc==0.0.4 +sentry-sdk==2.51.0 +requests==2.32.5 +nvidia-cufile-cu12==1.13.1.3 +ml_dtypes==0.5.4 +xformers==0.0.32.post1 +sglang==0.5.2 +multidict==6.7.1 +typing_extensions==4.15.0 +nvidia-cusparselt-cu12==0.7.1 +openai-harmony==0.0.4 +transformers==4.56.1 +Werkzeug==3.1.5 +identify==2.6.16 +pytest==9.0.2 +nvidia-cuda-runtime-cu12==12.8.90 +GitPython==3.1.46 +cupy-cuda12x==13.6.0 +tokenizers==0.22.2 +pybind11==3.0.1 +google-api-core==2.29.0 +partial-json-parser==0.2.1.1.post7 +aiohttp-cors==0.8.1 +sniffio==1.3.1 +tensordict==0.10.0 +smart_open==7.5.0 +cffi==2.0.0 +wcwidth==0.5.3 +asttokens==3.0.1 +opencensus==0.11.4 +rpds-py==0.30.0 +py-spy==0.4.1 +gguf==0.17.1 +nvidia-nvjitlink-cu12==12.8.93 +httpx==0.28.1 +cuda-python==13.1.1 +annotated-types==0.7.0 +regex==2026.1.15 +vllm==0.11.0 +idna==3.11 +fsspec==2025.10.0 +parso==0.8.5 +pydantic-extra-types==2.11.0 +MarkupSafe==3.0.3 +cryptography==46.0.4 +filelock==3.20.3 +modelscope==1.34.0 +outlines==0.1.11 +dnspython==2.8.0 +scipy==1.17.0 +zipp==3.23.0 +PyYAML==6.0.3 +onnx==1.20.1 +torchdata==0.11.0 +cuda-pathfinder==1.3.3 +verl==0.8.0.dev0 +httptools==0.7.1 +opencv-python-headless==4.13.0.90 +importlib_metadata==8.7.1 +peft==0.18.1 +opentelemetry-sdk==1.39.1 +python-json-logger==4.0.0 +cuda-bindings==13.1.1 +mdurl==0.1.2 +referencing==0.37.0 +xxhash==3.6.0 +interegular==0.3.3 +fastapi-cli==0.0.20 +uv==0.9.28 +tensorboard==2.20.0 +nvidia-cublas-cu12==12.8.4.1 +sentencepiece==0.2.1 +rich-toolkit==0.18.1 +numpy==2.2.0 +yarl==1.22.0 +opencv-fixer==0.2.5 +tqdm==4.67.2 +python-dotenv==1.2.1 +timm==1.0.16 +aiohappyeyeballs==2.6.1 +decord==0.6.0 +jiter==0.12.0 +airportsdata==20250909 +markdown-it-py==4.0.0 +nvidia-cusolver-cu12==11.7.3.90 +pyarrow==23.0.0 +opentelemetry-proto==1.39.1 +anyio==4.12.1 +pycryptodomex==3.23.0 +prometheus_client==0.24.1 +aiohttp==3.13.3 +urllib3==2.6.3 +pexpect==4.9.0 +pydantic-settings==2.12.0 +distro==1.9.0 +av==16.1.0 +cloudpickle==3.1.2 +mpmath==1.3.0 +certifi==2026.1.4 +antlr4-python3-runtime==4.9.3 +torchvision==0.23.0 +accelerate==1.12.0 +watchfiles==1.1.1 +ruff==0.14.14 +wheel==0.46.3 +omegaconf==2.3.0 +nvidia-cufft-cu12==11.3.3.83 +multiprocess==0.70.18 +frozendict==2.4.7 +sympy==1.14.0 +setproctitle==1.3.7 +setuptools==79.0.1 +py-cpuinfo==9.0.0 +ipython_pygments_lexers==1.1.1 +rich==14.3.2 +uvicorn==0.40.0 +openai==1.99.1 +outlines_core==0.2.11 +llvmlite==0.44.0 +nvidia-cuda-cupti-cu12==12.8.90 +attrs==25.4.0 +anthropic==0.77.0 +packaging==25.0 +fastrlock==0.8.3 +astor==0.8.1 +pluggy==1.6.0 +nvidia-cuda-nvrtc-cu12==12.8.93 +psutil==7.2.2 +virtualenv==20.36.1 +cbor2==5.8.0 +compressed-tensors==0.11.0 +nvidia-cusparse-cu12==12.5.8.93 +networkx==3.6.1 +httpcore==1.0.9 +onnxscript==0.3.1 +smmap==5.0.2 +opencv-python==4.13.0.90 +traitlets==5.14.3 +python-multipart==0.0.22 +pyvers==0.1.0 +huggingface-hub==0.36.0 +pillow==12.1.0 +jsonschema==4.26.0 +cfgv==3.5.0 +optree==0.18.0 +email-validator==2.3.0 +tabulate==0.9.0 +pre_commit==4.5.1 +msgpack==1.1.2 +depyf==0.19.0 +numba==0.61.2 +six==1.17.0 +aiosignal==1.4.0 +nvidia-nvtx-cu12==12.8.90 +propcache==0.4.1 +torch_memory_saver==0.0.8 +h11==0.16.0 +frozenlist==1.8.0 +websockets==16.0 +nvidia-cudnn-frontend==1.18.0 +build==1.4.0 +google-auth==2.48.0 +pycountry==24.6.1 +stack-data==0.6.3 +typing-inspection==0.4.2 +googleapis-common-protos==1.72.0 +pandas==3.0.0 +typer==0.21.1 +protobuf==6.33.5 +fastapi==0.128.0 +blake3==1.0.8 +opentelemetry-semantic-conventions==0.60b1 +opentelemetry-exporter-prometheus==0.60b1 +nvidia-cudnn-cu12==9.10.2.21 +Markdown==3.10.1 +liger_kernel==0.6.4 +nodeenv==1.10.0 +prompt_toolkit==3.0.52 +torchaudio==2.8.0 +codetiming==1.4.0 +platformdirs==4.5.1 +jsonschema-specifications==2025.9.1 +hydra-core==1.3.2 +tensorboard-data-server==0.7.2 +lm-format-enforcer==0.11.3 +pyasn1_modules==0.4.2 +tiktoken==0.12.0 +starlette==0.50.0 +pyproject_hooks==1.2.0 +flash_attn==2.8.1 +rsa==4.9.1 +ray==2.53.0 +nest-asyncio==1.6.0 +lark==1.2.2 +fastar==0.8.0 +orjson==3.11.6 +prometheus-fastapi-instrumentator==7.1.0 +opentelemetry-api==1.39.1 +mathruler==0.1.0 +pydantic_core==2.41.5 +fastapi-cloud-cli==0.11.0 +pynvml==13.0.1 +loguru==0.7.3 +torch==2.8.0 +msgspec==0.20.0 +nvidia-curand-cu12==10.3.9.90 +blobfile==3.0.0 +gitdb==4.0.12 +llguidance==0.7.30 +hf_transfer==0.1.9 +nvidia-nccl-cu12==2.27.3 +qwen-vl-utils==0.0.14 +ptyprocess==0.7.0 +ipdb==0.13.13 +opencensus-context==0.1.3 +jedi==0.19.2 +click==8.3.1 +datasets==4.5.0 +soxr==1.0.0 +sgl-kernel==0.3.9.post2 +colorful==0.5.8 +pyasn1==0.6.2 +charset-normalizer==3.4.4 +nvidia-ml-py==13.590.48 +hf-xet==1.2.0 +dill==0.4.0 +absl-py==2.4.0 +pydantic==2.12.5 +wrapt==2.1.0 +flashinfer-python==0.3.1 +python-dateutil==2.9.0.post0 +torchao==0.9.0 +cachetools==7.0.0 +soundfile==0.13.1 +diskcache==5.6.3 +onnx-ir==0.1.15 +docstring_parser==0.17.0 +matplotlib-inline==0.2.1 +Pygments==2.19.2 +wandb==0.24.1 +pure_eval==0.2.3 +ninja==1.13.0 +proto-plus==1.27.0 +pyzmq==27.1.0 +iniconfig==2.3.0 +Jinja2==3.1.6 +megatron-core==0.13.1 +uvloop==0.22.1 +pycparser==3.0 +pylatexenc==2.10 +decorator==5.2.1 +shellingham==1.5.4 +lxml==6.0.2 +safetensors==0.7.0 +xgrammar==0.1.25 +pybase64==1.4.3 +ipython==9.9.0 +mistral_common==1.9.0 +rignore==0.7.6 +einops==0.8.2 +distlib==0.4.0 +triton==3.4.0 +executing==2.2.1 +grpcio==1.76.0 +pip==25.3 +verl==0.8.0.dev0 diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_103450-gjiqvndf/files/wandb-metadata.json b/code/RL_model/verl/verl_train/wandb/run-20260207_103450-gjiqvndf/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..ace3e2de1a2b39f86fcd6f09e9dd1b9e3a5c1075 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_103450-gjiqvndf/files/wandb-metadata.json @@ -0,0 +1,93 @@ +{ + "os": "Linux-5.15.0-160-generic-x86_64-with-glibc2.35", + "python": "CPython 3.12.12", + "startedAt": "2026-02-07T15:34:50.097536Z", + "args": [ + "--node-ip-address=172.16.34.29", + "--node-manager-port=36923", + "--object-store-name=/tmp/ray/session_2026-02-07_10-27-49_585748_201487/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2026-02-07_10-27-49_585748_201487/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=62681", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=64918", + "--gcs-address=172.16.34.29:55671", + "--session-name=session_2026-02-07_10-27-49_585748_201487", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8301", + "--cluster-id=841daf3bf517d42599bf5437920e93f71bb9f6bdbebef8d87f3e3fb8", + "--startup-token=128", + "--worker-launch-time-ms=1770478087768", + "--node-id=9443de04e0e7b889515a68e44b6547fea2664df6607ccafe65b49f74", + "--runtime-env-hash=1096984665" + ], + "program": "/home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/verl-project/verl", + "commit": "d9939add7a2a01923a9088891f913a5d20c4e622" + }, + "email": "shahidulshakib034@gmail.com", + "root": "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train", + "host": "gamma", + "executable": "/home/mshahidul/miniconda3/envs/verl2/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA A100 80GB PCIe", + "gpu_count": 6, + "disk": { + "/": { + "total": "3766429188096", + "used": "188147589120" + } + }, + "memory": { + "total": "1081814863872" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-4a3678c7-34a9-356f-f7b7-7f7e2f44b596" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-df506764-0db5-91b4-8ec9-154a3bb8123f" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-2c3dbd62-b384-2996-a0f6-b32dcfcc3538" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-1ff3dabe-4b9a-ea62-5cc3-01f12f32d328" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper", + "uuid": "GPU-eefc4b8c-0e79-c1d6-a9ff-8325040572eb" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper", + "uuid": "GPU-d42b6057-13e8-1e88-6aa1-9307df72dece" + } + ], + "cudaVersion": "13.0", + "writerId": "af0nv9x3dpfekkhrsmbujt15hq59tdje" +} \ No newline at end of file diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_103450-gjiqvndf/logs/debug-core.log b/code/RL_model/verl/verl_train/wandb/run-20260207_103450-gjiqvndf/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..a6d4e545eea1e63e4754f59c0e545922cabf10e2 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_103450-gjiqvndf/logs/debug-core.log @@ -0,0 +1,7 @@ +{"time":"2026-02-07T10:34:50.270562852-05:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpv2ofcc1o/port-234834.txt","pid":234834,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-02-07T10:34:50.272055203-05:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":234834} +{"time":"2026-02-07T10:34:50.272117988-05:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-234834-250217-273148444/socket","Net":"unix"}} +{"time":"2026-02-07T10:34:50.41166765-05:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-02-07T10:34:50.429036847-05:00","level":"INFO","msg":"handleInformInit: received","streamId":"gjiqvndf","id":"1(@)"} +{"time":"2026-02-07T10:34:51.354718574-05:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"gjiqvndf","id":"1(@)"} +{"time":"2026-02-07T10:34:57.371677054-05:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"u3iy5baa6jjx"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_103450-gjiqvndf/logs/debug-internal.log b/code/RL_model/verl/verl_train/wandb/run-20260207_103450-gjiqvndf/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..e1696c748f3714c69ee061d88576e8b1eccc5ab5 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_103450-gjiqvndf/logs/debug-internal.log @@ -0,0 +1,6 @@ +{"time":"2026-02-07T10:34:50.430956456-05:00","level":"INFO","msg":"stream: starting","core version":"0.24.1"} +{"time":"2026-02-07T10:34:51.352286383-05:00","level":"INFO","msg":"stream: created new stream","id":"gjiqvndf"} +{"time":"2026-02-07T10:34:51.352434377-05:00","level":"INFO","msg":"handler: started","stream_id":"gjiqvndf"} +{"time":"2026-02-07T10:34:51.354691926-05:00","level":"INFO","msg":"stream: started","id":"gjiqvndf"} +{"time":"2026-02-07T10:34:51.354727628-05:00","level":"INFO","msg":"writer: started","stream_id":"gjiqvndf"} +{"time":"2026-02-07T10:34:51.354740753-05:00","level":"INFO","msg":"sender: started","stream_id":"gjiqvndf"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_103450-gjiqvndf/logs/debug.log b/code/RL_model/verl/verl_train/wandb/run-20260207_103450-gjiqvndf/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..2adb1eeb51ead5d5627bb06ed6559d9acf7a852c --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_103450-gjiqvndf/logs/debug.log @@ -0,0 +1,19 @@ +2026-02-07 10:34:50,120 INFO MainThread:234834 [wandb_setup.py:_flush():81] Current SDK version is 0.24.1 +2026-02-07 10:34:50,120 INFO MainThread:234834 [wandb_setup.py:_flush():81] Configure stats pid to 234834 +2026-02-07 10:34:50,120 INFO MainThread:234834 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-02-07 10:34:50,121 INFO MainThread:234834 [wandb_init.py:setup_run_log_directory():717] Logging user logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260207_103450-gjiqvndf/logs/debug.log +2026-02-07 10:34:50,121 INFO MainThread:234834 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260207_103450-gjiqvndf/logs/debug-internal.log +2026-02-07 10:34:50,121 INFO MainThread:234834 [wandb_init.py:init():844] calling init triggers +2026-02-07 10:34:50,123 INFO MainThread:234834 [wandb_init.py:init():849] wandb.init called with sweep_config: {} +config: {'actor_rollout_ref': {'actor': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-06, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 90, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'ppo_mini_batch_size': 256, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': 32, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 16384, 'clip_ratio': 0.2, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.2, 'tau_pos': 1.0, 'tau_neg': 1.05, 'freeze_vision_tower': False, 'policy_loss': {'_target_': 'verl.workers.config.PolicyLossConfig', 'loss_mode': 'vanilla', 'clip_cov_ratio': 0.0002, 'clip_cov_lb': 1.0, 'clip_cov_ub': 5.0, 'kl_cov_ratio': 0.0002, 'ppo_kl_coef': 0.1}, 'clip_ratio_c': 3.0, 'loss_agg_mode': 'token-mean', 'loss_scale_factor': None, 'entropy_coeff': 0, 'calculate_entropy': False, 'use_kl_loss': True, 'use_prefix_grouper': False, 'use_torch_compile': True, 'kl_loss_coef': 0.001, 'kl_loss_type': 'low_var_kl', 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'use_fused_kernels': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'grad_clip': 1.0, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False, 'use_remove_padding': True, 'calculate_sum_pi_squared': False, 'sum_pi_squared_checkpointing': False}, 'ref': {'rollout_n': 3, 'strategy': 'fsdp', 'use_torch_compile': True, 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': True, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False}, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': 'vllm', 'mode': 'async', 'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'prompt_length': 1024, 'response_length': 2048, 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'tensor_model_parallel_size': 1, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'pipeline_model_parallel_size': 1, 'max_num_batched_tokens': 8192, 'max_model_len': 8192, 'max_num_seqs': 1024, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'logprobs_mode': 'processed_logprobs', 'scheduling_policy': 'fcfs', 'load_format': 'dummy', 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'disable_log_stats': True, 'do_sample': True, 'n': 3, 'over_sample_rate': 0, 'multi_stage_wake_up': False, 'engine_kwargs': {'vllm': {}, 'sglang': {}, 'trtllm': {}}, 'val_kwargs': {'_target_': 'verl.workers.config.SamplingConfig', 'top_k': -1, 'top_p': 1.0, 'temperature': 0, 'n': 1, 'do_sample': False}, 'multi_turn': {'_target_': 'verl.workers.config.MultiTurnConfig', 'enable': False, 'max_assistant_turns': None, 'tool_config_path': None, 'max_user_turns': None, 'max_parallel_calls': 1, 'max_tool_response_length': 256, 'tool_response_truncate_side': 'middle', 'interaction_config_path': None, 'use_inference_chat_template': False, 'tokenization_sanity_check_mode': 'strict', 'format': 'hermes', 'num_repeat_rollouts': None}, 'calculate_log_probs': False, 'agent': {'_target_': 'verl.workers.config.AgentLoopConfig', 'num_workers': 8, 'default_agent_loop': 'single_turn_agent', 'agent_loop_config_path': None, 'custom_async_server': {'_target_': 'verl.workers.config.CustomAsyncServerConfig', 'path': None, 'name': None}}, 'checkpoint_engine': {'_target_': 'verl.workers.config.CheckpointEngineConfig', 'backend': 'naive', 'update_weights_bucket_megabytes': 2048, 'engine_kwargs': {}}, 'trace': {'_target_': 'verl.workers.config.TraceConfig', 'backend': None, 'token2text': False, 'max_samples_per_step_per_worker': None}, 'skip_rollout': False, 'skip_dump_dir': '/tmp/rollout_dump', 'skip_tokenizer_init': True, 'enable_rollout_routing_replay': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'prometheus': {'_target_': 'verl.workers.config.PrometheusConfig', 'enable': False, 'port': 9090, 'file': '/tmp/ray/session_latest/metrics/prometheus/prometheus.yml', 'served_model_name': 'Qwen/Qwen3-4B-Instruct-2507'}, 'quantization': None, 'quantization_config_file': None, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}, 'layered_summon': False}, 'model': {'_target_': 'verl.workers.config.HFModelConfig', 'path': 'Qwen/Qwen3-4B-Instruct-2507', 'hf_config_path': None, 'tokenizer_path': None, 'use_shm': False, 'trust_remote_code': False, 'custom_chat_template': None, 'external_lib': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': True, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'exclude_modules': None, 'lora_adapter_path': None, 'use_liger': False, 'use_fused_kernels': False, 'fused_kernel_options': {'impl_backend': 'torch'}, 'tiled_mlp': {'enabled': False, 'num_shards': 4}, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}}, 'hybrid_engine': True, 'nccl_timeout': 600}, 'data': {'tokenizer': None, 'use_shm': False, 'train_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/train.parquet', 'val_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/test.parquet', 'train_max_samples': -1, 'val_max_samples': -1, 'prompt_key': 'prompt', 'reward_fn_key': 'data_source', 'max_prompt_length': 1024, 'max_response_length': 2048, 'train_batch_size': 512, 'val_batch_size': None, 'tool_config_path': None, 'return_raw_input_ids': False, 'return_raw_chat': True, 'return_full_prompt': False, 'shuffle': True, 'seed': None, 'dataloader_num_workers': 8, 'image_patch_size': 14, 'validation_shuffle': False, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 1, 'truncation': 'error', 'image_key': 'images', 'video_key': 'videos', 'trust_remote_code': False, 'custom_cls': {'path': None, 'name': None}, 'return_multi_modal_inputs': True, 'sampler': {'class_path': None, 'class_name': None}, 'datagen': {'path': None, 'name': None}, 'apply_chat_template_kwargs': {}}, 'reward_manager': {'_target_': 'verl.trainer.config.config.RewardManagerConfig', 'source': 'register', 'name': 'naive', 'module': {'_target_': 'verl.trainer.config.config.ModuleConfig', 'path': None, 'name': 'custom_reward_manager'}}, 'critic': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-05, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 90, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'model': {'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, 'path': '~/models/deepseek-llm-7b-chat', 'tokenizer_path': 'Qwen/Qwen3-4B-Instruct-2507', 'override_config': {}, 'external_lib': None, 'trust_remote_code': False, '_target_': 'verl.workers.config.FSDPCriticModelCfg', 'use_shm': False, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': False, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'tiled_mlp': {'enabled': False, 'num_shards': 4}}, '_target_': 'verl.workers.config.FSDPCriticConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'enable': None, 'ppo_mini_batch_size': 256, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': None, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 32768, 'forward_max_token_len_per_gpu': 32768, 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'cliprange_value': 0.5, 'loss_agg_mode': 'token-mean', 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'forward_micro_batch_size': None, 'forward_micro_batch_size_per_gpu': None, 'ulysses_sequence_parallel_size': 1, 'grad_clip': 1.0}, 'reward_model': {'enable': False, 'enable_resource_pool': False, 'n_gpus_per_node': 8, 'nnodes': 0, 'strategy': 'fsdp', 'model': {'input_tokenizer': 'Qwen/Qwen3-4B-Instruct-2507', 'path': '~/models/FsfairX-LLaMA3-RM-v0.1', 'external_lib': None, 'trust_remote_code': False, 'override_config': {}, 'use_shm': False, 'use_remove_padding': False, 'use_fused_kernels': False, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False}}, 'micro_batch_size': None, 'micro_batch_size_per_gpu': None, 'max_length': None, 'use_dynamic_bsz': False, 'forward_max_token_len_per_gpu': 32768, 'reward_manager': 'naive', 'reward_loop_source': 'register', 'reward_loop_module_path': None, 'reward_loop_class_name': None, 'launch_reward_fn_async': False, 'sandbox_fusion': {'url': None, 'max_concurrent': 64, 'memory_limit_mb': 1024}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'ulysses_sequence_parallel_size': 1, 'use_reward_loop': True, 'num_workers': 1, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': '???', 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.5, 'enforce_eager': True, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'tensor_model_parallel_size': 2, 'max_num_batched_tokens': 8192, 'max_model_len': None, 'max_num_seqs': 1024, 'load_format': 'auto', 'engine_kwargs': {}, 'limit_images': None, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'disable_log_stats': True, 'skip_tokenizer_init': False, 'prompt_length': 2048, 'response_length': 2048}}, 'algorithm': {'rollout_correction': {'rollout_is': None, 'rollout_is_threshold': 2.0, 'rollout_rs': None, 'rollout_rs_threshold': None, 'bypass_mode': False, 'loss_type': 'ppo_clip', 'rollout_is_batch_normalize': False}, '_target_': 'verl.trainer.config.AlgoConfig', 'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'norm_adv_by_std_in_grpo': True, 'use_kl_in_reward': False, 'kl_penalty': 'kl', 'kl_ctrl': {'_target_': 'verl.trainer.config.KLControlConfig', 'type': 'fixed', 'kl_coef': 0.001, 'horizon': 10000, 'target_kl': 0.1}, 'use_pf_ppo': False, 'pf_ppo': {'reweight_method': 'pow', 'weight_pow': 2.0}}, 'custom_reward_function': {'path': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/reward_func/reward.py', 'name': 'compute_score'}, 'trainer': {'balance_batch': True, 'total_epochs': 15, 'total_training_steps': None, 'project_name': 'readctrl-verl', 'experiment_name': 'qwen3-4b-instruct-optimized-multiclinsum-gs', 'logger': ['console', 'wandb'], 'log_val_generations': 0, 'rollout_data_dir': None, 'validation_data_dir': None, 'nnodes': 1, 'n_gpus_per_node': 2, 'save_freq': 20, 'esi_redundant_time': 0, 'resume_mode': 'auto', 'resume_from_path': None, 'val_before_train': True, 'val_only': False, 'test_freq': 5, 'critic_warmup': 0, 'default_hdfs_dir': None, 'del_local_ckpt_after_load': False, 'default_local_dir': '/home/mshahidul/readctrl/code/RL_model/train_v2', 'max_actor_ckpt_to_keep': 1, 'max_critic_ckpt_to_keep': 1, 'ray_wait_register_center_timeout': 300, 'device': 'cuda', 'use_legacy_worker_impl': 'auto', 'remove_previous_ckpt_in_save': True}, 'global_profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'steps': None, 'profile_continuous_steps': False, 'save_path': 'outputs/profile', 'global_tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False, 'controller_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph'}, 'worker_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph', 'capture-range': 'cudaProfilerApi', 'capture-range-end': None, 'kill': 'none'}}, 'torch_memory': {'trace_alloc_max_entries': 100000, 'stack_depth': 32, 'context': 'all', 'stacks': 'all', 'kw_args': {}}}}, 'transfer_queue': {'enable': False}, 'ray_kwargs': {'ray_init': {'num_cpus': None}, 'timeline_json_file': None}, '_wandb': {}} +2026-02-07 10:34:50,123 INFO MainThread:234834 [wandb_init.py:init():892] starting backend +2026-02-07 10:34:50,412 INFO MainThread:234834 [wandb_init.py:init():895] sending inform_init request +2026-02-07 10:34:50,421 INFO MainThread:234834 [wandb_init.py:init():903] backend started and connected +2026-02-07 10:34:50,436 INFO MainThread:234834 [wandb_init.py:init():973] updated telemetry +2026-02-07 10:34:50,460 INFO MainThread:234834 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-02-07 10:34:51,583 INFO MainThread:234834 [wandb_init.py:init():1042] starting run threads in backend +2026-02-07 10:34:52,343 INFO MainThread:234834 [wandb_run.py:_console_start():2529] atexit reg +2026-02-07 10:34:52,343 INFO MainThread:234834 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-02-07 10:34:52,343 INFO MainThread:234834 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-02-07 10:34:52,343 INFO MainThread:234834 [wandb_run.py:_redirect():2469] Redirects installed. +2026-02-07 10:34:52,356 INFO MainThread:234834 [wandb_init.py:init():1082] run started, returning control to user process diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_110043-guu1a2ui/files/output.log b/code/RL_model/verl/verl_train/wandb/run-20260207_110043-guu1a2ui/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..d78af0c289eb892ae52257239a41a200ae2d6cd8 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_110043-guu1a2ui/files/output.log @@ -0,0 +1,6 @@ +wandb: Detected [openai] in use. +wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script. +wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/ +Checkpoint tracker file does not exist: /home/mshahidul/readctrl/code/RL_model/train_v2/latest_checkpointed_iteration.txt +Training from scratch +test_gen_batch meta info: {'eos_token_id': 151645, 'pad_token_id': 151643, 'recompute_log_prob': False, 'do_sample': False, 'validate': True, 'global_steps': 0} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_110043-guu1a2ui/files/requirements.txt b/code/RL_model/verl/verl_train/wandb/run-20260207_110043-guu1a2ui/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..c168a7097ef00c84a34fe1db69f95d6eaafb03f2 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_110043-guu1a2ui/files/requirements.txt @@ -0,0 +1,269 @@ +verl==0.8.0.dev0 +psutil==7.1.3 +colorama==0.4.6 +annotated-doc==0.0.4 +sentry-sdk==2.51.0 +requests==2.32.5 +nvidia-cufile-cu12==1.13.1.3 +ml_dtypes==0.5.4 +xformers==0.0.32.post1 +sglang==0.5.2 +multidict==6.7.1 +typing_extensions==4.15.0 +nvidia-cusparselt-cu12==0.7.1 +openai-harmony==0.0.4 +transformers==4.56.1 +Werkzeug==3.1.5 +identify==2.6.16 +pytest==9.0.2 +nvidia-cuda-runtime-cu12==12.8.90 +GitPython==3.1.46 +cupy-cuda12x==13.6.0 +tokenizers==0.22.2 +pybind11==3.0.1 +google-api-core==2.29.0 +partial-json-parser==0.2.1.1.post7 +aiohttp-cors==0.8.1 +sniffio==1.3.1 +tensordict==0.10.0 +smart_open==7.5.0 +cffi==2.0.0 +wcwidth==0.5.3 +asttokens==3.0.1 +opencensus==0.11.4 +rpds-py==0.30.0 +py-spy==0.4.1 +gguf==0.17.1 +nvidia-nvjitlink-cu12==12.8.93 +httpx==0.28.1 +cuda-python==13.1.1 +annotated-types==0.7.0 +regex==2026.1.15 +vllm==0.11.0 +idna==3.11 +fsspec==2025.10.0 +parso==0.8.5 +pydantic-extra-types==2.11.0 +MarkupSafe==3.0.3 +cryptography==46.0.4 +filelock==3.20.3 +modelscope==1.34.0 +outlines==0.1.11 +dnspython==2.8.0 +scipy==1.17.0 +zipp==3.23.0 +PyYAML==6.0.3 +onnx==1.20.1 +torchdata==0.11.0 +cuda-pathfinder==1.3.3 +verl==0.8.0.dev0 +httptools==0.7.1 +opencv-python-headless==4.13.0.90 +importlib_metadata==8.7.1 +peft==0.18.1 +opentelemetry-sdk==1.39.1 +python-json-logger==4.0.0 +cuda-bindings==13.1.1 +mdurl==0.1.2 +referencing==0.37.0 +xxhash==3.6.0 +interegular==0.3.3 +fastapi-cli==0.0.20 +uv==0.9.28 +tensorboard==2.20.0 +nvidia-cublas-cu12==12.8.4.1 +sentencepiece==0.2.1 +rich-toolkit==0.18.1 +numpy==2.2.0 +yarl==1.22.0 +opencv-fixer==0.2.5 +tqdm==4.67.2 +python-dotenv==1.2.1 +timm==1.0.16 +aiohappyeyeballs==2.6.1 +decord==0.6.0 +jiter==0.12.0 +airportsdata==20250909 +markdown-it-py==4.0.0 +nvidia-cusolver-cu12==11.7.3.90 +pyarrow==23.0.0 +opentelemetry-proto==1.39.1 +anyio==4.12.1 +pycryptodomex==3.23.0 +prometheus_client==0.24.1 +aiohttp==3.13.3 +urllib3==2.6.3 +pexpect==4.9.0 +pydantic-settings==2.12.0 +distro==1.9.0 +av==16.1.0 +cloudpickle==3.1.2 +mpmath==1.3.0 +certifi==2026.1.4 +antlr4-python3-runtime==4.9.3 +torchvision==0.23.0 +accelerate==1.12.0 +watchfiles==1.1.1 +ruff==0.14.14 +wheel==0.46.3 +omegaconf==2.3.0 +nvidia-cufft-cu12==11.3.3.83 +multiprocess==0.70.18 +frozendict==2.4.7 +sympy==1.14.0 +setproctitle==1.3.7 +setuptools==79.0.1 +py-cpuinfo==9.0.0 +ipython_pygments_lexers==1.1.1 +rich==14.3.2 +uvicorn==0.40.0 +openai==1.99.1 +outlines_core==0.2.11 +llvmlite==0.44.0 +nvidia-cuda-cupti-cu12==12.8.90 +attrs==25.4.0 +anthropic==0.77.0 +packaging==25.0 +fastrlock==0.8.3 +astor==0.8.1 +pluggy==1.6.0 +nvidia-cuda-nvrtc-cu12==12.8.93 +psutil==7.2.2 +virtualenv==20.36.1 +cbor2==5.8.0 +compressed-tensors==0.11.0 +nvidia-cusparse-cu12==12.5.8.93 +networkx==3.6.1 +httpcore==1.0.9 +onnxscript==0.3.1 +smmap==5.0.2 +opencv-python==4.13.0.90 +traitlets==5.14.3 +python-multipart==0.0.22 +pyvers==0.1.0 +huggingface-hub==0.36.0 +pillow==12.1.0 +jsonschema==4.26.0 +cfgv==3.5.0 +optree==0.18.0 +email-validator==2.3.0 +tabulate==0.9.0 +pre_commit==4.5.1 +msgpack==1.1.2 +depyf==0.19.0 +numba==0.61.2 +six==1.17.0 +aiosignal==1.4.0 +nvidia-nvtx-cu12==12.8.90 +propcache==0.4.1 +torch_memory_saver==0.0.8 +h11==0.16.0 +frozenlist==1.8.0 +websockets==16.0 +nvidia-cudnn-frontend==1.18.0 +build==1.4.0 +google-auth==2.48.0 +pycountry==24.6.1 +stack-data==0.6.3 +typing-inspection==0.4.2 +googleapis-common-protos==1.72.0 +pandas==3.0.0 +typer==0.21.1 +protobuf==6.33.5 +fastapi==0.128.0 +blake3==1.0.8 +opentelemetry-semantic-conventions==0.60b1 +opentelemetry-exporter-prometheus==0.60b1 +nvidia-cudnn-cu12==9.10.2.21 +Markdown==3.10.1 +liger_kernel==0.6.4 +nodeenv==1.10.0 +prompt_toolkit==3.0.52 +torchaudio==2.8.0 +codetiming==1.4.0 +platformdirs==4.5.1 +jsonschema-specifications==2025.9.1 +hydra-core==1.3.2 +tensorboard-data-server==0.7.2 +lm-format-enforcer==0.11.3 +pyasn1_modules==0.4.2 +tiktoken==0.12.0 +starlette==0.50.0 +pyproject_hooks==1.2.0 +flash_attn==2.8.1 +rsa==4.9.1 +ray==2.53.0 +nest-asyncio==1.6.0 +lark==1.2.2 +fastar==0.8.0 +orjson==3.11.6 +prometheus-fastapi-instrumentator==7.1.0 +opentelemetry-api==1.39.1 +mathruler==0.1.0 +pydantic_core==2.41.5 +fastapi-cloud-cli==0.11.0 +pynvml==13.0.1 +loguru==0.7.3 +torch==2.8.0 +msgspec==0.20.0 +nvidia-curand-cu12==10.3.9.90 +blobfile==3.0.0 +gitdb==4.0.12 +llguidance==0.7.30 +hf_transfer==0.1.9 +nvidia-nccl-cu12==2.27.3 +qwen-vl-utils==0.0.14 +ptyprocess==0.7.0 +ipdb==0.13.13 +opencensus-context==0.1.3 +jedi==0.19.2 +click==8.3.1 +datasets==4.5.0 +soxr==1.0.0 +sgl-kernel==0.3.9.post2 +colorful==0.5.8 +pyasn1==0.6.2 +charset-normalizer==3.4.4 +nvidia-ml-py==13.590.48 +hf-xet==1.2.0 +dill==0.4.0 +absl-py==2.4.0 +pydantic==2.12.5 +wrapt==2.1.0 +flashinfer-python==0.3.1 +python-dateutil==2.9.0.post0 +torchao==0.9.0 +cachetools==7.0.0 +soundfile==0.13.1 +diskcache==5.6.3 +onnx-ir==0.1.15 +docstring_parser==0.17.0 +matplotlib-inline==0.2.1 +Pygments==2.19.2 +wandb==0.24.1 +pure_eval==0.2.3 +ninja==1.13.0 +proto-plus==1.27.0 +pyzmq==27.1.0 +iniconfig==2.3.0 +Jinja2==3.1.6 +megatron-core==0.13.1 +uvloop==0.22.1 +pycparser==3.0 +pylatexenc==2.10 +decorator==5.2.1 +shellingham==1.5.4 +lxml==6.0.2 +safetensors==0.7.0 +xgrammar==0.1.25 +pybase64==1.4.3 +ipython==9.9.0 +mistral_common==1.9.0 +rignore==0.7.6 +einops==0.8.2 +distlib==0.4.0 +triton==3.4.0 +executing==2.2.1 +grpcio==1.76.0 +pip==25.3 +verl==0.8.0.dev0 diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_110043-guu1a2ui/files/wandb-metadata.json b/code/RL_model/verl/verl_train/wandb/run-20260207_110043-guu1a2ui/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..4d684cb4e0fd3db8541a1713504acacaaca9bf44 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_110043-guu1a2ui/files/wandb-metadata.json @@ -0,0 +1,93 @@ +{ + "os": "Linux-5.15.0-160-generic-x86_64-with-glibc2.35", + "python": "CPython 3.12.12", + "startedAt": "2026-02-07T16:00:43.122673Z", + "args": [ + "--node-ip-address=172.16.34.29", + "--node-manager-port=43297", + "--object-store-name=/tmp/ray/session_2026-02-07_10-53-44_882903_374056/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2026-02-07_10-53-44_882903_374056/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=53255", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=64878", + "--gcs-address=172.16.34.29:65048", + "--session-name=session_2026-02-07_10-53-44_882903_374056", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8297", + "--cluster-id=45af7cb19e83f65af29f825c76037f19f111e73b255f6821957a9742", + "--startup-token=128", + "--worker-launch-time-ms=1770479641843", + "--node-id=8f30099b7dfadcd6cb48455a01a58a59d61a74566e43759f39d48ec5", + "--runtime-env-hash=1096984665" + ], + "program": "/home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/verl-project/verl", + "commit": "d9939add7a2a01923a9088891f913a5d20c4e622" + }, + "email": "shahidulshakib034@gmail.com", + "root": "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train", + "host": "gamma", + "executable": "/home/mshahidul/miniconda3/envs/verl2/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA A100 80GB PCIe", + "gpu_count": 6, + "disk": { + "/": { + "total": "3766429188096", + "used": "188198088704" + } + }, + "memory": { + "total": "1081814863872" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-4a3678c7-34a9-356f-f7b7-7f7e2f44b596" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-df506764-0db5-91b4-8ec9-154a3bb8123f" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-2c3dbd62-b384-2996-a0f6-b32dcfcc3538" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-1ff3dabe-4b9a-ea62-5cc3-01f12f32d328" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper", + "uuid": "GPU-eefc4b8c-0e79-c1d6-a9ff-8325040572eb" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper", + "uuid": "GPU-d42b6057-13e8-1e88-6aa1-9307df72dece" + } + ], + "cudaVersion": "13.0", + "writerId": "4bco19eqiqqcgqtfc6fjxcmnw9htd5ev" +} \ No newline at end of file diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_110043-guu1a2ui/files/wandb-summary.json b/code/RL_model/verl/verl_train/wandb/run-20260207_110043-guu1a2ui/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..df418e460a991959868709428088ad4948a7a21d --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_110043-guu1a2ui/files/wandb-summary.json @@ -0,0 +1 @@ +{"_runtime":258,"_wandb":{"runtime":258}} \ No newline at end of file diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_110043-guu1a2ui/logs/debug-core.log b/code/RL_model/verl/verl_train/wandb/run-20260207_110043-guu1a2ui/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..7c56c995ffe0a38e0c3cc914bd9097e1012fc2d7 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_110043-guu1a2ui/logs/debug-core.log @@ -0,0 +1,8 @@ +{"time":"2026-02-07T11:00:43.279763084-05:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpjnpsrfuy/port-391957.txt","pid":391957,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-02-07T11:00:43.282615617-05:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":391957} +{"time":"2026-02-07T11:00:43.282602902-05:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-391957-425242-629764596/socket","Net":"unix"}} +{"time":"2026-02-07T11:00:43.439644719-05:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-02-07T11:00:43.454683954-05:00","level":"INFO","msg":"handleInformInit: received","streamId":"guu1a2ui","id":"1(@)"} +{"time":"2026-02-07T11:00:45.188214759-05:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"guu1a2ui","id":"1(@)"} +{"time":"2026-02-07T11:00:51.484496051-05:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"awrfwfzlbciu"} +{"time":"2026-02-07T11:05:04.093068084-05:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"awrfwfzlbciu"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_110043-guu1a2ui/logs/debug-internal.log b/code/RL_model/verl/verl_train/wandb/run-20260207_110043-guu1a2ui/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..d73333ee1a016e8fa2ca38b7cefd33eddb68da67 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_110043-guu1a2ui/logs/debug-internal.log @@ -0,0 +1,6 @@ +{"time":"2026-02-07T11:00:43.455983119-05:00","level":"INFO","msg":"stream: starting","core version":"0.24.1"} +{"time":"2026-02-07T11:00:45.185665869-05:00","level":"INFO","msg":"stream: created new stream","id":"guu1a2ui"} +{"time":"2026-02-07T11:00:45.185857972-05:00","level":"INFO","msg":"handler: started","stream_id":"guu1a2ui"} +{"time":"2026-02-07T11:00:45.188178423-05:00","level":"INFO","msg":"stream: started","id":"guu1a2ui"} +{"time":"2026-02-07T11:00:45.18824121-05:00","level":"INFO","msg":"sender: started","stream_id":"guu1a2ui"} +{"time":"2026-02-07T11:00:45.188252559-05:00","level":"INFO","msg":"writer: started","stream_id":"guu1a2ui"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_110043-guu1a2ui/logs/debug.log b/code/RL_model/verl/verl_train/wandb/run-20260207_110043-guu1a2ui/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..06d4dd9903f7dd6d2833118cd6da669556f0aea0 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_110043-guu1a2ui/logs/debug.log @@ -0,0 +1,23 @@ +2026-02-07 11:00:43,143 INFO MainThread:391957 [wandb_setup.py:_flush():81] Current SDK version is 0.24.1 +2026-02-07 11:00:43,144 INFO MainThread:391957 [wandb_setup.py:_flush():81] Configure stats pid to 391957 +2026-02-07 11:00:43,144 INFO MainThread:391957 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-02-07 11:00:43,144 INFO MainThread:391957 [wandb_init.py:setup_run_log_directory():717] Logging user logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260207_110043-guu1a2ui/logs/debug.log +2026-02-07 11:00:43,144 INFO MainThread:391957 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260207_110043-guu1a2ui/logs/debug-internal.log +2026-02-07 11:00:43,144 INFO MainThread:391957 [wandb_init.py:init():844] calling init triggers +2026-02-07 11:00:43,146 INFO MainThread:391957 [wandb_init.py:init():849] wandb.init called with sweep_config: {} +config: {'actor_rollout_ref': {'actor': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-06, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 90, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'ppo_mini_batch_size': 256, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': 32, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 16384, 'clip_ratio': 0.2, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.2, 'tau_pos': 1.0, 'tau_neg': 1.05, 'freeze_vision_tower': False, 'policy_loss': {'_target_': 'verl.workers.config.PolicyLossConfig', 'loss_mode': 'vanilla', 'clip_cov_ratio': 0.0002, 'clip_cov_lb': 1.0, 'clip_cov_ub': 5.0, 'kl_cov_ratio': 0.0002, 'ppo_kl_coef': 0.1}, 'clip_ratio_c': 3.0, 'loss_agg_mode': 'token-mean', 'loss_scale_factor': None, 'entropy_coeff': 0, 'calculate_entropy': False, 'use_kl_loss': True, 'use_prefix_grouper': False, 'use_torch_compile': True, 'kl_loss_coef': 0.001, 'kl_loss_type': 'low_var_kl', 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'use_fused_kernels': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'grad_clip': 1.0, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False, 'use_remove_padding': True, 'calculate_sum_pi_squared': False, 'sum_pi_squared_checkpointing': False}, 'ref': {'rollout_n': 3, 'strategy': 'fsdp', 'use_torch_compile': True, 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': True, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False}, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': 'vllm', 'mode': 'async', 'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'prompt_length': 1024, 'response_length': 2048, 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'tensor_model_parallel_size': 1, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'pipeline_model_parallel_size': 1, 'max_num_batched_tokens': 8192, 'max_model_len': 8192, 'max_num_seqs': 1024, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'logprobs_mode': 'processed_logprobs', 'scheduling_policy': 'fcfs', 'load_format': 'dummy', 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'disable_log_stats': True, 'do_sample': True, 'n': 3, 'over_sample_rate': 0, 'multi_stage_wake_up': False, 'engine_kwargs': {'vllm': {}, 'sglang': {}, 'trtllm': {}}, 'val_kwargs': {'_target_': 'verl.workers.config.SamplingConfig', 'top_k': -1, 'top_p': 1.0, 'temperature': 0, 'n': 1, 'do_sample': False}, 'multi_turn': {'_target_': 'verl.workers.config.MultiTurnConfig', 'enable': False, 'max_assistant_turns': None, 'tool_config_path': None, 'max_user_turns': None, 'max_parallel_calls': 1, 'max_tool_response_length': 256, 'tool_response_truncate_side': 'middle', 'interaction_config_path': None, 'use_inference_chat_template': False, 'tokenization_sanity_check_mode': 'strict', 'format': 'hermes', 'num_repeat_rollouts': None}, 'calculate_log_probs': False, 'agent': {'_target_': 'verl.workers.config.AgentLoopConfig', 'num_workers': 8, 'default_agent_loop': 'single_turn_agent', 'agent_loop_config_path': None, 'custom_async_server': {'_target_': 'verl.workers.config.CustomAsyncServerConfig', 'path': None, 'name': None}}, 'checkpoint_engine': {'_target_': 'verl.workers.config.CheckpointEngineConfig', 'backend': 'naive', 'update_weights_bucket_megabytes': 2048, 'engine_kwargs': {}}, 'trace': {'_target_': 'verl.workers.config.TraceConfig', 'backend': None, 'token2text': False, 'max_samples_per_step_per_worker': None}, 'skip_rollout': False, 'skip_dump_dir': '/tmp/rollout_dump', 'skip_tokenizer_init': True, 'enable_rollout_routing_replay': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'prometheus': {'_target_': 'verl.workers.config.PrometheusConfig', 'enable': False, 'port': 9090, 'file': '/tmp/ray/session_latest/metrics/prometheus/prometheus.yml', 'served_model_name': 'Qwen/Qwen3-4B-Instruct-2507'}, 'quantization': None, 'quantization_config_file': None, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}, 'layered_summon': False}, 'model': {'_target_': 'verl.workers.config.HFModelConfig', 'path': 'Qwen/Qwen3-4B-Instruct-2507', 'hf_config_path': None, 'tokenizer_path': None, 'use_shm': False, 'trust_remote_code': False, 'custom_chat_template': None, 'external_lib': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': True, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'exclude_modules': None, 'lora_adapter_path': None, 'use_liger': False, 'use_fused_kernels': False, 'fused_kernel_options': {'impl_backend': 'torch'}, 'tiled_mlp': {'enabled': False, 'num_shards': 4}, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}}, 'hybrid_engine': True, 'nccl_timeout': 600}, 'data': {'tokenizer': None, 'use_shm': False, 'train_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/train.parquet', 'val_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/test.parquet', 'train_max_samples': -1, 'val_max_samples': -1, 'prompt_key': 'prompt', 'reward_fn_key': 'data_source', 'max_prompt_length': 1024, 'max_response_length': 2048, 'train_batch_size': 512, 'val_batch_size': None, 'tool_config_path': None, 'return_raw_input_ids': False, 'return_raw_chat': True, 'return_full_prompt': False, 'shuffle': True, 'seed': None, 'dataloader_num_workers': 8, 'image_patch_size': 14, 'validation_shuffle': False, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 1, 'truncation': 'error', 'image_key': 'images', 'video_key': 'videos', 'trust_remote_code': False, 'custom_cls': {'path': None, 'name': None}, 'return_multi_modal_inputs': True, 'sampler': {'class_path': None, 'class_name': None}, 'datagen': {'path': None, 'name': None}, 'apply_chat_template_kwargs': {}}, 'reward_manager': {'_target_': 'verl.trainer.config.config.RewardManagerConfig', 'source': 'register', 'name': 'naive', 'module': {'_target_': 'verl.trainer.config.config.ModuleConfig', 'path': None, 'name': 'custom_reward_manager'}}, 'critic': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-05, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 90, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'model': {'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, 'path': '~/models/deepseek-llm-7b-chat', 'tokenizer_path': 'Qwen/Qwen3-4B-Instruct-2507', 'override_config': {}, 'external_lib': None, 'trust_remote_code': False, '_target_': 'verl.workers.config.FSDPCriticModelCfg', 'use_shm': False, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': False, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'tiled_mlp': {'enabled': False, 'num_shards': 4}}, '_target_': 'verl.workers.config.FSDPCriticConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'enable': None, 'ppo_mini_batch_size': 256, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': None, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 32768, 'forward_max_token_len_per_gpu': 32768, 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'cliprange_value': 0.5, 'loss_agg_mode': 'token-mean', 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'forward_micro_batch_size': None, 'forward_micro_batch_size_per_gpu': None, 'ulysses_sequence_parallel_size': 1, 'grad_clip': 1.0}, 'reward_model': {'enable': False, 'enable_resource_pool': False, 'n_gpus_per_node': 8, 'nnodes': 0, 'strategy': 'fsdp', 'model': {'input_tokenizer': 'Qwen/Qwen3-4B-Instruct-2507', 'path': '~/models/FsfairX-LLaMA3-RM-v0.1', 'external_lib': None, 'trust_remote_code': False, 'override_config': {}, 'use_shm': False, 'use_remove_padding': False, 'use_fused_kernels': False, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False}}, 'micro_batch_size': None, 'micro_batch_size_per_gpu': None, 'max_length': None, 'use_dynamic_bsz': False, 'forward_max_token_len_per_gpu': 32768, 'reward_manager': 'naive', 'reward_loop_source': 'register', 'reward_loop_module_path': None, 'reward_loop_class_name': None, 'launch_reward_fn_async': False, 'sandbox_fusion': {'url': None, 'max_concurrent': 64, 'memory_limit_mb': 1024}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'ulysses_sequence_parallel_size': 1, 'use_reward_loop': True, 'num_workers': 1, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': '???', 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.5, 'enforce_eager': True, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'tensor_model_parallel_size': 2, 'max_num_batched_tokens': 8192, 'max_model_len': None, 'max_num_seqs': 1024, 'load_format': 'auto', 'engine_kwargs': {}, 'limit_images': None, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'disable_log_stats': True, 'skip_tokenizer_init': False, 'prompt_length': 2048, 'response_length': 2048}}, 'algorithm': {'rollout_correction': {'rollout_is': None, 'rollout_is_threshold': 2.0, 'rollout_rs': None, 'rollout_rs_threshold': None, 'bypass_mode': False, 'loss_type': 'ppo_clip', 'rollout_is_batch_normalize': False}, '_target_': 'verl.trainer.config.AlgoConfig', 'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'norm_adv_by_std_in_grpo': True, 'use_kl_in_reward': False, 'kl_penalty': 'kl', 'kl_ctrl': {'_target_': 'verl.trainer.config.KLControlConfig', 'type': 'fixed', 'kl_coef': 0.001, 'horizon': 10000, 'target_kl': 0.1}, 'use_pf_ppo': False, 'pf_ppo': {'reweight_method': 'pow', 'weight_pow': 2.0}}, 'custom_reward_function': {'path': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/reward_func/reward.py', 'name': 'compute_score'}, 'trainer': {'balance_batch': True, 'total_epochs': 15, 'total_training_steps': None, 'project_name': 'readctrl-verl', 'experiment_name': 'qwen3-4b-instruct-optimized-multiclinsum-gs', 'logger': ['console', 'wandb'], 'log_val_generations': 0, 'rollout_data_dir': None, 'validation_data_dir': None, 'nnodes': 1, 'n_gpus_per_node': 2, 'save_freq': 20, 'esi_redundant_time': 0, 'resume_mode': 'auto', 'resume_from_path': None, 'val_before_train': True, 'val_only': False, 'test_freq': 5, 'critic_warmup': 0, 'default_hdfs_dir': None, 'del_local_ckpt_after_load': False, 'default_local_dir': '/home/mshahidul/readctrl/code/RL_model/train_v2', 'max_actor_ckpt_to_keep': 1, 'max_critic_ckpt_to_keep': 1, 'ray_wait_register_center_timeout': 300, 'device': 'cuda', 'use_legacy_worker_impl': 'auto', 'remove_previous_ckpt_in_save': True}, 'global_profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'steps': None, 'profile_continuous_steps': False, 'save_path': 'outputs/profile', 'global_tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False, 'controller_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph'}, 'worker_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph', 'capture-range': 'cudaProfilerApi', 'capture-range-end': None, 'kill': 'none'}}, 'torch_memory': {'trace_alloc_max_entries': 100000, 'stack_depth': 32, 'context': 'all', 'stacks': 'all', 'kw_args': {}}}}, 'transfer_queue': {'enable': False}, 'ray_kwargs': {'ray_init': {'num_cpus': None}, 'timeline_json_file': None}, '_wandb': {}} +2026-02-07 11:00:43,146 INFO MainThread:391957 [wandb_init.py:init():892] starting backend +2026-02-07 11:00:43,440 INFO MainThread:391957 [wandb_init.py:init():895] sending inform_init request +2026-02-07 11:00:43,449 INFO MainThread:391957 [wandb_init.py:init():903] backend started and connected +2026-02-07 11:00:43,461 INFO MainThread:391957 [wandb_init.py:init():973] updated telemetry +2026-02-07 11:00:43,485 INFO MainThread:391957 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-02-07 11:00:45,512 INFO MainThread:391957 [wandb_init.py:init():1042] starting run threads in backend +2026-02-07 11:00:46,454 INFO MainThread:391957 [wandb_run.py:_console_start():2529] atexit reg +2026-02-07 11:00:46,455 INFO MainThread:391957 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-02-07 11:00:46,455 INFO MainThread:391957 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-02-07 11:00:46,456 INFO MainThread:391957 [wandb_run.py:_redirect():2469] Redirects installed. +2026-02-07 11:00:46,470 INFO MainThread:391957 [wandb_init.py:init():1082] run started, returning control to user process +2026-02-07 11:05:04,088 INFO MainThread:391957 [wandb_run.py:_finish():2295] finishing run shahidulshakib034-khulna-university-of-engineering-techn/readctrl-verl/guu1a2ui +2026-02-07 11:05:04,090 INFO MainThread:391957 [wandb_run.py:_atexit_cleanup():2494] got exitcode: 0 +2026-02-07 11:05:04,091 INFO MainThread:391957 [wandb_run.py:_restore():2476] restore +2026-02-07 11:05:04,092 INFO MainThread:391957 [wandb_run.py:_restore():2482] restore done diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_111840-r3wx8thu/files/output.log b/code/RL_model/verl/verl_train/wandb/run-20260207_111840-r3wx8thu/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..d78af0c289eb892ae52257239a41a200ae2d6cd8 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_111840-r3wx8thu/files/output.log @@ -0,0 +1,6 @@ +wandb: Detected [openai] in use. +wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script. +wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/ +Checkpoint tracker file does not exist: /home/mshahidul/readctrl/code/RL_model/train_v2/latest_checkpointed_iteration.txt +Training from scratch +test_gen_batch meta info: {'eos_token_id': 151645, 'pad_token_id': 151643, 'recompute_log_prob': False, 'do_sample': False, 'validate': True, 'global_steps': 0} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_111840-r3wx8thu/files/requirements.txt b/code/RL_model/verl/verl_train/wandb/run-20260207_111840-r3wx8thu/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..c168a7097ef00c84a34fe1db69f95d6eaafb03f2 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_111840-r3wx8thu/files/requirements.txt @@ -0,0 +1,269 @@ +verl==0.8.0.dev0 +psutil==7.1.3 +colorama==0.4.6 +annotated-doc==0.0.4 +sentry-sdk==2.51.0 +requests==2.32.5 +nvidia-cufile-cu12==1.13.1.3 +ml_dtypes==0.5.4 +xformers==0.0.32.post1 +sglang==0.5.2 +multidict==6.7.1 +typing_extensions==4.15.0 +nvidia-cusparselt-cu12==0.7.1 +openai-harmony==0.0.4 +transformers==4.56.1 +Werkzeug==3.1.5 +identify==2.6.16 +pytest==9.0.2 +nvidia-cuda-runtime-cu12==12.8.90 +GitPython==3.1.46 +cupy-cuda12x==13.6.0 +tokenizers==0.22.2 +pybind11==3.0.1 +google-api-core==2.29.0 +partial-json-parser==0.2.1.1.post7 +aiohttp-cors==0.8.1 +sniffio==1.3.1 +tensordict==0.10.0 +smart_open==7.5.0 +cffi==2.0.0 +wcwidth==0.5.3 +asttokens==3.0.1 +opencensus==0.11.4 +rpds-py==0.30.0 +py-spy==0.4.1 +gguf==0.17.1 +nvidia-nvjitlink-cu12==12.8.93 +httpx==0.28.1 +cuda-python==13.1.1 +annotated-types==0.7.0 +regex==2026.1.15 +vllm==0.11.0 +idna==3.11 +fsspec==2025.10.0 +parso==0.8.5 +pydantic-extra-types==2.11.0 +MarkupSafe==3.0.3 +cryptography==46.0.4 +filelock==3.20.3 +modelscope==1.34.0 +outlines==0.1.11 +dnspython==2.8.0 +scipy==1.17.0 +zipp==3.23.0 +PyYAML==6.0.3 +onnx==1.20.1 +torchdata==0.11.0 +cuda-pathfinder==1.3.3 +verl==0.8.0.dev0 +httptools==0.7.1 +opencv-python-headless==4.13.0.90 +importlib_metadata==8.7.1 +peft==0.18.1 +opentelemetry-sdk==1.39.1 +python-json-logger==4.0.0 +cuda-bindings==13.1.1 +mdurl==0.1.2 +referencing==0.37.0 +xxhash==3.6.0 +interegular==0.3.3 +fastapi-cli==0.0.20 +uv==0.9.28 +tensorboard==2.20.0 +nvidia-cublas-cu12==12.8.4.1 +sentencepiece==0.2.1 +rich-toolkit==0.18.1 +numpy==2.2.0 +yarl==1.22.0 +opencv-fixer==0.2.5 +tqdm==4.67.2 +python-dotenv==1.2.1 +timm==1.0.16 +aiohappyeyeballs==2.6.1 +decord==0.6.0 +jiter==0.12.0 +airportsdata==20250909 +markdown-it-py==4.0.0 +nvidia-cusolver-cu12==11.7.3.90 +pyarrow==23.0.0 +opentelemetry-proto==1.39.1 +anyio==4.12.1 +pycryptodomex==3.23.0 +prometheus_client==0.24.1 +aiohttp==3.13.3 +urllib3==2.6.3 +pexpect==4.9.0 +pydantic-settings==2.12.0 +distro==1.9.0 +av==16.1.0 +cloudpickle==3.1.2 +mpmath==1.3.0 +certifi==2026.1.4 +antlr4-python3-runtime==4.9.3 +torchvision==0.23.0 +accelerate==1.12.0 +watchfiles==1.1.1 +ruff==0.14.14 +wheel==0.46.3 +omegaconf==2.3.0 +nvidia-cufft-cu12==11.3.3.83 +multiprocess==0.70.18 +frozendict==2.4.7 +sympy==1.14.0 +setproctitle==1.3.7 +setuptools==79.0.1 +py-cpuinfo==9.0.0 +ipython_pygments_lexers==1.1.1 +rich==14.3.2 +uvicorn==0.40.0 +openai==1.99.1 +outlines_core==0.2.11 +llvmlite==0.44.0 +nvidia-cuda-cupti-cu12==12.8.90 +attrs==25.4.0 +anthropic==0.77.0 +packaging==25.0 +fastrlock==0.8.3 +astor==0.8.1 +pluggy==1.6.0 +nvidia-cuda-nvrtc-cu12==12.8.93 +psutil==7.2.2 +virtualenv==20.36.1 +cbor2==5.8.0 +compressed-tensors==0.11.0 +nvidia-cusparse-cu12==12.5.8.93 +networkx==3.6.1 +httpcore==1.0.9 +onnxscript==0.3.1 +smmap==5.0.2 +opencv-python==4.13.0.90 +traitlets==5.14.3 +python-multipart==0.0.22 +pyvers==0.1.0 +huggingface-hub==0.36.0 +pillow==12.1.0 +jsonschema==4.26.0 +cfgv==3.5.0 +optree==0.18.0 +email-validator==2.3.0 +tabulate==0.9.0 +pre_commit==4.5.1 +msgpack==1.1.2 +depyf==0.19.0 +numba==0.61.2 +six==1.17.0 +aiosignal==1.4.0 +nvidia-nvtx-cu12==12.8.90 +propcache==0.4.1 +torch_memory_saver==0.0.8 +h11==0.16.0 +frozenlist==1.8.0 +websockets==16.0 +nvidia-cudnn-frontend==1.18.0 +build==1.4.0 +google-auth==2.48.0 +pycountry==24.6.1 +stack-data==0.6.3 +typing-inspection==0.4.2 +googleapis-common-protos==1.72.0 +pandas==3.0.0 +typer==0.21.1 +protobuf==6.33.5 +fastapi==0.128.0 +blake3==1.0.8 +opentelemetry-semantic-conventions==0.60b1 +opentelemetry-exporter-prometheus==0.60b1 +nvidia-cudnn-cu12==9.10.2.21 +Markdown==3.10.1 +liger_kernel==0.6.4 +nodeenv==1.10.0 +prompt_toolkit==3.0.52 +torchaudio==2.8.0 +codetiming==1.4.0 +platformdirs==4.5.1 +jsonschema-specifications==2025.9.1 +hydra-core==1.3.2 +tensorboard-data-server==0.7.2 +lm-format-enforcer==0.11.3 +pyasn1_modules==0.4.2 +tiktoken==0.12.0 +starlette==0.50.0 +pyproject_hooks==1.2.0 +flash_attn==2.8.1 +rsa==4.9.1 +ray==2.53.0 +nest-asyncio==1.6.0 +lark==1.2.2 +fastar==0.8.0 +orjson==3.11.6 +prometheus-fastapi-instrumentator==7.1.0 +opentelemetry-api==1.39.1 +mathruler==0.1.0 +pydantic_core==2.41.5 +fastapi-cloud-cli==0.11.0 +pynvml==13.0.1 +loguru==0.7.3 +torch==2.8.0 +msgspec==0.20.0 +nvidia-curand-cu12==10.3.9.90 +blobfile==3.0.0 +gitdb==4.0.12 +llguidance==0.7.30 +hf_transfer==0.1.9 +nvidia-nccl-cu12==2.27.3 +qwen-vl-utils==0.0.14 +ptyprocess==0.7.0 +ipdb==0.13.13 +opencensus-context==0.1.3 +jedi==0.19.2 +click==8.3.1 +datasets==4.5.0 +soxr==1.0.0 +sgl-kernel==0.3.9.post2 +colorful==0.5.8 +pyasn1==0.6.2 +charset-normalizer==3.4.4 +nvidia-ml-py==13.590.48 +hf-xet==1.2.0 +dill==0.4.0 +absl-py==2.4.0 +pydantic==2.12.5 +wrapt==2.1.0 +flashinfer-python==0.3.1 +python-dateutil==2.9.0.post0 +torchao==0.9.0 +cachetools==7.0.0 +soundfile==0.13.1 +diskcache==5.6.3 +onnx-ir==0.1.15 +docstring_parser==0.17.0 +matplotlib-inline==0.2.1 +Pygments==2.19.2 +wandb==0.24.1 +pure_eval==0.2.3 +ninja==1.13.0 +proto-plus==1.27.0 +pyzmq==27.1.0 +iniconfig==2.3.0 +Jinja2==3.1.6 +megatron-core==0.13.1 +uvloop==0.22.1 +pycparser==3.0 +pylatexenc==2.10 +decorator==5.2.1 +shellingham==1.5.4 +lxml==6.0.2 +safetensors==0.7.0 +xgrammar==0.1.25 +pybase64==1.4.3 +ipython==9.9.0 +mistral_common==1.9.0 +rignore==0.7.6 +einops==0.8.2 +distlib==0.4.0 +triton==3.4.0 +executing==2.2.1 +grpcio==1.76.0 +pip==25.3 +verl==0.8.0.dev0 diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_111840-r3wx8thu/files/wandb-metadata.json b/code/RL_model/verl/verl_train/wandb/run-20260207_111840-r3wx8thu/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..e73d2539857a19b6106780c6c8e8819bd013c3b2 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_111840-r3wx8thu/files/wandb-metadata.json @@ -0,0 +1,93 @@ +{ + "os": "Linux-5.15.0-160-generic-x86_64-with-glibc2.35", + "python": "CPython 3.12.12", + "startedAt": "2026-02-07T16:18:40.056002Z", + "args": [ + "--node-ip-address=172.16.34.29", + "--node-manager-port=33377", + "--object-store-name=/tmp/ray/session_2026-02-07_11-11-51_145730_485767/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2026-02-07_11-11-51_145730_485767/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=61864", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=38528", + "--gcs-address=172.16.34.29:64074", + "--session-name=session_2026-02-07_11-11-51_145730_485767", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8297", + "--cluster-id=b9c6d8260ff7765ecb32ce5e2f2a6f96c20bb8e9e690a94355ef429e", + "--startup-token=128", + "--worker-launch-time-ms=1770480725136", + "--node-id=1722aff056e4c15ae7bfdb475e188eccdfc4f0fb5498c83483b19c28", + "--runtime-env-hash=1096984665" + ], + "program": "/home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/verl-project/verl", + "commit": "d9939add7a2a01923a9088891f913a5d20c4e622" + }, + "email": "shahidulshakib034@gmail.com", + "root": "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train", + "host": "gamma", + "executable": "/home/mshahidul/miniconda3/envs/verl2/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA A100 80GB PCIe", + "gpu_count": 6, + "disk": { + "/": { + "total": "3766429188096", + "used": "188225777664" + } + }, + "memory": { + "total": "1081814863872" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-4a3678c7-34a9-356f-f7b7-7f7e2f44b596" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-df506764-0db5-91b4-8ec9-154a3bb8123f" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-2c3dbd62-b384-2996-a0f6-b32dcfcc3538" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-1ff3dabe-4b9a-ea62-5cc3-01f12f32d328" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper", + "uuid": "GPU-eefc4b8c-0e79-c1d6-a9ff-8325040572eb" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper", + "uuid": "GPU-d42b6057-13e8-1e88-6aa1-9307df72dece" + } + ], + "cudaVersion": "13.0", + "writerId": "88p3jnmc3bcmts4akvh8cpqwqwpmfzgt" +} \ No newline at end of file diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_111840-r3wx8thu/logs/debug-core.log b/code/RL_model/verl/verl_train/wandb/run-20260207_111840-r3wx8thu/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..5133d53a4b4e3b5cce309ffedbbffbc583d6dd14 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_111840-r3wx8thu/logs/debug-core.log @@ -0,0 +1,7 @@ +{"time":"2026-02-07T11:18:40.230438437-05:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpm8cmdcpl/port-494494.txt","pid":494494,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-02-07T11:18:40.231936108-05:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":494494} +{"time":"2026-02-07T11:18:40.231930554-05:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-494494-521925-457192743/socket","Net":"unix"}} +{"time":"2026-02-07T11:18:40.387955627-05:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-02-07T11:18:40.40286835-05:00","level":"INFO","msg":"handleInformInit: received","streamId":"r3wx8thu","id":"1(@)"} +{"time":"2026-02-07T11:18:41.641271581-05:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"r3wx8thu","id":"1(@)"} +{"time":"2026-02-07T11:18:47.785157372-05:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"q1cc73tmhltw"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_111840-r3wx8thu/logs/debug-internal.log b/code/RL_model/verl/verl_train/wandb/run-20260207_111840-r3wx8thu/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..699315f1b00bc19cd1b4c32f78d48476349d4f24 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_111840-r3wx8thu/logs/debug-internal.log @@ -0,0 +1,6 @@ +{"time":"2026-02-07T11:18:40.40454051-05:00","level":"INFO","msg":"stream: starting","core version":"0.24.1"} +{"time":"2026-02-07T11:18:41.638510241-05:00","level":"INFO","msg":"stream: created new stream","id":"r3wx8thu"} +{"time":"2026-02-07T11:18:41.638679539-05:00","level":"INFO","msg":"handler: started","stream_id":"r3wx8thu"} +{"time":"2026-02-07T11:18:41.641242172-05:00","level":"INFO","msg":"stream: started","id":"r3wx8thu"} +{"time":"2026-02-07T11:18:41.641416143-05:00","level":"INFO","msg":"writer: started","stream_id":"r3wx8thu"} +{"time":"2026-02-07T11:18:41.641421939-05:00","level":"INFO","msg":"sender: started","stream_id":"r3wx8thu"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_111840-r3wx8thu/logs/debug.log b/code/RL_model/verl/verl_train/wandb/run-20260207_111840-r3wx8thu/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..6c0eb6969363c42037a4639b86860e747238b620 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_111840-r3wx8thu/logs/debug.log @@ -0,0 +1,19 @@ +2026-02-07 11:18:40,082 INFO MainThread:494494 [wandb_setup.py:_flush():81] Current SDK version is 0.24.1 +2026-02-07 11:18:40,082 INFO MainThread:494494 [wandb_setup.py:_flush():81] Configure stats pid to 494494 +2026-02-07 11:18:40,083 INFO MainThread:494494 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-02-07 11:18:40,083 INFO MainThread:494494 [wandb_init.py:setup_run_log_directory():717] Logging user logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260207_111840-r3wx8thu/logs/debug.log +2026-02-07 11:18:40,083 INFO MainThread:494494 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260207_111840-r3wx8thu/logs/debug-internal.log +2026-02-07 11:18:40,084 INFO MainThread:494494 [wandb_init.py:init():844] calling init triggers +2026-02-07 11:18:40,085 INFO MainThread:494494 [wandb_init.py:init():849] wandb.init called with sweep_config: {} +config: {'actor_rollout_ref': {'actor': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-06, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 6045, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'ppo_mini_batch_size': 4, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': 2, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 16384, 'clip_ratio': 0.2, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.2, 'tau_pos': 1.0, 'tau_neg': 1.05, 'freeze_vision_tower': False, 'policy_loss': {'_target_': 'verl.workers.config.PolicyLossConfig', 'loss_mode': 'vanilla', 'clip_cov_ratio': 0.0002, 'clip_cov_lb': 1.0, 'clip_cov_ub': 5.0, 'kl_cov_ratio': 0.0002, 'ppo_kl_coef': 0.1}, 'clip_ratio_c': 3.0, 'loss_agg_mode': 'token-mean', 'loss_scale_factor': None, 'entropy_coeff': 0, 'calculate_entropy': False, 'use_kl_loss': True, 'use_prefix_grouper': False, 'use_torch_compile': True, 'kl_loss_coef': 0.001, 'kl_loss_type': 'low_var_kl', 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'use_fused_kernels': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'grad_clip': 1.0, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False, 'use_remove_padding': True, 'calculate_sum_pi_squared': False, 'sum_pi_squared_checkpointing': False}, 'ref': {'rollout_n': 3, 'strategy': 'fsdp', 'use_torch_compile': True, 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': True, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False}, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': 'vllm', 'mode': 'async', 'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'prompt_length': 1024, 'response_length': 2048, 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'tensor_model_parallel_size': 1, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'pipeline_model_parallel_size': 1, 'max_num_batched_tokens': 8192, 'max_model_len': 8192, 'max_num_seqs': 1024, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'logprobs_mode': 'processed_logprobs', 'scheduling_policy': 'fcfs', 'load_format': 'dummy', 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 2, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'disable_log_stats': True, 'do_sample': True, 'n': 3, 'over_sample_rate': 0, 'multi_stage_wake_up': False, 'engine_kwargs': {'vllm': {}, 'sglang': {}, 'trtllm': {}}, 'val_kwargs': {'_target_': 'verl.workers.config.SamplingConfig', 'top_k': -1, 'top_p': 1.0, 'temperature': 0, 'n': 1, 'do_sample': False}, 'multi_turn': {'_target_': 'verl.workers.config.MultiTurnConfig', 'enable': False, 'max_assistant_turns': None, 'tool_config_path': None, 'max_user_turns': None, 'max_parallel_calls': 1, 'max_tool_response_length': 256, 'tool_response_truncate_side': 'middle', 'interaction_config_path': None, 'use_inference_chat_template': False, 'tokenization_sanity_check_mode': 'strict', 'format': 'hermes', 'num_repeat_rollouts': None}, 'calculate_log_probs': False, 'agent': {'_target_': 'verl.workers.config.AgentLoopConfig', 'num_workers': 8, 'default_agent_loop': 'single_turn_agent', 'agent_loop_config_path': None, 'custom_async_server': {'_target_': 'verl.workers.config.CustomAsyncServerConfig', 'path': None, 'name': None}}, 'checkpoint_engine': {'_target_': 'verl.workers.config.CheckpointEngineConfig', 'backend': 'naive', 'update_weights_bucket_megabytes': 2048, 'engine_kwargs': {}}, 'trace': {'_target_': 'verl.workers.config.TraceConfig', 'backend': None, 'token2text': False, 'max_samples_per_step_per_worker': None}, 'skip_rollout': False, 'skip_dump_dir': '/tmp/rollout_dump', 'skip_tokenizer_init': True, 'enable_rollout_routing_replay': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'prometheus': {'_target_': 'verl.workers.config.PrometheusConfig', 'enable': False, 'port': 9090, 'file': '/tmp/ray/session_latest/metrics/prometheus/prometheus.yml', 'served_model_name': 'Qwen/Qwen3-4B-Instruct-2507'}, 'quantization': None, 'quantization_config_file': None, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}, 'layered_summon': False}, 'model': {'_target_': 'verl.workers.config.HFModelConfig', 'path': 'Qwen/Qwen3-4B-Instruct-2507', 'hf_config_path': None, 'tokenizer_path': None, 'use_shm': False, 'trust_remote_code': False, 'custom_chat_template': None, 'external_lib': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': True, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'exclude_modules': None, 'lora_adapter_path': None, 'use_liger': False, 'use_fused_kernels': False, 'fused_kernel_options': {'impl_backend': 'torch'}, 'tiled_mlp': {'enabled': False, 'num_shards': 4}, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}}, 'hybrid_engine': True, 'nccl_timeout': 600}, 'data': {'tokenizer': None, 'use_shm': False, 'train_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/train.parquet', 'val_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/test.parquet', 'train_max_samples': -1, 'val_max_samples': -1, 'prompt_key': 'prompt', 'reward_fn_key': 'data_source', 'max_prompt_length': 1024, 'max_response_length': 2048, 'train_batch_size': 8, 'val_batch_size': None, 'tool_config_path': None, 'return_raw_input_ids': False, 'return_raw_chat': True, 'return_full_prompt': False, 'shuffle': True, 'seed': None, 'dataloader_num_workers': 8, 'image_patch_size': 14, 'validation_shuffle': False, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 1, 'truncation': 'error', 'image_key': 'images', 'video_key': 'videos', 'trust_remote_code': False, 'custom_cls': {'path': None, 'name': None}, 'return_multi_modal_inputs': True, 'sampler': {'class_path': None, 'class_name': None}, 'datagen': {'path': None, 'name': None}, 'apply_chat_template_kwargs': {}}, 'reward_manager': {'_target_': 'verl.trainer.config.config.RewardManagerConfig', 'source': 'register', 'name': 'naive', 'module': {'_target_': 'verl.trainer.config.config.ModuleConfig', 'path': None, 'name': 'custom_reward_manager'}}, 'critic': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-05, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 6045, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'model': {'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, 'path': '~/models/deepseek-llm-7b-chat', 'tokenizer_path': 'Qwen/Qwen3-4B-Instruct-2507', 'override_config': {}, 'external_lib': None, 'trust_remote_code': False, '_target_': 'verl.workers.config.FSDPCriticModelCfg', 'use_shm': False, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': False, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'tiled_mlp': {'enabled': False, 'num_shards': 4}}, '_target_': 'verl.workers.config.FSDPCriticConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'enable': None, 'ppo_mini_batch_size': 4, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': None, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 32768, 'forward_max_token_len_per_gpu': 32768, 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'cliprange_value': 0.5, 'loss_agg_mode': 'token-mean', 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'forward_micro_batch_size': None, 'forward_micro_batch_size_per_gpu': None, 'ulysses_sequence_parallel_size': 1, 'grad_clip': 1.0}, 'reward_model': {'enable': False, 'enable_resource_pool': False, 'n_gpus_per_node': 8, 'nnodes': 0, 'strategy': 'fsdp', 'model': {'input_tokenizer': 'Qwen/Qwen3-4B-Instruct-2507', 'path': '~/models/FsfairX-LLaMA3-RM-v0.1', 'external_lib': None, 'trust_remote_code': False, 'override_config': {}, 'use_shm': False, 'use_remove_padding': False, 'use_fused_kernels': False, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False}}, 'micro_batch_size': None, 'micro_batch_size_per_gpu': None, 'max_length': None, 'use_dynamic_bsz': False, 'forward_max_token_len_per_gpu': 32768, 'reward_manager': 'naive', 'reward_loop_source': 'register', 'reward_loop_module_path': None, 'reward_loop_class_name': None, 'launch_reward_fn_async': False, 'sandbox_fusion': {'url': None, 'max_concurrent': 64, 'memory_limit_mb': 1024}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'ulysses_sequence_parallel_size': 1, 'use_reward_loop': True, 'num_workers': 1, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': '???', 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.5, 'enforce_eager': True, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'tensor_model_parallel_size': 2, 'max_num_batched_tokens': 8192, 'max_model_len': None, 'max_num_seqs': 1024, 'load_format': 'auto', 'engine_kwargs': {}, 'limit_images': None, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'disable_log_stats': True, 'skip_tokenizer_init': False, 'prompt_length': 2048, 'response_length': 2048}}, 'algorithm': {'rollout_correction': {'rollout_is': None, 'rollout_is_threshold': 2.0, 'rollout_rs': None, 'rollout_rs_threshold': None, 'bypass_mode': False, 'loss_type': 'ppo_clip', 'rollout_is_batch_normalize': False}, '_target_': 'verl.trainer.config.AlgoConfig', 'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'norm_adv_by_std_in_grpo': True, 'use_kl_in_reward': False, 'kl_penalty': 'kl', 'kl_ctrl': {'_target_': 'verl.trainer.config.KLControlConfig', 'type': 'fixed', 'kl_coef': 0.001, 'horizon': 10000, 'target_kl': 0.1}, 'use_pf_ppo': False, 'pf_ppo': {'reweight_method': 'pow', 'weight_pow': 2.0}}, 'custom_reward_function': {'path': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/reward_func/reward.py', 'name': 'compute_score'}, 'trainer': {'balance_batch': True, 'total_epochs': 15, 'total_training_steps': None, 'project_name': 'readctrl-verl', 'experiment_name': 'qwen3-4b-instruct-optimized-multiclinsum-gs', 'logger': ['console', 'wandb'], 'log_val_generations': 0, 'rollout_data_dir': None, 'validation_data_dir': None, 'nnodes': 1, 'n_gpus_per_node': 2, 'save_freq': 100, 'esi_redundant_time': 0, 'resume_mode': 'auto', 'resume_from_path': None, 'val_before_train': True, 'val_only': False, 'test_freq': 1, 'critic_warmup': 0, 'default_hdfs_dir': None, 'del_local_ckpt_after_load': False, 'default_local_dir': '/home/mshahidul/readctrl/code/RL_model/train_v2', 'max_actor_ckpt_to_keep': 1, 'max_critic_ckpt_to_keep': 1, 'ray_wait_register_center_timeout': 300, 'device': 'cuda', 'use_legacy_worker_impl': 'auto', 'remove_previous_ckpt_in_save': True}, 'global_profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'steps': None, 'profile_continuous_steps': False, 'save_path': 'outputs/profile', 'global_tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False, 'controller_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph'}, 'worker_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph', 'capture-range': 'cudaProfilerApi', 'capture-range-end': None, 'kill': 'none'}}, 'torch_memory': {'trace_alloc_max_entries': 100000, 'stack_depth': 32, 'context': 'all', 'stacks': 'all', 'kw_args': {}}}}, 'transfer_queue': {'enable': False}, 'ray_kwargs': {'ray_init': {'num_cpus': None}, 'timeline_json_file': None}, '_wandb': {}} +2026-02-07 11:18:40,085 INFO MainThread:494494 [wandb_init.py:init():892] starting backend +2026-02-07 11:18:40,388 INFO MainThread:494494 [wandb_init.py:init():895] sending inform_init request +2026-02-07 11:18:40,397 INFO MainThread:494494 [wandb_init.py:init():903] backend started and connected +2026-02-07 11:18:40,411 INFO MainThread:494494 [wandb_init.py:init():973] updated telemetry +2026-02-07 11:18:40,474 INFO MainThread:494494 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-02-07 11:18:41,922 INFO MainThread:494494 [wandb_init.py:init():1042] starting run threads in backend +2026-02-07 11:18:42,759 INFO MainThread:494494 [wandb_run.py:_console_start():2529] atexit reg +2026-02-07 11:18:42,759 INFO MainThread:494494 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-02-07 11:18:42,760 INFO MainThread:494494 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-02-07 11:18:42,760 INFO MainThread:494494 [wandb_run.py:_redirect():2469] Redirects installed. +2026-02-07 11:18:42,773 INFO MainThread:494494 [wandb_init.py:init():1082] run started, returning control to user process diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_113041-bhf8tuxa/files/output.log b/code/RL_model/verl/verl_train/wandb/run-20260207_113041-bhf8tuxa/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..d78af0c289eb892ae52257239a41a200ae2d6cd8 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_113041-bhf8tuxa/files/output.log @@ -0,0 +1,6 @@ +wandb: Detected [openai] in use. +wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script. +wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/ +Checkpoint tracker file does not exist: /home/mshahidul/readctrl/code/RL_model/train_v2/latest_checkpointed_iteration.txt +Training from scratch +test_gen_batch meta info: {'eos_token_id': 151645, 'pad_token_id': 151643, 'recompute_log_prob': False, 'do_sample': False, 'validate': True, 'global_steps': 0} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_113041-bhf8tuxa/files/requirements.txt b/code/RL_model/verl/verl_train/wandb/run-20260207_113041-bhf8tuxa/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..c168a7097ef00c84a34fe1db69f95d6eaafb03f2 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_113041-bhf8tuxa/files/requirements.txt @@ -0,0 +1,269 @@ +verl==0.8.0.dev0 +psutil==7.1.3 +colorama==0.4.6 +annotated-doc==0.0.4 +sentry-sdk==2.51.0 +requests==2.32.5 +nvidia-cufile-cu12==1.13.1.3 +ml_dtypes==0.5.4 +xformers==0.0.32.post1 +sglang==0.5.2 +multidict==6.7.1 +typing_extensions==4.15.0 +nvidia-cusparselt-cu12==0.7.1 +openai-harmony==0.0.4 +transformers==4.56.1 +Werkzeug==3.1.5 +identify==2.6.16 +pytest==9.0.2 +nvidia-cuda-runtime-cu12==12.8.90 +GitPython==3.1.46 +cupy-cuda12x==13.6.0 +tokenizers==0.22.2 +pybind11==3.0.1 +google-api-core==2.29.0 +partial-json-parser==0.2.1.1.post7 +aiohttp-cors==0.8.1 +sniffio==1.3.1 +tensordict==0.10.0 +smart_open==7.5.0 +cffi==2.0.0 +wcwidth==0.5.3 +asttokens==3.0.1 +opencensus==0.11.4 +rpds-py==0.30.0 +py-spy==0.4.1 +gguf==0.17.1 +nvidia-nvjitlink-cu12==12.8.93 +httpx==0.28.1 +cuda-python==13.1.1 +annotated-types==0.7.0 +regex==2026.1.15 +vllm==0.11.0 +idna==3.11 +fsspec==2025.10.0 +parso==0.8.5 +pydantic-extra-types==2.11.0 +MarkupSafe==3.0.3 +cryptography==46.0.4 +filelock==3.20.3 +modelscope==1.34.0 +outlines==0.1.11 +dnspython==2.8.0 +scipy==1.17.0 +zipp==3.23.0 +PyYAML==6.0.3 +onnx==1.20.1 +torchdata==0.11.0 +cuda-pathfinder==1.3.3 +verl==0.8.0.dev0 +httptools==0.7.1 +opencv-python-headless==4.13.0.90 +importlib_metadata==8.7.1 +peft==0.18.1 +opentelemetry-sdk==1.39.1 +python-json-logger==4.0.0 +cuda-bindings==13.1.1 +mdurl==0.1.2 +referencing==0.37.0 +xxhash==3.6.0 +interegular==0.3.3 +fastapi-cli==0.0.20 +uv==0.9.28 +tensorboard==2.20.0 +nvidia-cublas-cu12==12.8.4.1 +sentencepiece==0.2.1 +rich-toolkit==0.18.1 +numpy==2.2.0 +yarl==1.22.0 +opencv-fixer==0.2.5 +tqdm==4.67.2 +python-dotenv==1.2.1 +timm==1.0.16 +aiohappyeyeballs==2.6.1 +decord==0.6.0 +jiter==0.12.0 +airportsdata==20250909 +markdown-it-py==4.0.0 +nvidia-cusolver-cu12==11.7.3.90 +pyarrow==23.0.0 +opentelemetry-proto==1.39.1 +anyio==4.12.1 +pycryptodomex==3.23.0 +prometheus_client==0.24.1 +aiohttp==3.13.3 +urllib3==2.6.3 +pexpect==4.9.0 +pydantic-settings==2.12.0 +distro==1.9.0 +av==16.1.0 +cloudpickle==3.1.2 +mpmath==1.3.0 +certifi==2026.1.4 +antlr4-python3-runtime==4.9.3 +torchvision==0.23.0 +accelerate==1.12.0 +watchfiles==1.1.1 +ruff==0.14.14 +wheel==0.46.3 +omegaconf==2.3.0 +nvidia-cufft-cu12==11.3.3.83 +multiprocess==0.70.18 +frozendict==2.4.7 +sympy==1.14.0 +setproctitle==1.3.7 +setuptools==79.0.1 +py-cpuinfo==9.0.0 +ipython_pygments_lexers==1.1.1 +rich==14.3.2 +uvicorn==0.40.0 +openai==1.99.1 +outlines_core==0.2.11 +llvmlite==0.44.0 +nvidia-cuda-cupti-cu12==12.8.90 +attrs==25.4.0 +anthropic==0.77.0 +packaging==25.0 +fastrlock==0.8.3 +astor==0.8.1 +pluggy==1.6.0 +nvidia-cuda-nvrtc-cu12==12.8.93 +psutil==7.2.2 +virtualenv==20.36.1 +cbor2==5.8.0 +compressed-tensors==0.11.0 +nvidia-cusparse-cu12==12.5.8.93 +networkx==3.6.1 +httpcore==1.0.9 +onnxscript==0.3.1 +smmap==5.0.2 +opencv-python==4.13.0.90 +traitlets==5.14.3 +python-multipart==0.0.22 +pyvers==0.1.0 +huggingface-hub==0.36.0 +pillow==12.1.0 +jsonschema==4.26.0 +cfgv==3.5.0 +optree==0.18.0 +email-validator==2.3.0 +tabulate==0.9.0 +pre_commit==4.5.1 +msgpack==1.1.2 +depyf==0.19.0 +numba==0.61.2 +six==1.17.0 +aiosignal==1.4.0 +nvidia-nvtx-cu12==12.8.90 +propcache==0.4.1 +torch_memory_saver==0.0.8 +h11==0.16.0 +frozenlist==1.8.0 +websockets==16.0 +nvidia-cudnn-frontend==1.18.0 +build==1.4.0 +google-auth==2.48.0 +pycountry==24.6.1 +stack-data==0.6.3 +typing-inspection==0.4.2 +googleapis-common-protos==1.72.0 +pandas==3.0.0 +typer==0.21.1 +protobuf==6.33.5 +fastapi==0.128.0 +blake3==1.0.8 +opentelemetry-semantic-conventions==0.60b1 +opentelemetry-exporter-prometheus==0.60b1 +nvidia-cudnn-cu12==9.10.2.21 +Markdown==3.10.1 +liger_kernel==0.6.4 +nodeenv==1.10.0 +prompt_toolkit==3.0.52 +torchaudio==2.8.0 +codetiming==1.4.0 +platformdirs==4.5.1 +jsonschema-specifications==2025.9.1 +hydra-core==1.3.2 +tensorboard-data-server==0.7.2 +lm-format-enforcer==0.11.3 +pyasn1_modules==0.4.2 +tiktoken==0.12.0 +starlette==0.50.0 +pyproject_hooks==1.2.0 +flash_attn==2.8.1 +rsa==4.9.1 +ray==2.53.0 +nest-asyncio==1.6.0 +lark==1.2.2 +fastar==0.8.0 +orjson==3.11.6 +prometheus-fastapi-instrumentator==7.1.0 +opentelemetry-api==1.39.1 +mathruler==0.1.0 +pydantic_core==2.41.5 +fastapi-cloud-cli==0.11.0 +pynvml==13.0.1 +loguru==0.7.3 +torch==2.8.0 +msgspec==0.20.0 +nvidia-curand-cu12==10.3.9.90 +blobfile==3.0.0 +gitdb==4.0.12 +llguidance==0.7.30 +hf_transfer==0.1.9 +nvidia-nccl-cu12==2.27.3 +qwen-vl-utils==0.0.14 +ptyprocess==0.7.0 +ipdb==0.13.13 +opencensus-context==0.1.3 +jedi==0.19.2 +click==8.3.1 +datasets==4.5.0 +soxr==1.0.0 +sgl-kernel==0.3.9.post2 +colorful==0.5.8 +pyasn1==0.6.2 +charset-normalizer==3.4.4 +nvidia-ml-py==13.590.48 +hf-xet==1.2.0 +dill==0.4.0 +absl-py==2.4.0 +pydantic==2.12.5 +wrapt==2.1.0 +flashinfer-python==0.3.1 +python-dateutil==2.9.0.post0 +torchao==0.9.0 +cachetools==7.0.0 +soundfile==0.13.1 +diskcache==5.6.3 +onnx-ir==0.1.15 +docstring_parser==0.17.0 +matplotlib-inline==0.2.1 +Pygments==2.19.2 +wandb==0.24.1 +pure_eval==0.2.3 +ninja==1.13.0 +proto-plus==1.27.0 +pyzmq==27.1.0 +iniconfig==2.3.0 +Jinja2==3.1.6 +megatron-core==0.13.1 +uvloop==0.22.1 +pycparser==3.0 +pylatexenc==2.10 +decorator==5.2.1 +shellingham==1.5.4 +lxml==6.0.2 +safetensors==0.7.0 +xgrammar==0.1.25 +pybase64==1.4.3 +ipython==9.9.0 +mistral_common==1.9.0 +rignore==0.7.6 +einops==0.8.2 +distlib==0.4.0 +triton==3.4.0 +executing==2.2.1 +grpcio==1.76.0 +pip==25.3 +verl==0.8.0.dev0 diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_113041-bhf8tuxa/files/wandb-metadata.json b/code/RL_model/verl/verl_train/wandb/run-20260207_113041-bhf8tuxa/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..e37ed81dbf43490c68dcddbcc97e81a0e92a0b1c --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_113041-bhf8tuxa/files/wandb-metadata.json @@ -0,0 +1,93 @@ +{ + "os": "Linux-5.15.0-160-generic-x86_64-with-glibc2.35", + "python": "CPython 3.12.12", + "startedAt": "2026-02-07T16:30:41.971066Z", + "args": [ + "--node-ip-address=172.16.34.29", + "--node-manager-port=38731", + "--object-store-name=/tmp/ray/session_2026-02-07_11-23-21_176789_560668/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2026-02-07_11-23-21_176789_560668/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=57787", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=61716", + "--gcs-address=172.16.34.29:60023", + "--session-name=session_2026-02-07_11-23-21_176789_560668", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8297", + "--cluster-id=370a1f724330a4aafe3fbd01e4e14a241970e55ca223570d10827c4e", + "--startup-token=128", + "--worker-launch-time-ms=1770481416707", + "--node-id=3167b7b31cc1bbafa7cd7475e85c6e83f52bf93321eadd37e0235a0a", + "--runtime-env-hash=-2086329310" + ], + "program": "/home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/verl-project/verl", + "commit": "d9939add7a2a01923a9088891f913a5d20c4e622" + }, + "email": "shahidulshakib034@gmail.com", + "root": "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train", + "host": "gamma", + "executable": "/home/mshahidul/miniconda3/envs/verl2/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA A100 80GB PCIe", + "gpu_count": 6, + "disk": { + "/": { + "total": "3766429188096", + "used": "188255096832" + } + }, + "memory": { + "total": "1081814863872" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-4a3678c7-34a9-356f-f7b7-7f7e2f44b596" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-df506764-0db5-91b4-8ec9-154a3bb8123f" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-2c3dbd62-b384-2996-a0f6-b32dcfcc3538" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-1ff3dabe-4b9a-ea62-5cc3-01f12f32d328" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper", + "uuid": "GPU-eefc4b8c-0e79-c1d6-a9ff-8325040572eb" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper", + "uuid": "GPU-d42b6057-13e8-1e88-6aa1-9307df72dece" + } + ], + "cudaVersion": "13.0", + "writerId": "zs579pc3uw4lpzesyrt3i5jvsf78epwk" +} \ No newline at end of file diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_113041-bhf8tuxa/logs/debug-core.log b/code/RL_model/verl/verl_train/wandb/run-20260207_113041-bhf8tuxa/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..1d02d5242e21387cce05abcfb67aa63854f41379 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_113041-bhf8tuxa/logs/debug-core.log @@ -0,0 +1,7 @@ +{"time":"2026-02-07T11:30:42.133472663-05:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpxfufmriw/port-572506.txt","pid":572506,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-02-07T11:30:42.134859961-05:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":572506} +{"time":"2026-02-07T11:30:42.134811243-05:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-572506-600660-515315493/socket","Net":"unix"}} +{"time":"2026-02-07T11:30:42.295094179-05:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-02-07T11:30:42.309590652-05:00","level":"INFO","msg":"handleInformInit: received","streamId":"bhf8tuxa","id":"1(@)"} +{"time":"2026-02-07T11:30:43.80865873-05:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"bhf8tuxa","id":"1(@)"} +{"time":"2026-02-07T11:30:50.138881667-05:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"q0s6lmd8t1ko"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_113041-bhf8tuxa/logs/debug-internal.log b/code/RL_model/verl/verl_train/wandb/run-20260207_113041-bhf8tuxa/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..cb6e8886d491345e08232af5c0d7b703fa4575fe --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_113041-bhf8tuxa/logs/debug-internal.log @@ -0,0 +1,6 @@ +{"time":"2026-02-07T11:30:42.310964076-05:00","level":"INFO","msg":"stream: starting","core version":"0.24.1"} +{"time":"2026-02-07T11:30:43.754825568-05:00","level":"INFO","msg":"stream: created new stream","id":"bhf8tuxa"} +{"time":"2026-02-07T11:30:43.755004364-05:00","level":"INFO","msg":"handler: started","stream_id":"bhf8tuxa"} +{"time":"2026-02-07T11:30:43.808608409-05:00","level":"INFO","msg":"stream: started","id":"bhf8tuxa"} +{"time":"2026-02-07T11:30:43.80864944-05:00","level":"INFO","msg":"writer: started","stream_id":"bhf8tuxa"} +{"time":"2026-02-07T11:30:43.80868108-05:00","level":"INFO","msg":"sender: started","stream_id":"bhf8tuxa"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_113041-bhf8tuxa/logs/debug.log b/code/RL_model/verl/verl_train/wandb/run-20260207_113041-bhf8tuxa/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..4c9ceb586a4aa3f2f0ec8463aea5cd731ed66a59 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_113041-bhf8tuxa/logs/debug.log @@ -0,0 +1,21 @@ +2026-02-07 11:30:41,993 INFO MainThread:572506 [wandb_setup.py:_flush():81] Current SDK version is 0.24.1 +2026-02-07 11:30:41,993 INFO MainThread:572506 [wandb_setup.py:_flush():81] Configure stats pid to 572506 +2026-02-07 11:30:41,993 INFO MainThread:572506 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-02-07 11:30:41,994 INFO MainThread:572506 [wandb_init.py:setup_run_log_directory():717] Logging user logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260207_113041-bhf8tuxa/logs/debug.log +2026-02-07 11:30:41,994 INFO MainThread:572506 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260207_113041-bhf8tuxa/logs/debug-internal.log +2026-02-07 11:30:41,994 INFO MainThread:572506 [wandb_init.py:init():844] calling init triggers +2026-02-07 11:30:41,996 INFO MainThread:572506 [wandb_init.py:init():849] wandb.init called with sweep_config: {} +config: {'actor_rollout_ref': {'actor': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-06, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 6045, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'ppo_mini_batch_size': 4, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': 2, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 16384, 'clip_ratio': 0.2, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.2, 'tau_pos': 1.0, 'tau_neg': 1.05, 'freeze_vision_tower': False, 'policy_loss': {'_target_': 'verl.workers.config.PolicyLossConfig', 'loss_mode': 'vanilla', 'clip_cov_ratio': 0.0002, 'clip_cov_lb': 1.0, 'clip_cov_ub': 5.0, 'kl_cov_ratio': 0.0002, 'ppo_kl_coef': 0.1}, 'clip_ratio_c': 3.0, 'loss_agg_mode': 'token-mean', 'loss_scale_factor': None, 'entropy_coeff': 0, 'calculate_entropy': False, 'use_kl_loss': True, 'use_prefix_grouper': False, 'use_torch_compile': True, 'kl_loss_coef': 0.001, 'kl_loss_type': 'low_var_kl', 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'use_fused_kernels': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'grad_clip': 1.0, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False, 'use_remove_padding': True, 'calculate_sum_pi_squared': False, 'sum_pi_squared_checkpointing': False}, 'ref': {'rollout_n': 3, 'strategy': 'fsdp', 'use_torch_compile': True, 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': True, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False}, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': 'vllm', 'mode': 'async', 'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'prompt_length': 1024, 'response_length': 2048, 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'tensor_model_parallel_size': 1, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'pipeline_model_parallel_size': 1, 'max_num_batched_tokens': 8192, 'max_model_len': 8192, 'max_num_seqs': 1024, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'logprobs_mode': 'processed_logprobs', 'scheduling_policy': 'fcfs', 'load_format': 'dummy', 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 2, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'disable_log_stats': True, 'do_sample': True, 'n': 3, 'over_sample_rate': 0, 'multi_stage_wake_up': False, 'engine_kwargs': {'vllm': {}, 'sglang': {}, 'trtllm': {}}, 'val_kwargs': {'_target_': 'verl.workers.config.SamplingConfig', 'top_k': -1, 'top_p': 1.0, 'temperature': 0, 'n': 1, 'do_sample': False}, 'multi_turn': {'_target_': 'verl.workers.config.MultiTurnConfig', 'enable': False, 'max_assistant_turns': None, 'tool_config_path': None, 'max_user_turns': None, 'max_parallel_calls': 1, 'max_tool_response_length': 256, 'tool_response_truncate_side': 'middle', 'interaction_config_path': None, 'use_inference_chat_template': False, 'tokenization_sanity_check_mode': 'strict', 'format': 'hermes', 'num_repeat_rollouts': None}, 'calculate_log_probs': False, 'agent': {'_target_': 'verl.workers.config.AgentLoopConfig', 'num_workers': 8, 'default_agent_loop': 'single_turn_agent', 'agent_loop_config_path': None, 'custom_async_server': {'_target_': 'verl.workers.config.CustomAsyncServerConfig', 'path': None, 'name': None}}, 'checkpoint_engine': {'_target_': 'verl.workers.config.CheckpointEngineConfig', 'backend': 'naive', 'update_weights_bucket_megabytes': 2048, 'engine_kwargs': {}}, 'trace': {'_target_': 'verl.workers.config.TraceConfig', 'backend': None, 'token2text': False, 'max_samples_per_step_per_worker': None}, 'skip_rollout': False, 'skip_dump_dir': '/tmp/rollout_dump', 'skip_tokenizer_init': True, 'enable_rollout_routing_replay': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'prometheus': {'_target_': 'verl.workers.config.PrometheusConfig', 'enable': False, 'port': 9090, 'file': '/tmp/ray/session_latest/metrics/prometheus/prometheus.yml', 'served_model_name': 'Qwen/Qwen3-4B-Instruct-2507'}, 'quantization': None, 'quantization_config_file': None, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}, 'layered_summon': False}, 'model': {'_target_': 'verl.workers.config.HFModelConfig', 'path': 'Qwen/Qwen3-4B-Instruct-2507', 'hf_config_path': None, 'tokenizer_path': None, 'use_shm': False, 'trust_remote_code': False, 'custom_chat_template': None, 'external_lib': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': True, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'exclude_modules': None, 'lora_adapter_path': None, 'use_liger': False, 'use_fused_kernels': False, 'fused_kernel_options': {'impl_backend': 'torch'}, 'tiled_mlp': {'enabled': False, 'num_shards': 4}, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}}, 'hybrid_engine': True, 'nccl_timeout': 600}, 'data': {'tokenizer': None, 'use_shm': False, 'train_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/train.parquet', 'val_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/test.parquet', 'train_max_samples': -1, 'val_max_samples': -1, 'prompt_key': 'prompt', 'reward_fn_key': 'data_source', 'max_prompt_length': 1024, 'max_response_length': 2048, 'train_batch_size': 8, 'val_batch_size': None, 'tool_config_path': None, 'return_raw_input_ids': False, 'return_raw_chat': True, 'return_full_prompt': False, 'shuffle': True, 'seed': None, 'dataloader_num_workers': 8, 'image_patch_size': 14, 'validation_shuffle': False, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 1, 'truncation': 'error', 'image_key': 'images', 'video_key': 'videos', 'trust_remote_code': False, 'custom_cls': {'path': None, 'name': None}, 'return_multi_modal_inputs': True, 'sampler': {'class_path': None, 'class_name': None}, 'datagen': {'path': None, 'name': None}, 'apply_chat_template_kwargs': {}}, 'reward_manager': {'_target_': 'verl.trainer.config.config.RewardManagerConfig', 'source': 'register', 'name': 'naive', 'module': {'_target_': 'verl.trainer.config.config.ModuleConfig', 'path': None, 'name': 'custom_reward_manager'}}, 'critic': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-05, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 6045, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'model': {'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, 'path': '~/models/deepseek-llm-7b-chat', 'tokenizer_path': 'Qwen/Qwen3-4B-Instruct-2507', 'override_config': {}, 'external_lib': None, 'trust_remote_code': False, '_target_': 'verl.workers.config.FSDPCriticModelCfg', 'use_shm': False, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': False, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'tiled_mlp': {'enabled': False, 'num_shards': 4}}, '_target_': 'verl.workers.config.FSDPCriticConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'enable': None, 'ppo_mini_batch_size': 4, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': None, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 32768, 'forward_max_token_len_per_gpu': 32768, 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'cliprange_value': 0.5, 'loss_agg_mode': 'token-mean', 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'forward_micro_batch_size': None, 'forward_micro_batch_size_per_gpu': None, 'ulysses_sequence_parallel_size': 1, 'grad_clip': 1.0}, 'reward_model': {'enable': False, 'enable_resource_pool': False, 'n_gpus_per_node': 8, 'nnodes': 0, 'strategy': 'fsdp', 'model': {'input_tokenizer': 'Qwen/Qwen3-4B-Instruct-2507', 'path': '~/models/FsfairX-LLaMA3-RM-v0.1', 'external_lib': None, 'trust_remote_code': False, 'override_config': {}, 'use_shm': False, 'use_remove_padding': False, 'use_fused_kernels': False, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False}}, 'micro_batch_size': None, 'micro_batch_size_per_gpu': None, 'max_length': None, 'use_dynamic_bsz': False, 'forward_max_token_len_per_gpu': 32768, 'reward_manager': 'naive', 'reward_loop_source': 'register', 'reward_loop_module_path': None, 'reward_loop_class_name': None, 'launch_reward_fn_async': False, 'sandbox_fusion': {'url': None, 'max_concurrent': 64, 'memory_limit_mb': 1024}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'ulysses_sequence_parallel_size': 1, 'use_reward_loop': True, 'num_workers': 1, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': '???', 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.5, 'enforce_eager': True, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'tensor_model_parallel_size': 2, 'max_num_batched_tokens': 8192, 'max_model_len': None, 'max_num_seqs': 1024, 'load_format': 'auto', 'engine_kwargs': {}, 'limit_images': None, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'disable_log_stats': True, 'skip_tokenizer_init': False, 'prompt_length': 2048, 'response_length': 2048}}, 'algorithm': {'rollout_correction': {'rollout_is': None, 'rollout_is_threshold': 2.0, 'rollout_rs': None, 'rollout_rs_threshold': None, 'bypass_mode': False, 'loss_type': 'ppo_clip', 'rollout_is_batch_normalize': False}, '_target_': 'verl.trainer.config.AlgoConfig', 'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'norm_adv_by_std_in_grpo': True, 'use_kl_in_reward': False, 'kl_penalty': 'kl', 'kl_ctrl': {'_target_': 'verl.trainer.config.KLControlConfig', 'type': 'fixed', 'kl_coef': 0.001, 'horizon': 10000, 'target_kl': 0.1}, 'use_pf_ppo': False, 'pf_ppo': {'reweight_method': 'pow', 'weight_pow': 2.0}}, 'custom_reward_function': {'path': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/reward_func/reward.py', 'name': 'compute_score'}, 'trainer': {'balance_batch': True, 'total_epochs': 15, 'total_training_steps': None, 'project_name': 'readctrl-verl', 'experiment_name': 'qwen3-4b-instruct-optimized-multiclinsum-gs', 'logger': ['console', 'wandb'], 'log_val_generations': 0, 'rollout_data_dir': None, 'validation_data_dir': None, 'nnodes': 1, 'n_gpus_per_node': 2, 'save_freq': 100, 'esi_redundant_time': 0, 'resume_mode': 'auto', 'resume_from_path': None, 'val_before_train': True, 'val_only': False, 'test_freq': 1, 'critic_warmup': 0, 'default_hdfs_dir': None, 'del_local_ckpt_after_load': False, 'default_local_dir': '/home/mshahidul/readctrl/code/RL_model/train_v2', 'max_actor_ckpt_to_keep': 1, 'max_critic_ckpt_to_keep': 1, 'ray_wait_register_center_timeout': 300, 'device': 'cuda', 'use_legacy_worker_impl': 'auto', 'remove_previous_ckpt_in_save': True}, 'global_profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'steps': None, 'profile_continuous_steps': False, 'save_path': 'outputs/profile', 'global_tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False, 'controller_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph'}, 'worker_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph', 'capture-range': 'cudaProfilerApi', 'capture-range-end': None, 'kill': 'none'}}, 'torch_memory': {'trace_alloc_max_entries': 100000, 'stack_depth': 32, 'context': 'all', 'stacks': 'all', 'kw_args': {}}}}, 'transfer_queue': {'enable': False}, 'ray_kwargs': {'ray_init': {'num_cpus': None}, 'timeline_json_file': None}, '_wandb': {}} +2026-02-07 11:30:41,997 INFO MainThread:572506 [wandb_init.py:init():892] starting backend +2026-02-07 11:30:42,295 INFO MainThread:572506 [wandb_init.py:init():895] sending inform_init request +2026-02-07 11:30:42,304 INFO MainThread:572506 [wandb_init.py:init():903] backend started and connected +2026-02-07 11:30:42,318 INFO MainThread:572506 [wandb_init.py:init():973] updated telemetry +2026-02-07 11:30:42,346 INFO MainThread:572506 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-02-07 11:30:44,107 INFO MainThread:572506 [wandb_init.py:init():1042] starting run threads in backend +2026-02-07 11:30:45,108 INFO MainThread:572506 [wandb_run.py:_console_start():2529] atexit reg +2026-02-07 11:30:45,109 INFO MainThread:572506 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-02-07 11:30:45,109 INFO MainThread:572506 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-02-07 11:30:45,109 INFO MainThread:572506 [wandb_run.py:_redirect():2469] Redirects installed. +2026-02-07 11:30:45,121 INFO MainThread:572506 [wandb_init.py:init():1082] run started, returning control to user process +2026-02-07 12:19:31,904 INFO wandb-AsyncioManager-main:572506 [service_client.py:_forward_responses():94] Reached EOF. +2026-02-07 12:19:31,914 INFO wandb-AsyncioManager-main:572506 [mailbox.py:close():154] Closing mailbox, abandoning 1 handles. diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_122607-4jfbiq6q/files/output.log b/code/RL_model/verl/verl_train/wandb/run-20260207_122607-4jfbiq6q/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..d78af0c289eb892ae52257239a41a200ae2d6cd8 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_122607-4jfbiq6q/files/output.log @@ -0,0 +1,6 @@ +wandb: Detected [openai] in use. +wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script. +wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/ +Checkpoint tracker file does not exist: /home/mshahidul/readctrl/code/RL_model/train_v2/latest_checkpointed_iteration.txt +Training from scratch +test_gen_batch meta info: {'eos_token_id': 151645, 'pad_token_id': 151643, 'recompute_log_prob': False, 'do_sample': False, 'validate': True, 'global_steps': 0} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_122607-4jfbiq6q/files/requirements.txt b/code/RL_model/verl/verl_train/wandb/run-20260207_122607-4jfbiq6q/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..c168a7097ef00c84a34fe1db69f95d6eaafb03f2 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_122607-4jfbiq6q/files/requirements.txt @@ -0,0 +1,269 @@ +verl==0.8.0.dev0 +psutil==7.1.3 +colorama==0.4.6 +annotated-doc==0.0.4 +sentry-sdk==2.51.0 +requests==2.32.5 +nvidia-cufile-cu12==1.13.1.3 +ml_dtypes==0.5.4 +xformers==0.0.32.post1 +sglang==0.5.2 +multidict==6.7.1 +typing_extensions==4.15.0 +nvidia-cusparselt-cu12==0.7.1 +openai-harmony==0.0.4 +transformers==4.56.1 +Werkzeug==3.1.5 +identify==2.6.16 +pytest==9.0.2 +nvidia-cuda-runtime-cu12==12.8.90 +GitPython==3.1.46 +cupy-cuda12x==13.6.0 +tokenizers==0.22.2 +pybind11==3.0.1 +google-api-core==2.29.0 +partial-json-parser==0.2.1.1.post7 +aiohttp-cors==0.8.1 +sniffio==1.3.1 +tensordict==0.10.0 +smart_open==7.5.0 +cffi==2.0.0 +wcwidth==0.5.3 +asttokens==3.0.1 +opencensus==0.11.4 +rpds-py==0.30.0 +py-spy==0.4.1 +gguf==0.17.1 +nvidia-nvjitlink-cu12==12.8.93 +httpx==0.28.1 +cuda-python==13.1.1 +annotated-types==0.7.0 +regex==2026.1.15 +vllm==0.11.0 +idna==3.11 +fsspec==2025.10.0 +parso==0.8.5 +pydantic-extra-types==2.11.0 +MarkupSafe==3.0.3 +cryptography==46.0.4 +filelock==3.20.3 +modelscope==1.34.0 +outlines==0.1.11 +dnspython==2.8.0 +scipy==1.17.0 +zipp==3.23.0 +PyYAML==6.0.3 +onnx==1.20.1 +torchdata==0.11.0 +cuda-pathfinder==1.3.3 +verl==0.8.0.dev0 +httptools==0.7.1 +opencv-python-headless==4.13.0.90 +importlib_metadata==8.7.1 +peft==0.18.1 +opentelemetry-sdk==1.39.1 +python-json-logger==4.0.0 +cuda-bindings==13.1.1 +mdurl==0.1.2 +referencing==0.37.0 +xxhash==3.6.0 +interegular==0.3.3 +fastapi-cli==0.0.20 +uv==0.9.28 +tensorboard==2.20.0 +nvidia-cublas-cu12==12.8.4.1 +sentencepiece==0.2.1 +rich-toolkit==0.18.1 +numpy==2.2.0 +yarl==1.22.0 +opencv-fixer==0.2.5 +tqdm==4.67.2 +python-dotenv==1.2.1 +timm==1.0.16 +aiohappyeyeballs==2.6.1 +decord==0.6.0 +jiter==0.12.0 +airportsdata==20250909 +markdown-it-py==4.0.0 +nvidia-cusolver-cu12==11.7.3.90 +pyarrow==23.0.0 +opentelemetry-proto==1.39.1 +anyio==4.12.1 +pycryptodomex==3.23.0 +prometheus_client==0.24.1 +aiohttp==3.13.3 +urllib3==2.6.3 +pexpect==4.9.0 +pydantic-settings==2.12.0 +distro==1.9.0 +av==16.1.0 +cloudpickle==3.1.2 +mpmath==1.3.0 +certifi==2026.1.4 +antlr4-python3-runtime==4.9.3 +torchvision==0.23.0 +accelerate==1.12.0 +watchfiles==1.1.1 +ruff==0.14.14 +wheel==0.46.3 +omegaconf==2.3.0 +nvidia-cufft-cu12==11.3.3.83 +multiprocess==0.70.18 +frozendict==2.4.7 +sympy==1.14.0 +setproctitle==1.3.7 +setuptools==79.0.1 +py-cpuinfo==9.0.0 +ipython_pygments_lexers==1.1.1 +rich==14.3.2 +uvicorn==0.40.0 +openai==1.99.1 +outlines_core==0.2.11 +llvmlite==0.44.0 +nvidia-cuda-cupti-cu12==12.8.90 +attrs==25.4.0 +anthropic==0.77.0 +packaging==25.0 +fastrlock==0.8.3 +astor==0.8.1 +pluggy==1.6.0 +nvidia-cuda-nvrtc-cu12==12.8.93 +psutil==7.2.2 +virtualenv==20.36.1 +cbor2==5.8.0 +compressed-tensors==0.11.0 +nvidia-cusparse-cu12==12.5.8.93 +networkx==3.6.1 +httpcore==1.0.9 +onnxscript==0.3.1 +smmap==5.0.2 +opencv-python==4.13.0.90 +traitlets==5.14.3 +python-multipart==0.0.22 +pyvers==0.1.0 +huggingface-hub==0.36.0 +pillow==12.1.0 +jsonschema==4.26.0 +cfgv==3.5.0 +optree==0.18.0 +email-validator==2.3.0 +tabulate==0.9.0 +pre_commit==4.5.1 +msgpack==1.1.2 +depyf==0.19.0 +numba==0.61.2 +six==1.17.0 +aiosignal==1.4.0 +nvidia-nvtx-cu12==12.8.90 +propcache==0.4.1 +torch_memory_saver==0.0.8 +h11==0.16.0 +frozenlist==1.8.0 +websockets==16.0 +nvidia-cudnn-frontend==1.18.0 +build==1.4.0 +google-auth==2.48.0 +pycountry==24.6.1 +stack-data==0.6.3 +typing-inspection==0.4.2 +googleapis-common-protos==1.72.0 +pandas==3.0.0 +typer==0.21.1 +protobuf==6.33.5 +fastapi==0.128.0 +blake3==1.0.8 +opentelemetry-semantic-conventions==0.60b1 +opentelemetry-exporter-prometheus==0.60b1 +nvidia-cudnn-cu12==9.10.2.21 +Markdown==3.10.1 +liger_kernel==0.6.4 +nodeenv==1.10.0 +prompt_toolkit==3.0.52 +torchaudio==2.8.0 +codetiming==1.4.0 +platformdirs==4.5.1 +jsonschema-specifications==2025.9.1 +hydra-core==1.3.2 +tensorboard-data-server==0.7.2 +lm-format-enforcer==0.11.3 +pyasn1_modules==0.4.2 +tiktoken==0.12.0 +starlette==0.50.0 +pyproject_hooks==1.2.0 +flash_attn==2.8.1 +rsa==4.9.1 +ray==2.53.0 +nest-asyncio==1.6.0 +lark==1.2.2 +fastar==0.8.0 +orjson==3.11.6 +prometheus-fastapi-instrumentator==7.1.0 +opentelemetry-api==1.39.1 +mathruler==0.1.0 +pydantic_core==2.41.5 +fastapi-cloud-cli==0.11.0 +pynvml==13.0.1 +loguru==0.7.3 +torch==2.8.0 +msgspec==0.20.0 +nvidia-curand-cu12==10.3.9.90 +blobfile==3.0.0 +gitdb==4.0.12 +llguidance==0.7.30 +hf_transfer==0.1.9 +nvidia-nccl-cu12==2.27.3 +qwen-vl-utils==0.0.14 +ptyprocess==0.7.0 +ipdb==0.13.13 +opencensus-context==0.1.3 +jedi==0.19.2 +click==8.3.1 +datasets==4.5.0 +soxr==1.0.0 +sgl-kernel==0.3.9.post2 +colorful==0.5.8 +pyasn1==0.6.2 +charset-normalizer==3.4.4 +nvidia-ml-py==13.590.48 +hf-xet==1.2.0 +dill==0.4.0 +absl-py==2.4.0 +pydantic==2.12.5 +wrapt==2.1.0 +flashinfer-python==0.3.1 +python-dateutil==2.9.0.post0 +torchao==0.9.0 +cachetools==7.0.0 +soundfile==0.13.1 +diskcache==5.6.3 +onnx-ir==0.1.15 +docstring_parser==0.17.0 +matplotlib-inline==0.2.1 +Pygments==2.19.2 +wandb==0.24.1 +pure_eval==0.2.3 +ninja==1.13.0 +proto-plus==1.27.0 +pyzmq==27.1.0 +iniconfig==2.3.0 +Jinja2==3.1.6 +megatron-core==0.13.1 +uvloop==0.22.1 +pycparser==3.0 +pylatexenc==2.10 +decorator==5.2.1 +shellingham==1.5.4 +lxml==6.0.2 +safetensors==0.7.0 +xgrammar==0.1.25 +pybase64==1.4.3 +ipython==9.9.0 +mistral_common==1.9.0 +rignore==0.7.6 +einops==0.8.2 +distlib==0.4.0 +triton==3.4.0 +executing==2.2.1 +grpcio==1.76.0 +pip==25.3 +verl==0.8.0.dev0 diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_122607-4jfbiq6q/files/wandb-metadata.json b/code/RL_model/verl/verl_train/wandb/run-20260207_122607-4jfbiq6q/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..50eb1b26b7ac445b86f4d41e8f002c070f0b24a6 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_122607-4jfbiq6q/files/wandb-metadata.json @@ -0,0 +1,93 @@ +{ + "os": "Linux-5.15.0-160-generic-x86_64-with-glibc2.35", + "python": "CPython 3.12.12", + "startedAt": "2026-02-07T17:26:07.697458Z", + "args": [ + "--node-ip-address=172.16.34.29", + "--node-manager-port=39565", + "--object-store-name=/tmp/ray/session_2026-02-07_12-20-13_689088_750591/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2026-02-07_12-20-13_689088_750591/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=55729", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=64966", + "--gcs-address=172.16.34.29:59877", + "--session-name=session_2026-02-07_12-20-13_689088_750591", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8297", + "--cluster-id=de0d59f03571a90ef7c45248c40006861cdf675236b4d421c9d82ff5", + "--startup-token=128", + "--worker-launch-time-ms=1770484828189", + "--node-id=bfe68127d86b81da9c46ebcb76220fe364225b2883247c2544a9a01d", + "--runtime-env-hash=-2086329310" + ], + "program": "/home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/verl-project/verl", + "commit": "d9939add7a2a01923a9088891f913a5d20c4e622" + }, + "email": "shahidulshakib034@gmail.com", + "root": "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train", + "host": "gamma", + "executable": "/home/mshahidul/miniconda3/envs/verl2/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA A100 80GB PCIe", + "gpu_count": 6, + "disk": { + "/": { + "total": "3766429188096", + "used": "188373913600" + } + }, + "memory": { + "total": "1081814863872" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-4a3678c7-34a9-356f-f7b7-7f7e2f44b596" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-df506764-0db5-91b4-8ec9-154a3bb8123f" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-2c3dbd62-b384-2996-a0f6-b32dcfcc3538" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-1ff3dabe-4b9a-ea62-5cc3-01f12f32d328" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper", + "uuid": "GPU-eefc4b8c-0e79-c1d6-a9ff-8325040572eb" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper", + "uuid": "GPU-d42b6057-13e8-1e88-6aa1-9307df72dece" + } + ], + "cudaVersion": "13.0", + "writerId": "qzshr3cxtxqmv96cwqiw3741i4bjegop" +} \ No newline at end of file diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_122607-4jfbiq6q/files/wandb-summary.json b/code/RL_model/verl/verl_train/wandb/run-20260207_122607-4jfbiq6q/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..ec6d562821d7b3e629a4dc356aaa6b9124950a46 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_122607-4jfbiq6q/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":1223},"_runtime":1223} \ No newline at end of file diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_122607-4jfbiq6q/logs/debug-core.log b/code/RL_model/verl/verl_train/wandb/run-20260207_122607-4jfbiq6q/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..ee346208ba57b683b1615796dd8f5cbb2a7e2f64 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_122607-4jfbiq6q/logs/debug-core.log @@ -0,0 +1,8 @@ +{"time":"2026-02-07T12:26:07.836102374-05:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp1896t4x6/port-759076.txt","pid":759076,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-02-07T12:26:07.837420986-05:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":759076} +{"time":"2026-02-07T12:26:07.83733235-05:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-759076-769345-1309437185/socket","Net":"unix"}} +{"time":"2026-02-07T12:26:08.006628386-05:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-02-07T12:26:08.020850136-05:00","level":"INFO","msg":"handleInformInit: received","streamId":"4jfbiq6q","id":"1(@)"} +{"time":"2026-02-07T12:26:09.659024492-05:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"4jfbiq6q","id":"1(@)"} +{"time":"2026-02-07T12:26:16.138163252-05:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"dv9r2vfji77v"} +{"time":"2026-02-07T12:46:33.378450347-05:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"dv9r2vfji77v"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_122607-4jfbiq6q/logs/debug-internal.log b/code/RL_model/verl/verl_train/wandb/run-20260207_122607-4jfbiq6q/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..5f1c4b44a01bc744776a5f6cdc4ce2d39dee3bf8 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_122607-4jfbiq6q/logs/debug-internal.log @@ -0,0 +1,6 @@ +{"time":"2026-02-07T12:26:08.021962867-05:00","level":"INFO","msg":"stream: starting","core version":"0.24.1"} +{"time":"2026-02-07T12:26:09.656270809-05:00","level":"INFO","msg":"stream: created new stream","id":"4jfbiq6q"} +{"time":"2026-02-07T12:26:09.656429545-05:00","level":"INFO","msg":"handler: started","stream_id":"4jfbiq6q"} +{"time":"2026-02-07T12:26:09.65899053-05:00","level":"INFO","msg":"stream: started","id":"4jfbiq6q"} +{"time":"2026-02-07T12:26:09.659226983-05:00","level":"INFO","msg":"writer: started","stream_id":"4jfbiq6q"} +{"time":"2026-02-07T12:26:09.659263272-05:00","level":"INFO","msg":"sender: started","stream_id":"4jfbiq6q"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_122607-4jfbiq6q/logs/debug.log b/code/RL_model/verl/verl_train/wandb/run-20260207_122607-4jfbiq6q/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..550bd97b81b4f961b6ab0608e6c312df0119598b --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_122607-4jfbiq6q/logs/debug.log @@ -0,0 +1,23 @@ +2026-02-07 12:26:07,717 INFO MainThread:759076 [wandb_setup.py:_flush():81] Current SDK version is 0.24.1 +2026-02-07 12:26:07,718 INFO MainThread:759076 [wandb_setup.py:_flush():81] Configure stats pid to 759076 +2026-02-07 12:26:07,718 INFO MainThread:759076 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-02-07 12:26:07,718 INFO MainThread:759076 [wandb_init.py:setup_run_log_directory():717] Logging user logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260207_122607-4jfbiq6q/logs/debug.log +2026-02-07 12:26:07,718 INFO MainThread:759076 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260207_122607-4jfbiq6q/logs/debug-internal.log +2026-02-07 12:26:07,719 INFO MainThread:759076 [wandb_init.py:init():844] calling init triggers +2026-02-07 12:26:07,720 INFO MainThread:759076 [wandb_init.py:init():849] wandb.init called with sweep_config: {} +config: {'actor_rollout_ref': {'actor': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-06, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 6045, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'ppo_mini_batch_size': 4, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': 2, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 16384, 'clip_ratio': 0.2, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.2, 'tau_pos': 1.0, 'tau_neg': 1.05, 'freeze_vision_tower': False, 'policy_loss': {'_target_': 'verl.workers.config.PolicyLossConfig', 'loss_mode': 'vanilla', 'clip_cov_ratio': 0.0002, 'clip_cov_lb': 1.0, 'clip_cov_ub': 5.0, 'kl_cov_ratio': 0.0002, 'ppo_kl_coef': 0.1}, 'clip_ratio_c': 3.0, 'loss_agg_mode': 'token-mean', 'loss_scale_factor': None, 'entropy_coeff': 0, 'calculate_entropy': False, 'use_kl_loss': True, 'use_prefix_grouper': False, 'use_torch_compile': True, 'kl_loss_coef': 0.001, 'kl_loss_type': 'low_var_kl', 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'use_fused_kernels': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'grad_clip': 1.0, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False, 'use_remove_padding': True, 'calculate_sum_pi_squared': False, 'sum_pi_squared_checkpointing': False}, 'ref': {'rollout_n': 3, 'strategy': 'fsdp', 'use_torch_compile': True, 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': True, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False}, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': 'vllm', 'mode': 'async', 'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'prompt_length': 1024, 'response_length': 2048, 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'tensor_model_parallel_size': 1, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'pipeline_model_parallel_size': 1, 'max_num_batched_tokens': 8192, 'max_model_len': 8192, 'max_num_seqs': 1024, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'logprobs_mode': 'processed_logprobs', 'scheduling_policy': 'fcfs', 'load_format': 'dummy', 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 2, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'disable_log_stats': True, 'do_sample': True, 'n': 3, 'over_sample_rate': 0, 'multi_stage_wake_up': False, 'engine_kwargs': {'vllm': {}, 'sglang': {}, 'trtllm': {}}, 'val_kwargs': {'_target_': 'verl.workers.config.SamplingConfig', 'top_k': -1, 'top_p': 1.0, 'temperature': 0, 'n': 1, 'do_sample': False}, 'multi_turn': {'_target_': 'verl.workers.config.MultiTurnConfig', 'enable': False, 'max_assistant_turns': None, 'tool_config_path': None, 'max_user_turns': None, 'max_parallel_calls': 1, 'max_tool_response_length': 256, 'tool_response_truncate_side': 'middle', 'interaction_config_path': None, 'use_inference_chat_template': False, 'tokenization_sanity_check_mode': 'strict', 'format': 'hermes', 'num_repeat_rollouts': None}, 'calculate_log_probs': False, 'agent': {'_target_': 'verl.workers.config.AgentLoopConfig', 'num_workers': 8, 'default_agent_loop': 'single_turn_agent', 'agent_loop_config_path': None, 'custom_async_server': {'_target_': 'verl.workers.config.CustomAsyncServerConfig', 'path': None, 'name': None}}, 'checkpoint_engine': {'_target_': 'verl.workers.config.CheckpointEngineConfig', 'backend': 'naive', 'update_weights_bucket_megabytes': 2048, 'engine_kwargs': {}}, 'trace': {'_target_': 'verl.workers.config.TraceConfig', 'backend': None, 'token2text': False, 'max_samples_per_step_per_worker': None}, 'skip_rollout': False, 'skip_dump_dir': '/tmp/rollout_dump', 'skip_tokenizer_init': True, 'enable_rollout_routing_replay': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'prometheus': {'_target_': 'verl.workers.config.PrometheusConfig', 'enable': False, 'port': 9090, 'file': '/tmp/ray/session_latest/metrics/prometheus/prometheus.yml', 'served_model_name': 'Qwen/Qwen3-4B-Instruct-2507'}, 'quantization': None, 'quantization_config_file': None, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}, 'layered_summon': False}, 'model': {'_target_': 'verl.workers.config.HFModelConfig', 'path': 'Qwen/Qwen3-4B-Instruct-2507', 'hf_config_path': None, 'tokenizer_path': None, 'use_shm': False, 'trust_remote_code': False, 'custom_chat_template': None, 'external_lib': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': True, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'exclude_modules': None, 'lora_adapter_path': None, 'use_liger': False, 'use_fused_kernels': False, 'fused_kernel_options': {'impl_backend': 'torch'}, 'tiled_mlp': {'enabled': False, 'num_shards': 4}, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}}, 'hybrid_engine': True, 'nccl_timeout': 600}, 'data': {'tokenizer': None, 'use_shm': False, 'train_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/train.parquet', 'val_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/test.parquet', 'train_max_samples': -1, 'val_max_samples': -1, 'prompt_key': 'prompt', 'reward_fn_key': 'data_source', 'max_prompt_length': 1024, 'max_response_length': 2048, 'train_batch_size': 8, 'val_batch_size': None, 'tool_config_path': None, 'return_raw_input_ids': False, 'return_raw_chat': True, 'return_full_prompt': False, 'shuffle': True, 'seed': None, 'dataloader_num_workers': 8, 'image_patch_size': 14, 'validation_shuffle': False, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 1, 'truncation': 'error', 'image_key': 'images', 'video_key': 'videos', 'trust_remote_code': False, 'custom_cls': {'path': None, 'name': None}, 'return_multi_modal_inputs': True, 'sampler': {'class_path': None, 'class_name': None}, 'datagen': {'path': None, 'name': None}, 'apply_chat_template_kwargs': {}}, 'reward_manager': {'_target_': 'verl.trainer.config.config.RewardManagerConfig', 'source': 'register', 'name': 'naive', 'module': {'_target_': 'verl.trainer.config.config.ModuleConfig', 'path': None, 'name': 'custom_reward_manager'}}, 'critic': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-05, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 6045, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'model': {'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, 'path': '~/models/deepseek-llm-7b-chat', 'tokenizer_path': 'Qwen/Qwen3-4B-Instruct-2507', 'override_config': {}, 'external_lib': None, 'trust_remote_code': False, '_target_': 'verl.workers.config.FSDPCriticModelCfg', 'use_shm': False, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': False, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'tiled_mlp': {'enabled': False, 'num_shards': 4}}, '_target_': 'verl.workers.config.FSDPCriticConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'enable': None, 'ppo_mini_batch_size': 4, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': None, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 32768, 'forward_max_token_len_per_gpu': 32768, 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'cliprange_value': 0.5, 'loss_agg_mode': 'token-mean', 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'forward_micro_batch_size': None, 'forward_micro_batch_size_per_gpu': None, 'ulysses_sequence_parallel_size': 1, 'grad_clip': 1.0}, 'reward_model': {'enable': False, 'enable_resource_pool': False, 'n_gpus_per_node': 8, 'nnodes': 0, 'strategy': 'fsdp', 'model': {'input_tokenizer': 'Qwen/Qwen3-4B-Instruct-2507', 'path': '~/models/FsfairX-LLaMA3-RM-v0.1', 'external_lib': None, 'trust_remote_code': False, 'override_config': {}, 'use_shm': False, 'use_remove_padding': False, 'use_fused_kernels': False, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False}}, 'micro_batch_size': None, 'micro_batch_size_per_gpu': None, 'max_length': None, 'use_dynamic_bsz': False, 'forward_max_token_len_per_gpu': 32768, 'reward_manager': 'naive', 'reward_loop_source': 'register', 'reward_loop_module_path': None, 'reward_loop_class_name': None, 'launch_reward_fn_async': False, 'sandbox_fusion': {'url': None, 'max_concurrent': 64, 'memory_limit_mb': 1024}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'ulysses_sequence_parallel_size': 1, 'use_reward_loop': True, 'num_workers': 1, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': '???', 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.5, 'enforce_eager': True, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'tensor_model_parallel_size': 2, 'max_num_batched_tokens': 8192, 'max_model_len': None, 'max_num_seqs': 1024, 'load_format': 'auto', 'engine_kwargs': {}, 'limit_images': None, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'disable_log_stats': True, 'skip_tokenizer_init': False, 'prompt_length': 2048, 'response_length': 2048}}, 'algorithm': {'rollout_correction': {'rollout_is': None, 'rollout_is_threshold': 2.0, 'rollout_rs': None, 'rollout_rs_threshold': None, 'bypass_mode': False, 'loss_type': 'ppo_clip', 'rollout_is_batch_normalize': False}, '_target_': 'verl.trainer.config.AlgoConfig', 'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'norm_adv_by_std_in_grpo': True, 'use_kl_in_reward': False, 'kl_penalty': 'kl', 'kl_ctrl': {'_target_': 'verl.trainer.config.KLControlConfig', 'type': 'fixed', 'kl_coef': 0.001, 'horizon': 10000, 'target_kl': 0.1}, 'use_pf_ppo': False, 'pf_ppo': {'reweight_method': 'pow', 'weight_pow': 2.0}}, 'custom_reward_function': {'path': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/reward_func/reward.py', 'name': 'compute_score'}, 'trainer': {'balance_batch': True, 'total_epochs': 15, 'total_training_steps': None, 'project_name': 'readctrl-verl', 'experiment_name': 'qwen3-4b-instruct-optimized-multiclinsum-gs', 'logger': ['console', 'wandb'], 'log_val_generations': 0, 'rollout_data_dir': None, 'validation_data_dir': None, 'nnodes': 1, 'n_gpus_per_node': 2, 'save_freq': 100, 'esi_redundant_time': 0, 'resume_mode': 'auto', 'resume_from_path': None, 'val_before_train': True, 'val_only': False, 'test_freq': 1, 'critic_warmup': 0, 'default_hdfs_dir': None, 'del_local_ckpt_after_load': False, 'default_local_dir': '/home/mshahidul/readctrl/code/RL_model/train_v2', 'max_actor_ckpt_to_keep': 1, 'max_critic_ckpt_to_keep': 1, 'ray_wait_register_center_timeout': 300, 'device': 'cuda', 'use_legacy_worker_impl': 'auto', 'remove_previous_ckpt_in_save': True}, 'global_profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'steps': None, 'profile_continuous_steps': False, 'save_path': 'outputs/profile', 'global_tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False, 'controller_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph'}, 'worker_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph', 'capture-range': 'cudaProfilerApi', 'capture-range-end': None, 'kill': 'none'}}, 'torch_memory': {'trace_alloc_max_entries': 100000, 'stack_depth': 32, 'context': 'all', 'stacks': 'all', 'kw_args': {}}}}, 'transfer_queue': {'enable': False}, 'ray_kwargs': {'ray_init': {'num_cpus': None}, 'timeline_json_file': None}, '_wandb': {}} +2026-02-07 12:26:07,720 INFO MainThread:759076 [wandb_init.py:init():892] starting backend +2026-02-07 12:26:08,007 INFO MainThread:759076 [wandb_init.py:init():895] sending inform_init request +2026-02-07 12:26:08,018 INFO MainThread:759076 [wandb_init.py:init():903] backend started and connected +2026-02-07 12:26:08,029 INFO MainThread:759076 [wandb_init.py:init():973] updated telemetry +2026-02-07 12:26:08,052 INFO MainThread:759076 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-02-07 12:26:09,897 INFO MainThread:759076 [wandb_init.py:init():1042] starting run threads in backend +2026-02-07 12:26:11,112 INFO MainThread:759076 [wandb_run.py:_console_start():2529] atexit reg +2026-02-07 12:26:11,114 INFO MainThread:759076 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-02-07 12:26:11,115 INFO MainThread:759076 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-02-07 12:26:11,115 INFO MainThread:759076 [wandb_run.py:_redirect():2469] Redirects installed. +2026-02-07 12:26:11,127 INFO MainThread:759076 [wandb_init.py:init():1082] run started, returning control to user process +2026-02-07 12:46:33,375 INFO MainThread:759076 [wandb_run.py:_finish():2295] finishing run shahidulshakib034-khulna-university-of-engineering-techn/readctrl-verl/4jfbiq6q +2026-02-07 12:46:33,376 INFO MainThread:759076 [wandb_run.py:_atexit_cleanup():2494] got exitcode: 0 +2026-02-07 12:46:33,377 INFO MainThread:759076 [wandb_run.py:_restore():2476] restore +2026-02-07 12:46:33,377 INFO MainThread:759076 [wandb_run.py:_restore():2482] restore done diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_134018-vq0iy4i3/files/output.log b/code/RL_model/verl/verl_train/wandb/run-20260207_134018-vq0iy4i3/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..4d4500c4494be5ebb4275048fa0eb1f31b675ed8 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_134018-vq0iy4i3/files/output.log @@ -0,0 +1,14 @@ +wandb: Detected [openai] in use. +wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script. +wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/ +Checkpoint tracker file does not exist: /home/mshahidul/readctrl/code/RL_model/train_v2/latest_checkpointed_iteration.txt +Training from scratch +test_gen_batch meta info: {'eos_token_id': 151645, 'pad_token_id': 151643, 'recompute_log_prob': False, 'do_sample': False, 'validate': True, 'global_steps': 0} +validation generation end +("Initial validation metrics: {'val-aux/multiclinsum/reward/mean@1': " + "np.float64(-5.449668711774489), 'val-core/multiclinsum/acc/mean@1': " + "np.float64(-5.449668725530857), 'val-aux/num_turns/min': np.int32(2), " + "'val-aux/num_turns/max': np.int32(2), 'val-aux/num_turns/mean': " + 'np.float64(2.0)}') +step:0 - val-aux/multiclinsum/reward/mean@1:np.float64(-5.449668711774489) - val-core/multiclinsum/acc/mean@1:np.float64(-5.449668725530857) - val-aux/num_turns/min:np.int32(2) - val-aux/num_turns/max:np.int32(2) - val-aux/num_turns/mean:np.float64(2.0) +Training Progress: 0%| | 0/90 [00:00, ?it/s] diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_134018-vq0iy4i3/files/requirements.txt b/code/RL_model/verl/verl_train/wandb/run-20260207_134018-vq0iy4i3/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..c168a7097ef00c84a34fe1db69f95d6eaafb03f2 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_134018-vq0iy4i3/files/requirements.txt @@ -0,0 +1,269 @@ +verl==0.8.0.dev0 +psutil==7.1.3 +colorama==0.4.6 +annotated-doc==0.0.4 +sentry-sdk==2.51.0 +requests==2.32.5 +nvidia-cufile-cu12==1.13.1.3 +ml_dtypes==0.5.4 +xformers==0.0.32.post1 +sglang==0.5.2 +multidict==6.7.1 +typing_extensions==4.15.0 +nvidia-cusparselt-cu12==0.7.1 +openai-harmony==0.0.4 +transformers==4.56.1 +Werkzeug==3.1.5 +identify==2.6.16 +pytest==9.0.2 +nvidia-cuda-runtime-cu12==12.8.90 +GitPython==3.1.46 +cupy-cuda12x==13.6.0 +tokenizers==0.22.2 +pybind11==3.0.1 +google-api-core==2.29.0 +partial-json-parser==0.2.1.1.post7 +aiohttp-cors==0.8.1 +sniffio==1.3.1 +tensordict==0.10.0 +smart_open==7.5.0 +cffi==2.0.0 +wcwidth==0.5.3 +asttokens==3.0.1 +opencensus==0.11.4 +rpds-py==0.30.0 +py-spy==0.4.1 +gguf==0.17.1 +nvidia-nvjitlink-cu12==12.8.93 +httpx==0.28.1 +cuda-python==13.1.1 +annotated-types==0.7.0 +regex==2026.1.15 +vllm==0.11.0 +idna==3.11 +fsspec==2025.10.0 +parso==0.8.5 +pydantic-extra-types==2.11.0 +MarkupSafe==3.0.3 +cryptography==46.0.4 +filelock==3.20.3 +modelscope==1.34.0 +outlines==0.1.11 +dnspython==2.8.0 +scipy==1.17.0 +zipp==3.23.0 +PyYAML==6.0.3 +onnx==1.20.1 +torchdata==0.11.0 +cuda-pathfinder==1.3.3 +verl==0.8.0.dev0 +httptools==0.7.1 +opencv-python-headless==4.13.0.90 +importlib_metadata==8.7.1 +peft==0.18.1 +opentelemetry-sdk==1.39.1 +python-json-logger==4.0.0 +cuda-bindings==13.1.1 +mdurl==0.1.2 +referencing==0.37.0 +xxhash==3.6.0 +interegular==0.3.3 +fastapi-cli==0.0.20 +uv==0.9.28 +tensorboard==2.20.0 +nvidia-cublas-cu12==12.8.4.1 +sentencepiece==0.2.1 +rich-toolkit==0.18.1 +numpy==2.2.0 +yarl==1.22.0 +opencv-fixer==0.2.5 +tqdm==4.67.2 +python-dotenv==1.2.1 +timm==1.0.16 +aiohappyeyeballs==2.6.1 +decord==0.6.0 +jiter==0.12.0 +airportsdata==20250909 +markdown-it-py==4.0.0 +nvidia-cusolver-cu12==11.7.3.90 +pyarrow==23.0.0 +opentelemetry-proto==1.39.1 +anyio==4.12.1 +pycryptodomex==3.23.0 +prometheus_client==0.24.1 +aiohttp==3.13.3 +urllib3==2.6.3 +pexpect==4.9.0 +pydantic-settings==2.12.0 +distro==1.9.0 +av==16.1.0 +cloudpickle==3.1.2 +mpmath==1.3.0 +certifi==2026.1.4 +antlr4-python3-runtime==4.9.3 +torchvision==0.23.0 +accelerate==1.12.0 +watchfiles==1.1.1 +ruff==0.14.14 +wheel==0.46.3 +omegaconf==2.3.0 +nvidia-cufft-cu12==11.3.3.83 +multiprocess==0.70.18 +frozendict==2.4.7 +sympy==1.14.0 +setproctitle==1.3.7 +setuptools==79.0.1 +py-cpuinfo==9.0.0 +ipython_pygments_lexers==1.1.1 +rich==14.3.2 +uvicorn==0.40.0 +openai==1.99.1 +outlines_core==0.2.11 +llvmlite==0.44.0 +nvidia-cuda-cupti-cu12==12.8.90 +attrs==25.4.0 +anthropic==0.77.0 +packaging==25.0 +fastrlock==0.8.3 +astor==0.8.1 +pluggy==1.6.0 +nvidia-cuda-nvrtc-cu12==12.8.93 +psutil==7.2.2 +virtualenv==20.36.1 +cbor2==5.8.0 +compressed-tensors==0.11.0 +nvidia-cusparse-cu12==12.5.8.93 +networkx==3.6.1 +httpcore==1.0.9 +onnxscript==0.3.1 +smmap==5.0.2 +opencv-python==4.13.0.90 +traitlets==5.14.3 +python-multipart==0.0.22 +pyvers==0.1.0 +huggingface-hub==0.36.0 +pillow==12.1.0 +jsonschema==4.26.0 +cfgv==3.5.0 +optree==0.18.0 +email-validator==2.3.0 +tabulate==0.9.0 +pre_commit==4.5.1 +msgpack==1.1.2 +depyf==0.19.0 +numba==0.61.2 +six==1.17.0 +aiosignal==1.4.0 +nvidia-nvtx-cu12==12.8.90 +propcache==0.4.1 +torch_memory_saver==0.0.8 +h11==0.16.0 +frozenlist==1.8.0 +websockets==16.0 +nvidia-cudnn-frontend==1.18.0 +build==1.4.0 +google-auth==2.48.0 +pycountry==24.6.1 +stack-data==0.6.3 +typing-inspection==0.4.2 +googleapis-common-protos==1.72.0 +pandas==3.0.0 +typer==0.21.1 +protobuf==6.33.5 +fastapi==0.128.0 +blake3==1.0.8 +opentelemetry-semantic-conventions==0.60b1 +opentelemetry-exporter-prometheus==0.60b1 +nvidia-cudnn-cu12==9.10.2.21 +Markdown==3.10.1 +liger_kernel==0.6.4 +nodeenv==1.10.0 +prompt_toolkit==3.0.52 +torchaudio==2.8.0 +codetiming==1.4.0 +platformdirs==4.5.1 +jsonschema-specifications==2025.9.1 +hydra-core==1.3.2 +tensorboard-data-server==0.7.2 +lm-format-enforcer==0.11.3 +pyasn1_modules==0.4.2 +tiktoken==0.12.0 +starlette==0.50.0 +pyproject_hooks==1.2.0 +flash_attn==2.8.1 +rsa==4.9.1 +ray==2.53.0 +nest-asyncio==1.6.0 +lark==1.2.2 +fastar==0.8.0 +orjson==3.11.6 +prometheus-fastapi-instrumentator==7.1.0 +opentelemetry-api==1.39.1 +mathruler==0.1.0 +pydantic_core==2.41.5 +fastapi-cloud-cli==0.11.0 +pynvml==13.0.1 +loguru==0.7.3 +torch==2.8.0 +msgspec==0.20.0 +nvidia-curand-cu12==10.3.9.90 +blobfile==3.0.0 +gitdb==4.0.12 +llguidance==0.7.30 +hf_transfer==0.1.9 +nvidia-nccl-cu12==2.27.3 +qwen-vl-utils==0.0.14 +ptyprocess==0.7.0 +ipdb==0.13.13 +opencensus-context==0.1.3 +jedi==0.19.2 +click==8.3.1 +datasets==4.5.0 +soxr==1.0.0 +sgl-kernel==0.3.9.post2 +colorful==0.5.8 +pyasn1==0.6.2 +charset-normalizer==3.4.4 +nvidia-ml-py==13.590.48 +hf-xet==1.2.0 +dill==0.4.0 +absl-py==2.4.0 +pydantic==2.12.5 +wrapt==2.1.0 +flashinfer-python==0.3.1 +python-dateutil==2.9.0.post0 +torchao==0.9.0 +cachetools==7.0.0 +soundfile==0.13.1 +diskcache==5.6.3 +onnx-ir==0.1.15 +docstring_parser==0.17.0 +matplotlib-inline==0.2.1 +Pygments==2.19.2 +wandb==0.24.1 +pure_eval==0.2.3 +ninja==1.13.0 +proto-plus==1.27.0 +pyzmq==27.1.0 +iniconfig==2.3.0 +Jinja2==3.1.6 +megatron-core==0.13.1 +uvloop==0.22.1 +pycparser==3.0 +pylatexenc==2.10 +decorator==5.2.1 +shellingham==1.5.4 +lxml==6.0.2 +safetensors==0.7.0 +xgrammar==0.1.25 +pybase64==1.4.3 +ipython==9.9.0 +mistral_common==1.9.0 +rignore==0.7.6 +einops==0.8.2 +distlib==0.4.0 +triton==3.4.0 +executing==2.2.1 +grpcio==1.76.0 +pip==25.3 +verl==0.8.0.dev0 diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_134018-vq0iy4i3/files/wandb-metadata.json b/code/RL_model/verl/verl_train/wandb/run-20260207_134018-vq0iy4i3/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..48ac49de1d8177e5ff9820d556ee467bc705a559 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_134018-vq0iy4i3/files/wandb-metadata.json @@ -0,0 +1,93 @@ +{ + "os": "Linux-5.15.0-160-generic-x86_64-with-glibc2.35", + "python": "CPython 3.12.12", + "startedAt": "2026-02-07T18:40:18.940694Z", + "args": [ + "--node-ip-address=172.16.34.29", + "--node-manager-port=41791", + "--object-store-name=/tmp/ray/session_2026-02-07_13-35-02_732437_976760/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2026-02-07_13-35-02_732437_976760/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=64627", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=57123", + "--gcs-address=172.16.34.29:63934", + "--session-name=session_2026-02-07_13-35-02_732437_976760", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8297", + "--cluster-id=1ca88028e1332d4f568376deb6a3b01bef3fb4a9ec19d630a6618a8b", + "--startup-token=128", + "--worker-launch-time-ms=1770489314378", + "--node-id=96c9780356cd34598e101ac5c4fe132c1a4b6d624db5e0834ed90b48", + "--runtime-env-hash=1096984665" + ], + "program": "/home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/verl-project/verl", + "commit": "d9939add7a2a01923a9088891f913a5d20c4e622" + }, + "email": "shahidulshakib034@gmail.com", + "root": "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train", + "host": "gamma", + "executable": "/home/mshahidul/miniconda3/envs/verl2/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA A100 80GB PCIe", + "gpu_count": 6, + "disk": { + "/": { + "total": "3766429188096", + "used": "188492046336" + } + }, + "memory": { + "total": "1081814863872" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-4a3678c7-34a9-356f-f7b7-7f7e2f44b596" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-df506764-0db5-91b4-8ec9-154a3bb8123f" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-2c3dbd62-b384-2996-a0f6-b32dcfcc3538" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-1ff3dabe-4b9a-ea62-5cc3-01f12f32d328" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper", + "uuid": "GPU-eefc4b8c-0e79-c1d6-a9ff-8325040572eb" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper", + "uuid": "GPU-d42b6057-13e8-1e88-6aa1-9307df72dece" + } + ], + "cudaVersion": "13.0", + "writerId": "izo95pui2vf5pq0kfph35rscarkhzb28" +} \ No newline at end of file diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_134018-vq0iy4i3/logs/debug-core.log b/code/RL_model/verl/verl_train/wandb/run-20260207_134018-vq0iy4i3/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..7c50d4676f1545e16cab18ee2033d0a219192d1a --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_134018-vq0iy4i3/logs/debug-core.log @@ -0,0 +1,7 @@ +{"time":"2026-02-07T13:40:19.078017031-05:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmps4xovg1g/port-985279.txt","pid":985279,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-02-07T13:40:19.079087888-05:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":985279} +{"time":"2026-02-07T13:40:19.079081795-05:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-985279-993946-699035934/socket","Net":"unix"}} +{"time":"2026-02-07T13:40:19.241748003-05:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-02-07T13:40:19.255122953-05:00","level":"INFO","msg":"handleInformInit: received","streamId":"vq0iy4i3","id":"1(@)"} +{"time":"2026-02-07T13:40:19.870023218-05:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"vq0iy4i3","id":"1(@)"} +{"time":"2026-02-07T13:40:25.893320003-05:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"vc0gpm2d6b5f"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_134018-vq0iy4i3/logs/debug-internal.log b/code/RL_model/verl/verl_train/wandb/run-20260207_134018-vq0iy4i3/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..2a73f54d9375d41523e7bbdea207d5166fb3a1c3 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_134018-vq0iy4i3/logs/debug-internal.log @@ -0,0 +1,6 @@ +{"time":"2026-02-07T13:40:19.256642832-05:00","level":"INFO","msg":"stream: starting","core version":"0.24.1"} +{"time":"2026-02-07T13:40:19.866928726-05:00","level":"INFO","msg":"stream: created new stream","id":"vq0iy4i3"} +{"time":"2026-02-07T13:40:19.867085963-05:00","level":"INFO","msg":"handler: started","stream_id":"vq0iy4i3"} +{"time":"2026-02-07T13:40:19.869994951-05:00","level":"INFO","msg":"stream: started","id":"vq0iy4i3"} +{"time":"2026-02-07T13:40:19.870044082-05:00","level":"INFO","msg":"writer: started","stream_id":"vq0iy4i3"} +{"time":"2026-02-07T13:40:19.870047092-05:00","level":"INFO","msg":"sender: started","stream_id":"vq0iy4i3"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260207_134018-vq0iy4i3/logs/debug.log b/code/RL_model/verl/verl_train/wandb/run-20260207_134018-vq0iy4i3/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..19ed70d6630193c1ad7bc9b74d3a21e9fd2d1238 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260207_134018-vq0iy4i3/logs/debug.log @@ -0,0 +1,19 @@ +2026-02-07 13:40:18,960 INFO MainThread:985279 [wandb_setup.py:_flush():81] Current SDK version is 0.24.1 +2026-02-07 13:40:18,961 INFO MainThread:985279 [wandb_setup.py:_flush():81] Configure stats pid to 985279 +2026-02-07 13:40:18,961 INFO MainThread:985279 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-02-07 13:40:18,961 INFO MainThread:985279 [wandb_init.py:setup_run_log_directory():717] Logging user logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260207_134018-vq0iy4i3/logs/debug.log +2026-02-07 13:40:18,961 INFO MainThread:985279 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260207_134018-vq0iy4i3/logs/debug-internal.log +2026-02-07 13:40:18,962 INFO MainThread:985279 [wandb_init.py:init():844] calling init triggers +2026-02-07 13:40:18,963 INFO MainThread:985279 [wandb_init.py:init():849] wandb.init called with sweep_config: {} +config: {'actor_rollout_ref': {'actor': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-06, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 90, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'ppo_mini_batch_size': 256, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': 32, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 16384, 'clip_ratio': 0.2, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.2, 'tau_pos': 1.0, 'tau_neg': 1.05, 'freeze_vision_tower': False, 'policy_loss': {'_target_': 'verl.workers.config.PolicyLossConfig', 'loss_mode': 'vanilla', 'clip_cov_ratio': 0.0002, 'clip_cov_lb': 1.0, 'clip_cov_ub': 5.0, 'kl_cov_ratio': 0.0002, 'ppo_kl_coef': 0.1}, 'clip_ratio_c': 3.0, 'loss_agg_mode': 'token-mean', 'loss_scale_factor': None, 'entropy_coeff': 0, 'calculate_entropy': False, 'use_kl_loss': True, 'use_prefix_grouper': False, 'use_torch_compile': True, 'kl_loss_coef': 0.001, 'kl_loss_type': 'low_var_kl', 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'use_fused_kernels': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'grad_clip': 1.0, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False, 'use_remove_padding': True, 'calculate_sum_pi_squared': False, 'sum_pi_squared_checkpointing': False}, 'ref': {'rollout_n': 3, 'strategy': 'fsdp', 'use_torch_compile': True, 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': True, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False}, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': 'vllm', 'mode': 'async', 'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'prompt_length': 1024, 'response_length': 2048, 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'tensor_model_parallel_size': 1, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'pipeline_model_parallel_size': 1, 'max_num_batched_tokens': 8192, 'max_model_len': 8192, 'max_num_seqs': 1024, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'logprobs_mode': 'processed_logprobs', 'scheduling_policy': 'fcfs', 'load_format': 'dummy', 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'disable_log_stats': True, 'do_sample': True, 'n': 3, 'over_sample_rate': 0, 'multi_stage_wake_up': False, 'engine_kwargs': {'vllm': {}, 'sglang': {}, 'trtllm': {}}, 'val_kwargs': {'_target_': 'verl.workers.config.SamplingConfig', 'top_k': -1, 'top_p': 1.0, 'temperature': 0, 'n': 1, 'do_sample': False}, 'multi_turn': {'_target_': 'verl.workers.config.MultiTurnConfig', 'enable': False, 'max_assistant_turns': None, 'tool_config_path': None, 'max_user_turns': None, 'max_parallel_calls': 1, 'max_tool_response_length': 256, 'tool_response_truncate_side': 'middle', 'interaction_config_path': None, 'use_inference_chat_template': False, 'tokenization_sanity_check_mode': 'strict', 'format': 'hermes', 'num_repeat_rollouts': None}, 'calculate_log_probs': False, 'agent': {'_target_': 'verl.workers.config.AgentLoopConfig', 'num_workers': 8, 'default_agent_loop': 'single_turn_agent', 'agent_loop_config_path': None, 'custom_async_server': {'_target_': 'verl.workers.config.CustomAsyncServerConfig', 'path': None, 'name': None}}, 'checkpoint_engine': {'_target_': 'verl.workers.config.CheckpointEngineConfig', 'backend': 'naive', 'update_weights_bucket_megabytes': 2048, 'engine_kwargs': {}}, 'trace': {'_target_': 'verl.workers.config.TraceConfig', 'backend': None, 'token2text': False, 'max_samples_per_step_per_worker': None}, 'skip_rollout': False, 'skip_dump_dir': '/tmp/rollout_dump', 'skip_tokenizer_init': True, 'enable_rollout_routing_replay': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'prometheus': {'_target_': 'verl.workers.config.PrometheusConfig', 'enable': False, 'port': 9090, 'file': '/tmp/ray/session_latest/metrics/prometheus/prometheus.yml', 'served_model_name': 'Qwen/Qwen3-4B-Instruct-2507'}, 'quantization': None, 'quantization_config_file': None, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}, 'layered_summon': False}, 'model': {'_target_': 'verl.workers.config.HFModelConfig', 'path': 'Qwen/Qwen3-4B-Instruct-2507', 'hf_config_path': None, 'tokenizer_path': None, 'use_shm': False, 'trust_remote_code': False, 'custom_chat_template': None, 'external_lib': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': True, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'exclude_modules': None, 'lora_adapter_path': None, 'use_liger': False, 'use_fused_kernels': False, 'fused_kernel_options': {'impl_backend': 'torch'}, 'tiled_mlp': {'enabled': False, 'num_shards': 4}, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}}, 'hybrid_engine': True, 'nccl_timeout': 600}, 'data': {'tokenizer': None, 'use_shm': False, 'train_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/train.parquet', 'val_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/test.parquet', 'train_max_samples': -1, 'val_max_samples': -1, 'prompt_key': 'prompt', 'reward_fn_key': 'data_source', 'max_prompt_length': 1024, 'max_response_length': 2048, 'train_batch_size': 512, 'val_batch_size': None, 'tool_config_path': None, 'return_raw_input_ids': False, 'return_raw_chat': True, 'return_full_prompt': False, 'shuffle': True, 'seed': None, 'dataloader_num_workers': 8, 'image_patch_size': 14, 'validation_shuffle': False, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 1, 'truncation': 'error', 'image_key': 'images', 'video_key': 'videos', 'trust_remote_code': False, 'custom_cls': {'path': None, 'name': None}, 'return_multi_modal_inputs': True, 'sampler': {'class_path': None, 'class_name': None}, 'datagen': {'path': None, 'name': None}, 'apply_chat_template_kwargs': {}}, 'reward_manager': {'_target_': 'verl.trainer.config.config.RewardManagerConfig', 'source': 'register', 'name': 'naive', 'module': {'_target_': 'verl.trainer.config.config.ModuleConfig', 'path': None, 'name': 'custom_reward_manager'}}, 'critic': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-05, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 90, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'model': {'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, 'path': '~/models/deepseek-llm-7b-chat', 'tokenizer_path': 'Qwen/Qwen3-4B-Instruct-2507', 'override_config': {}, 'external_lib': None, 'trust_remote_code': False, '_target_': 'verl.workers.config.FSDPCriticModelCfg', 'use_shm': False, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': False, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'tiled_mlp': {'enabled': False, 'num_shards': 4}}, '_target_': 'verl.workers.config.FSDPCriticConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'enable': None, 'ppo_mini_batch_size': 256, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': None, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 32768, 'forward_max_token_len_per_gpu': 32768, 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'cliprange_value': 0.5, 'loss_agg_mode': 'token-mean', 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'forward_micro_batch_size': None, 'forward_micro_batch_size_per_gpu': None, 'ulysses_sequence_parallel_size': 1, 'grad_clip': 1.0}, 'reward_model': {'enable': False, 'enable_resource_pool': False, 'n_gpus_per_node': 8, 'nnodes': 0, 'strategy': 'fsdp', 'model': {'input_tokenizer': 'Qwen/Qwen3-4B-Instruct-2507', 'path': '~/models/FsfairX-LLaMA3-RM-v0.1', 'external_lib': None, 'trust_remote_code': False, 'override_config': {}, 'use_shm': False, 'use_remove_padding': False, 'use_fused_kernels': False, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False}}, 'micro_batch_size': None, 'micro_batch_size_per_gpu': None, 'max_length': None, 'use_dynamic_bsz': False, 'forward_max_token_len_per_gpu': 32768, 'reward_manager': 'naive', 'reward_loop_source': 'register', 'reward_loop_module_path': None, 'reward_loop_class_name': None, 'launch_reward_fn_async': False, 'sandbox_fusion': {'url': None, 'max_concurrent': 64, 'memory_limit_mb': 1024}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'ulysses_sequence_parallel_size': 1, 'use_reward_loop': True, 'num_workers': 1, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': '???', 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.5, 'enforce_eager': True, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'tensor_model_parallel_size': 2, 'max_num_batched_tokens': 8192, 'max_model_len': None, 'max_num_seqs': 1024, 'load_format': 'auto', 'engine_kwargs': {}, 'limit_images': None, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'disable_log_stats': True, 'skip_tokenizer_init': False, 'prompt_length': 2048, 'response_length': 2048}}, 'algorithm': {'rollout_correction': {'rollout_is': None, 'rollout_is_threshold': 2.0, 'rollout_rs': None, 'rollout_rs_threshold': None, 'bypass_mode': False, 'loss_type': 'ppo_clip', 'rollout_is_batch_normalize': False}, '_target_': 'verl.trainer.config.AlgoConfig', 'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'norm_adv_by_std_in_grpo': True, 'use_kl_in_reward': False, 'kl_penalty': 'kl', 'kl_ctrl': {'_target_': 'verl.trainer.config.KLControlConfig', 'type': 'fixed', 'kl_coef': 0.001, 'horizon': 10000, 'target_kl': 0.1}, 'use_pf_ppo': False, 'pf_ppo': {'reweight_method': 'pow', 'weight_pow': 2.0}}, 'custom_reward_function': {'path': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/reward_func/reward.py', 'name': 'compute_score'}, 'trainer': {'balance_batch': True, 'total_epochs': 15, 'total_training_steps': None, 'project_name': 'readctrl-verl', 'experiment_name': 'qwen3-4b-instruct-en', 'logger': ['console', 'wandb'], 'log_val_generations': 0, 'rollout_data_dir': None, 'validation_data_dir': None, 'nnodes': 1, 'n_gpus_per_node': 2, 'save_freq': 20, 'esi_redundant_time': 0, 'resume_mode': 'auto', 'resume_from_path': None, 'val_before_train': True, 'val_only': False, 'test_freq': 5, 'critic_warmup': 0, 'default_hdfs_dir': None, 'del_local_ckpt_after_load': False, 'default_local_dir': '/home/mshahidul/readctrl/code/RL_model/train_v2', 'max_actor_ckpt_to_keep': 1, 'max_critic_ckpt_to_keep': 1, 'ray_wait_register_center_timeout': 300, 'device': 'cuda', 'use_legacy_worker_impl': 'auto', 'remove_previous_ckpt_in_save': True}, 'global_profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'steps': None, 'profile_continuous_steps': False, 'save_path': 'outputs/profile', 'global_tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False, 'controller_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph'}, 'worker_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph', 'capture-range': 'cudaProfilerApi', 'capture-range-end': None, 'kill': 'none'}}, 'torch_memory': {'trace_alloc_max_entries': 100000, 'stack_depth': 32, 'context': 'all', 'stacks': 'all', 'kw_args': {}}}}, 'transfer_queue': {'enable': False}, 'ray_kwargs': {'ray_init': {'num_cpus': None}, 'timeline_json_file': None}, '_wandb': {}} +2026-02-07 13:40:18,964 INFO MainThread:985279 [wandb_init.py:init():892] starting backend +2026-02-07 13:40:19,242 INFO MainThread:985279 [wandb_init.py:init():895] sending inform_init request +2026-02-07 13:40:19,250 INFO MainThread:985279 [wandb_init.py:init():903] backend started and connected +2026-02-07 13:40:19,260 INFO MainThread:985279 [wandb_init.py:init():973] updated telemetry +2026-02-07 13:40:19,283 INFO MainThread:985279 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-02-07 13:40:20,120 INFO MainThread:985279 [wandb_init.py:init():1042] starting run threads in backend +2026-02-07 13:40:20,863 INFO MainThread:985279 [wandb_run.py:_console_start():2529] atexit reg +2026-02-07 13:40:20,863 INFO MainThread:985279 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-02-07 13:40:20,863 INFO MainThread:985279 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-02-07 13:40:20,864 INFO MainThread:985279 [wandb_run.py:_redirect():2469] Redirects installed. +2026-02-07 13:40:20,871 INFO MainThread:985279 [wandb_init.py:init():1082] run started, returning control to user process diff --git a/code/RL_model/verl/verl_train/wandb/run-20260209_134931-1bt9yf1w/files/output.log b/code/RL_model/verl/verl_train/wandb/run-20260209_134931-1bt9yf1w/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..048f584926303d4a7bed90e510fd3f45631a398c --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260209_134931-1bt9yf1w/files/output.log @@ -0,0 +1,14 @@ +wandb: Detected [dspy, litellm, openai] in use. +wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script. +wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/ +Checkpoint tracker file does not exist: /home/mshahidul/readctrl/code/RL_model/train_v2/latest_checkpointed_iteration.txt +Training from scratch +test_gen_batch meta info: {'eos_token_id': 151645, 'pad_token_id': 151643, 'recompute_log_prob': False, 'do_sample': False, 'validate': True, 'global_steps': 0} +validation generation end +("Initial validation metrics: {'val-aux/multiclinsum/reward/mean@1': " + "np.float64(0.697760713885875), 'val-core/multiclinsum/acc/mean@1': " + "np.float64(0.6977607163442949), 'val-aux/num_turns/min': np.int32(2), " + "'val-aux/num_turns/max': np.int32(2), 'val-aux/num_turns/mean': " + 'np.float64(2.0)}') +step:0 - val-aux/multiclinsum/reward/mean@1:np.float64(0.697760713885875) - val-core/multiclinsum/acc/mean@1:np.float64(0.6977607163442949) - val-aux/num_turns/min:np.int32(2) - val-aux/num_turns/max:np.int32(2) - val-aux/num_turns/mean:np.float64(2.0) +Training Progress: 0%| | 0/90 [00:00, ?it/s] diff --git a/code/RL_model/verl/verl_train/wandb/run-20260209_134931-1bt9yf1w/files/requirements.txt b/code/RL_model/verl/verl_train/wandb/run-20260209_134931-1bt9yf1w/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..dd2de633cd2e89d34a77b40967a7fc9ed3177c34 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260209_134931-1bt9yf1w/files/requirements.txt @@ -0,0 +1,283 @@ +verl==0.8.0.dev0 +psutil==7.1.3 +colorama==0.4.6 +annotated-doc==0.0.4 +sentry-sdk==2.51.0 +requests==2.32.5 +nvidia-cufile-cu12==1.13.1.3 +ml_dtypes==0.5.4 +xformers==0.0.32.post1 +sglang==0.5.2 +multidict==6.7.1 +typing_extensions==4.15.0 +nvidia-cusparselt-cu12==0.7.1 +openai-harmony==0.0.4 +transformers==4.56.1 +Werkzeug==3.1.5 +identify==2.6.16 +gepa==0.0.26 +pytest==9.0.2 +nvidia-cuda-runtime-cu12==12.8.90 +GitPython==3.1.46 +cupy-cuda12x==13.6.0 +tokenizers==0.22.2 +pybind11==3.0.1 +google-api-core==2.29.0 +partial-json-parser==0.2.1.1.post7 +aiohttp-cors==0.8.1 +sniffio==1.3.1 +tensordict==0.10.0 +smart_open==7.5.0 +cffi==2.0.0 +wcwidth==0.5.3 +asttokens==3.0.1 +opencensus==0.11.4 +rpds-py==0.30.0 +py-spy==0.4.1 +litellm==1.81.9 +gguf==0.17.1 +nvidia-nvjitlink-cu12==12.8.93 +httpx==0.28.1 +cuda-python==13.1.1 +annotated-types==0.7.0 +regex==2026.1.15 +vllm==0.11.0 +idna==3.11 +fsspec==2025.10.0 +parso==0.8.5 +pydantic-extra-types==2.11.0 +MarkupSafe==3.0.3 +cryptography==46.0.4 +openai==2.17.0 +filelock==3.20.3 +modelscope==1.34.0 +outlines==0.1.11 +dnspython==2.8.0 +scipy==1.17.0 +zipp==3.23.0 +PyYAML==6.0.3 +onnx==1.20.1 +torchdata==0.11.0 +cuda-pathfinder==1.3.3 +asyncer==0.0.8 +verl==0.8.0.dev0 +httptools==0.7.1 +opencv-python-headless==4.13.0.90 +importlib_metadata==8.7.1 +peft==0.18.1 +opentelemetry-sdk==1.39.1 +python-json-logger==4.0.0 +alembic==1.18.3 +cuda-bindings==13.1.1 +mdurl==0.1.2 +referencing==0.37.0 +xxhash==3.6.0 +interegular==0.3.3 +fastapi-cli==0.0.20 +uv==0.9.28 +tensorboard==2.20.0 +nvidia-cublas-cu12==12.8.4.1 +sentencepiece==0.2.1 +rich-toolkit==0.18.1 +numpy==2.2.0 +yarl==1.22.0 +opencv-fixer==0.2.5 +tqdm==4.67.2 +python-dotenv==1.2.1 +Mako==1.3.10 +timm==1.0.16 +aiohappyeyeballs==2.6.1 +decord==0.6.0 +jiter==0.12.0 +airportsdata==20250909 +markdown-it-py==4.0.0 +nvidia-cusolver-cu12==11.7.3.90 +pyarrow==23.0.0 +opentelemetry-proto==1.39.1 +anyio==4.12.1 +pycryptodomex==3.23.0 +prometheus_client==0.24.1 +aiohttp==3.13.3 +urllib3==2.6.3 +pexpect==4.9.0 +pydantic-settings==2.12.0 +distro==1.9.0 +av==16.1.0 +cloudpickle==3.1.2 +mpmath==1.3.0 +certifi==2026.1.4 +antlr4-python3-runtime==4.9.3 +torchvision==0.23.0 +accelerate==1.12.0 +watchfiles==1.1.1 +ruff==0.14.14 +wheel==0.46.3 +omegaconf==2.3.0 +nvidia-cufft-cu12==11.3.3.83 +multiprocess==0.70.18 +frozendict==2.4.7 +sympy==1.14.0 +setproctitle==1.3.7 +optuna==4.7.0 +setuptools==79.0.1 +py-cpuinfo==9.0.0 +ipython_pygments_lexers==1.1.1 +rich==14.3.2 +uvicorn==0.40.0 +outlines_core==0.2.11 +llvmlite==0.44.0 +nvidia-cuda-cupti-cu12==12.8.90 +attrs==25.4.0 +anthropic==0.77.0 +packaging==25.0 +fastrlock==0.8.3 +astor==0.8.1 +pluggy==1.6.0 +nvidia-cuda-nvrtc-cu12==12.8.93 +psutil==7.2.2 +virtualenv==20.36.1 +cbor2==5.8.0 +tenacity==9.1.4 +compressed-tensors==0.11.0 +SQLAlchemy==2.0.46 +nvidia-cusparse-cu12==12.5.8.93 +networkx==3.6.1 +httpcore==1.0.9 +onnxscript==0.3.1 +smmap==5.0.2 +opencv-python==4.13.0.90 +traitlets==5.14.3 +python-multipart==0.0.22 +pyvers==0.1.0 +huggingface-hub==0.36.0 +pillow==12.1.0 +jsonschema==4.26.0 +cfgv==3.5.0 +optree==0.18.0 +email-validator==2.3.0 +tabulate==0.9.0 +pre_commit==4.5.1 +msgpack==1.1.2 +depyf==0.19.0 +numba==0.61.2 +six==1.17.0 +aiosignal==1.4.0 +nvidia-nvtx-cu12==12.8.90 +propcache==0.4.1 +torch_memory_saver==0.0.8 +h11==0.16.0 +frozenlist==1.8.0 +websockets==16.0 +nvidia-cudnn-frontend==1.18.0 +build==1.4.0 +google-auth==2.48.0 +pycountry==24.6.1 +colorlog==6.10.1 +stack-data==0.6.3 +typing-inspection==0.4.2 +googleapis-common-protos==1.72.0 +pandas==3.0.0 +typer==0.21.1 +protobuf==6.33.5 +fastapi==0.128.0 +blake3==1.0.8 +opentelemetry-semantic-conventions==0.60b1 +opentelemetry-exporter-prometheus==0.60b1 +nvidia-cudnn-cu12==9.10.2.21 +Markdown==3.10.1 +liger_kernel==0.6.4 +json_repair==0.57.1 +nodeenv==1.10.0 +prompt_toolkit==3.0.52 +torchaudio==2.8.0 +codetiming==1.4.0 +platformdirs==4.5.1 +jsonschema-specifications==2025.9.1 +hydra-core==1.3.2 +tensorboard-data-server==0.7.2 +lm-format-enforcer==0.11.3 +pyasn1_modules==0.4.2 +tiktoken==0.12.0 +starlette==0.50.0 +pyproject_hooks==1.2.0 +flash_attn==2.8.1 +rsa==4.9.1 +ray==2.53.0 +nest-asyncio==1.6.0 +lark==1.2.2 +fastar==0.8.0 +orjson==3.11.6 +prometheus-fastapi-instrumentator==7.1.0 +opentelemetry-api==1.39.1 +mathruler==0.1.0 +pydantic_core==2.41.5 +fastapi-cloud-cli==0.11.0 +pynvml==13.0.1 +loguru==0.7.3 +torch==2.8.0 +msgspec==0.20.0 +nvidia-curand-cu12==10.3.9.90 +blobfile==3.0.0 +gitdb==4.0.12 +llguidance==0.7.30 +hf_transfer==0.1.9 +nvidia-nccl-cu12==2.27.3 +qwen-vl-utils==0.0.14 +ptyprocess==0.7.0 +ipdb==0.13.13 +opencensus-context==0.1.3 +jedi==0.19.2 +click==8.3.1 +datasets==4.5.0 +soxr==1.0.0 +sgl-kernel==0.3.9.post2 +colorful==0.5.8 +pyasn1==0.6.2 +charset-normalizer==3.4.4 +nvidia-ml-py==13.590.48 +hf-xet==1.2.0 +dill==0.4.0 +absl-py==2.4.0 +pydantic==2.12.5 +dspy==3.1.3 +wrapt==2.1.0 +flashinfer-python==0.3.1 +python-dateutil==2.9.0.post0 +torchao==0.9.0 +cachetools==7.0.0 +soundfile==0.13.1 +diskcache==5.6.3 +onnx-ir==0.1.15 +docstring_parser==0.17.0 +matplotlib-inline==0.2.1 +Pygments==2.19.2 +wandb==0.24.1 +pure_eval==0.2.3 +ninja==1.13.0 +proto-plus==1.27.0 +pyzmq==27.1.0 +iniconfig==2.3.0 +Jinja2==3.1.6 +megatron-core==0.13.1 +uvloop==0.22.1 +fastuuid==0.14.0 +pycparser==3.0 +pylatexenc==2.10 +decorator==5.2.1 +shellingham==1.5.4 +lxml==6.0.2 +safetensors==0.7.0 +xgrammar==0.1.25 +pybase64==1.4.3 +ipython==9.9.0 +greenlet==3.3.1 +mistral_common==1.9.0 +rignore==0.7.6 +einops==0.8.2 +distlib==0.4.0 +triton==3.4.0 +executing==2.2.1 +grpcio==1.76.0 +pip==25.3 +verl==0.8.0.dev0 +verl==0.8.0.dev0 diff --git a/code/RL_model/verl/verl_train/wandb/run-20260209_134931-1bt9yf1w/files/wandb-metadata.json b/code/RL_model/verl/verl_train/wandb/run-20260209_134931-1bt9yf1w/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..cd079d4e2641308d90b573fb3dc85cbc81afc342 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260209_134931-1bt9yf1w/files/wandb-metadata.json @@ -0,0 +1,93 @@ +{ + "os": "Linux-5.15.0-160-generic-x86_64-with-glibc2.35", + "python": "CPython 3.12.12", + "startedAt": "2026-02-09T18:49:31.389571Z", + "args": [ + "--node-ip-address=172.16.34.29", + "--node-manager-port=35681", + "--object-store-name=/tmp/ray/session_2026-02-09_13-42-08_618732_2570965/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2026-02-09_13-42-08_618732_2570965/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=62828", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=53235", + "--gcs-address=172.16.34.29:55755", + "--session-name=session_2026-02-09_13-42-08_618732_2570965", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8301", + "--cluster-id=0daa93ba39ae3cc2ee93bd203e3ccd51fd8a622c46e34753d7113d46", + "--startup-token=128", + "--worker-launch-time-ms=1770662543316", + "--node-id=ecebb65123f76ead3c1959af07a6e81b5d58be19b8637e533386a674", + "--runtime-env-hash=1096984665" + ], + "program": "/home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/verl-project/verl", + "commit": "d9939add7a2a01923a9088891f913a5d20c4e622" + }, + "email": "shahidulshakib034@gmail.com", + "root": "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train", + "host": "gamma", + "executable": "/home/mshahidul/miniconda3/envs/verl2/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA A100 80GB PCIe", + "gpu_count": 6, + "disk": { + "/": { + "total": "3766429188096", + "used": "190310715392" + } + }, + "memory": { + "total": "1081814863872" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-4a3678c7-34a9-356f-f7b7-7f7e2f44b596" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-df506764-0db5-91b4-8ec9-154a3bb8123f" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-2c3dbd62-b384-2996-a0f6-b32dcfcc3538" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-1ff3dabe-4b9a-ea62-5cc3-01f12f32d328" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper", + "uuid": "GPU-eefc4b8c-0e79-c1d6-a9ff-8325040572eb" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper", + "uuid": "GPU-d42b6057-13e8-1e88-6aa1-9307df72dece" + } + ], + "cudaVersion": "13.0", + "writerId": "1q91yficctuqr3ffuibg1yb0j0pr86f1" +} \ No newline at end of file diff --git a/code/RL_model/verl/verl_train/wandb/run-20260209_134931-1bt9yf1w/logs/debug-core.log b/code/RL_model/verl/verl_train/wandb/run-20260209_134931-1bt9yf1w/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..fbbdf5c6919f59aa0b7374f8dae2b8cd10dc043f --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260209_134931-1bt9yf1w/logs/debug-core.log @@ -0,0 +1,7 @@ +{"time":"2026-02-09T13:49:31.574266092-05:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp2t9bobxg/port-2582064.txt","pid":2582064,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-02-09T13:49:31.57548892-05:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":2582064} +{"time":"2026-02-09T13:49:31.575491107-05:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2582064-2595313-1678983126/socket","Net":"unix"}} +{"time":"2026-02-09T13:49:31.733956428-05:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-02-09T13:49:31.753673265-05:00","level":"INFO","msg":"handleInformInit: received","streamId":"1bt9yf1w","id":"1(@)"} +{"time":"2026-02-09T13:49:33.286947279-05:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"1bt9yf1w","id":"1(@)"} +{"time":"2026-02-09T13:49:39.699760308-05:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"xf46bdycpcib"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260209_134931-1bt9yf1w/logs/debug-internal.log b/code/RL_model/verl/verl_train/wandb/run-20260209_134931-1bt9yf1w/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..5d8b973f1e5f786b9b02456aaa4ff517134fd56c --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260209_134931-1bt9yf1w/logs/debug-internal.log @@ -0,0 +1,8 @@ +{"time":"2026-02-09T13:49:31.75524847-05:00","level":"INFO","msg":"stream: starting","core version":"0.24.1"} +{"time":"2026-02-09T13:49:33.283741989-05:00","level":"INFO","msg":"stream: created new stream","id":"1bt9yf1w"} +{"time":"2026-02-09T13:49:33.284008148-05:00","level":"INFO","msg":"handler: started","stream_id":"1bt9yf1w"} +{"time":"2026-02-09T13:49:33.286895283-05:00","level":"INFO","msg":"stream: started","id":"1bt9yf1w"} +{"time":"2026-02-09T13:49:33.286912602-05:00","level":"INFO","msg":"writer: started","stream_id":"1bt9yf1w"} +{"time":"2026-02-09T13:49:33.286920547-05:00","level":"INFO","msg":"sender: started","stream_id":"1bt9yf1w"} +{"time":"2026-02-09T14:58:53.807356305-05:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/shahidulshakib034-khulna-university-of-engineering-techn/readctrl-verl/1bt9yf1w/file_stream","body":"\n\n\nPlease try again in 30 seconds.\n
\n\n"} +{"time":"2026-02-09T15:16:49.160385314-05:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/shahidulshakib034-khulna-university-of-engineering-techn/readctrl-verl/1bt9yf1w/file_stream","body":"\n\n\nPlease try again in 30 seconds.\n
\n\n"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260209_134931-1bt9yf1w/logs/debug.log b/code/RL_model/verl/verl_train/wandb/run-20260209_134931-1bt9yf1w/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..72d85dbd24168bfa806ea75fac99f7383aa132ef --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260209_134931-1bt9yf1w/logs/debug.log @@ -0,0 +1,19 @@ +2026-02-09 13:49:31,410 INFO MainThread:2582064 [wandb_setup.py:_flush():81] Current SDK version is 0.24.1 +2026-02-09 13:49:31,411 INFO MainThread:2582064 [wandb_setup.py:_flush():81] Configure stats pid to 2582064 +2026-02-09 13:49:31,411 INFO MainThread:2582064 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-02-09 13:49:31,411 INFO MainThread:2582064 [wandb_init.py:setup_run_log_directory():717] Logging user logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260209_134931-1bt9yf1w/logs/debug.log +2026-02-09 13:49:31,411 INFO MainThread:2582064 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260209_134931-1bt9yf1w/logs/debug-internal.log +2026-02-09 13:49:31,411 INFO MainThread:2582064 [wandb_init.py:init():844] calling init triggers +2026-02-09 13:49:31,413 INFO MainThread:2582064 [wandb_init.py:init():849] wandb.init called with sweep_config: {} +config: {'actor_rollout_ref': {'actor': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-06, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 90, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'ppo_mini_batch_size': 256, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': 32, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 16384, 'clip_ratio': 0.2, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.2, 'tau_pos': 1.0, 'tau_neg': 1.05, 'freeze_vision_tower': False, 'policy_loss': {'_target_': 'verl.workers.config.PolicyLossConfig', 'loss_mode': 'vanilla', 'clip_cov_ratio': 0.0002, 'clip_cov_lb': 1.0, 'clip_cov_ub': 5.0, 'kl_cov_ratio': 0.0002, 'ppo_kl_coef': 0.1}, 'clip_ratio_c': 3.0, 'loss_agg_mode': 'token-mean', 'loss_scale_factor': None, 'entropy_coeff': 0, 'calculate_entropy': False, 'use_kl_loss': True, 'use_prefix_grouper': False, 'use_torch_compile': True, 'kl_loss_coef': 0.001, 'kl_loss_type': 'low_var_kl', 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'use_fused_kernels': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'grad_clip': 1.0, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False, 'use_remove_padding': True, 'calculate_sum_pi_squared': False, 'sum_pi_squared_checkpointing': False}, 'ref': {'rollout_n': 3, 'strategy': 'fsdp', 'use_torch_compile': True, 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': True, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False}, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': 'vllm', 'mode': 'async', 'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'prompt_length': 1024, 'response_length': 2048, 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'tensor_model_parallel_size': 1, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'pipeline_model_parallel_size': 1, 'max_num_batched_tokens': 8192, 'max_model_len': 8192, 'max_num_seqs': 1024, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'logprobs_mode': 'processed_logprobs', 'scheduling_policy': 'fcfs', 'load_format': 'dummy', 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'disable_log_stats': True, 'do_sample': True, 'n': 3, 'over_sample_rate': 0, 'multi_stage_wake_up': False, 'engine_kwargs': {'vllm': {}, 'sglang': {}, 'trtllm': {}}, 'val_kwargs': {'_target_': 'verl.workers.config.SamplingConfig', 'top_k': -1, 'top_p': 1.0, 'temperature': 0, 'n': 1, 'do_sample': False}, 'multi_turn': {'_target_': 'verl.workers.config.MultiTurnConfig', 'enable': False, 'max_assistant_turns': None, 'tool_config_path': None, 'max_user_turns': None, 'max_parallel_calls': 1, 'max_tool_response_length': 256, 'tool_response_truncate_side': 'middle', 'interaction_config_path': None, 'use_inference_chat_template': False, 'tokenization_sanity_check_mode': 'strict', 'format': 'hermes', 'num_repeat_rollouts': None}, 'calculate_log_probs': False, 'agent': {'_target_': 'verl.workers.config.AgentLoopConfig', 'num_workers': 8, 'default_agent_loop': 'single_turn_agent', 'agent_loop_config_path': None, 'custom_async_server': {'_target_': 'verl.workers.config.CustomAsyncServerConfig', 'path': None, 'name': None}}, 'checkpoint_engine': {'_target_': 'verl.workers.config.CheckpointEngineConfig', 'backend': 'naive', 'update_weights_bucket_megabytes': 2048, 'engine_kwargs': {}}, 'trace': {'_target_': 'verl.workers.config.TraceConfig', 'backend': None, 'token2text': False, 'max_samples_per_step_per_worker': None}, 'skip_rollout': False, 'skip_dump_dir': '/tmp/rollout_dump', 'skip_tokenizer_init': True, 'enable_rollout_routing_replay': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'prometheus': {'_target_': 'verl.workers.config.PrometheusConfig', 'enable': False, 'port': 9090, 'file': '/tmp/ray/session_latest/metrics/prometheus/prometheus.yml', 'served_model_name': 'Qwen/Qwen3-4B-Instruct-2507'}, 'quantization': None, 'quantization_config_file': None, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}, 'layered_summon': False}, 'model': {'_target_': 'verl.workers.config.HFModelConfig', 'path': 'Qwen/Qwen3-4B-Instruct-2507', 'hf_config_path': None, 'tokenizer_path': None, 'use_shm': False, 'trust_remote_code': False, 'custom_chat_template': None, 'external_lib': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': True, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'exclude_modules': None, 'lora_adapter_path': None, 'use_liger': False, 'use_fused_kernels': False, 'fused_kernel_options': {'impl_backend': 'torch'}, 'tiled_mlp': {'enabled': False, 'num_shards': 4}, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}}, 'hybrid_engine': True, 'nccl_timeout': 600}, 'data': {'tokenizer': None, 'use_shm': False, 'train_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/train.parquet', 'val_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/test.parquet', 'train_max_samples': -1, 'val_max_samples': -1, 'prompt_key': 'prompt', 'reward_fn_key': 'data_source', 'max_prompt_length': 1024, 'max_response_length': 2048, 'train_batch_size': 512, 'val_batch_size': None, 'tool_config_path': None, 'return_raw_input_ids': False, 'return_raw_chat': True, 'return_full_prompt': False, 'shuffle': True, 'seed': None, 'dataloader_num_workers': 8, 'image_patch_size': 14, 'validation_shuffle': False, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 1, 'truncation': 'error', 'image_key': 'images', 'video_key': 'videos', 'trust_remote_code': False, 'custom_cls': {'path': None, 'name': None}, 'return_multi_modal_inputs': True, 'sampler': {'class_path': None, 'class_name': None}, 'datagen': {'path': None, 'name': None}, 'apply_chat_template_kwargs': {}}, 'reward_manager': {'_target_': 'verl.trainer.config.config.RewardManagerConfig', 'source': 'register', 'name': 'naive', 'module': {'_target_': 'verl.trainer.config.config.ModuleConfig', 'path': None, 'name': 'custom_reward_manager'}}, 'critic': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-05, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 90, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'model': {'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, 'path': '~/models/deepseek-llm-7b-chat', 'tokenizer_path': 'Qwen/Qwen3-4B-Instruct-2507', 'override_config': {}, 'external_lib': None, 'trust_remote_code': False, '_target_': 'verl.workers.config.FSDPCriticModelCfg', 'use_shm': False, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': False, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'tiled_mlp': {'enabled': False, 'num_shards': 4}}, '_target_': 'verl.workers.config.FSDPCriticConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'enable': None, 'ppo_mini_batch_size': 256, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': None, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 32768, 'forward_max_token_len_per_gpu': 32768, 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'cliprange_value': 0.5, 'loss_agg_mode': 'token-mean', 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'forward_micro_batch_size': None, 'forward_micro_batch_size_per_gpu': None, 'ulysses_sequence_parallel_size': 1, 'grad_clip': 1.0}, 'reward_model': {'enable': False, 'enable_resource_pool': False, 'n_gpus_per_node': 8, 'nnodes': 0, 'strategy': 'fsdp', 'model': {'input_tokenizer': 'Qwen/Qwen3-4B-Instruct-2507', 'path': '~/models/FsfairX-LLaMA3-RM-v0.1', 'external_lib': None, 'trust_remote_code': False, 'override_config': {}, 'use_shm': False, 'use_remove_padding': False, 'use_fused_kernels': False, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False}}, 'micro_batch_size': None, 'micro_batch_size_per_gpu': None, 'max_length': None, 'use_dynamic_bsz': False, 'forward_max_token_len_per_gpu': 32768, 'reward_manager': 'naive', 'reward_loop_source': 'register', 'reward_loop_module_path': None, 'reward_loop_class_name': None, 'launch_reward_fn_async': False, 'sandbox_fusion': {'url': None, 'max_concurrent': 64, 'memory_limit_mb': 1024}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'ulysses_sequence_parallel_size': 1, 'use_reward_loop': True, 'num_workers': 1, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': '???', 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.5, 'enforce_eager': True, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'tensor_model_parallel_size': 2, 'max_num_batched_tokens': 8192, 'max_model_len': None, 'max_num_seqs': 1024, 'load_format': 'auto', 'engine_kwargs': {}, 'limit_images': None, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'disable_log_stats': True, 'skip_tokenizer_init': False, 'prompt_length': 2048, 'response_length': 2048}}, 'algorithm': {'rollout_correction': {'rollout_is': None, 'rollout_is_threshold': 2.0, 'rollout_rs': None, 'rollout_rs_threshold': None, 'bypass_mode': False, 'loss_type': 'ppo_clip', 'rollout_is_batch_normalize': False}, '_target_': 'verl.trainer.config.AlgoConfig', 'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'norm_adv_by_std_in_grpo': True, 'use_kl_in_reward': False, 'kl_penalty': 'kl', 'kl_ctrl': {'_target_': 'verl.trainer.config.KLControlConfig', 'type': 'fixed', 'kl_coef': 0.001, 'horizon': 10000, 'target_kl': 0.1}, 'use_pf_ppo': False, 'pf_ppo': {'reweight_method': 'pow', 'weight_pow': 2.0}}, 'custom_reward_function': {'path': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/reward_func/reward.py', 'name': 'compute_score'}, 'trainer': {'balance_batch': True, 'total_epochs': 15, 'total_training_steps': None, 'project_name': 'readctrl-verl', 'experiment_name': 'qwen3-4b-instruct-en', 'logger': ['console', 'wandb'], 'log_val_generations': 0, 'rollout_data_dir': None, 'validation_data_dir': None, 'nnodes': 1, 'n_gpus_per_node': 2, 'save_freq': 5, 'esi_redundant_time': 0, 'resume_mode': 'auto', 'resume_from_path': None, 'val_before_train': True, 'val_only': False, 'test_freq': 10, 'critic_warmup': 0, 'default_hdfs_dir': None, 'del_local_ckpt_after_load': False, 'default_local_dir': '/home/mshahidul/readctrl/code/RL_model/train_v2', 'max_actor_ckpt_to_keep': 1, 'max_critic_ckpt_to_keep': 1, 'ray_wait_register_center_timeout': 300, 'device': 'cuda', 'use_legacy_worker_impl': 'auto', 'remove_previous_ckpt_in_save': True}, 'global_profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'steps': None, 'profile_continuous_steps': False, 'save_path': 'outputs/profile', 'global_tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False, 'controller_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph'}, 'worker_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph', 'capture-range': 'cudaProfilerApi', 'capture-range-end': None, 'kill': 'none'}}, 'torch_memory': {'trace_alloc_max_entries': 100000, 'stack_depth': 32, 'context': 'all', 'stacks': 'all', 'kw_args': {}}}}, 'transfer_queue': {'enable': False}, 'ray_kwargs': {'ray_init': {'num_cpus': None}, 'timeline_json_file': None}, '_wandb': {}} +2026-02-09 13:49:31,413 INFO MainThread:2582064 [wandb_init.py:init():892] starting backend +2026-02-09 13:49:31,734 INFO MainThread:2582064 [wandb_init.py:init():895] sending inform_init request +2026-02-09 13:49:31,746 INFO MainThread:2582064 [wandb_init.py:init():903] backend started and connected +2026-02-09 13:49:31,764 INFO MainThread:2582064 [wandb_init.py:init():973] updated telemetry +2026-02-09 13:49:31,790 INFO MainThread:2582064 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-02-09 13:49:33,768 INFO MainThread:2582064 [wandb_init.py:init():1042] starting run threads in backend +2026-02-09 13:49:34,669 INFO MainThread:2582064 [wandb_run.py:_console_start():2529] atexit reg +2026-02-09 13:49:34,669 INFO MainThread:2582064 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-02-09 13:49:34,670 INFO MainThread:2582064 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-02-09 13:49:34,670 INFO MainThread:2582064 [wandb_run.py:_redirect():2469] Redirects installed. +2026-02-09 13:49:34,683 INFO MainThread:2582064 [wandb_init.py:init():1082] run started, returning control to user process diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_000542-03xcpt7l/files/output.log b/code/RL_model/verl/verl_train/wandb/run-20260210_000542-03xcpt7l/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..1363cb81f80a8d0bd6d71ec1070f9a51abcbc7eb --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_000542-03xcpt7l/files/output.log @@ -0,0 +1,6 @@ +wandb: Detected [dspy, litellm, openai] in use. +wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script. +wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/ +Checkpoint tracker file does not exist: /home/mshahidul/readctrl/code/RL_model/train_v2/latest_checkpointed_iteration.txt +Training from scratch +test_gen_batch meta info: {'eos_token_id': 151645, 'pad_token_id': 151643, 'recompute_log_prob': False, 'do_sample': False, 'validate': True, 'global_steps': 0} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_000542-03xcpt7l/files/requirements.txt b/code/RL_model/verl/verl_train/wandb/run-20260210_000542-03xcpt7l/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..dd2de633cd2e89d34a77b40967a7fc9ed3177c34 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_000542-03xcpt7l/files/requirements.txt @@ -0,0 +1,283 @@ +verl==0.8.0.dev0 +psutil==7.1.3 +colorama==0.4.6 +annotated-doc==0.0.4 +sentry-sdk==2.51.0 +requests==2.32.5 +nvidia-cufile-cu12==1.13.1.3 +ml_dtypes==0.5.4 +xformers==0.0.32.post1 +sglang==0.5.2 +multidict==6.7.1 +typing_extensions==4.15.0 +nvidia-cusparselt-cu12==0.7.1 +openai-harmony==0.0.4 +transformers==4.56.1 +Werkzeug==3.1.5 +identify==2.6.16 +gepa==0.0.26 +pytest==9.0.2 +nvidia-cuda-runtime-cu12==12.8.90 +GitPython==3.1.46 +cupy-cuda12x==13.6.0 +tokenizers==0.22.2 +pybind11==3.0.1 +google-api-core==2.29.0 +partial-json-parser==0.2.1.1.post7 +aiohttp-cors==0.8.1 +sniffio==1.3.1 +tensordict==0.10.0 +smart_open==7.5.0 +cffi==2.0.0 +wcwidth==0.5.3 +asttokens==3.0.1 +opencensus==0.11.4 +rpds-py==0.30.0 +py-spy==0.4.1 +litellm==1.81.9 +gguf==0.17.1 +nvidia-nvjitlink-cu12==12.8.93 +httpx==0.28.1 +cuda-python==13.1.1 +annotated-types==0.7.0 +regex==2026.1.15 +vllm==0.11.0 +idna==3.11 +fsspec==2025.10.0 +parso==0.8.5 +pydantic-extra-types==2.11.0 +MarkupSafe==3.0.3 +cryptography==46.0.4 +openai==2.17.0 +filelock==3.20.3 +modelscope==1.34.0 +outlines==0.1.11 +dnspython==2.8.0 +scipy==1.17.0 +zipp==3.23.0 +PyYAML==6.0.3 +onnx==1.20.1 +torchdata==0.11.0 +cuda-pathfinder==1.3.3 +asyncer==0.0.8 +verl==0.8.0.dev0 +httptools==0.7.1 +opencv-python-headless==4.13.0.90 +importlib_metadata==8.7.1 +peft==0.18.1 +opentelemetry-sdk==1.39.1 +python-json-logger==4.0.0 +alembic==1.18.3 +cuda-bindings==13.1.1 +mdurl==0.1.2 +referencing==0.37.0 +xxhash==3.6.0 +interegular==0.3.3 +fastapi-cli==0.0.20 +uv==0.9.28 +tensorboard==2.20.0 +nvidia-cublas-cu12==12.8.4.1 +sentencepiece==0.2.1 +rich-toolkit==0.18.1 +numpy==2.2.0 +yarl==1.22.0 +opencv-fixer==0.2.5 +tqdm==4.67.2 +python-dotenv==1.2.1 +Mako==1.3.10 +timm==1.0.16 +aiohappyeyeballs==2.6.1 +decord==0.6.0 +jiter==0.12.0 +airportsdata==20250909 +markdown-it-py==4.0.0 +nvidia-cusolver-cu12==11.7.3.90 +pyarrow==23.0.0 +opentelemetry-proto==1.39.1 +anyio==4.12.1 +pycryptodomex==3.23.0 +prometheus_client==0.24.1 +aiohttp==3.13.3 +urllib3==2.6.3 +pexpect==4.9.0 +pydantic-settings==2.12.0 +distro==1.9.0 +av==16.1.0 +cloudpickle==3.1.2 +mpmath==1.3.0 +certifi==2026.1.4 +antlr4-python3-runtime==4.9.3 +torchvision==0.23.0 +accelerate==1.12.0 +watchfiles==1.1.1 +ruff==0.14.14 +wheel==0.46.3 +omegaconf==2.3.0 +nvidia-cufft-cu12==11.3.3.83 +multiprocess==0.70.18 +frozendict==2.4.7 +sympy==1.14.0 +setproctitle==1.3.7 +optuna==4.7.0 +setuptools==79.0.1 +py-cpuinfo==9.0.0 +ipython_pygments_lexers==1.1.1 +rich==14.3.2 +uvicorn==0.40.0 +outlines_core==0.2.11 +llvmlite==0.44.0 +nvidia-cuda-cupti-cu12==12.8.90 +attrs==25.4.0 +anthropic==0.77.0 +packaging==25.0 +fastrlock==0.8.3 +astor==0.8.1 +pluggy==1.6.0 +nvidia-cuda-nvrtc-cu12==12.8.93 +psutil==7.2.2 +virtualenv==20.36.1 +cbor2==5.8.0 +tenacity==9.1.4 +compressed-tensors==0.11.0 +SQLAlchemy==2.0.46 +nvidia-cusparse-cu12==12.5.8.93 +networkx==3.6.1 +httpcore==1.0.9 +onnxscript==0.3.1 +smmap==5.0.2 +opencv-python==4.13.0.90 +traitlets==5.14.3 +python-multipart==0.0.22 +pyvers==0.1.0 +huggingface-hub==0.36.0 +pillow==12.1.0 +jsonschema==4.26.0 +cfgv==3.5.0 +optree==0.18.0 +email-validator==2.3.0 +tabulate==0.9.0 +pre_commit==4.5.1 +msgpack==1.1.2 +depyf==0.19.0 +numba==0.61.2 +six==1.17.0 +aiosignal==1.4.0 +nvidia-nvtx-cu12==12.8.90 +propcache==0.4.1 +torch_memory_saver==0.0.8 +h11==0.16.0 +frozenlist==1.8.0 +websockets==16.0 +nvidia-cudnn-frontend==1.18.0 +build==1.4.0 +google-auth==2.48.0 +pycountry==24.6.1 +colorlog==6.10.1 +stack-data==0.6.3 +typing-inspection==0.4.2 +googleapis-common-protos==1.72.0 +pandas==3.0.0 +typer==0.21.1 +protobuf==6.33.5 +fastapi==0.128.0 +blake3==1.0.8 +opentelemetry-semantic-conventions==0.60b1 +opentelemetry-exporter-prometheus==0.60b1 +nvidia-cudnn-cu12==9.10.2.21 +Markdown==3.10.1 +liger_kernel==0.6.4 +json_repair==0.57.1 +nodeenv==1.10.0 +prompt_toolkit==3.0.52 +torchaudio==2.8.0 +codetiming==1.4.0 +platformdirs==4.5.1 +jsonschema-specifications==2025.9.1 +hydra-core==1.3.2 +tensorboard-data-server==0.7.2 +lm-format-enforcer==0.11.3 +pyasn1_modules==0.4.2 +tiktoken==0.12.0 +starlette==0.50.0 +pyproject_hooks==1.2.0 +flash_attn==2.8.1 +rsa==4.9.1 +ray==2.53.0 +nest-asyncio==1.6.0 +lark==1.2.2 +fastar==0.8.0 +orjson==3.11.6 +prometheus-fastapi-instrumentator==7.1.0 +opentelemetry-api==1.39.1 +mathruler==0.1.0 +pydantic_core==2.41.5 +fastapi-cloud-cli==0.11.0 +pynvml==13.0.1 +loguru==0.7.3 +torch==2.8.0 +msgspec==0.20.0 +nvidia-curand-cu12==10.3.9.90 +blobfile==3.0.0 +gitdb==4.0.12 +llguidance==0.7.30 +hf_transfer==0.1.9 +nvidia-nccl-cu12==2.27.3 +qwen-vl-utils==0.0.14 +ptyprocess==0.7.0 +ipdb==0.13.13 +opencensus-context==0.1.3 +jedi==0.19.2 +click==8.3.1 +datasets==4.5.0 +soxr==1.0.0 +sgl-kernel==0.3.9.post2 +colorful==0.5.8 +pyasn1==0.6.2 +charset-normalizer==3.4.4 +nvidia-ml-py==13.590.48 +hf-xet==1.2.0 +dill==0.4.0 +absl-py==2.4.0 +pydantic==2.12.5 +dspy==3.1.3 +wrapt==2.1.0 +flashinfer-python==0.3.1 +python-dateutil==2.9.0.post0 +torchao==0.9.0 +cachetools==7.0.0 +soundfile==0.13.1 +diskcache==5.6.3 +onnx-ir==0.1.15 +docstring_parser==0.17.0 +matplotlib-inline==0.2.1 +Pygments==2.19.2 +wandb==0.24.1 +pure_eval==0.2.3 +ninja==1.13.0 +proto-plus==1.27.0 +pyzmq==27.1.0 +iniconfig==2.3.0 +Jinja2==3.1.6 +megatron-core==0.13.1 +uvloop==0.22.1 +fastuuid==0.14.0 +pycparser==3.0 +pylatexenc==2.10 +decorator==5.2.1 +shellingham==1.5.4 +lxml==6.0.2 +safetensors==0.7.0 +xgrammar==0.1.25 +pybase64==1.4.3 +ipython==9.9.0 +greenlet==3.3.1 +mistral_common==1.9.0 +rignore==0.7.6 +einops==0.8.2 +distlib==0.4.0 +triton==3.4.0 +executing==2.2.1 +grpcio==1.76.0 +pip==25.3 +verl==0.8.0.dev0 +verl==0.8.0.dev0 diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_000542-03xcpt7l/files/wandb-metadata.json b/code/RL_model/verl/verl_train/wandb/run-20260210_000542-03xcpt7l/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..b393367930f475c7911a90aa10198388f8bf25ce --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_000542-03xcpt7l/files/wandb-metadata.json @@ -0,0 +1,93 @@ +{ + "os": "Linux-5.15.0-160-generic-x86_64-with-glibc2.35", + "python": "CPython 3.12.12", + "startedAt": "2026-02-10T05:05:42.731761Z", + "args": [ + "--node-ip-address=172.16.34.29", + "--node-manager-port=39329", + "--object-store-name=/tmp/ray/session_2026-02-09_23-59-33_106640_3780904/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2026-02-09_23-59-33_106640_3780904/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=61322", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=64277", + "--gcs-address=172.16.34.29:61670", + "--session-name=session_2026-02-09_23-59-33_106640_3780904", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8297", + "--cluster-id=f3d4f943538bba80589df6297cde07197630716a63e13a8e2b80f5d7", + "--startup-token=128", + "--worker-launch-time-ms=1770699585639", + "--node-id=df798a3a0fabc60d5df71d3c6e276eebc0a0a39646cff7b49cfc7423", + "--runtime-env-hash=1096984665" + ], + "program": "/home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/verl-project/verl", + "commit": "d9939add7a2a01923a9088891f913a5d20c4e622" + }, + "email": "shahidulshakib034@gmail.com", + "root": "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train", + "host": "gamma", + "executable": "/home/mshahidul/miniconda3/envs/verl2/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA A100 80GB PCIe", + "gpu_count": 6, + "disk": { + "/": { + "total": "3766429188096", + "used": "191242661888" + } + }, + "memory": { + "total": "1081814863872" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-4a3678c7-34a9-356f-f7b7-7f7e2f44b596" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-df506764-0db5-91b4-8ec9-154a3bb8123f" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-2c3dbd62-b384-2996-a0f6-b32dcfcc3538" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-1ff3dabe-4b9a-ea62-5cc3-01f12f32d328" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper", + "uuid": "GPU-eefc4b8c-0e79-c1d6-a9ff-8325040572eb" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper", + "uuid": "GPU-d42b6057-13e8-1e88-6aa1-9307df72dece" + } + ], + "cudaVersion": "13.0", + "writerId": "gc5oti9ohsskplcsqtnkl5dfkfh06kju" +} \ No newline at end of file diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_000542-03xcpt7l/files/wandb-summary.json b/code/RL_model/verl/verl_train/wandb/run-20260210_000542-03xcpt7l/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..0fa02652fd51f44042f2441e36c10a2dbe3b1411 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_000542-03xcpt7l/files/wandb-summary.json @@ -0,0 +1 @@ +{"_runtime":577,"_wandb":{"runtime":577}} \ No newline at end of file diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_000542-03xcpt7l/logs/debug-core.log b/code/RL_model/verl/verl_train/wandb/run-20260210_000542-03xcpt7l/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..d5b885a47eb034b0592ec229bf55244a06178349 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_000542-03xcpt7l/logs/debug-core.log @@ -0,0 +1,8 @@ +{"time":"2026-02-10T00:05:42.88556399-05:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp_998e929/port-3789424.txt","pid":3789424,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-02-10T00:05:42.886575339-05:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":3789424} +{"time":"2026-02-10T00:05:42.886570624-05:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3789424-3799227-3397557547/socket","Net":"unix"}} +{"time":"2026-02-10T00:05:43.043301577-05:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-02-10T00:05:43.055063051-05:00","level":"INFO","msg":"handleInformInit: received","streamId":"03xcpt7l","id":"1(@)"} +{"time":"2026-02-10T00:05:44.675181561-05:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"03xcpt7l","id":"1(@)"} +{"time":"2026-02-10T00:05:50.862144117-05:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"x7xtdc0sam17"} +{"time":"2026-02-10T00:15:22.684860046-05:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"x7xtdc0sam17"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_000542-03xcpt7l/logs/debug-internal.log b/code/RL_model/verl/verl_train/wandb/run-20260210_000542-03xcpt7l/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..87c54a2e32518f3ea0aa5a7bf5155a9f3dd66911 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_000542-03xcpt7l/logs/debug-internal.log @@ -0,0 +1,6 @@ +{"time":"2026-02-10T00:05:43.05661957-05:00","level":"INFO","msg":"stream: starting","core version":"0.24.1"} +{"time":"2026-02-10T00:05:44.671346502-05:00","level":"INFO","msg":"stream: created new stream","id":"03xcpt7l"} +{"time":"2026-02-10T00:05:44.672455175-05:00","level":"INFO","msg":"handler: started","stream_id":"03xcpt7l"} +{"time":"2026-02-10T00:05:44.675146416-05:00","level":"INFO","msg":"stream: started","id":"03xcpt7l"} +{"time":"2026-02-10T00:05:44.675188611-05:00","level":"INFO","msg":"sender: started","stream_id":"03xcpt7l"} +{"time":"2026-02-10T00:05:44.675192997-05:00","level":"INFO","msg":"writer: started","stream_id":"03xcpt7l"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_000542-03xcpt7l/logs/debug.log b/code/RL_model/verl/verl_train/wandb/run-20260210_000542-03xcpt7l/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..078e4aa62d03ca59be227a0a321dc6a1e4b37eb2 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_000542-03xcpt7l/logs/debug.log @@ -0,0 +1,27 @@ +2026-02-10 00:05:42,748 INFO MainThread:3789424 [wandb_setup.py:_flush():81] Current SDK version is 0.24.1 +2026-02-10 00:05:42,749 INFO MainThread:3789424 [wandb_setup.py:_flush():81] Configure stats pid to 3789424 +2026-02-10 00:05:42,749 INFO MainThread:3789424 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-02-10 00:05:42,749 INFO MainThread:3789424 [wandb_init.py:setup_run_log_directory():717] Logging user logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260210_000542-03xcpt7l/logs/debug.log +2026-02-10 00:05:42,749 INFO MainThread:3789424 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260210_000542-03xcpt7l/logs/debug-internal.log +2026-02-10 00:05:42,749 INFO MainThread:3789424 [wandb_init.py:init():844] calling init triggers +2026-02-10 00:05:42,751 INFO MainThread:3789424 [wandb_init.py:init():849] wandb.init called with sweep_config: {} +config: {'actor_rollout_ref': {'actor': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-06, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 90, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'ppo_mini_batch_size': 256, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': 32, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 16384, 'clip_ratio': 0.2, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.2, 'tau_pos': 1.0, 'tau_neg': 1.05, 'freeze_vision_tower': False, 'policy_loss': {'_target_': 'verl.workers.config.PolicyLossConfig', 'loss_mode': 'vanilla', 'clip_cov_ratio': 0.0002, 'clip_cov_lb': 1.0, 'clip_cov_ub': 5.0, 'kl_cov_ratio': 0.0002, 'ppo_kl_coef': 0.1}, 'clip_ratio_c': 3.0, 'loss_agg_mode': 'token-mean', 'loss_scale_factor': None, 'entropy_coeff': 0, 'calculate_entropy': False, 'use_kl_loss': True, 'use_prefix_grouper': False, 'use_torch_compile': True, 'kl_loss_coef': 0.001, 'kl_loss_type': 'low_var_kl', 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'use_fused_kernels': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'grad_clip': 1.0, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False, 'use_remove_padding': True, 'calculate_sum_pi_squared': False, 'sum_pi_squared_checkpointing': False}, 'ref': {'rollout_n': 3, 'strategy': 'fsdp', 'use_torch_compile': True, 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': True, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False}, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': 'vllm', 'mode': 'async', 'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'prompt_length': 1024, 'response_length': 2048, 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'tensor_model_parallel_size': 1, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'pipeline_model_parallel_size': 1, 'max_num_batched_tokens': 8192, 'max_model_len': 8192, 'max_num_seqs': 1024, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'logprobs_mode': 'processed_logprobs', 'scheduling_policy': 'fcfs', 'load_format': 'dummy', 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'disable_log_stats': True, 'do_sample': True, 'n': 3, 'over_sample_rate': 0, 'multi_stage_wake_up': False, 'engine_kwargs': {'vllm': {}, 'sglang': {}, 'trtllm': {}}, 'val_kwargs': {'_target_': 'verl.workers.config.SamplingConfig', 'top_k': -1, 'top_p': 1.0, 'temperature': 0, 'n': 1, 'do_sample': False}, 'multi_turn': {'_target_': 'verl.workers.config.MultiTurnConfig', 'enable': False, 'max_assistant_turns': None, 'tool_config_path': None, 'max_user_turns': None, 'max_parallel_calls': 1, 'max_tool_response_length': 256, 'tool_response_truncate_side': 'middle', 'interaction_config_path': None, 'use_inference_chat_template': False, 'tokenization_sanity_check_mode': 'strict', 'format': 'hermes', 'num_repeat_rollouts': None}, 'calculate_log_probs': False, 'agent': {'_target_': 'verl.workers.config.AgentLoopConfig', 'num_workers': 8, 'default_agent_loop': 'single_turn_agent', 'agent_loop_config_path': None, 'custom_async_server': {'_target_': 'verl.workers.config.CustomAsyncServerConfig', 'path': None, 'name': None}}, 'checkpoint_engine': {'_target_': 'verl.workers.config.CheckpointEngineConfig', 'backend': 'naive', 'update_weights_bucket_megabytes': 2048, 'engine_kwargs': {}}, 'trace': {'_target_': 'verl.workers.config.TraceConfig', 'backend': None, 'token2text': False, 'max_samples_per_step_per_worker': None}, 'skip_rollout': False, 'skip_dump_dir': '/tmp/rollout_dump', 'skip_tokenizer_init': True, 'enable_rollout_routing_replay': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'prometheus': {'_target_': 'verl.workers.config.PrometheusConfig', 'enable': False, 'port': 9090, 'file': '/tmp/ray/session_latest/metrics/prometheus/prometheus.yml', 'served_model_name': 'Qwen/Qwen3-4B-Instruct-2507'}, 'quantization': None, 'quantization_config_file': None, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}, 'layered_summon': False}, 'model': {'_target_': 'verl.workers.config.HFModelConfig', 'path': 'Qwen/Qwen3-4B-Instruct-2507', 'hf_config_path': None, 'tokenizer_path': None, 'use_shm': False, 'trust_remote_code': False, 'custom_chat_template': None, 'external_lib': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': True, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'exclude_modules': None, 'lora_adapter_path': None, 'use_liger': False, 'use_fused_kernels': False, 'fused_kernel_options': {'impl_backend': 'torch'}, 'tiled_mlp': {'enabled': False, 'num_shards': 4}, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}}, 'hybrid_engine': True, 'nccl_timeout': 600}, 'data': {'tokenizer': None, 'use_shm': False, 'train_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/train.parquet', 'val_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/test.parquet', 'train_max_samples': -1, 'val_max_samples': -1, 'prompt_key': 'prompt', 'reward_fn_key': 'data_source', 'max_prompt_length': 1024, 'max_response_length': 2048, 'train_batch_size': 512, 'val_batch_size': None, 'tool_config_path': None, 'return_raw_input_ids': False, 'return_raw_chat': True, 'return_full_prompt': False, 'shuffle': True, 'seed': None, 'dataloader_num_workers': 8, 'image_patch_size': 14, 'validation_shuffle': False, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 1, 'truncation': 'error', 'image_key': 'images', 'video_key': 'videos', 'trust_remote_code': False, 'custom_cls': {'path': None, 'name': None}, 'return_multi_modal_inputs': True, 'sampler': {'class_path': None, 'class_name': None}, 'datagen': {'path': None, 'name': None}, 'apply_chat_template_kwargs': {}}, 'reward_manager': {'_target_': 'verl.trainer.config.config.RewardManagerConfig', 'source': 'register', 'name': 'naive', 'module': {'_target_': 'verl.trainer.config.config.ModuleConfig', 'path': None, 'name': 'custom_reward_manager'}}, 'critic': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-05, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 90, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'model': {'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, 'path': '~/models/deepseek-llm-7b-chat', 'tokenizer_path': 'Qwen/Qwen3-4B-Instruct-2507', 'override_config': {}, 'external_lib': None, 'trust_remote_code': False, '_target_': 'verl.workers.config.FSDPCriticModelCfg', 'use_shm': False, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': False, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'tiled_mlp': {'enabled': False, 'num_shards': 4}}, '_target_': 'verl.workers.config.FSDPCriticConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'enable': None, 'ppo_mini_batch_size': 256, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': None, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 32768, 'forward_max_token_len_per_gpu': 32768, 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'cliprange_value': 0.5, 'loss_agg_mode': 'token-mean', 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'forward_micro_batch_size': None, 'forward_micro_batch_size_per_gpu': None, 'ulysses_sequence_parallel_size': 1, 'grad_clip': 1.0}, 'reward_model': {'enable': False, 'enable_resource_pool': False, 'n_gpus_per_node': 8, 'nnodes': 0, 'strategy': 'fsdp', 'model': {'input_tokenizer': 'Qwen/Qwen3-4B-Instruct-2507', 'path': '~/models/FsfairX-LLaMA3-RM-v0.1', 'external_lib': None, 'trust_remote_code': False, 'override_config': {}, 'use_shm': False, 'use_remove_padding': False, 'use_fused_kernels': False, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False}}, 'micro_batch_size': None, 'micro_batch_size_per_gpu': None, 'max_length': None, 'use_dynamic_bsz': False, 'forward_max_token_len_per_gpu': 32768, 'reward_manager': 'naive', 'reward_loop_source': 'register', 'reward_loop_module_path': None, 'reward_loop_class_name': None, 'launch_reward_fn_async': False, 'sandbox_fusion': {'url': None, 'max_concurrent': 64, 'memory_limit_mb': 1024}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'ulysses_sequence_parallel_size': 1, 'use_reward_loop': True, 'num_workers': 1, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': '???', 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.5, 'enforce_eager': True, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'tensor_model_parallel_size': 2, 'max_num_batched_tokens': 8192, 'max_model_len': None, 'max_num_seqs': 1024, 'load_format': 'auto', 'engine_kwargs': {}, 'limit_images': None, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'disable_log_stats': True, 'skip_tokenizer_init': False, 'prompt_length': 2048, 'response_length': 2048}}, 'algorithm': {'rollout_correction': {'rollout_is': None, 'rollout_is_threshold': 2.0, 'rollout_rs': None, 'rollout_rs_threshold': None, 'bypass_mode': False, 'loss_type': 'ppo_clip', 'rollout_is_batch_normalize': False}, '_target_': 'verl.trainer.config.AlgoConfig', 'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'norm_adv_by_std_in_grpo': True, 'use_kl_in_reward': False, 'kl_penalty': 'kl', 'kl_ctrl': {'_target_': 'verl.trainer.config.KLControlConfig', 'type': 'fixed', 'kl_coef': 0.001, 'horizon': 10000, 'target_kl': 0.1}, 'use_pf_ppo': False, 'pf_ppo': {'reweight_method': 'pow', 'weight_pow': 2.0}}, 'custom_reward_function': {'path': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/reward_func/reward.py', 'name': 'compute_score'}, 'trainer': {'balance_batch': True, 'total_epochs': 15, 'total_training_steps': None, 'project_name': 'readctrl-verl', 'experiment_name': 'qwen3-4b-instruct-en', 'logger': ['console', 'wandb'], 'log_val_generations': 0, 'rollout_data_dir': None, 'validation_data_dir': None, 'nnodes': 1, 'n_gpus_per_node': 2, 'save_freq': 5, 'esi_redundant_time': 0, 'resume_mode': 'auto', 'resume_from_path': None, 'val_before_train': True, 'val_only': False, 'test_freq': 10, 'critic_warmup': 0, 'default_hdfs_dir': None, 'del_local_ckpt_after_load': False, 'default_local_dir': '/home/mshahidul/readctrl/code/RL_model/train_v2', 'max_actor_ckpt_to_keep': 1, 'max_critic_ckpt_to_keep': 1, 'ray_wait_register_center_timeout': 300, 'device': 'cuda', 'use_legacy_worker_impl': 'auto', 'remove_previous_ckpt_in_save': True}, 'global_profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'steps': None, 'profile_continuous_steps': False, 'save_path': 'outputs/profile', 'global_tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False, 'controller_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph'}, 'worker_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph', 'capture-range': 'cudaProfilerApi', 'capture-range-end': None, 'kill': 'none'}}, 'torch_memory': {'trace_alloc_max_entries': 100000, 'stack_depth': 32, 'context': 'all', 'stacks': 'all', 'kw_args': {}}}}, 'transfer_queue': {'enable': False}, 'ray_kwargs': {'ray_init': {'num_cpus': None}, 'timeline_json_file': None}, '_wandb': {}} +2026-02-10 00:05:42,751 INFO MainThread:3789424 [wandb_init.py:init():892] starting backend +2026-02-10 00:05:43,043 INFO MainThread:3789424 [wandb_init.py:init():895] sending inform_init request +2026-02-10 00:05:43,049 INFO MainThread:3789424 [wandb_init.py:init():903] backend started and connected +2026-02-10 00:05:43,057 INFO MainThread:3789424 [wandb_init.py:init():973] updated telemetry +2026-02-10 00:05:43,076 INFO MainThread:3789424 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-02-10 00:05:45,011 INFO MainThread:3789424 [wandb_init.py:init():1042] starting run threads in backend +2026-02-10 00:05:45,832 INFO MainThread:3789424 [wandb_run.py:_console_start():2529] atexit reg +2026-02-10 00:05:45,832 INFO MainThread:3789424 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-02-10 00:05:45,832 INFO MainThread:3789424 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-02-10 00:05:45,833 INFO MainThread:3789424 [wandb_run.py:_redirect():2469] Redirects installed. +2026-02-10 00:05:45,846 INFO MainThread:3789424 [wandb_init.py:init():1082] run started, returning control to user process +2026-02-10 00:15:22,681 INFO MainThread:3789424 [wandb_run.py:_finish():2295] finishing run shahidulshakib034-khulna-university-of-engineering-techn/readctrl-verl/03xcpt7l +2026-02-10 00:15:22,683 INFO MainThread:3789424 [wandb_run.py:_atexit_cleanup():2494] got exitcode: 0 +2026-02-10 00:15:22,684 INFO MainThread:3789424 [wandb_run.py:_restore():2476] restore +2026-02-10 00:15:22,684 INFO MainThread:3789424 [wandb_run.py:_restore():2482] restore done +2026-02-10 00:15:22,868 INFO wandb-AsyncioManager-main:3789424 [service_client.py:_forward_responses():94] Reached EOF. +2026-02-10 00:15:22,869 INFO wandb-AsyncioManager-main:3789424 [mailbox.py:close():154] Closing mailbox, abandoning 2 handles. +2026-02-10 00:15:22,870 INFO MainThread:3789424 [wandb_run.py:_restore():2476] restore +2026-02-10 00:15:22,870 INFO MainThread:3789424 [wandb_run.py:_restore():2482] restore done diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_002512-y8zrft04/files/config.yaml b/code/RL_model/verl/verl_train/wandb/run-20260210_002512-y8zrft04/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..91d98a525ec841645630e1c56c8129ef38be557d --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_002512-y8zrft04/files/config.yaml @@ -0,0 +1,819 @@ +_wandb: + value: + cli_version: 0.24.1 + e: + b67cw9b3932w1ydhiiskcdzjmplv9v6z: + args: + - --node-ip-address=172.16.34.29 + - --node-manager-port=41799 + - --object-store-name=/tmp/ray/session_2026-02-10_00-19-11_291622_3842721/sockets/plasma_store + - --raylet-name=/tmp/ray/session_2026-02-10_00-19-11_291622_3842721/sockets/raylet + - --redis-address=None + - --metrics-agent-port=51842 + - --logging-rotate-bytes=536870912 + - --logging-rotate-backup-count=5 + - --runtime-env-agent-port=52235 + - --gcs-address=172.16.34.29:58975 + - --session-name=session_2026-02-10_00-19-11_291622_3842721 + - --temp-dir=/tmp/ray + - --webui=127.0.0.1:8297 + - --cluster-id=dc298598f37a0d21538dce20236cc3100f2fd8ec5710337dfdeabc00 + - --startup-token=128 + - --worker-launch-time-ms=1770700763686 + - --node-id=057bfcf3bc01161d491e5fe6ef31ed0729d687cd30c5819b159d5cf2 + - --runtime-env-hash=1096984665 + cpu_count: 64 + cpu_count_logical: 128 + cudaVersion: "13.0" + disk: + /: + total: "3766429188096" + used: "191265169408" + email: shahidulshakib034@gmail.com + executable: /home/mshahidul/miniconda3/envs/verl2/bin/python3 + git: + commit: d9939add7a2a01923a9088891f913a5d20c4e622 + remote: https://github.com/verl-project/verl + gpu: NVIDIA A100 80GB PCIe + gpu_count: 6 + gpu_nvidia: + - architecture: Ampere + cudaCores: 6912 + memoryTotal: "85899345920" + name: NVIDIA A100 80GB PCIe + uuid: GPU-4a3678c7-34a9-356f-f7b7-7f7e2f44b596 + - architecture: Ampere + cudaCores: 6912 + memoryTotal: "85899345920" + name: NVIDIA A100 80GB PCIe + uuid: GPU-df506764-0db5-91b4-8ec9-154a3bb8123f + - architecture: Ampere + cudaCores: 6912 + memoryTotal: "85899345920" + name: NVIDIA A100 80GB PCIe + uuid: GPU-2c3dbd62-b384-2996-a0f6-b32dcfcc3538 + - architecture: Ampere + cudaCores: 6912 + memoryTotal: "85899345920" + name: NVIDIA A100 80GB PCIe + uuid: GPU-1ff3dabe-4b9a-ea62-5cc3-01f12f32d328 + - architecture: Hopper + cudaCores: 14592 + memoryTotal: "85520809984" + name: NVIDIA H100 PCIe + uuid: GPU-eefc4b8c-0e79-c1d6-a9ff-8325040572eb + - architecture: Hopper + cudaCores: 14592 + memoryTotal: "85520809984" + name: NVIDIA H100 PCIe + uuid: GPU-d42b6057-13e8-1e88-6aa1-9307df72dece + host: gamma + memory: + total: "1081814863872" + os: Linux-5.15.0-160-generic-x86_64-with-glibc2.35 + program: /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/ray/_private/workers/default_worker.py + python: CPython 3.12.12 + root: /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train + startedAt: "2026-02-10T05:25:12.620990Z" + writerId: b67cw9b3932w1ydhiiskcdzjmplv9v6z + m: [] + python_version: 3.12.12 + t: + "1": + - 1 + - 11 + - 30 + - 35 + - 41 + - 49 + - 50 + - 51 + - 71 + - 95 + - 98 + - 105 + - 107 + "2": + - 1 + - 11 + - 30 + - 35 + - 41 + - 49 + - 50 + - 51 + - 71 + - 95 + - 98 + - 105 + - 107 + "3": + - 2 + - 13 + - 16 + - 61 + "4": 3.12.12 + "5": 0.24.1 + "6": 4.56.1 + "12": 0.24.1 + "13": linux-x86_64 +actor_rollout_ref: + value: + actor: + _target_: verl.workers.config.FSDPActorConfig + calculate_entropy: false + calculate_sum_pi_squared: false + checkpoint: + _target_: verl.trainer.config.CheckpointConfig + async_save: false + load_contents: + - model + - optimizer + - extra + save_contents: + - model + - optimizer + - extra + clip_ratio: 0.2 + clip_ratio_c: 3 + clip_ratio_high: 0.2 + clip_ratio_low: 0.2 + data_loader_seed: 42 + entropy_checkpointing: false + entropy_coeff: 0 + entropy_from_logits_with_chunking: false + freeze_vision_tower: false + fsdp_config: + _target_: verl.workers.config.FSDPEngineConfig + dtype: bfloat16 + entropy_checkpointing: false + entropy_from_logits_with_chunking: false + forward_only: false + forward_prefetch: false + fsdp_size: -1 + full_determinism: false + model_dtype: fp32 + offload_policy: false + optimizer_offload: false + param_offload: false + reshard_after_forward: true + seed: 42 + strategy: fsdp + ulysses_sequence_parallel_size: 1 + use_orig_params: false + use_torch_compile: true + wrap_policy: + min_num_params: 0 + grad_clip: 1 + kl_loss_coef: 0.001 + kl_loss_type: low_var_kl + loss_agg_mode: token-mean + loss_scale_factor: null + optim: + _target_: verl.workers.config.FSDPOptimizerConfig + betas: + - 0.9 + - 0.999 + clip_grad: 1 + lr: 1e-06 + lr_scheduler_type: constant + lr_warmup_steps: -1 + lr_warmup_steps_ratio: 0 + min_lr_ratio: 0 + num_cycles: 0.5 + optimizer: AdamW + optimizer_impl: torch.optim + override_optimizer_config: null + total_training_steps: 90 + warmup_style: null + weight_decay: 0.01 + policy_loss: + _target_: verl.workers.config.PolicyLossConfig + clip_cov_lb: 1 + clip_cov_ratio: 0.0002 + clip_cov_ub: 5 + kl_cov_ratio: 0.0002 + loss_mode: vanilla + ppo_kl_coef: 0.1 + ppo_epochs: 1 + ppo_max_token_len_per_gpu: 16384 + ppo_micro_batch_size: null + ppo_micro_batch_size_per_gpu: 32 + ppo_mini_batch_size: 256 + profiler: + _target_: verl.utils.profiler.ProfilerConfig + all_ranks: false + enable: false + ranks: [] + save_path: outputs/profile + tool: null + tool_config: + npu: + _target_: verl.utils.profiler.config.NPUToolConfig + analysis: true + contents: [] + discrete: false + level: level0 + nsys: + _target_: verl.utils.profiler.config.NsightToolConfig + discrete: false + torch: + _target_: verl.utils.profiler.config.TorchProfilerToolConfig + contents: [] + discrete: false + torch_memory: + _target_: verl.utils.profiler.config.TorchMemoryToolConfig + stack_depth: 32 + trace_alloc_max_entries: 100000 + rollout_n: 3 + router_replay: + _target_: verl.workers.config.RouterReplayConfig + mode: disabled + record_file: null + replay_file: null + shuffle: false + strategy: fsdp + sum_pi_squared_checkpointing: false + tau_neg: 1.05 + tau_pos: 1 + ulysses_sequence_parallel_size: 1 + use_dynamic_bsz: false + use_fused_kernels: false + use_kl_loss: true + use_prefix_grouper: false + use_remove_padding: true + use_torch_compile: true + hybrid_engine: true + model: + _target_: verl.workers.config.HFModelConfig + custom_chat_template: null + enable_activation_offload: false + enable_gradient_checkpointing: true + exclude_modules: null + external_lib: null + fused_kernel_options: + impl_backend: torch + hf_config_path: null + lora_adapter_path: null + lora_alpha: 16 + lora_rank: 0 + mtp: + _target_: verl.workers.config.MtpConfig + detach_encoder: false + enable: false + enable_rollout: false + enable_train: false + method: mtp + mtp_loss_scaling_factor: 0.1 + num_speculative_tokens: 1 + speculative_algorithm: EAGLE + speculative_eagle_topk: 1 + speculative_num_draft_tokens: 4 + speculative_num_steps: 3 + path: Qwen/Qwen3-4B-Instruct-2507 + target_modules: all-linear + tiled_mlp: + enabled: false + num_shards: 4 + tokenizer_path: null + trust_remote_code: false + use_fused_kernels: false + use_liger: false + use_remove_padding: true + use_shm: false + nccl_timeout: 600 + ref: + _target_: verl.workers.config.FSDPActorConfig + entropy_checkpointing: false + entropy_from_logits_with_chunking: false + fsdp_config: + _target_: verl.workers.config.FSDPEngineConfig + dtype: bfloat16 + entropy_checkpointing: false + entropy_from_logits_with_chunking: false + forward_only: true + forward_prefetch: false + fsdp_size: -1 + full_determinism: false + model_dtype: fp32 + offload_policy: false + optimizer_offload: false + param_offload: false + reshard_after_forward: true + seed: 42 + strategy: fsdp + ulysses_sequence_parallel_size: 1 + use_orig_params: false + use_torch_compile: true + wrap_policy: + min_num_params: 0 + log_prob_max_token_len_per_gpu: 16384 + log_prob_micro_batch_size: null + log_prob_micro_batch_size_per_gpu: 32 + log_prob_use_dynamic_bsz: false + profiler: + _target_: verl.utils.profiler.ProfilerConfig + all_ranks: false + enable: false + ranks: [] + save_path: outputs/profile + tool: null + tool_config: + npu: + _target_: verl.utils.profiler.config.NPUToolConfig + analysis: true + contents: [] + discrete: false + level: level0 + nsys: + _target_: verl.utils.profiler.config.NsightToolConfig + discrete: false + torch: + _target_: verl.utils.profiler.config.TorchProfilerToolConfig + contents: [] + discrete: false + torch_memory: + _target_: verl.utils.profiler.config.TorchMemoryToolConfig + stack_depth: 32 + trace_alloc_max_entries: 100000 + rollout_n: 3 + router_replay: + _target_: verl.workers.config.RouterReplayConfig + mode: disabled + record_file: null + replay_file: null + strategy: fsdp + ulysses_sequence_parallel_size: 1 + use_torch_compile: true + rollout: + _target_: verl.workers.config.RolloutConfig + agent: + _target_: verl.workers.config.AgentLoopConfig + agent_loop_config_path: null + custom_async_server: + _target_: verl.workers.config.CustomAsyncServerConfig + name: null + path: null + default_agent_loop: single_turn_agent + num_workers: 8 + calculate_log_probs: false + checkpoint_engine: + _target_: verl.workers.config.CheckpointEngineConfig + backend: naive + update_weights_bucket_megabytes: 2048 + cudagraph_capture_sizes: null + data_parallel_size: 1 + disable_log_stats: true + do_sample: true + dtype: bfloat16 + enable_chunked_prefill: true + enable_prefix_caching: true + enable_rollout_routing_replay: false + enforce_eager: false + expert_parallel_size: 1 + free_cache_engine: true + gpu_memory_utilization: 0.6 + ignore_eos: false + layered_summon: false + load_format: dummy + log_prob_max_token_len_per_gpu: 16384 + log_prob_micro_batch_size: null + log_prob_micro_batch_size_per_gpu: 32 + log_prob_use_dynamic_bsz: false + logprobs_mode: processed_logprobs + max_model_len: 8192 + max_num_batched_tokens: 8192 + max_num_seqs: 1024 + mode: async + mtp: + _target_: verl.workers.config.MtpConfig + detach_encoder: false + enable: false + enable_rollout: false + enable_train: false + method: mtp + mtp_loss_scaling_factor: 0.1 + num_speculative_tokens: 1 + speculative_algorithm: EAGLE + speculative_eagle_topk: 1 + speculative_num_draft_tokens: 4 + speculative_num_steps: 3 + multi_stage_wake_up: false + multi_turn: + _target_: verl.workers.config.MultiTurnConfig + enable: false + format: hermes + interaction_config_path: null + max_assistant_turns: null + max_parallel_calls: 1 + max_tool_response_length: 256 + max_user_turns: null + num_repeat_rollouts: null + tokenization_sanity_check_mode: strict + tool_config_path: null + tool_response_truncate_side: middle + use_inference_chat_template: false + "n": 3 + name: vllm + over_sample_rate: 0 + pipeline_model_parallel_size: 1 + profiler: + _target_: verl.utils.profiler.ProfilerConfig + all_ranks: false + enable: false + ranks: [] + save_path: outputs/profile + tool: null + tool_config: + npu: + _target_: verl.utils.profiler.config.NPUToolConfig + analysis: true + contents: [] + discrete: false + level: level0 + nsys: + _target_: verl.utils.profiler.config.NsightToolConfig + discrete: false + torch: + _target_: verl.utils.profiler.config.TorchProfilerToolConfig + contents: [] + discrete: false + torch_memory: + _target_: verl.utils.profiler.config.TorchMemoryToolConfig + stack_depth: 32 + trace_alloc_max_entries: 100000 + prometheus: + _target_: verl.workers.config.PrometheusConfig + enable: false + file: /tmp/ray/session_latest/metrics/prometheus/prometheus.yml + port: 9090 + served_model_name: Qwen/Qwen3-4B-Instruct-2507 + prompt_length: 1024 + quantization: null + quantization_config_file: null + response_length: 2048 + scheduling_policy: fcfs + skip_dump_dir: /tmp/rollout_dump + skip_rollout: false + skip_tokenizer_init: true + temperature: 1 + tensor_model_parallel_size: 1 + top_k: -1 + top_p: 1 + trace: + _target_: verl.workers.config.TraceConfig + backend: null + max_samples_per_step_per_worker: null + token2text: false + val_kwargs: + _target_: verl.workers.config.SamplingConfig + do_sample: false + "n": 1 + temperature: 0 + top_k: -1 + top_p: 1 +algorithm: + value: + _target_: verl.trainer.config.AlgoConfig + adv_estimator: grpo + gamma: 1 + kl_ctrl: + _target_: verl.trainer.config.KLControlConfig + horizon: 10000 + kl_coef: 0.001 + target_kl: 0.1 + type: fixed + kl_penalty: kl + lam: 1 + norm_adv_by_std_in_grpo: true + pf_ppo: + reweight_method: pow + weight_pow: 2 + rollout_correction: + bypass_mode: false + loss_type: ppo_clip + rollout_is: null + rollout_is_batch_normalize: false + rollout_is_threshold: 2 + rollout_rs: null + rollout_rs_threshold: null + use_kl_in_reward: false + use_pf_ppo: false +critic: + value: + _target_: verl.workers.config.FSDPCriticConfig + checkpoint: + _target_: verl.trainer.config.CheckpointConfig + async_save: false + load_contents: + - model + - optimizer + - extra + save_contents: + - model + - optimizer + - extra + cliprange_value: 0.5 + data_loader_seed: 42 + enable: null + forward_max_token_len_per_gpu: 32768 + forward_micro_batch_size: null + forward_micro_batch_size_per_gpu: null + grad_clip: 1 + loss_agg_mode: token-mean + model: + _target_: verl.workers.config.FSDPCriticModelCfg + enable_activation_offload: false + enable_gradient_checkpointing: true + external_lib: null + fsdp_config: + _target_: verl.workers.config.FSDPEngineConfig + dtype: bfloat16 + entropy_checkpointing: false + entropy_from_logits_with_chunking: false + forward_only: false + forward_prefetch: false + fsdp_size: -1 + full_determinism: false + model_dtype: fp32 + offload_policy: false + optimizer_offload: false + param_offload: false + reshard_after_forward: true + seed: 42 + strategy: fsdp + ulysses_sequence_parallel_size: 1 + use_orig_params: false + use_torch_compile: true + wrap_policy: + min_num_params: 0 + lora_alpha: 16 + lora_rank: 0 + path: ~/models/deepseek-llm-7b-chat + target_modules: all-linear + tiled_mlp: + enabled: false + num_shards: 4 + tokenizer_path: Qwen/Qwen3-4B-Instruct-2507 + trust_remote_code: false + use_remove_padding: false + use_shm: false + optim: + _target_: verl.workers.config.FSDPOptimizerConfig + betas: + - 0.9 + - 0.999 + clip_grad: 1 + lr: 1e-05 + lr_scheduler_type: constant + lr_warmup_steps: -1 + lr_warmup_steps_ratio: 0 + min_lr_ratio: 0 + num_cycles: 0.5 + optimizer: AdamW + optimizer_impl: torch.optim + override_optimizer_config: null + total_training_steps: 90 + warmup_style: null + weight_decay: 0.01 + ppo_epochs: 1 + ppo_max_token_len_per_gpu: 32768 + ppo_micro_batch_size: null + ppo_micro_batch_size_per_gpu: null + ppo_mini_batch_size: 256 + profiler: + _target_: verl.utils.profiler.ProfilerConfig + all_ranks: false + enable: false + ranks: [] + save_path: outputs/profile + tool: null + tool_config: + npu: + _target_: verl.utils.profiler.config.NPUToolConfig + analysis: true + contents: [] + discrete: false + level: level0 + nsys: + _target_: verl.utils.profiler.config.NsightToolConfig + discrete: false + torch: + _target_: verl.utils.profiler.config.TorchProfilerToolConfig + contents: [] + discrete: false + torch_memory: + _target_: verl.utils.profiler.config.TorchMemoryToolConfig + stack_depth: 32 + trace_alloc_max_entries: 100000 + rollout_n: 3 + shuffle: false + strategy: fsdp + ulysses_sequence_parallel_size: 1 + use_dynamic_bsz: false +custom_reward_function: + value: + name: compute_score + path: /home/mshahidul/readctrl/code/RL_model/verl/verl_train/reward_func/reward.py +data: + value: + custom_cls: + name: null + path: null + datagen: + name: null + path: null + dataloader_num_workers: 8 + filter_overlong_prompts: true + filter_overlong_prompts_workers: 1 + image_key: images + image_patch_size: 14 + max_prompt_length: 1024 + max_response_length: 2048 + prompt_key: prompt + return_full_prompt: false + return_multi_modal_inputs: true + return_raw_chat: true + return_raw_input_ids: false + reward_fn_key: data_source + sampler: + class_name: null + class_path: null + seed: null + shuffle: true + tokenizer: null + tool_config_path: null + train_batch_size: 512 + train_files: /home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/train.parquet + train_max_samples: -1 + truncation: error + trust_remote_code: false + use_shm: false + val_batch_size: null + val_files: /home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/test.parquet + val_max_samples: -1 + validation_shuffle: false + video_key: videos +global_profiler: + value: + _target_: verl.utils.profiler.ProfilerConfig + global_tool_config: + nsys: + _target_: verl.utils.profiler.config.NsightToolConfig + controller_nsight_options: + cuda-graph-trace: graph + cuda-memory-usage: "true" + trace: cuda,nvtx,cublas,ucx + discrete: false + worker_nsight_options: + capture-range: cudaProfilerApi + capture-range-end: null + cuda-graph-trace: graph + cuda-memory-usage: "true" + kill: none + trace: cuda,nvtx,cublas,ucx + torch_memory: + context: all + stack_depth: 32 + stacks: all + trace_alloc_max_entries: 100000 + profile_continuous_steps: false + save_path: outputs/profile + steps: null + tool: null +ray_kwargs: + value: + ray_init: + num_cpus: null + timeline_json_file: null +reward_manager: + value: + _target_: verl.trainer.config.config.RewardManagerConfig + module: + _target_: verl.trainer.config.config.ModuleConfig + name: custom_reward_manager + path: null + name: naive + source: register +reward_model: + value: + enable: false + enable_resource_pool: false + forward_max_token_len_per_gpu: 32768 + launch_reward_fn_async: false + max_length: null + micro_batch_size: null + micro_batch_size_per_gpu: null + model: + external_lib: null + fsdp_config: + _target_: verl.workers.config.FSDPEngineConfig + forward_prefetch: false + fsdp_size: -1 + param_offload: false + reshard_after_forward: true + wrap_policy: + min_num_params: 0 + input_tokenizer: Qwen/Qwen3-4B-Instruct-2507 + path: ~/models/FsfairX-LLaMA3-RM-v0.1 + trust_remote_code: false + use_fused_kernels: false + use_remove_padding: false + use_shm: false + n_gpus_per_node: 8 + nnodes: 0 + num_workers: 1 + profiler: + _target_: verl.utils.profiler.ProfilerConfig + all_ranks: false + enable: false + ranks: [] + save_path: outputs/profile + tool: null + tool_config: + npu: + _target_: verl.utils.profiler.config.NPUToolConfig + analysis: true + contents: [] + discrete: false + level: level0 + nsys: + _target_: verl.utils.profiler.config.NsightToolConfig + discrete: false + torch: + _target_: verl.utils.profiler.config.TorchProfilerToolConfig + contents: [] + discrete: false + torch_memory: + _target_: verl.utils.profiler.config.TorchMemoryToolConfig + stack_depth: 32 + trace_alloc_max_entries: 100000 + reward_loop_class_name: null + reward_loop_module_path: null + reward_loop_source: register + reward_manager: naive + rollout: + _target_: verl.workers.config.RolloutConfig + cudagraph_capture_sizes: null + data_parallel_size: 1 + disable_log_stats: true + dtype: bfloat16 + enable_chunked_prefill: true + enable_prefix_caching: true + enforce_eager: true + expert_parallel_size: 1 + free_cache_engine: true + gpu_memory_utilization: 0.5 + limit_images: null + load_format: auto + max_model_len: null + max_num_batched_tokens: 8192 + max_num_seqs: 1024 + name: ??? + prompt_length: 2048 + response_length: 2048 + skip_tokenizer_init: false + tensor_model_parallel_size: 2 + sandbox_fusion: + max_concurrent: 64 + memory_limit_mb: 1024 + url: null + strategy: fsdp + ulysses_sequence_parallel_size: 1 + use_dynamic_bsz: false + use_reward_loop: true +trainer: + value: + balance_batch: true + critic_warmup: 0 + default_hdfs_dir: null + default_local_dir: /home/mshahidul/readctrl/code/RL_model/train_v2 + del_local_ckpt_after_load: false + device: cuda + esi_redundant_time: 0 + experiment_name: qwen3-4b-instruct-en + log_val_generations: 0 + logger: + - console + - wandb + max_actor_ckpt_to_keep: 1 + max_critic_ckpt_to_keep: 1 + n_gpus_per_node: 2 + nnodes: 1 + project_name: readctrl-verl + ray_wait_register_center_timeout: 300 + remove_previous_ckpt_in_save: true + resume_from_path: null + resume_mode: auto + rollout_data_dir: null + save_freq: 5 + test_freq: 10 + total_epochs: 15 + total_training_steps: null + use_legacy_worker_impl: auto + val_before_train: true + val_only: false + validation_data_dir: null +transfer_queue: + value: + enable: false diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_002512-y8zrft04/files/output.log b/code/RL_model/verl/verl_train/wandb/run-20260210_002512-y8zrft04/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..a561ec1558cd07387a1606fafd18f7d663606cd6 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_002512-y8zrft04/files/output.log @@ -0,0 +1,14 @@ +wandb: Detected [dspy, litellm, openai] in use. +wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script. +wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/ +Checkpoint tracker file does not exist: /home/mshahidul/readctrl/code/RL_model/train_v2/latest_checkpointed_iteration.txt +Training from scratch +test_gen_batch meta info: {'eos_token_id': 151645, 'pad_token_id': 151643, 'recompute_log_prob': False, 'do_sample': False, 'validate': True, 'global_steps': 0} +validation generation end +("Initial validation metrics: {'val-aux/multiclinsum/reward/mean@1': " + "np.float64(0.6505448959575766), 'val-core/multiclinsum/acc/mean@1': " + "np.float64(0.6505449011414762), 'val-aux/num_turns/min': np.int32(2), " + "'val-aux/num_turns/max': np.int32(2), 'val-aux/num_turns/mean': " + 'np.float64(2.0)}') +step:0 - val-aux/multiclinsum/reward/mean@1:np.float64(0.6505448959575766) - val-core/multiclinsum/acc/mean@1:np.float64(0.6505449011414762) - val-aux/num_turns/min:np.int32(2) - val-aux/num_turns/max:np.int32(2) - val-aux/num_turns/mean:np.float64(2.0) +Training Progress: 0%| | 0/90 [00:00, ?it/s] diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_002512-y8zrft04/files/requirements.txt b/code/RL_model/verl/verl_train/wandb/run-20260210_002512-y8zrft04/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..dd2de633cd2e89d34a77b40967a7fc9ed3177c34 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_002512-y8zrft04/files/requirements.txt @@ -0,0 +1,283 @@ +verl==0.8.0.dev0 +psutil==7.1.3 +colorama==0.4.6 +annotated-doc==0.0.4 +sentry-sdk==2.51.0 +requests==2.32.5 +nvidia-cufile-cu12==1.13.1.3 +ml_dtypes==0.5.4 +xformers==0.0.32.post1 +sglang==0.5.2 +multidict==6.7.1 +typing_extensions==4.15.0 +nvidia-cusparselt-cu12==0.7.1 +openai-harmony==0.0.4 +transformers==4.56.1 +Werkzeug==3.1.5 +identify==2.6.16 +gepa==0.0.26 +pytest==9.0.2 +nvidia-cuda-runtime-cu12==12.8.90 +GitPython==3.1.46 +cupy-cuda12x==13.6.0 +tokenizers==0.22.2 +pybind11==3.0.1 +google-api-core==2.29.0 +partial-json-parser==0.2.1.1.post7 +aiohttp-cors==0.8.1 +sniffio==1.3.1 +tensordict==0.10.0 +smart_open==7.5.0 +cffi==2.0.0 +wcwidth==0.5.3 +asttokens==3.0.1 +opencensus==0.11.4 +rpds-py==0.30.0 +py-spy==0.4.1 +litellm==1.81.9 +gguf==0.17.1 +nvidia-nvjitlink-cu12==12.8.93 +httpx==0.28.1 +cuda-python==13.1.1 +annotated-types==0.7.0 +regex==2026.1.15 +vllm==0.11.0 +idna==3.11 +fsspec==2025.10.0 +parso==0.8.5 +pydantic-extra-types==2.11.0 +MarkupSafe==3.0.3 +cryptography==46.0.4 +openai==2.17.0 +filelock==3.20.3 +modelscope==1.34.0 +outlines==0.1.11 +dnspython==2.8.0 +scipy==1.17.0 +zipp==3.23.0 +PyYAML==6.0.3 +onnx==1.20.1 +torchdata==0.11.0 +cuda-pathfinder==1.3.3 +asyncer==0.0.8 +verl==0.8.0.dev0 +httptools==0.7.1 +opencv-python-headless==4.13.0.90 +importlib_metadata==8.7.1 +peft==0.18.1 +opentelemetry-sdk==1.39.1 +python-json-logger==4.0.0 +alembic==1.18.3 +cuda-bindings==13.1.1 +mdurl==0.1.2 +referencing==0.37.0 +xxhash==3.6.0 +interegular==0.3.3 +fastapi-cli==0.0.20 +uv==0.9.28 +tensorboard==2.20.0 +nvidia-cublas-cu12==12.8.4.1 +sentencepiece==0.2.1 +rich-toolkit==0.18.1 +numpy==2.2.0 +yarl==1.22.0 +opencv-fixer==0.2.5 +tqdm==4.67.2 +python-dotenv==1.2.1 +Mako==1.3.10 +timm==1.0.16 +aiohappyeyeballs==2.6.1 +decord==0.6.0 +jiter==0.12.0 +airportsdata==20250909 +markdown-it-py==4.0.0 +nvidia-cusolver-cu12==11.7.3.90 +pyarrow==23.0.0 +opentelemetry-proto==1.39.1 +anyio==4.12.1 +pycryptodomex==3.23.0 +prometheus_client==0.24.1 +aiohttp==3.13.3 +urllib3==2.6.3 +pexpect==4.9.0 +pydantic-settings==2.12.0 +distro==1.9.0 +av==16.1.0 +cloudpickle==3.1.2 +mpmath==1.3.0 +certifi==2026.1.4 +antlr4-python3-runtime==4.9.3 +torchvision==0.23.0 +accelerate==1.12.0 +watchfiles==1.1.1 +ruff==0.14.14 +wheel==0.46.3 +omegaconf==2.3.0 +nvidia-cufft-cu12==11.3.3.83 +multiprocess==0.70.18 +frozendict==2.4.7 +sympy==1.14.0 +setproctitle==1.3.7 +optuna==4.7.0 +setuptools==79.0.1 +py-cpuinfo==9.0.0 +ipython_pygments_lexers==1.1.1 +rich==14.3.2 +uvicorn==0.40.0 +outlines_core==0.2.11 +llvmlite==0.44.0 +nvidia-cuda-cupti-cu12==12.8.90 +attrs==25.4.0 +anthropic==0.77.0 +packaging==25.0 +fastrlock==0.8.3 +astor==0.8.1 +pluggy==1.6.0 +nvidia-cuda-nvrtc-cu12==12.8.93 +psutil==7.2.2 +virtualenv==20.36.1 +cbor2==5.8.0 +tenacity==9.1.4 +compressed-tensors==0.11.0 +SQLAlchemy==2.0.46 +nvidia-cusparse-cu12==12.5.8.93 +networkx==3.6.1 +httpcore==1.0.9 +onnxscript==0.3.1 +smmap==5.0.2 +opencv-python==4.13.0.90 +traitlets==5.14.3 +python-multipart==0.0.22 +pyvers==0.1.0 +huggingface-hub==0.36.0 +pillow==12.1.0 +jsonschema==4.26.0 +cfgv==3.5.0 +optree==0.18.0 +email-validator==2.3.0 +tabulate==0.9.0 +pre_commit==4.5.1 +msgpack==1.1.2 +depyf==0.19.0 +numba==0.61.2 +six==1.17.0 +aiosignal==1.4.0 +nvidia-nvtx-cu12==12.8.90 +propcache==0.4.1 +torch_memory_saver==0.0.8 +h11==0.16.0 +frozenlist==1.8.0 +websockets==16.0 +nvidia-cudnn-frontend==1.18.0 +build==1.4.0 +google-auth==2.48.0 +pycountry==24.6.1 +colorlog==6.10.1 +stack-data==0.6.3 +typing-inspection==0.4.2 +googleapis-common-protos==1.72.0 +pandas==3.0.0 +typer==0.21.1 +protobuf==6.33.5 +fastapi==0.128.0 +blake3==1.0.8 +opentelemetry-semantic-conventions==0.60b1 +opentelemetry-exporter-prometheus==0.60b1 +nvidia-cudnn-cu12==9.10.2.21 +Markdown==3.10.1 +liger_kernel==0.6.4 +json_repair==0.57.1 +nodeenv==1.10.0 +prompt_toolkit==3.0.52 +torchaudio==2.8.0 +codetiming==1.4.0 +platformdirs==4.5.1 +jsonschema-specifications==2025.9.1 +hydra-core==1.3.2 +tensorboard-data-server==0.7.2 +lm-format-enforcer==0.11.3 +pyasn1_modules==0.4.2 +tiktoken==0.12.0 +starlette==0.50.0 +pyproject_hooks==1.2.0 +flash_attn==2.8.1 +rsa==4.9.1 +ray==2.53.0 +nest-asyncio==1.6.0 +lark==1.2.2 +fastar==0.8.0 +orjson==3.11.6 +prometheus-fastapi-instrumentator==7.1.0 +opentelemetry-api==1.39.1 +mathruler==0.1.0 +pydantic_core==2.41.5 +fastapi-cloud-cli==0.11.0 +pynvml==13.0.1 +loguru==0.7.3 +torch==2.8.0 +msgspec==0.20.0 +nvidia-curand-cu12==10.3.9.90 +blobfile==3.0.0 +gitdb==4.0.12 +llguidance==0.7.30 +hf_transfer==0.1.9 +nvidia-nccl-cu12==2.27.3 +qwen-vl-utils==0.0.14 +ptyprocess==0.7.0 +ipdb==0.13.13 +opencensus-context==0.1.3 +jedi==0.19.2 +click==8.3.1 +datasets==4.5.0 +soxr==1.0.0 +sgl-kernel==0.3.9.post2 +colorful==0.5.8 +pyasn1==0.6.2 +charset-normalizer==3.4.4 +nvidia-ml-py==13.590.48 +hf-xet==1.2.0 +dill==0.4.0 +absl-py==2.4.0 +pydantic==2.12.5 +dspy==3.1.3 +wrapt==2.1.0 +flashinfer-python==0.3.1 +python-dateutil==2.9.0.post0 +torchao==0.9.0 +cachetools==7.0.0 +soundfile==0.13.1 +diskcache==5.6.3 +onnx-ir==0.1.15 +docstring_parser==0.17.0 +matplotlib-inline==0.2.1 +Pygments==2.19.2 +wandb==0.24.1 +pure_eval==0.2.3 +ninja==1.13.0 +proto-plus==1.27.0 +pyzmq==27.1.0 +iniconfig==2.3.0 +Jinja2==3.1.6 +megatron-core==0.13.1 +uvloop==0.22.1 +fastuuid==0.14.0 +pycparser==3.0 +pylatexenc==2.10 +decorator==5.2.1 +shellingham==1.5.4 +lxml==6.0.2 +safetensors==0.7.0 +xgrammar==0.1.25 +pybase64==1.4.3 +ipython==9.9.0 +greenlet==3.3.1 +mistral_common==1.9.0 +rignore==0.7.6 +einops==0.8.2 +distlib==0.4.0 +triton==3.4.0 +executing==2.2.1 +grpcio==1.76.0 +pip==25.3 +verl==0.8.0.dev0 +verl==0.8.0.dev0 diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_002512-y8zrft04/files/wandb-metadata.json b/code/RL_model/verl/verl_train/wandb/run-20260210_002512-y8zrft04/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..948ead216ad7a22cb7f272ac30d720580a4ab731 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_002512-y8zrft04/files/wandb-metadata.json @@ -0,0 +1,93 @@ +{ + "os": "Linux-5.15.0-160-generic-x86_64-with-glibc2.35", + "python": "CPython 3.12.12", + "startedAt": "2026-02-10T05:25:12.620990Z", + "args": [ + "--node-ip-address=172.16.34.29", + "--node-manager-port=41799", + "--object-store-name=/tmp/ray/session_2026-02-10_00-19-11_291622_3842721/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2026-02-10_00-19-11_291622_3842721/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=51842", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=52235", + "--gcs-address=172.16.34.29:58975", + "--session-name=session_2026-02-10_00-19-11_291622_3842721", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8297", + "--cluster-id=dc298598f37a0d21538dce20236cc3100f2fd8ec5710337dfdeabc00", + "--startup-token=128", + "--worker-launch-time-ms=1770700763686", + "--node-id=057bfcf3bc01161d491e5fe6ef31ed0729d687cd30c5819b159d5cf2", + "--runtime-env-hash=1096984665" + ], + "program": "/home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/verl-project/verl", + "commit": "d9939add7a2a01923a9088891f913a5d20c4e622" + }, + "email": "shahidulshakib034@gmail.com", + "root": "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train", + "host": "gamma", + "executable": "/home/mshahidul/miniconda3/envs/verl2/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA A100 80GB PCIe", + "gpu_count": 6, + "disk": { + "/": { + "total": "3766429188096", + "used": "191265169408" + } + }, + "memory": { + "total": "1081814863872" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-4a3678c7-34a9-356f-f7b7-7f7e2f44b596" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-df506764-0db5-91b4-8ec9-154a3bb8123f" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-2c3dbd62-b384-2996-a0f6-b32dcfcc3538" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-1ff3dabe-4b9a-ea62-5cc3-01f12f32d328" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper", + "uuid": "GPU-eefc4b8c-0e79-c1d6-a9ff-8325040572eb" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper", + "uuid": "GPU-d42b6057-13e8-1e88-6aa1-9307df72dece" + } + ], + "cudaVersion": "13.0", + "writerId": "b67cw9b3932w1ydhiiskcdzjmplv9v6z" +} \ No newline at end of file diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_002512-y8zrft04/files/wandb-summary.json b/code/RL_model/verl/verl_train/wandb/run-20260210_002512-y8zrft04/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..d688f146e93899b0fc65189543550cf9f5411b4b --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_002512-y8zrft04/files/wandb-summary.json @@ -0,0 +1 @@ +{"_step":0,"_timestamp":1.7707018355185583e+09,"_wandb":{"runtime":8351},"val-aux/num_turns/mean":2,"val-core/multiclinsum/acc/mean@1":0.6505449011414762,"val-aux/multiclinsum/reward/mean@1":0.6505448959575766,"_runtime":8351.856333513,"val-aux/num_turns/min":2,"val-aux/num_turns/max":2} \ No newline at end of file diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_002512-y8zrft04/logs/debug-core.log b/code/RL_model/verl/verl_train/wandb/run-20260210_002512-y8zrft04/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..dd7a7f5bce9e53e7afa75a28128991b084876dfa --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_002512-y8zrft04/logs/debug-core.log @@ -0,0 +1,11 @@ +{"time":"2026-02-10T00:25:12.759037579-05:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmps6u_dtz1/port-3851482.txt","pid":3851482,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-02-10T00:25:12.760015843-05:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":3851482} +{"time":"2026-02-10T00:25:12.760005502-05:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3851482-3861314-2845526687/socket","Net":"unix"}} +{"time":"2026-02-10T00:25:12.923307176-05:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-02-10T00:25:12.939263644-05:00","level":"INFO","msg":"handleInformInit: received","streamId":"y8zrft04","id":"1(@)"} +{"time":"2026-02-10T00:25:13.551913005-05:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"y8zrft04","id":"1(@)"} +{"time":"2026-02-10T00:25:19.643275682-05:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"rq4c54y5yw8a"} +{"time":"2026-02-10T02:44:25.679362379-05:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"rq4c54y5yw8a"} +{"time":"2026-02-10T02:44:26.425486617-05:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"rq4c54y5yw8a"} +{"time":"2026-02-10T02:44:26.430696861-05:00","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"y8zrft04","id":"1(@)"} +{"time":"2026-02-10T02:44:26.434586373-05:00","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"y8zrft04","id":"1(@)"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_002512-y8zrft04/logs/debug-internal.log b/code/RL_model/verl/verl_train/wandb/run-20260210_002512-y8zrft04/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..9518c3cd698a1acdf87cfa6a09a6e4451c57c2a0 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_002512-y8zrft04/logs/debug-internal.log @@ -0,0 +1,12 @@ +{"time":"2026-02-10T00:25:12.940293373-05:00","level":"INFO","msg":"stream: starting","core version":"0.24.1"} +{"time":"2026-02-10T00:25:13.549978748-05:00","level":"INFO","msg":"stream: created new stream","id":"y8zrft04"} +{"time":"2026-02-10T00:25:13.550105809-05:00","level":"INFO","msg":"handler: started","stream_id":"y8zrft04"} +{"time":"2026-02-10T00:25:13.551890555-05:00","level":"INFO","msg":"stream: started","id":"y8zrft04"} +{"time":"2026-02-10T00:25:13.551928596-05:00","level":"INFO","msg":"writer: started","stream_id":"y8zrft04"} +{"time":"2026-02-10T00:25:13.551935087-05:00","level":"INFO","msg":"sender: started","stream_id":"y8zrft04"} +{"time":"2026-02-10T02:44:26.239366453-05:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-02-10T02:44:26.419330403-05:00","level":"INFO","msg":"handler: operation stats","stats":{}} +{"time":"2026-02-10T02:44:26.430714884-05:00","level":"INFO","msg":"stream: closing","id":"y8zrft04"} +{"time":"2026-02-10T02:44:26.430729866-05:00","level":"INFO","msg":"handler: closed","stream_id":"y8zrft04"} +{"time":"2026-02-10T02:44:26.43342045-05:00","level":"INFO","msg":"sender: closed","stream_id":"y8zrft04"} +{"time":"2026-02-10T02:44:26.433430718-05:00","level":"INFO","msg":"stream: closed","id":"y8zrft04"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_002512-y8zrft04/logs/debug.log b/code/RL_model/verl/verl_train/wandb/run-20260210_002512-y8zrft04/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..90ff43848a4247a961c485db7be22f2620e9112a --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_002512-y8zrft04/logs/debug.log @@ -0,0 +1,24 @@ +2026-02-10 00:25:12,638 INFO MainThread:3851482 [wandb_setup.py:_flush():81] Current SDK version is 0.24.1 +2026-02-10 00:25:12,638 INFO MainThread:3851482 [wandb_setup.py:_flush():81] Configure stats pid to 3851482 +2026-02-10 00:25:12,638 INFO MainThread:3851482 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-02-10 00:25:12,638 INFO MainThread:3851482 [wandb_init.py:setup_run_log_directory():717] Logging user logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260210_002512-y8zrft04/logs/debug.log +2026-02-10 00:25:12,639 INFO MainThread:3851482 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260210_002512-y8zrft04/logs/debug-internal.log +2026-02-10 00:25:12,639 INFO MainThread:3851482 [wandb_init.py:init():844] calling init triggers +2026-02-10 00:25:12,640 INFO MainThread:3851482 [wandb_init.py:init():849] wandb.init called with sweep_config: {} +config: {'actor_rollout_ref': {'actor': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-06, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 90, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'ppo_mini_batch_size': 256, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': 32, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 16384, 'clip_ratio': 0.2, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.2, 'tau_pos': 1.0, 'tau_neg': 1.05, 'freeze_vision_tower': False, 'policy_loss': {'_target_': 'verl.workers.config.PolicyLossConfig', 'loss_mode': 'vanilla', 'clip_cov_ratio': 0.0002, 'clip_cov_lb': 1.0, 'clip_cov_ub': 5.0, 'kl_cov_ratio': 0.0002, 'ppo_kl_coef': 0.1}, 'clip_ratio_c': 3.0, 'loss_agg_mode': 'token-mean', 'loss_scale_factor': None, 'entropy_coeff': 0, 'calculate_entropy': False, 'use_kl_loss': True, 'use_prefix_grouper': False, 'use_torch_compile': True, 'kl_loss_coef': 0.001, 'kl_loss_type': 'low_var_kl', 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'use_fused_kernels': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'grad_clip': 1.0, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False, 'use_remove_padding': True, 'calculate_sum_pi_squared': False, 'sum_pi_squared_checkpointing': False}, 'ref': {'rollout_n': 3, 'strategy': 'fsdp', 'use_torch_compile': True, 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': True, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False}, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': 'vllm', 'mode': 'async', 'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'prompt_length': 1024, 'response_length': 2048, 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': False, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'tensor_model_parallel_size': 1, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'pipeline_model_parallel_size': 1, 'max_num_batched_tokens': 8192, 'max_model_len': 8192, 'max_num_seqs': 1024, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'logprobs_mode': 'processed_logprobs', 'scheduling_policy': 'fcfs', 'load_format': 'dummy', 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'disable_log_stats': True, 'do_sample': True, 'n': 3, 'over_sample_rate': 0, 'multi_stage_wake_up': False, 'engine_kwargs': {'vllm': {}, 'sglang': {}, 'trtllm': {}}, 'val_kwargs': {'_target_': 'verl.workers.config.SamplingConfig', 'top_k': -1, 'top_p': 1.0, 'temperature': 0, 'n': 1, 'do_sample': False}, 'multi_turn': {'_target_': 'verl.workers.config.MultiTurnConfig', 'enable': False, 'max_assistant_turns': None, 'tool_config_path': None, 'max_user_turns': None, 'max_parallel_calls': 1, 'max_tool_response_length': 256, 'tool_response_truncate_side': 'middle', 'interaction_config_path': None, 'use_inference_chat_template': False, 'tokenization_sanity_check_mode': 'strict', 'format': 'hermes', 'num_repeat_rollouts': None}, 'calculate_log_probs': False, 'agent': {'_target_': 'verl.workers.config.AgentLoopConfig', 'num_workers': 8, 'default_agent_loop': 'single_turn_agent', 'agent_loop_config_path': None, 'custom_async_server': {'_target_': 'verl.workers.config.CustomAsyncServerConfig', 'path': None, 'name': None}}, 'checkpoint_engine': {'_target_': 'verl.workers.config.CheckpointEngineConfig', 'backend': 'naive', 'update_weights_bucket_megabytes': 2048, 'engine_kwargs': {}}, 'trace': {'_target_': 'verl.workers.config.TraceConfig', 'backend': None, 'token2text': False, 'max_samples_per_step_per_worker': None}, 'skip_rollout': False, 'skip_dump_dir': '/tmp/rollout_dump', 'skip_tokenizer_init': True, 'enable_rollout_routing_replay': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'prometheus': {'_target_': 'verl.workers.config.PrometheusConfig', 'enable': False, 'port': 9090, 'file': '/tmp/ray/session_latest/metrics/prometheus/prometheus.yml', 'served_model_name': 'Qwen/Qwen3-4B-Instruct-2507'}, 'quantization': None, 'quantization_config_file': None, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}, 'layered_summon': False}, 'model': {'_target_': 'verl.workers.config.HFModelConfig', 'path': 'Qwen/Qwen3-4B-Instruct-2507', 'hf_config_path': None, 'tokenizer_path': None, 'use_shm': False, 'trust_remote_code': False, 'custom_chat_template': None, 'external_lib': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': True, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'exclude_modules': None, 'lora_adapter_path': None, 'use_liger': False, 'use_fused_kernels': False, 'fused_kernel_options': {'impl_backend': 'torch'}, 'tiled_mlp': {'enabled': False, 'num_shards': 4}, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}}, 'hybrid_engine': True, 'nccl_timeout': 600}, 'data': {'tokenizer': None, 'use_shm': False, 'train_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/train.parquet', 'val_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/test.parquet', 'train_max_samples': -1, 'val_max_samples': -1, 'prompt_key': 'prompt', 'reward_fn_key': 'data_source', 'max_prompt_length': 1024, 'max_response_length': 2048, 'train_batch_size': 512, 'val_batch_size': None, 'tool_config_path': None, 'return_raw_input_ids': False, 'return_raw_chat': True, 'return_full_prompt': False, 'shuffle': True, 'seed': None, 'dataloader_num_workers': 8, 'image_patch_size': 14, 'validation_shuffle': False, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 1, 'truncation': 'error', 'image_key': 'images', 'video_key': 'videos', 'trust_remote_code': False, 'custom_cls': {'path': None, 'name': None}, 'return_multi_modal_inputs': True, 'sampler': {'class_path': None, 'class_name': None}, 'datagen': {'path': None, 'name': None}, 'apply_chat_template_kwargs': {}}, 'reward_manager': {'_target_': 'verl.trainer.config.config.RewardManagerConfig', 'source': 'register', 'name': 'naive', 'module': {'_target_': 'verl.trainer.config.config.ModuleConfig', 'path': None, 'name': 'custom_reward_manager'}}, 'critic': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-05, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 90, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'model': {'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, 'path': '~/models/deepseek-llm-7b-chat', 'tokenizer_path': 'Qwen/Qwen3-4B-Instruct-2507', 'override_config': {}, 'external_lib': None, 'trust_remote_code': False, '_target_': 'verl.workers.config.FSDPCriticModelCfg', 'use_shm': False, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': False, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'tiled_mlp': {'enabled': False, 'num_shards': 4}}, '_target_': 'verl.workers.config.FSDPCriticConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'enable': None, 'ppo_mini_batch_size': 256, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': None, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 32768, 'forward_max_token_len_per_gpu': 32768, 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'cliprange_value': 0.5, 'loss_agg_mode': 'token-mean', 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'forward_micro_batch_size': None, 'forward_micro_batch_size_per_gpu': None, 'ulysses_sequence_parallel_size': 1, 'grad_clip': 1.0}, 'reward_model': {'enable': False, 'enable_resource_pool': False, 'n_gpus_per_node': 8, 'nnodes': 0, 'strategy': 'fsdp', 'model': {'input_tokenizer': 'Qwen/Qwen3-4B-Instruct-2507', 'path': '~/models/FsfairX-LLaMA3-RM-v0.1', 'external_lib': None, 'trust_remote_code': False, 'override_config': {}, 'use_shm': False, 'use_remove_padding': False, 'use_fused_kernels': False, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False}}, 'micro_batch_size': None, 'micro_batch_size_per_gpu': None, 'max_length': None, 'use_dynamic_bsz': False, 'forward_max_token_len_per_gpu': 32768, 'reward_manager': 'naive', 'reward_loop_source': 'register', 'reward_loop_module_path': None, 'reward_loop_class_name': None, 'launch_reward_fn_async': False, 'sandbox_fusion': {'url': None, 'max_concurrent': 64, 'memory_limit_mb': 1024}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'ulysses_sequence_parallel_size': 1, 'use_reward_loop': True, 'num_workers': 1, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': '???', 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.5, 'enforce_eager': True, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'tensor_model_parallel_size': 2, 'max_num_batched_tokens': 8192, 'max_model_len': None, 'max_num_seqs': 1024, 'load_format': 'auto', 'engine_kwargs': {}, 'limit_images': None, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'disable_log_stats': True, 'skip_tokenizer_init': False, 'prompt_length': 2048, 'response_length': 2048}}, 'algorithm': {'rollout_correction': {'rollout_is': None, 'rollout_is_threshold': 2.0, 'rollout_rs': None, 'rollout_rs_threshold': None, 'bypass_mode': False, 'loss_type': 'ppo_clip', 'rollout_is_batch_normalize': False}, '_target_': 'verl.trainer.config.AlgoConfig', 'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'norm_adv_by_std_in_grpo': True, 'use_kl_in_reward': False, 'kl_penalty': 'kl', 'kl_ctrl': {'_target_': 'verl.trainer.config.KLControlConfig', 'type': 'fixed', 'kl_coef': 0.001, 'horizon': 10000, 'target_kl': 0.1}, 'use_pf_ppo': False, 'pf_ppo': {'reweight_method': 'pow', 'weight_pow': 2.0}}, 'custom_reward_function': {'path': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/reward_func/reward.py', 'name': 'compute_score'}, 'trainer': {'balance_batch': True, 'total_epochs': 15, 'total_training_steps': None, 'project_name': 'readctrl-verl', 'experiment_name': 'qwen3-4b-instruct-en', 'logger': ['console', 'wandb'], 'log_val_generations': 0, 'rollout_data_dir': None, 'validation_data_dir': None, 'nnodes': 1, 'n_gpus_per_node': 2, 'save_freq': 5, 'esi_redundant_time': 0, 'resume_mode': 'auto', 'resume_from_path': None, 'val_before_train': True, 'val_only': False, 'test_freq': 10, 'critic_warmup': 0, 'default_hdfs_dir': None, 'del_local_ckpt_after_load': False, 'default_local_dir': '/home/mshahidul/readctrl/code/RL_model/train_v2', 'max_actor_ckpt_to_keep': 1, 'max_critic_ckpt_to_keep': 1, 'ray_wait_register_center_timeout': 300, 'device': 'cuda', 'use_legacy_worker_impl': 'auto', 'remove_previous_ckpt_in_save': True}, 'global_profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'steps': None, 'profile_continuous_steps': False, 'save_path': 'outputs/profile', 'global_tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False, 'controller_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph'}, 'worker_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph', 'capture-range': 'cudaProfilerApi', 'capture-range-end': None, 'kill': 'none'}}, 'torch_memory': {'trace_alloc_max_entries': 100000, 'stack_depth': 32, 'context': 'all', 'stacks': 'all', 'kw_args': {}}}}, 'transfer_queue': {'enable': False}, 'ray_kwargs': {'ray_init': {'num_cpus': None}, 'timeline_json_file': None}, '_wandb': {}} +2026-02-10 00:25:12,640 INFO MainThread:3851482 [wandb_init.py:init():892] starting backend +2026-02-10 00:25:12,924 INFO MainThread:3851482 [wandb_init.py:init():895] sending inform_init request +2026-02-10 00:25:12,935 INFO MainThread:3851482 [wandb_init.py:init():903] backend started and connected +2026-02-10 00:25:12,954 INFO MainThread:3851482 [wandb_init.py:init():973] updated telemetry +2026-02-10 00:25:12,975 INFO MainThread:3851482 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-02-10 00:25:13,820 INFO MainThread:3851482 [wandb_init.py:init():1042] starting run threads in backend +2026-02-10 00:25:14,616 INFO MainThread:3851482 [wandb_run.py:_console_start():2529] atexit reg +2026-02-10 00:25:14,617 INFO MainThread:3851482 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-02-10 00:25:14,617 INFO MainThread:3851482 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-02-10 00:25:14,617 INFO MainThread:3851482 [wandb_run.py:_redirect():2469] Redirects installed. +2026-02-10 00:25:14,628 INFO MainThread:3851482 [wandb_init.py:init():1082] run started, returning control to user process +2026-02-10 02:44:25,674 INFO MainThread:3851482 [wandb_run.py:_finish():2295] finishing run shahidulshakib034-khulna-university-of-engineering-techn/readctrl-verl/y8zrft04 +2026-02-10 02:44:25,676 INFO MainThread:3851482 [wandb_run.py:_atexit_cleanup():2494] got exitcode: 0 +2026-02-10 02:44:25,677 INFO MainThread:3851482 [wandb_run.py:_restore():2476] restore +2026-02-10 02:44:25,678 INFO MainThread:3851482 [wandb_run.py:_restore():2482] restore done +2026-02-10 02:44:26,426 INFO MainThread:3851482 [wandb_run.py:_footer_sync_info():3871] logging synced files diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_104801-4ptnl9ej/files/config.yaml b/code/RL_model/verl/verl_train/wandb/run-20260210_104801-4ptnl9ej/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b1ca123d34d976ffb623d58334cd9a4cc0638a97 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_104801-4ptnl9ej/files/config.yaml @@ -0,0 +1,819 @@ +_wandb: + value: + cli_version: 0.24.1 + e: + s1zs8nbohjb0gcsqxia4r556f3absilj: + args: + - --node-ip-address=172.16.34.29 + - --node-manager-port=35027 + - --object-store-name=/tmp/ray/session_2026-02-10_10-43-37_364111_468038/sockets/plasma_store + - --raylet-name=/tmp/ray/session_2026-02-10_10-43-37_364111_468038/sockets/raylet + - --redis-address=None + - --metrics-agent-port=62875 + - --logging-rotate-bytes=536870912 + - --logging-rotate-backup-count=5 + - --runtime-env-agent-port=61020 + - --gcs-address=172.16.34.29:44687 + - --session-name=session_2026-02-10_10-43-37_364111_468038 + - --temp-dir=/tmp/ray + - --webui=127.0.0.1:8297 + - --cluster-id=1a2afab2d844e52e8ba1ba2a3a7177b31bfbebd4bc4a92c2f46ac3a2 + - --startup-token=128 + - --worker-launch-time-ms=1770738229009 + - --node-id=cb7e6953dd0f4e0cc6a0673aa459e295ee760b1101d6cf807e0c0643 + - --runtime-env-hash=1096984665 + cpu_count: 64 + cpu_count_logical: 128 + cudaVersion: "13.0" + disk: + /: + total: "3766429188096" + used: "191249948672" + email: shahidulshakib034@gmail.com + executable: /home/mshahidul/miniconda3/envs/verl2/bin/python3 + git: + commit: d9939add7a2a01923a9088891f913a5d20c4e622 + remote: https://github.com/verl-project/verl + gpu: NVIDIA A100 80GB PCIe + gpu_count: 6 + gpu_nvidia: + - architecture: Ampere + cudaCores: 6912 + memoryTotal: "85899345920" + name: NVIDIA A100 80GB PCIe + uuid: GPU-4a3678c7-34a9-356f-f7b7-7f7e2f44b596 + - architecture: Ampere + cudaCores: 6912 + memoryTotal: "85899345920" + name: NVIDIA A100 80GB PCIe + uuid: GPU-df506764-0db5-91b4-8ec9-154a3bb8123f + - architecture: Ampere + cudaCores: 6912 + memoryTotal: "85899345920" + name: NVIDIA A100 80GB PCIe + uuid: GPU-2c3dbd62-b384-2996-a0f6-b32dcfcc3538 + - architecture: Ampere + cudaCores: 6912 + memoryTotal: "85899345920" + name: NVIDIA A100 80GB PCIe + uuid: GPU-1ff3dabe-4b9a-ea62-5cc3-01f12f32d328 + - architecture: Hopper + cudaCores: 14592 + memoryTotal: "85520809984" + name: NVIDIA H100 PCIe + uuid: GPU-eefc4b8c-0e79-c1d6-a9ff-8325040572eb + - architecture: Hopper + cudaCores: 14592 + memoryTotal: "85520809984" + name: NVIDIA H100 PCIe + uuid: GPU-d42b6057-13e8-1e88-6aa1-9307df72dece + host: gamma + memory: + total: "1081814863872" + os: Linux-5.15.0-160-generic-x86_64-with-glibc2.35 + program: /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/ray/_private/workers/default_worker.py + python: CPython 3.12.12 + root: /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train + startedAt: "2026-02-10T15:48:01.036772Z" + writerId: s1zs8nbohjb0gcsqxia4r556f3absilj + m: [] + python_version: 3.12.12 + t: + "1": + - 1 + - 11 + - 30 + - 35 + - 41 + - 49 + - 50 + - 51 + - 71 + - 95 + - 98 + - 105 + - 107 + "2": + - 1 + - 11 + - 30 + - 35 + - 41 + - 49 + - 50 + - 51 + - 71 + - 95 + - 98 + - 105 + - 107 + "3": + - 2 + - 13 + - 16 + - 61 + "4": 3.12.12 + "5": 0.24.1 + "6": 4.56.1 + "12": 0.24.1 + "13": linux-x86_64 +actor_rollout_ref: + value: + actor: + _target_: verl.workers.config.FSDPActorConfig + calculate_entropy: false + calculate_sum_pi_squared: false + checkpoint: + _target_: verl.trainer.config.CheckpointConfig + async_save: false + load_contents: + - model + - optimizer + - extra + save_contents: + - model + - optimizer + - extra + clip_ratio: 0.2 + clip_ratio_c: 3 + clip_ratio_high: 0.2 + clip_ratio_low: 0.2 + data_loader_seed: 42 + entropy_checkpointing: false + entropy_coeff: 0 + entropy_from_logits_with_chunking: false + freeze_vision_tower: false + fsdp_config: + _target_: verl.workers.config.FSDPEngineConfig + dtype: bfloat16 + entropy_checkpointing: false + entropy_from_logits_with_chunking: false + forward_only: false + forward_prefetch: false + fsdp_size: -1 + full_determinism: false + model_dtype: fp32 + offload_policy: false + optimizer_offload: false + param_offload: false + reshard_after_forward: true + seed: 42 + strategy: fsdp + ulysses_sequence_parallel_size: 1 + use_orig_params: false + use_torch_compile: true + wrap_policy: + min_num_params: 0 + grad_clip: 1 + kl_loss_coef: 0.001 + kl_loss_type: low_var_kl + loss_agg_mode: token-mean + loss_scale_factor: null + optim: + _target_: verl.workers.config.FSDPOptimizerConfig + betas: + - 0.9 + - 0.999 + clip_grad: 1 + lr: 1e-06 + lr_scheduler_type: constant + lr_warmup_steps: -1 + lr_warmup_steps_ratio: 0 + min_lr_ratio: 0 + num_cycles: 0.5 + optimizer: AdamW + optimizer_impl: torch.optim + override_optimizer_config: null + total_training_steps: 90 + warmup_style: null + weight_decay: 0.01 + policy_loss: + _target_: verl.workers.config.PolicyLossConfig + clip_cov_lb: 1 + clip_cov_ratio: 0.0002 + clip_cov_ub: 5 + kl_cov_ratio: 0.0002 + loss_mode: vanilla + ppo_kl_coef: 0.1 + ppo_epochs: 1 + ppo_max_token_len_per_gpu: 16384 + ppo_micro_batch_size: null + ppo_micro_batch_size_per_gpu: 32 + ppo_mini_batch_size: 256 + profiler: + _target_: verl.utils.profiler.ProfilerConfig + all_ranks: false + enable: false + ranks: [] + save_path: outputs/profile + tool: null + tool_config: + npu: + _target_: verl.utils.profiler.config.NPUToolConfig + analysis: true + contents: [] + discrete: false + level: level0 + nsys: + _target_: verl.utils.profiler.config.NsightToolConfig + discrete: false + torch: + _target_: verl.utils.profiler.config.TorchProfilerToolConfig + contents: [] + discrete: false + torch_memory: + _target_: verl.utils.profiler.config.TorchMemoryToolConfig + stack_depth: 32 + trace_alloc_max_entries: 100000 + rollout_n: 3 + router_replay: + _target_: verl.workers.config.RouterReplayConfig + mode: disabled + record_file: null + replay_file: null + shuffle: false + strategy: fsdp + sum_pi_squared_checkpointing: false + tau_neg: 1.05 + tau_pos: 1 + ulysses_sequence_parallel_size: 1 + use_dynamic_bsz: false + use_fused_kernels: false + use_kl_loss: true + use_prefix_grouper: false + use_remove_padding: true + use_torch_compile: true + hybrid_engine: true + model: + _target_: verl.workers.config.HFModelConfig + custom_chat_template: null + enable_activation_offload: false + enable_gradient_checkpointing: true + exclude_modules: null + external_lib: null + fused_kernel_options: + impl_backend: torch + hf_config_path: null + lora_adapter_path: null + lora_alpha: 16 + lora_rank: 0 + mtp: + _target_: verl.workers.config.MtpConfig + detach_encoder: false + enable: false + enable_rollout: false + enable_train: false + method: mtp + mtp_loss_scaling_factor: 0.1 + num_speculative_tokens: 1 + speculative_algorithm: EAGLE + speculative_eagle_topk: 1 + speculative_num_draft_tokens: 4 + speculative_num_steps: 3 + path: Qwen/Qwen3-4B-Instruct-2507 + target_modules: all-linear + tiled_mlp: + enabled: false + num_shards: 4 + tokenizer_path: null + trust_remote_code: false + use_fused_kernels: false + use_liger: false + use_remove_padding: true + use_shm: false + nccl_timeout: 600 + ref: + _target_: verl.workers.config.FSDPActorConfig + entropy_checkpointing: false + entropy_from_logits_with_chunking: false + fsdp_config: + _target_: verl.workers.config.FSDPEngineConfig + dtype: bfloat16 + entropy_checkpointing: false + entropy_from_logits_with_chunking: false + forward_only: true + forward_prefetch: false + fsdp_size: -1 + full_determinism: false + model_dtype: fp32 + offload_policy: false + optimizer_offload: false + param_offload: false + reshard_after_forward: true + seed: 42 + strategy: fsdp + ulysses_sequence_parallel_size: 1 + use_orig_params: false + use_torch_compile: true + wrap_policy: + min_num_params: 0 + log_prob_max_token_len_per_gpu: 16384 + log_prob_micro_batch_size: null + log_prob_micro_batch_size_per_gpu: 32 + log_prob_use_dynamic_bsz: false + profiler: + _target_: verl.utils.profiler.ProfilerConfig + all_ranks: false + enable: false + ranks: [] + save_path: outputs/profile + tool: null + tool_config: + npu: + _target_: verl.utils.profiler.config.NPUToolConfig + analysis: true + contents: [] + discrete: false + level: level0 + nsys: + _target_: verl.utils.profiler.config.NsightToolConfig + discrete: false + torch: + _target_: verl.utils.profiler.config.TorchProfilerToolConfig + contents: [] + discrete: false + torch_memory: + _target_: verl.utils.profiler.config.TorchMemoryToolConfig + stack_depth: 32 + trace_alloc_max_entries: 100000 + rollout_n: 3 + router_replay: + _target_: verl.workers.config.RouterReplayConfig + mode: disabled + record_file: null + replay_file: null + strategy: fsdp + ulysses_sequence_parallel_size: 1 + use_torch_compile: true + rollout: + _target_: verl.workers.config.RolloutConfig + agent: + _target_: verl.workers.config.AgentLoopConfig + agent_loop_config_path: null + custom_async_server: + _target_: verl.workers.config.CustomAsyncServerConfig + name: null + path: null + default_agent_loop: single_turn_agent + num_workers: 8 + calculate_log_probs: false + checkpoint_engine: + _target_: verl.workers.config.CheckpointEngineConfig + backend: naive + update_weights_bucket_megabytes: 2048 + cudagraph_capture_sizes: null + data_parallel_size: 1 + disable_log_stats: true + do_sample: true + dtype: bfloat16 + enable_chunked_prefill: true + enable_prefix_caching: true + enable_rollout_routing_replay: false + enforce_eager: true + expert_parallel_size: 1 + free_cache_engine: true + gpu_memory_utilization: 0.6 + ignore_eos: false + layered_summon: false + load_format: dummy + log_prob_max_token_len_per_gpu: 16384 + log_prob_micro_batch_size: null + log_prob_micro_batch_size_per_gpu: 32 + log_prob_use_dynamic_bsz: false + logprobs_mode: processed_logprobs + max_model_len: 8192 + max_num_batched_tokens: 8192 + max_num_seqs: 1024 + mode: async + mtp: + _target_: verl.workers.config.MtpConfig + detach_encoder: false + enable: false + enable_rollout: false + enable_train: false + method: mtp + mtp_loss_scaling_factor: 0.1 + num_speculative_tokens: 1 + speculative_algorithm: EAGLE + speculative_eagle_topk: 1 + speculative_num_draft_tokens: 4 + speculative_num_steps: 3 + multi_stage_wake_up: false + multi_turn: + _target_: verl.workers.config.MultiTurnConfig + enable: false + format: hermes + interaction_config_path: null + max_assistant_turns: null + max_parallel_calls: 1 + max_tool_response_length: 256 + max_user_turns: null + num_repeat_rollouts: null + tokenization_sanity_check_mode: strict + tool_config_path: null + tool_response_truncate_side: middle + use_inference_chat_template: false + "n": 3 + name: vllm + over_sample_rate: 0 + pipeline_model_parallel_size: 1 + profiler: + _target_: verl.utils.profiler.ProfilerConfig + all_ranks: false + enable: false + ranks: [] + save_path: outputs/profile + tool: null + tool_config: + npu: + _target_: verl.utils.profiler.config.NPUToolConfig + analysis: true + contents: [] + discrete: false + level: level0 + nsys: + _target_: verl.utils.profiler.config.NsightToolConfig + discrete: false + torch: + _target_: verl.utils.profiler.config.TorchProfilerToolConfig + contents: [] + discrete: false + torch_memory: + _target_: verl.utils.profiler.config.TorchMemoryToolConfig + stack_depth: 32 + trace_alloc_max_entries: 100000 + prometheus: + _target_: verl.workers.config.PrometheusConfig + enable: false + file: /tmp/ray/session_latest/metrics/prometheus/prometheus.yml + port: 9090 + served_model_name: Qwen/Qwen3-4B-Instruct-2507 + prompt_length: 1024 + quantization: null + quantization_config_file: null + response_length: 2048 + scheduling_policy: fcfs + skip_dump_dir: /tmp/rollout_dump + skip_rollout: false + skip_tokenizer_init: true + temperature: 1 + tensor_model_parallel_size: 1 + top_k: -1 + top_p: 1 + trace: + _target_: verl.workers.config.TraceConfig + backend: null + max_samples_per_step_per_worker: null + token2text: false + val_kwargs: + _target_: verl.workers.config.SamplingConfig + do_sample: false + "n": 1 + temperature: 0 + top_k: -1 + top_p: 1 +algorithm: + value: + _target_: verl.trainer.config.AlgoConfig + adv_estimator: grpo + gamma: 1 + kl_ctrl: + _target_: verl.trainer.config.KLControlConfig + horizon: 10000 + kl_coef: 0.001 + target_kl: 0.1 + type: fixed + kl_penalty: kl + lam: 1 + norm_adv_by_std_in_grpo: true + pf_ppo: + reweight_method: pow + weight_pow: 2 + rollout_correction: + bypass_mode: false + loss_type: ppo_clip + rollout_is: null + rollout_is_batch_normalize: false + rollout_is_threshold: 2 + rollout_rs: null + rollout_rs_threshold: null + use_kl_in_reward: false + use_pf_ppo: false +critic: + value: + _target_: verl.workers.config.FSDPCriticConfig + checkpoint: + _target_: verl.trainer.config.CheckpointConfig + async_save: false + load_contents: + - model + - optimizer + - extra + save_contents: + - model + - optimizer + - extra + cliprange_value: 0.5 + data_loader_seed: 42 + enable: null + forward_max_token_len_per_gpu: 32768 + forward_micro_batch_size: null + forward_micro_batch_size_per_gpu: null + grad_clip: 1 + loss_agg_mode: token-mean + model: + _target_: verl.workers.config.FSDPCriticModelCfg + enable_activation_offload: false + enable_gradient_checkpointing: true + external_lib: null + fsdp_config: + _target_: verl.workers.config.FSDPEngineConfig + dtype: bfloat16 + entropy_checkpointing: false + entropy_from_logits_with_chunking: false + forward_only: false + forward_prefetch: false + fsdp_size: -1 + full_determinism: false + model_dtype: fp32 + offload_policy: false + optimizer_offload: false + param_offload: false + reshard_after_forward: true + seed: 42 + strategy: fsdp + ulysses_sequence_parallel_size: 1 + use_orig_params: false + use_torch_compile: true + wrap_policy: + min_num_params: 0 + lora_alpha: 16 + lora_rank: 0 + path: ~/models/deepseek-llm-7b-chat + target_modules: all-linear + tiled_mlp: + enabled: false + num_shards: 4 + tokenizer_path: Qwen/Qwen3-4B-Instruct-2507 + trust_remote_code: false + use_remove_padding: false + use_shm: false + optim: + _target_: verl.workers.config.FSDPOptimizerConfig + betas: + - 0.9 + - 0.999 + clip_grad: 1 + lr: 1e-05 + lr_scheduler_type: constant + lr_warmup_steps: -1 + lr_warmup_steps_ratio: 0 + min_lr_ratio: 0 + num_cycles: 0.5 + optimizer: AdamW + optimizer_impl: torch.optim + override_optimizer_config: null + total_training_steps: 90 + warmup_style: null + weight_decay: 0.01 + ppo_epochs: 1 + ppo_max_token_len_per_gpu: 32768 + ppo_micro_batch_size: null + ppo_micro_batch_size_per_gpu: null + ppo_mini_batch_size: 256 + profiler: + _target_: verl.utils.profiler.ProfilerConfig + all_ranks: false + enable: false + ranks: [] + save_path: outputs/profile + tool: null + tool_config: + npu: + _target_: verl.utils.profiler.config.NPUToolConfig + analysis: true + contents: [] + discrete: false + level: level0 + nsys: + _target_: verl.utils.profiler.config.NsightToolConfig + discrete: false + torch: + _target_: verl.utils.profiler.config.TorchProfilerToolConfig + contents: [] + discrete: false + torch_memory: + _target_: verl.utils.profiler.config.TorchMemoryToolConfig + stack_depth: 32 + trace_alloc_max_entries: 100000 + rollout_n: 3 + shuffle: false + strategy: fsdp + ulysses_sequence_parallel_size: 1 + use_dynamic_bsz: false +custom_reward_function: + value: + name: compute_score + path: /home/mshahidul/readctrl/code/RL_model/verl/verl_train/reward_func/reward.py +data: + value: + custom_cls: + name: null + path: null + datagen: + name: null + path: null + dataloader_num_workers: 8 + filter_overlong_prompts: true + filter_overlong_prompts_workers: 1 + image_key: images + image_patch_size: 14 + max_prompt_length: 1024 + max_response_length: 2048 + prompt_key: prompt + return_full_prompt: false + return_multi_modal_inputs: true + return_raw_chat: true + return_raw_input_ids: false + reward_fn_key: data_source + sampler: + class_name: null + class_path: null + seed: null + shuffle: true + tokenizer: null + tool_config_path: null + train_batch_size: 512 + train_files: /home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/train.parquet + train_max_samples: -1 + truncation: error + trust_remote_code: false + use_shm: false + val_batch_size: null + val_files: /home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/test.parquet + val_max_samples: -1 + validation_shuffle: false + video_key: videos +global_profiler: + value: + _target_: verl.utils.profiler.ProfilerConfig + global_tool_config: + nsys: + _target_: verl.utils.profiler.config.NsightToolConfig + controller_nsight_options: + cuda-graph-trace: graph + cuda-memory-usage: "true" + trace: cuda,nvtx,cublas,ucx + discrete: false + worker_nsight_options: + capture-range: cudaProfilerApi + capture-range-end: null + cuda-graph-trace: graph + cuda-memory-usage: "true" + kill: none + trace: cuda,nvtx,cublas,ucx + torch_memory: + context: all + stack_depth: 32 + stacks: all + trace_alloc_max_entries: 100000 + profile_continuous_steps: false + save_path: outputs/profile + steps: null + tool: null +ray_kwargs: + value: + ray_init: + num_cpus: null + timeline_json_file: null +reward_manager: + value: + _target_: verl.trainer.config.config.RewardManagerConfig + module: + _target_: verl.trainer.config.config.ModuleConfig + name: custom_reward_manager + path: null + name: naive + source: register +reward_model: + value: + enable: false + enable_resource_pool: false + forward_max_token_len_per_gpu: 32768 + launch_reward_fn_async: false + max_length: null + micro_batch_size: null + micro_batch_size_per_gpu: null + model: + external_lib: null + fsdp_config: + _target_: verl.workers.config.FSDPEngineConfig + forward_prefetch: false + fsdp_size: -1 + param_offload: false + reshard_after_forward: true + wrap_policy: + min_num_params: 0 + input_tokenizer: Qwen/Qwen3-4B-Instruct-2507 + path: ~/models/FsfairX-LLaMA3-RM-v0.1 + trust_remote_code: false + use_fused_kernels: false + use_remove_padding: false + use_shm: false + n_gpus_per_node: 8 + nnodes: 0 + num_workers: 1 + profiler: + _target_: verl.utils.profiler.ProfilerConfig + all_ranks: false + enable: false + ranks: [] + save_path: outputs/profile + tool: null + tool_config: + npu: + _target_: verl.utils.profiler.config.NPUToolConfig + analysis: true + contents: [] + discrete: false + level: level0 + nsys: + _target_: verl.utils.profiler.config.NsightToolConfig + discrete: false + torch: + _target_: verl.utils.profiler.config.TorchProfilerToolConfig + contents: [] + discrete: false + torch_memory: + _target_: verl.utils.profiler.config.TorchMemoryToolConfig + stack_depth: 32 + trace_alloc_max_entries: 100000 + reward_loop_class_name: null + reward_loop_module_path: null + reward_loop_source: register + reward_manager: naive + rollout: + _target_: verl.workers.config.RolloutConfig + cudagraph_capture_sizes: null + data_parallel_size: 1 + disable_log_stats: true + dtype: bfloat16 + enable_chunked_prefill: true + enable_prefix_caching: true + enforce_eager: true + expert_parallel_size: 1 + free_cache_engine: true + gpu_memory_utilization: 0.5 + limit_images: null + load_format: auto + max_model_len: null + max_num_batched_tokens: 8192 + max_num_seqs: 1024 + name: ??? + prompt_length: 2048 + response_length: 2048 + skip_tokenizer_init: false + tensor_model_parallel_size: 2 + sandbox_fusion: + max_concurrent: 64 + memory_limit_mb: 1024 + url: null + strategy: fsdp + ulysses_sequence_parallel_size: 1 + use_dynamic_bsz: false + use_reward_loop: true +trainer: + value: + balance_batch: true + critic_warmup: 0 + default_hdfs_dir: null + default_local_dir: /home/mshahidul/readctrl/code/RL_model/train_v2 + del_local_ckpt_after_load: false + device: cuda + esi_redundant_time: 0 + experiment_name: qwen3-4b-instruct-en + log_val_generations: 0 + logger: + - console + - wandb + max_actor_ckpt_to_keep: 1 + max_critic_ckpt_to_keep: 1 + n_gpus_per_node: 2 + nnodes: 1 + project_name: readctrl-verl + ray_wait_register_center_timeout: 300 + remove_previous_ckpt_in_save: true + resume_from_path: null + resume_mode: auto + rollout_data_dir: null + save_freq: 5 + test_freq: 10 + total_epochs: 15 + total_training_steps: null + use_legacy_worker_impl: auto + val_before_train: true + val_only: false + validation_data_dir: null +transfer_queue: + value: + enable: false diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_104801-4ptnl9ej/files/output.log b/code/RL_model/verl/verl_train/wandb/run-20260210_104801-4ptnl9ej/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..ddba19cc7af5793d0b8a760132d86d1d200df990 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_104801-4ptnl9ej/files/output.log @@ -0,0 +1,14 @@ +wandb: Detected [dspy, litellm, openai] in use. +wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script. +wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/ +Checkpoint tracker file does not exist: /home/mshahidul/readctrl/code/RL_model/train_v2/latest_checkpointed_iteration.txt +Training from scratch +test_gen_batch meta info: {'eos_token_id': 151645, 'pad_token_id': 151643, 'recompute_log_prob': False, 'do_sample': False, 'validate': True, 'global_steps': 0} +validation generation end +("Initial validation metrics: {'val-aux/multiclinsum/reward/mean@1': " + "np.float64(0.24793405689846323), 'val-core/multiclinsum/acc/mean@1': " + "np.float64(0.24793405881498298), 'val-aux/num_turns/min': np.int32(2), " + "'val-aux/num_turns/max': np.int32(2), 'val-aux/num_turns/mean': " + 'np.float64(2.0)}') +step:0 - val-aux/multiclinsum/reward/mean@1:np.float64(0.24793405689846323) - val-core/multiclinsum/acc/mean@1:np.float64(0.24793405881498298) - val-aux/num_turns/min:np.int32(2) - val-aux/num_turns/max:np.int32(2) - val-aux/num_turns/mean:np.float64(2.0) +Training Progress: 0%| | 0/90 [00:00, ?it/s] diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_104801-4ptnl9ej/files/requirements.txt b/code/RL_model/verl/verl_train/wandb/run-20260210_104801-4ptnl9ej/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..dd2de633cd2e89d34a77b40967a7fc9ed3177c34 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_104801-4ptnl9ej/files/requirements.txt @@ -0,0 +1,283 @@ +verl==0.8.0.dev0 +psutil==7.1.3 +colorama==0.4.6 +annotated-doc==0.0.4 +sentry-sdk==2.51.0 +requests==2.32.5 +nvidia-cufile-cu12==1.13.1.3 +ml_dtypes==0.5.4 +xformers==0.0.32.post1 +sglang==0.5.2 +multidict==6.7.1 +typing_extensions==4.15.0 +nvidia-cusparselt-cu12==0.7.1 +openai-harmony==0.0.4 +transformers==4.56.1 +Werkzeug==3.1.5 +identify==2.6.16 +gepa==0.0.26 +pytest==9.0.2 +nvidia-cuda-runtime-cu12==12.8.90 +GitPython==3.1.46 +cupy-cuda12x==13.6.0 +tokenizers==0.22.2 +pybind11==3.0.1 +google-api-core==2.29.0 +partial-json-parser==0.2.1.1.post7 +aiohttp-cors==0.8.1 +sniffio==1.3.1 +tensordict==0.10.0 +smart_open==7.5.0 +cffi==2.0.0 +wcwidth==0.5.3 +asttokens==3.0.1 +opencensus==0.11.4 +rpds-py==0.30.0 +py-spy==0.4.1 +litellm==1.81.9 +gguf==0.17.1 +nvidia-nvjitlink-cu12==12.8.93 +httpx==0.28.1 +cuda-python==13.1.1 +annotated-types==0.7.0 +regex==2026.1.15 +vllm==0.11.0 +idna==3.11 +fsspec==2025.10.0 +parso==0.8.5 +pydantic-extra-types==2.11.0 +MarkupSafe==3.0.3 +cryptography==46.0.4 +openai==2.17.0 +filelock==3.20.3 +modelscope==1.34.0 +outlines==0.1.11 +dnspython==2.8.0 +scipy==1.17.0 +zipp==3.23.0 +PyYAML==6.0.3 +onnx==1.20.1 +torchdata==0.11.0 +cuda-pathfinder==1.3.3 +asyncer==0.0.8 +verl==0.8.0.dev0 +httptools==0.7.1 +opencv-python-headless==4.13.0.90 +importlib_metadata==8.7.1 +peft==0.18.1 +opentelemetry-sdk==1.39.1 +python-json-logger==4.0.0 +alembic==1.18.3 +cuda-bindings==13.1.1 +mdurl==0.1.2 +referencing==0.37.0 +xxhash==3.6.0 +interegular==0.3.3 +fastapi-cli==0.0.20 +uv==0.9.28 +tensorboard==2.20.0 +nvidia-cublas-cu12==12.8.4.1 +sentencepiece==0.2.1 +rich-toolkit==0.18.1 +numpy==2.2.0 +yarl==1.22.0 +opencv-fixer==0.2.5 +tqdm==4.67.2 +python-dotenv==1.2.1 +Mako==1.3.10 +timm==1.0.16 +aiohappyeyeballs==2.6.1 +decord==0.6.0 +jiter==0.12.0 +airportsdata==20250909 +markdown-it-py==4.0.0 +nvidia-cusolver-cu12==11.7.3.90 +pyarrow==23.0.0 +opentelemetry-proto==1.39.1 +anyio==4.12.1 +pycryptodomex==3.23.0 +prometheus_client==0.24.1 +aiohttp==3.13.3 +urllib3==2.6.3 +pexpect==4.9.0 +pydantic-settings==2.12.0 +distro==1.9.0 +av==16.1.0 +cloudpickle==3.1.2 +mpmath==1.3.0 +certifi==2026.1.4 +antlr4-python3-runtime==4.9.3 +torchvision==0.23.0 +accelerate==1.12.0 +watchfiles==1.1.1 +ruff==0.14.14 +wheel==0.46.3 +omegaconf==2.3.0 +nvidia-cufft-cu12==11.3.3.83 +multiprocess==0.70.18 +frozendict==2.4.7 +sympy==1.14.0 +setproctitle==1.3.7 +optuna==4.7.0 +setuptools==79.0.1 +py-cpuinfo==9.0.0 +ipython_pygments_lexers==1.1.1 +rich==14.3.2 +uvicorn==0.40.0 +outlines_core==0.2.11 +llvmlite==0.44.0 +nvidia-cuda-cupti-cu12==12.8.90 +attrs==25.4.0 +anthropic==0.77.0 +packaging==25.0 +fastrlock==0.8.3 +astor==0.8.1 +pluggy==1.6.0 +nvidia-cuda-nvrtc-cu12==12.8.93 +psutil==7.2.2 +virtualenv==20.36.1 +cbor2==5.8.0 +tenacity==9.1.4 +compressed-tensors==0.11.0 +SQLAlchemy==2.0.46 +nvidia-cusparse-cu12==12.5.8.93 +networkx==3.6.1 +httpcore==1.0.9 +onnxscript==0.3.1 +smmap==5.0.2 +opencv-python==4.13.0.90 +traitlets==5.14.3 +python-multipart==0.0.22 +pyvers==0.1.0 +huggingface-hub==0.36.0 +pillow==12.1.0 +jsonschema==4.26.0 +cfgv==3.5.0 +optree==0.18.0 +email-validator==2.3.0 +tabulate==0.9.0 +pre_commit==4.5.1 +msgpack==1.1.2 +depyf==0.19.0 +numba==0.61.2 +six==1.17.0 +aiosignal==1.4.0 +nvidia-nvtx-cu12==12.8.90 +propcache==0.4.1 +torch_memory_saver==0.0.8 +h11==0.16.0 +frozenlist==1.8.0 +websockets==16.0 +nvidia-cudnn-frontend==1.18.0 +build==1.4.0 +google-auth==2.48.0 +pycountry==24.6.1 +colorlog==6.10.1 +stack-data==0.6.3 +typing-inspection==0.4.2 +googleapis-common-protos==1.72.0 +pandas==3.0.0 +typer==0.21.1 +protobuf==6.33.5 +fastapi==0.128.0 +blake3==1.0.8 +opentelemetry-semantic-conventions==0.60b1 +opentelemetry-exporter-prometheus==0.60b1 +nvidia-cudnn-cu12==9.10.2.21 +Markdown==3.10.1 +liger_kernel==0.6.4 +json_repair==0.57.1 +nodeenv==1.10.0 +prompt_toolkit==3.0.52 +torchaudio==2.8.0 +codetiming==1.4.0 +platformdirs==4.5.1 +jsonschema-specifications==2025.9.1 +hydra-core==1.3.2 +tensorboard-data-server==0.7.2 +lm-format-enforcer==0.11.3 +pyasn1_modules==0.4.2 +tiktoken==0.12.0 +starlette==0.50.0 +pyproject_hooks==1.2.0 +flash_attn==2.8.1 +rsa==4.9.1 +ray==2.53.0 +nest-asyncio==1.6.0 +lark==1.2.2 +fastar==0.8.0 +orjson==3.11.6 +prometheus-fastapi-instrumentator==7.1.0 +opentelemetry-api==1.39.1 +mathruler==0.1.0 +pydantic_core==2.41.5 +fastapi-cloud-cli==0.11.0 +pynvml==13.0.1 +loguru==0.7.3 +torch==2.8.0 +msgspec==0.20.0 +nvidia-curand-cu12==10.3.9.90 +blobfile==3.0.0 +gitdb==4.0.12 +llguidance==0.7.30 +hf_transfer==0.1.9 +nvidia-nccl-cu12==2.27.3 +qwen-vl-utils==0.0.14 +ptyprocess==0.7.0 +ipdb==0.13.13 +opencensus-context==0.1.3 +jedi==0.19.2 +click==8.3.1 +datasets==4.5.0 +soxr==1.0.0 +sgl-kernel==0.3.9.post2 +colorful==0.5.8 +pyasn1==0.6.2 +charset-normalizer==3.4.4 +nvidia-ml-py==13.590.48 +hf-xet==1.2.0 +dill==0.4.0 +absl-py==2.4.0 +pydantic==2.12.5 +dspy==3.1.3 +wrapt==2.1.0 +flashinfer-python==0.3.1 +python-dateutil==2.9.0.post0 +torchao==0.9.0 +cachetools==7.0.0 +soundfile==0.13.1 +diskcache==5.6.3 +onnx-ir==0.1.15 +docstring_parser==0.17.0 +matplotlib-inline==0.2.1 +Pygments==2.19.2 +wandb==0.24.1 +pure_eval==0.2.3 +ninja==1.13.0 +proto-plus==1.27.0 +pyzmq==27.1.0 +iniconfig==2.3.0 +Jinja2==3.1.6 +megatron-core==0.13.1 +uvloop==0.22.1 +fastuuid==0.14.0 +pycparser==3.0 +pylatexenc==2.10 +decorator==5.2.1 +shellingham==1.5.4 +lxml==6.0.2 +safetensors==0.7.0 +xgrammar==0.1.25 +pybase64==1.4.3 +ipython==9.9.0 +greenlet==3.3.1 +mistral_common==1.9.0 +rignore==0.7.6 +einops==0.8.2 +distlib==0.4.0 +triton==3.4.0 +executing==2.2.1 +grpcio==1.76.0 +pip==25.3 +verl==0.8.0.dev0 +verl==0.8.0.dev0 diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_104801-4ptnl9ej/files/wandb-metadata.json b/code/RL_model/verl/verl_train/wandb/run-20260210_104801-4ptnl9ej/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..d4a4437931aa388ef50bad12a13ae4b279459858 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_104801-4ptnl9ej/files/wandb-metadata.json @@ -0,0 +1,93 @@ +{ + "os": "Linux-5.15.0-160-generic-x86_64-with-glibc2.35", + "python": "CPython 3.12.12", + "startedAt": "2026-02-10T15:48:01.036772Z", + "args": [ + "--node-ip-address=172.16.34.29", + "--node-manager-port=35027", + "--object-store-name=/tmp/ray/session_2026-02-10_10-43-37_364111_468038/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2026-02-10_10-43-37_364111_468038/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=62875", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=61020", + "--gcs-address=172.16.34.29:44687", + "--session-name=session_2026-02-10_10-43-37_364111_468038", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8297", + "--cluster-id=1a2afab2d844e52e8ba1ba2a3a7177b31bfbebd4bc4a92c2f46ac3a2", + "--startup-token=128", + "--worker-launch-time-ms=1770738229009", + "--node-id=cb7e6953dd0f4e0cc6a0673aa459e295ee760b1101d6cf807e0c0643", + "--runtime-env-hash=1096984665" + ], + "program": "/home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/verl-project/verl", + "commit": "d9939add7a2a01923a9088891f913a5d20c4e622" + }, + "email": "shahidulshakib034@gmail.com", + "root": "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train", + "host": "gamma", + "executable": "/home/mshahidul/miniconda3/envs/verl2/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA A100 80GB PCIe", + "gpu_count": 6, + "disk": { + "/": { + "total": "3766429188096", + "used": "191249948672" + } + }, + "memory": { + "total": "1081814863872" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-4a3678c7-34a9-356f-f7b7-7f7e2f44b596" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-df506764-0db5-91b4-8ec9-154a3bb8123f" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-2c3dbd62-b384-2996-a0f6-b32dcfcc3538" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-1ff3dabe-4b9a-ea62-5cc3-01f12f32d328" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper", + "uuid": "GPU-eefc4b8c-0e79-c1d6-a9ff-8325040572eb" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper", + "uuid": "GPU-d42b6057-13e8-1e88-6aa1-9307df72dece" + } + ], + "cudaVersion": "13.0", + "writerId": "s1zs8nbohjb0gcsqxia4r556f3absilj" +} \ No newline at end of file diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_104801-4ptnl9ej/files/wandb-summary.json b/code/RL_model/verl/verl_train/wandb/run-20260210_104801-4ptnl9ej/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..fabb8461c1b3e5480dfea281148e0b73285f4891 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_104801-4ptnl9ej/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":8055},"_step":0,"val-aux/multiclinsum/reward/mean@1":0.24793405689846323,"_runtime":8055.728600123,"val-aux/num_turns/mean":2,"_timestamp":1.770739332735304e+09,"val-core/multiclinsum/acc/mean@1":0.24793405881498298,"val-aux/num_turns/min":2,"val-aux/num_turns/max":2} \ No newline at end of file diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_104801-4ptnl9ej/logs/debug-core.log b/code/RL_model/verl/verl_train/wandb/run-20260210_104801-4ptnl9ej/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..f6434eae8029ff5c1532ae279fa452d32bbc49d1 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_104801-4ptnl9ej/logs/debug-core.log @@ -0,0 +1,12 @@ +{"time":"2026-02-10T10:48:01.317427867-05:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpnhztpv97/port-488089.txt","pid":488089,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-02-10T10:48:01.318345059-05:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":488089} +{"time":"2026-02-10T10:48:01.318315108-05:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-488089-496673-2876293546/socket","Net":"unix"}} +{"time":"2026-02-10T10:48:01.484474288-05:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-02-10T10:48:01.500510454-05:00","level":"INFO","msg":"handleInformInit: received","streamId":"4ptnl9ej","id":"1(@)"} +{"time":"2026-02-10T10:48:03.140082368-05:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"4ptnl9ej","id":"1(@)"} +{"time":"2026-02-10T10:48:09.203382842-05:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"g1976klca6vq"} +{"time":"2026-02-10T13:02:19.163693369-05:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"g1976klca6vq"} +{"time":"2026-02-10T13:02:19.907024343-05:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"g1976klca6vq"} +{"time":"2026-02-10T13:02:19.91072684-05:00","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"4ptnl9ej","id":"1(@)"} +{"time":"2026-02-10T13:02:19.914211129-05:00","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"4ptnl9ej","id":"1(@)"} +{"time":"2026-02-10T13:02:21.59068069-05:00","level":"INFO","msg":"server: parent process exited, terminating service process"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_104801-4ptnl9ej/logs/debug-internal.log b/code/RL_model/verl/verl_train/wandb/run-20260210_104801-4ptnl9ej/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..feaa5f27e5daa14cf654ae56be2770ce6c91af79 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_104801-4ptnl9ej/logs/debug-internal.log @@ -0,0 +1,12 @@ +{"time":"2026-02-10T10:48:01.50183243-05:00","level":"INFO","msg":"stream: starting","core version":"0.24.1"} +{"time":"2026-02-10T10:48:03.115597403-05:00","level":"INFO","msg":"stream: created new stream","id":"4ptnl9ej"} +{"time":"2026-02-10T10:48:03.115721566-05:00","level":"INFO","msg":"handler: started","stream_id":"4ptnl9ej"} +{"time":"2026-02-10T10:48:03.140048403-05:00","level":"INFO","msg":"stream: started","id":"4ptnl9ej"} +{"time":"2026-02-10T10:48:03.140111318-05:00","level":"INFO","msg":"sender: started","stream_id":"4ptnl9ej"} +{"time":"2026-02-10T10:48:03.140116773-05:00","level":"INFO","msg":"writer: started","stream_id":"4ptnl9ej"} +{"time":"2026-02-10T13:02:19.655384058-05:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-02-10T13:02:19.900931283-05:00","level":"INFO","msg":"handler: operation stats","stats":{}} +{"time":"2026-02-10T13:02:19.91079193-05:00","level":"INFO","msg":"stream: closing","id":"4ptnl9ej"} +{"time":"2026-02-10T13:02:19.91082422-05:00","level":"INFO","msg":"handler: closed","stream_id":"4ptnl9ej"} +{"time":"2026-02-10T13:02:19.913195344-05:00","level":"INFO","msg":"sender: closed","stream_id":"4ptnl9ej"} +{"time":"2026-02-10T13:02:19.913211494-05:00","level":"INFO","msg":"stream: closed","id":"4ptnl9ej"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_104801-4ptnl9ej/logs/debug.log b/code/RL_model/verl/verl_train/wandb/run-20260210_104801-4ptnl9ej/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..cf25075804e4fb4cb77c8b60f087a410d069af58 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_104801-4ptnl9ej/logs/debug.log @@ -0,0 +1,24 @@ +2026-02-10 10:48:01,060 INFO MainThread:488089 [wandb_setup.py:_flush():81] Current SDK version is 0.24.1 +2026-02-10 10:48:01,061 INFO MainThread:488089 [wandb_setup.py:_flush():81] Configure stats pid to 488089 +2026-02-10 10:48:01,061 INFO MainThread:488089 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-02-10 10:48:01,061 INFO MainThread:488089 [wandb_init.py:setup_run_log_directory():717] Logging user logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260210_104801-4ptnl9ej/logs/debug.log +2026-02-10 10:48:01,061 INFO MainThread:488089 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260210_104801-4ptnl9ej/logs/debug-internal.log +2026-02-10 10:48:01,062 INFO MainThread:488089 [wandb_init.py:init():844] calling init triggers +2026-02-10 10:48:01,063 INFO MainThread:488089 [wandb_init.py:init():849] wandb.init called with sweep_config: {} +config: {'actor_rollout_ref': {'actor': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-06, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 90, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'ppo_mini_batch_size': 256, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': 32, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 16384, 'clip_ratio': 0.2, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.2, 'tau_pos': 1.0, 'tau_neg': 1.05, 'freeze_vision_tower': False, 'policy_loss': {'_target_': 'verl.workers.config.PolicyLossConfig', 'loss_mode': 'vanilla', 'clip_cov_ratio': 0.0002, 'clip_cov_lb': 1.0, 'clip_cov_ub': 5.0, 'kl_cov_ratio': 0.0002, 'ppo_kl_coef': 0.1}, 'clip_ratio_c': 3.0, 'loss_agg_mode': 'token-mean', 'loss_scale_factor': None, 'entropy_coeff': 0, 'calculate_entropy': False, 'use_kl_loss': True, 'use_prefix_grouper': False, 'use_torch_compile': True, 'kl_loss_coef': 0.001, 'kl_loss_type': 'low_var_kl', 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'use_fused_kernels': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'grad_clip': 1.0, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False, 'use_remove_padding': True, 'calculate_sum_pi_squared': False, 'sum_pi_squared_checkpointing': False}, 'ref': {'rollout_n': 3, 'strategy': 'fsdp', 'use_torch_compile': True, 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': True, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False}, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': 'vllm', 'mode': 'async', 'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'prompt_length': 1024, 'response_length': 2048, 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.6, 'ignore_eos': False, 'enforce_eager': True, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'tensor_model_parallel_size': 1, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'pipeline_model_parallel_size': 1, 'max_num_batched_tokens': 8192, 'max_model_len': 8192, 'max_num_seqs': 1024, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'logprobs_mode': 'processed_logprobs', 'scheduling_policy': 'fcfs', 'load_format': 'dummy', 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'disable_log_stats': True, 'do_sample': True, 'n': 3, 'over_sample_rate': 0, 'multi_stage_wake_up': False, 'engine_kwargs': {'vllm': {}, 'sglang': {}, 'trtllm': {}}, 'val_kwargs': {'_target_': 'verl.workers.config.SamplingConfig', 'top_k': -1, 'top_p': 1.0, 'temperature': 0, 'n': 1, 'do_sample': False}, 'multi_turn': {'_target_': 'verl.workers.config.MultiTurnConfig', 'enable': False, 'max_assistant_turns': None, 'tool_config_path': None, 'max_user_turns': None, 'max_parallel_calls': 1, 'max_tool_response_length': 256, 'tool_response_truncate_side': 'middle', 'interaction_config_path': None, 'use_inference_chat_template': False, 'tokenization_sanity_check_mode': 'strict', 'format': 'hermes', 'num_repeat_rollouts': None}, 'calculate_log_probs': False, 'agent': {'_target_': 'verl.workers.config.AgentLoopConfig', 'num_workers': 8, 'default_agent_loop': 'single_turn_agent', 'agent_loop_config_path': None, 'custom_async_server': {'_target_': 'verl.workers.config.CustomAsyncServerConfig', 'path': None, 'name': None}}, 'checkpoint_engine': {'_target_': 'verl.workers.config.CheckpointEngineConfig', 'backend': 'naive', 'update_weights_bucket_megabytes': 2048, 'engine_kwargs': {}}, 'trace': {'_target_': 'verl.workers.config.TraceConfig', 'backend': None, 'token2text': False, 'max_samples_per_step_per_worker': None}, 'skip_rollout': False, 'skip_dump_dir': '/tmp/rollout_dump', 'skip_tokenizer_init': True, 'enable_rollout_routing_replay': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'prometheus': {'_target_': 'verl.workers.config.PrometheusConfig', 'enable': False, 'port': 9090, 'file': '/tmp/ray/session_latest/metrics/prometheus/prometheus.yml', 'served_model_name': 'Qwen/Qwen3-4B-Instruct-2507'}, 'quantization': None, 'quantization_config_file': None, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}, 'layered_summon': False}, 'model': {'_target_': 'verl.workers.config.HFModelConfig', 'path': 'Qwen/Qwen3-4B-Instruct-2507', 'hf_config_path': None, 'tokenizer_path': None, 'use_shm': False, 'trust_remote_code': False, 'custom_chat_template': None, 'external_lib': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': True, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'exclude_modules': None, 'lora_adapter_path': None, 'use_liger': False, 'use_fused_kernels': False, 'fused_kernel_options': {'impl_backend': 'torch'}, 'tiled_mlp': {'enabled': False, 'num_shards': 4}, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}}, 'hybrid_engine': True, 'nccl_timeout': 600}, 'data': {'tokenizer': None, 'use_shm': False, 'train_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/train.parquet', 'val_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/test.parquet', 'train_max_samples': -1, 'val_max_samples': -1, 'prompt_key': 'prompt', 'reward_fn_key': 'data_source', 'max_prompt_length': 1024, 'max_response_length': 2048, 'train_batch_size': 512, 'val_batch_size': None, 'tool_config_path': None, 'return_raw_input_ids': False, 'return_raw_chat': True, 'return_full_prompt': False, 'shuffle': True, 'seed': None, 'dataloader_num_workers': 8, 'image_patch_size': 14, 'validation_shuffle': False, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 1, 'truncation': 'error', 'image_key': 'images', 'video_key': 'videos', 'trust_remote_code': False, 'custom_cls': {'path': None, 'name': None}, 'return_multi_modal_inputs': True, 'sampler': {'class_path': None, 'class_name': None}, 'datagen': {'path': None, 'name': None}, 'apply_chat_template_kwargs': {}}, 'reward_manager': {'_target_': 'verl.trainer.config.config.RewardManagerConfig', 'source': 'register', 'name': 'naive', 'module': {'_target_': 'verl.trainer.config.config.ModuleConfig', 'path': None, 'name': 'custom_reward_manager'}}, 'critic': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-05, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 90, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'model': {'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, 'path': '~/models/deepseek-llm-7b-chat', 'tokenizer_path': 'Qwen/Qwen3-4B-Instruct-2507', 'override_config': {}, 'external_lib': None, 'trust_remote_code': False, '_target_': 'verl.workers.config.FSDPCriticModelCfg', 'use_shm': False, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': False, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'tiled_mlp': {'enabled': False, 'num_shards': 4}}, '_target_': 'verl.workers.config.FSDPCriticConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'enable': None, 'ppo_mini_batch_size': 256, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': None, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 32768, 'forward_max_token_len_per_gpu': 32768, 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'cliprange_value': 0.5, 'loss_agg_mode': 'token-mean', 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'forward_micro_batch_size': None, 'forward_micro_batch_size_per_gpu': None, 'ulysses_sequence_parallel_size': 1, 'grad_clip': 1.0}, 'reward_model': {'enable': False, 'enable_resource_pool': False, 'n_gpus_per_node': 8, 'nnodes': 0, 'strategy': 'fsdp', 'model': {'input_tokenizer': 'Qwen/Qwen3-4B-Instruct-2507', 'path': '~/models/FsfairX-LLaMA3-RM-v0.1', 'external_lib': None, 'trust_remote_code': False, 'override_config': {}, 'use_shm': False, 'use_remove_padding': False, 'use_fused_kernels': False, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False}}, 'micro_batch_size': None, 'micro_batch_size_per_gpu': None, 'max_length': None, 'use_dynamic_bsz': False, 'forward_max_token_len_per_gpu': 32768, 'reward_manager': 'naive', 'reward_loop_source': 'register', 'reward_loop_module_path': None, 'reward_loop_class_name': None, 'launch_reward_fn_async': False, 'sandbox_fusion': {'url': None, 'max_concurrent': 64, 'memory_limit_mb': 1024}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'ulysses_sequence_parallel_size': 1, 'use_reward_loop': True, 'num_workers': 1, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': '???', 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.5, 'enforce_eager': True, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'tensor_model_parallel_size': 2, 'max_num_batched_tokens': 8192, 'max_model_len': None, 'max_num_seqs': 1024, 'load_format': 'auto', 'engine_kwargs': {}, 'limit_images': None, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'disable_log_stats': True, 'skip_tokenizer_init': False, 'prompt_length': 2048, 'response_length': 2048}}, 'algorithm': {'rollout_correction': {'rollout_is': None, 'rollout_is_threshold': 2.0, 'rollout_rs': None, 'rollout_rs_threshold': None, 'bypass_mode': False, 'loss_type': 'ppo_clip', 'rollout_is_batch_normalize': False}, '_target_': 'verl.trainer.config.AlgoConfig', 'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'norm_adv_by_std_in_grpo': True, 'use_kl_in_reward': False, 'kl_penalty': 'kl', 'kl_ctrl': {'_target_': 'verl.trainer.config.KLControlConfig', 'type': 'fixed', 'kl_coef': 0.001, 'horizon': 10000, 'target_kl': 0.1}, 'use_pf_ppo': False, 'pf_ppo': {'reweight_method': 'pow', 'weight_pow': 2.0}}, 'custom_reward_function': {'path': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/reward_func/reward.py', 'name': 'compute_score'}, 'trainer': {'balance_batch': True, 'total_epochs': 15, 'total_training_steps': None, 'project_name': 'readctrl-verl', 'experiment_name': 'qwen3-4b-instruct-en', 'logger': ['console', 'wandb'], 'log_val_generations': 0, 'rollout_data_dir': None, 'validation_data_dir': None, 'nnodes': 1, 'n_gpus_per_node': 2, 'save_freq': 5, 'esi_redundant_time': 0, 'resume_mode': 'auto', 'resume_from_path': None, 'val_before_train': True, 'val_only': False, 'test_freq': 10, 'critic_warmup': 0, 'default_hdfs_dir': None, 'del_local_ckpt_after_load': False, 'default_local_dir': '/home/mshahidul/readctrl/code/RL_model/train_v2', 'max_actor_ckpt_to_keep': 1, 'max_critic_ckpt_to_keep': 1, 'ray_wait_register_center_timeout': 300, 'device': 'cuda', 'use_legacy_worker_impl': 'auto', 'remove_previous_ckpt_in_save': True}, 'global_profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'steps': None, 'profile_continuous_steps': False, 'save_path': 'outputs/profile', 'global_tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False, 'controller_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph'}, 'worker_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph', 'capture-range': 'cudaProfilerApi', 'capture-range-end': None, 'kill': 'none'}}, 'torch_memory': {'trace_alloc_max_entries': 100000, 'stack_depth': 32, 'context': 'all', 'stacks': 'all', 'kw_args': {}}}}, 'transfer_queue': {'enable': False}, 'ray_kwargs': {'ray_init': {'num_cpus': None}, 'timeline_json_file': None}, '_wandb': {}} +2026-02-10 10:48:01,063 INFO MainThread:488089 [wandb_init.py:init():892] starting backend +2026-02-10 10:48:01,485 INFO MainThread:488089 [wandb_init.py:init():895] sending inform_init request +2026-02-10 10:48:01,495 INFO MainThread:488089 [wandb_init.py:init():903] backend started and connected +2026-02-10 10:48:01,512 INFO MainThread:488089 [wandb_init.py:init():973] updated telemetry +2026-02-10 10:48:01,533 INFO MainThread:488089 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-02-10 10:48:03,432 INFO MainThread:488089 [wandb_init.py:init():1042] starting run threads in backend +2026-02-10 10:48:04,175 INFO MainThread:488089 [wandb_run.py:_console_start():2529] atexit reg +2026-02-10 10:48:04,175 INFO MainThread:488089 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-02-10 10:48:04,175 INFO MainThread:488089 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-02-10 10:48:04,176 INFO MainThread:488089 [wandb_run.py:_redirect():2469] Redirects installed. +2026-02-10 10:48:04,187 INFO MainThread:488089 [wandb_init.py:init():1082] run started, returning control to user process +2026-02-10 13:02:19,159 INFO MainThread:488089 [wandb_run.py:_finish():2295] finishing run shahidulshakib034-khulna-university-of-engineering-techn/readctrl-verl/4ptnl9ej +2026-02-10 13:02:19,161 INFO MainThread:488089 [wandb_run.py:_atexit_cleanup():2494] got exitcode: 0 +2026-02-10 13:02:19,162 INFO MainThread:488089 [wandb_run.py:_restore():2476] restore +2026-02-10 13:02:19,162 INFO MainThread:488089 [wandb_run.py:_restore():2482] restore done +2026-02-10 13:02:19,907 INFO MainThread:488089 [wandb_run.py:_footer_sync_info():3871] logging synced files diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_131724-1211jgw0/files/output.log b/code/RL_model/verl/verl_train/wandb/run-20260210_131724-1211jgw0/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..c97fc30e8493b70d05a2791ebe1e6912220e2a49 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_131724-1211jgw0/files/output.log @@ -0,0 +1,35 @@ +wandb: Detected [dspy, litellm, openai] in use. +wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script. +wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/ +Checkpoint tracker file does not exist: /home/mshahidul/readctrl/code/RL_model/train_v2/latest_checkpointed_iteration.txt +Training from scratch +test_gen_batch meta info: {'eos_token_id': 151645, 'pad_token_id': 151643, 'recompute_log_prob': False, 'do_sample': False, 'validate': True, 'global_steps': 0} +validation generation end +("Initial validation metrics: {'val-aux/multiclinsum/reward/mean@1': " + "np.float64(0.20282732751904486), 'val-core/multiclinsum/acc/mean@1': " + "np.float64(0.202827329291985), 'val-aux/num_turns/min': np.int32(2), " + "'val-aux/num_turns/max': np.int32(2), 'val-aux/num_turns/mean': " + 'np.float64(2.0)}') +step:0 - val-aux/multiclinsum/reward/mean@1:np.float64(0.20282732751904486) - val-core/multiclinsum/acc/mean@1:np.float64(0.202827329291985) - val-aux/num_turns/min:np.int32(2) - val-aux/num_turns/max:np.int32(2) - val-aux/num_turns/mean:np.float64(2.0) +Training Progress: 16%|█▌ | 14/90 [27:09:18<135:52:49, 6436.44s/it] +step:1 - global_seqlen/min:904383 - global_seqlen/max:926987 - global_seqlen/minmax_diff:22604 - global_seqlen/balanced_min:915684 - global_seqlen/balanced_max:915686 - global_seqlen/mean:915685.0 - actor/entropy:0.34481707215309143 - perf/mfu/actor_infer:0 - actor/pg_loss:np.float64(0.0018514616240281638) - actor/kl_loss:np.float64(0.0011594261244075215) - actor/pg_clipfrac:np.float64(0.0029825566719713) - actor/ppo_kl:np.float64(1.9972493078057596e-05) - actor/pg_clipfrac_lower:np.float64(3.693853310930232e-06) - actor/kl_coef:np.float64(0.0010000000000000002) - actor/grad_norm:np.float64(0.6428714394569397) - perf/mfu/actor:np.float64(0.2094989122074124) - perf/max_memory_allocated_gb:np.float64(66.85039329528809) - perf/max_memory_reserved_gb:np.float64(77.42578125) - perf/cpu_memory_used_gb:np.float64(560.3104209899902) - actor/lr:np.float64(1e-06) - training/global_step:1 - training/epoch:0 - critic/score/mean:0.15831153094768524 - critic/score/max:1.67739999294281 - critic/score/min:-1.9347000122070312 - critic/rewards/mean:0.15831153094768524 - critic/rewards/max:1.67739999294281 - critic/rewards/min:-1.9347000122070312 - critic/advantages/mean:-0.0013581495732069016 - critic/advantages/max:1.1546990871429443 - critic/advantages/min:-1.1546989679336548 - critic/returns/mean:-0.0013581495732069016 - critic/returns/max:1.1546990871429443 - critic/returns/min:-1.1546989679336548 - response_length/mean:343.6282653808594 - response_length/max:1015.0 - response_length/min:55.0 - response_length/clip_ratio:0.0 - response_length_non_aborted/mean:343.6282653808594 - response_length_non_aborted/max:1015.0 - response_length_non_aborted/min:55.0 - response_length_non_aborted/clip_ratio:0.0 - response/aborted_ratio:0.0 - prompt_length/mean:848.669921875 - prompt_length/max:1024.0 - prompt_length/min:474.0 - prompt_length/clip_ratio:0.001953125 - num_turns/min:np.int32(2) - num_turns/max:np.int32(2) - num_turns/mean:np.float64(2.0) - timing_s/start_profile:0.0004279911518096924 - timing_s/agent_loop/num_preempted/min:np.int64(-1) - timing_s/agent_loop/num_preempted/max:np.int64(-1) - timing_s/agent_loop/num_preempted/mean:np.float64(-1.0) - timing_s/agent_loop/generate_sequences/min:np.float64(9.52629435993731) - timing_s/agent_loop/generate_sequences/max:np.float64(102.46318369172513) - timing_s/agent_loop/generate_sequences/mean:np.float64(53.09819213339991) - timing_s/agent_loop/tool_calls/min:np.float64(0.0) - timing_s/agent_loop/tool_calls/max:np.float64(0.0) - timing_s/agent_loop/tool_calls/mean:np.float64(0.0) - timing_s/agent_loop/slowest/generate_sequences:np.float64(102.46318369172513) - timing_s/agent_loop/slowest/tool_calls:np.float64(0.0) - timing_s/agent_loop/slowest/prompt_length:978 - timing_s/agent_loop/slowest/response_length:887 - timing_s/agent_loop/slowest/num_preempted:np.int64(-1) - timing_s/gen:10558.705292457715 - timing_s/reward:0.0002764500677585602 - timing_s/old_log_prob:103.06826664507389 - timing_s/ref:166.90196609310806 - timing_s/adv:0.11428205855190754 - timing_s/update_actor:484.51057759672403 - timing_s/update_weights:57.97787474654615 - timing_s/step:11372.068956425413 - timing_s/stop_profile:0.00012105889618396759 - timing_per_token_ms/update_actor:0.26456181852750893 - timing_per_token_ms/gen:20.004632876525804 - timing_per_token_ms/adv:6.240249570098207e-05 - timing_per_token_ms/ref:0.09113503338654017 - perf/total_num_tokens:1831370 - perf/time_per_step:11372.068956425413 - perf/throughput:80.52052827929981 +step:2 - global_seqlen/min:902855 - global_seqlen/max:906506 - global_seqlen/minmax_diff:3651 - global_seqlen/balanced_min:904680 - global_seqlen/balanced_max:904681 - global_seqlen/mean:904680.5 - actor/entropy:0.33920779824256897 - perf/mfu/actor_infer:0 - actor/pg_loss:np.float64(3.49809900702415e-05) - actor/kl_loss:np.float64(0.003172027438267833) - actor/pg_clipfrac:np.float64(0.0029588085550737255) - actor/ppo_kl:np.float64(7.039809323335551e-05) - actor/pg_clipfrac_lower:np.float64(3.2061146460667564e-06) - actor/kl_coef:np.float64(0.0010000000000000002) - actor/grad_norm:np.float64(0.6830487847328186) - perf/mfu/actor:np.float64(0.2596320834063999) - perf/max_memory_allocated_gb:np.float64(68.2978663444519) - perf/max_memory_reserved_gb:np.float64(77.42578125) - perf/cpu_memory_used_gb:np.float64(525.9886741638184) - actor/lr:np.float64(1e-06) - training/global_step:2 - training/epoch:0 - critic/score/mean:0.1422908455133438 - critic/score/max:1.67739999294281 - critic/score/min:-1.9347000122070312 - critic/rewards/mean:0.1422908455133438 - critic/rewards/max:1.67739999294281 - critic/rewards/min:-1.9347000122070312 - critic/advantages/mean:0.0008163226884789765 - critic/advantages/max:1.1546993255615234 - critic/advantages/min:-1.1546992063522339 - critic/returns/mean:0.0008163226884789765 - critic/returns/max:1.1546993255615234 - critic/returns/min:-1.1546992063522339 - response_length/mean:333.9791564941406 - response_length/max:895.0 - response_length/min:69.0 - response_length/clip_ratio:0.0 - response_length_non_aborted/mean:333.9791564941406 - response_length_non_aborted/max:895.0 - response_length_non_aborted/min:69.0 - response_length_non_aborted/clip_ratio:0.0 - response/aborted_ratio:0.0 - prompt_length/mean:843.990234375 - prompt_length/max:1022.0 - prompt_length/min:503.0 - prompt_length/clip_ratio:0.0 - num_turns/min:np.int32(2) - num_turns/max:np.int32(2) - num_turns/mean:np.float64(2.0) - timing_s/start_profile:9.107030928134918e-05 - timing_s/agent_loop/num_preempted/min:np.int64(-1) - timing_s/agent_loop/num_preempted/max:np.int64(-1) - timing_s/agent_loop/num_preempted/mean:np.float64(-1.0) - timing_s/agent_loop/generate_sequences/min:np.float64(10.415249353274703) - timing_s/agent_loop/generate_sequences/max:np.float64(95.41629525646567) - timing_s/agent_loop/generate_sequences/mean:np.float64(53.06463890697705) - timing_s/agent_loop/tool_calls/min:np.float64(0.0) - timing_s/agent_loop/tool_calls/max:np.float64(0.0) - timing_s/agent_loop/tool_calls/mean:np.float64(0.0) - timing_s/agent_loop/slowest/generate_sequences:np.float64(95.41629525646567) - timing_s/agent_loop/slowest/tool_calls:np.float64(0.0) - timing_s/agent_loop/slowest/prompt_length:967 - timing_s/agent_loop/slowest/response_length:813 - timing_s/agent_loop/slowest/num_preempted:np.int64(-1) - timing_s/gen:6260.377356123179 - timing_s/reward:0.00021571293473243713 - timing_s/old_log_prob:104.86580123566091 - timing_s/ref:110.49944491684437 - timing_s/adv:0.10402406379580498 - timing_s/update_actor:344.1254272479564 - timing_s/update_weights:37.53278676047921 - timing_s/step:6858.35935588181 - timing_s/stop_profile:0.00022448226809501648 - timing_per_token_ms/update_actor:0.1901916904630731 - timing_per_token_ms/gen:12.20365494222752 - timing_per_token_ms/adv:5.749215540503248e-05 - timing_per_token_ms/ref:0.0610709774980473 - perf/total_num_tokens:1809361 - perf/time_per_step:6858.35935588181 - perf/throughput:131.9091714294812 +step:3 - global_seqlen/min:897027 - global_seqlen/max:932561 - global_seqlen/minmax_diff:35534 - global_seqlen/balanced_min:914793 - global_seqlen/balanced_max:914795 - global_seqlen/mean:914794.0 - actor/entropy:0.34644219279289246 - perf/mfu/actor_infer:0 - actor/pg_loss:np.float64(0.006986460823100067) - actor/kl_loss:np.float64(0.002725735552909706) - actor/pg_clipfrac:np.float64(0.002998340171567785) - actor/ppo_kl:np.float64(3.007104322705345e-05) - actor/pg_clipfrac_lower:np.float64(0.0) - actor/kl_coef:np.float64(0.0010000000000000002) - actor/grad_norm:np.float64(0.6904629766941071) - perf/mfu/actor:np.float64(0.23226882580273517) - perf/max_memory_allocated_gb:np.float64(68.36006116867065) - perf/max_memory_reserved_gb:np.float64(77.42578125) - perf/cpu_memory_used_gb:np.float64(590.0709457397461) - actor/lr:np.float64(1e-06) - training/global_step:3 - training/epoch:0 - critic/score/mean:0.002129792235791683 - critic/score/max:1.67739999294281 - critic/score/min:-1.9347000122070312 - critic/rewards/mean:0.002129792235791683 - critic/rewards/max:1.67739999294281 - critic/rewards/min:-1.9347000122070312 - critic/advantages/mean:-0.006899530533701181 - critic/advantages/max:1.1546992063522339 - critic/advantages/min:-1.1546993255615234 - critic/returns/mean:-0.006899530533701181 - critic/returns/max:1.1546992063522339 - critic/returns/min:-1.1546993255615234 - response_length/mean:341.4368591308594 - response_length/max:975.0 - response_length/min:71.0 - response_length/clip_ratio:0.0 - response_length_non_aborted/mean:341.4368591308594 - response_length_non_aborted/max:975.0 - response_length_non_aborted/min:71.0 - response_length_non_aborted/clip_ratio:0.0 - response/aborted_ratio:0.0 - prompt_length/mean:849.701171875 - prompt_length/max:1023.0 - prompt_length/min:505.0 - prompt_length/clip_ratio:0.0 - num_turns/min:np.int32(2) - num_turns/max:np.int32(2) - num_turns/mean:np.float64(2.0) - timing_s/start_profile:0.00014372169971466064 - timing_s/agent_loop/num_preempted/min:np.int64(-1) - timing_s/agent_loop/num_preempted/max:np.int64(-1) - timing_s/agent_loop/num_preempted/mean:np.float64(-1.0) - timing_s/agent_loop/generate_sequences/min:np.float64(9.696604173630476) - timing_s/agent_loop/generate_sequences/max:np.float64(101.52453327178955) - timing_s/agent_loop/generate_sequences/mean:np.float64(56.159022753915146) - timing_s/agent_loop/tool_calls/min:np.float64(0.0) - timing_s/agent_loop/tool_calls/max:np.float64(0.0) - timing_s/agent_loop/tool_calls/mean:np.float64(0.0) - timing_s/agent_loop/slowest/generate_sequences:np.float64(101.52453327178955) - timing_s/agent_loop/slowest/tool_calls:np.float64(0.0) - timing_s/agent_loop/slowest/prompt_length:1018 - timing_s/agent_loop/slowest/response_length:793 - timing_s/agent_loop/slowest/num_preempted:np.int64(-1) - timing_s/gen:6959.941137799993 - timing_s/reward:0.0003918502479791641 - timing_s/old_log_prob:102.1553018912673 - timing_s/ref:105.36204122006893 - timing_s/adv:0.11357932351529598 - timing_s/update_actor:409.8651849385351 - timing_s/update_weights:42.74051162600517 - timing_s/step:7621.053854770958 - timing_s/stop_profile:0.00035617128014564514 - timing_per_token_ms/update_actor:0.224020481626757 - timing_per_token_ms/gen:13.271009535377251 - timing_per_token_ms/adv:6.207918040307216e-05 - timing_per_token_ms/ref:0.05758785104628415 - perf/total_num_tokens:1829588 - perf/time_per_step:7621.053854770958 - perf/throughput:120.03510504355215 +step:4 - global_seqlen/min:911273 - global_seqlen/max:918514 - global_seqlen/minmax_diff:7241 - global_seqlen/balanced_min:914893 - global_seqlen/balanced_max:914894 - global_seqlen/mean:914893.5 - actor/entropy:0.3310437500476837 - perf/mfu/actor_infer:0 - actor/pg_loss:np.float64(0.008646129320065175) - actor/kl_loss:np.float64(0.003855417583205659) - actor/pg_clipfrac:np.float64(0.002998319403559435) - actor/ppo_kl:np.float64(7.829349669445189e-05) - actor/pg_clipfrac_lower:np.float64(0.0) - actor/kl_coef:np.float64(0.0010000000000000002) - actor/grad_norm:np.float64(1.6705068349838257) - perf/mfu/actor:np.float64(0.2682557596163515) - perf/max_memory_allocated_gb:np.float64(68.36006116867065) - perf/max_memory_reserved_gb:np.float64(77.42578125) - perf/cpu_memory_used_gb:np.float64(582.3164710998535) - actor/lr:np.float64(1e-06) - training/global_step:4 - training/epoch:0 - critic/score/mean:-0.07396338135004044 - critic/score/max:0.6773999929428101 - critic/score/min:-1.9347000122070312 - critic/rewards/mean:-0.07396338135004044 - critic/rewards/max:0.6773999929428101 - critic/rewards/min:-1.9347000122070312 - critic/advantages/mean:-0.002212673658505082 - critic/advantages/max:1.1546955108642578 - critic/advantages/min:-1.154699683189392 - critic/returns/mean:-0.002212673658505082 - critic/returns/max:1.1546955108642578 - critic/returns/min:-1.154699683189392 - response_length/mean:338.583984375 - response_length/max:917.0 - response_length/min:50.0 - response_length/clip_ratio:0.0 - response_length_non_aborted/mean:338.583984375 - response_length_non_aborted/max:917.0 - response_length_non_aborted/min:50.0 - response_length_non_aborted/clip_ratio:0.0 - response/aborted_ratio:0.0 - prompt_length/mean:852.68359375 - prompt_length/max:1024.0 - prompt_length/min:500.0 - prompt_length/clip_ratio:0.001953125 - num_turns/min:np.int32(2) - num_turns/max:np.int32(2) - num_turns/mean:np.float64(2.0) - timing_s/start_profile:0.00013132765889167786 - timing_s/agent_loop/num_preempted/min:np.int64(-1) - timing_s/agent_loop/num_preempted/max:np.int64(-1) - timing_s/agent_loop/num_preempted/mean:np.float64(-1.0) - timing_s/agent_loop/generate_sequences/min:np.float64(9.1927550714463) - timing_s/agent_loop/generate_sequences/max:np.float64(111.37905073538423) - timing_s/agent_loop/generate_sequences/mean:np.float64(56.981789650533756) - timing_s/agent_loop/tool_calls/min:np.float64(0.0) - timing_s/agent_loop/tool_calls/max:np.float64(0.0) - timing_s/agent_loop/tool_calls/mean:np.float64(0.0) - timing_s/agent_loop/slowest/generate_sequences:np.float64(111.37905073538423) - timing_s/agent_loop/slowest/tool_calls:np.float64(0.0) - timing_s/agent_loop/slowest/prompt_length:967 - timing_s/agent_loop/slowest/response_length:765 - timing_s/agent_loop/slowest/num_preempted:np.int64(-1) - timing_s/gen:5966.30539980717 - timing_s/reward:0.000382985919713974 - timing_s/old_log_prob:96.8154194522649 - timing_s/ref:109.49904641509056 - timing_s/adv:0.11315538734197617 - timing_s/update_actor:355.71544171869755 - timing_s/update_weights:50.02524193003774 - timing_s/step:6579.494083464146 - timing_s/stop_profile:0.00022345781326293945 - timing_per_token_ms/update_actor:0.19440264999078993 - timing_per_token_ms/gen:11.472230201623201 - timing_per_token_ms/adv:6.184074285257037e-05 - timing_per_token_ms/ref:0.05984250976484725 - perf/total_num_tokens:1829787 - perf/time_per_step:6579.494083464146 - perf/throughput:139.05225666200505 +local_global_step_folder: /home/mshahidul/readctrl/code/RL_model/train_v2/global_step_5 +Warning: remove_previous_ckpt_in_save is deprecated, set max_actor_ckpt_to_keep=1 and max_critic_ckpt_to_keep=1 instead +step:5 - global_seqlen/min:895204 - global_seqlen/max:897413 - global_seqlen/minmax_diff:2209 - global_seqlen/balanced_min:896307 - global_seqlen/balanced_max:896310 - global_seqlen/mean:896308.5 - actor/entropy:0.3421386778354645 - perf/mfu/actor_infer:0 - actor/pg_loss:np.float64(0.009625122649595145) - actor/kl_loss:np.float64(0.005004712205845863) - actor/pg_clipfrac:np.float64(0.0029644252451059097) - actor/ppo_kl:np.float64(3.524208580074628e-05) - actor/pg_clipfrac_lower:np.float64(0.0) - actor/kl_coef:np.float64(0.0010000000000000002) - actor/grad_norm:np.float64(0.7243452668190002) - perf/mfu/actor:np.float64(0.26171028563049353) - perf/max_memory_allocated_gb:np.float64(68.36006116867065) - perf/max_memory_reserved_gb:np.float64(77.42578125) - perf/cpu_memory_used_gb:np.float64(583.3458938598633) - actor/lr:np.float64(1e-06) - training/global_step:5 - training/epoch:0 - critic/score/mean:-0.06008334830403328 - critic/score/max:0.6773999929428101 - critic/score/min:-1.9347000122070312 - critic/rewards/mean:-0.06008334830403328 - critic/rewards/max:0.6773999929428101 - critic/rewards/min:-1.9347000122070312 - critic/advantages/mean:-0.0010442143538966775 - critic/advantages/max:1.1546990871429443 - critic/advantages/min:-1.1546990871429443 - critic/returns/mean:-0.0010442143538966775 - critic/returns/max:1.1546990871429443 - critic/returns/min:-1.1546990871429443 - response_length/mean:320.80859375 - response_length/max:938.0 - response_length/min:49.0 - response_length/clip_ratio:0.0 - response_length_non_aborted/mean:320.80859375 - response_length_non_aborted/max:938.0 - response_length_non_aborted/min:49.0 - response_length_non_aborted/clip_ratio:0.0 - response/aborted_ratio:0.0 - prompt_length/mean:846.259765625 - prompt_length/max:1024.0 - prompt_length/min:499.0 - prompt_length/clip_ratio:0.0078125 - num_turns/min:np.int32(2) - num_turns/max:np.int32(2) - num_turns/mean:np.float64(2.0) - timing_s/start_profile:7.12275505065918e-05 - timing_s/agent_loop/num_preempted/min:np.int64(-1) - timing_s/agent_loop/num_preempted/max:np.int64(-1) - timing_s/agent_loop/num_preempted/mean:np.float64(-1.0) - timing_s/agent_loop/generate_sequences/min:np.float64(9.692853027954698) - timing_s/agent_loop/generate_sequences/max:np.float64(100.33804639987648) - timing_s/agent_loop/generate_sequences/mean:np.float64(53.73951493648807) - timing_s/agent_loop/tool_calls/min:np.float64(0.0) - timing_s/agent_loop/tool_calls/max:np.float64(0.0) - timing_s/agent_loop/tool_calls/mean:np.float64(0.0) - timing_s/agent_loop/slowest/generate_sequences:np.float64(100.33804639987648) - timing_s/agent_loop/slowest/tool_calls:np.float64(0.0) - timing_s/agent_loop/slowest/prompt_length:915 - timing_s/agent_loop/slowest/response_length:729 - timing_s/agent_loop/slowest/num_preempted:np.int64(-1) - timing_s/gen:5868.828779060394 - timing_s/reward:0.0002489425241947174 - timing_s/old_log_prob:93.80370813794434 - timing_s/ref:102.6779938004911 - timing_s/adv:0.11371846869587898 - timing_s/update_actor:344.207032084465 - timing_s/update_weights:55.44637464731932 - timing_s/step:6466.0121261347085 - timing_s/save_checkpoint:191.2808481361717 - timing_s/stop_profile:0.0005093328654766083 - timing_per_token_ms/update_actor:0.19201370514976987 - timing_per_token_ms/gen:11.910067698118754 - timing_per_token_ms/adv:6.343712499428433e-05 - timing_per_token_ms/ref:0.057278266244541416 - perf/total_num_tokens:1792617 - perf/time_per_step:6466.0121261347085 - perf/throughput:138.61843784320286 +step:6 - global_seqlen/min:881971 - global_seqlen/max:900551 - global_seqlen/minmax_diff:18580 - global_seqlen/balanced_min:891256 - global_seqlen/balanced_max:891266 - global_seqlen/mean:891261.0 - actor/entropy:0.32986292243003845 - perf/mfu/actor_infer:0 - actor/pg_loss:np.float64(0.01222455719259111) - actor/kl_loss:np.float64(0.006763544864952564) - actor/pg_clipfrac:np.float64(0.0031090279953787103) - actor/ppo_kl:np.float64(8.558833257173622e-05) - actor/pg_clipfrac_lower:np.float64(0.0) - actor/kl_coef:np.float64(0.0010000000000000002) - actor/grad_norm:np.float64(0.612917572259903) - perf/mfu/actor:np.float64(0.2437447657726975) - perf/max_memory_allocated_gb:np.float64(68.36006116867065) - perf/max_memory_reserved_gb:np.float64(77.42578125) - perf/cpu_memory_used_gb:np.float64(583.4901008605957) - actor/lr:np.float64(1e-06) - training/global_step:6 - training/epoch:0 - critic/score/mean:-0.05711016058921814 - critic/score/max:0.6773999929428101 - critic/score/min:-1.9347000122070312 - critic/rewards/mean:-0.05711016058921814 - critic/rewards/max:0.6773999929428101 - critic/rewards/min:-1.9347000122070312 - critic/advantages/mean:0.0003150680277030915 - critic/advantages/max:1.1546992063522339 - critic/advantages/min:-1.1546984910964966 - critic/returns/mean:0.0003150680277030915 - critic/returns/max:1.1546992063522339 - critic/returns/min:-1.1546984910964966 - response_length/mean:313.8671875 - response_length/max:996.0 - response_length/min:53.0 - response_length/clip_ratio:0.0 - response_length_non_aborted/mean:313.8671875 - response_length_non_aborted/max:996.0 - response_length_non_aborted/min:53.0 - response_length_non_aborted/clip_ratio:0.0 - response/aborted_ratio:0.0 - prompt_length/mean:846.62890625 - prompt_length/max:1024.0 - prompt_length/min:475.0 - prompt_length/clip_ratio:0.005859375 - num_turns/min:np.int32(2) - num_turns/max:np.int32(2) - num_turns/mean:np.float64(2.0) - timing_s/start_profile:7.897429168224335e-05 - timing_s/agent_loop/num_preempted/min:np.int64(-1) - timing_s/agent_loop/num_preempted/max:np.int64(-1) - timing_s/agent_loop/num_preempted/mean:np.float64(-1.0) - timing_s/agent_loop/generate_sequences/min:np.float64(9.867899587377906) - timing_s/agent_loop/generate_sequences/max:np.float64(97.00309281609952) - timing_s/agent_loop/generate_sequences/mean:np.float64(52.04364936840648) - timing_s/agent_loop/tool_calls/min:np.float64(0.0) - timing_s/agent_loop/tool_calls/max:np.float64(0.0) - timing_s/agent_loop/tool_calls/mean:np.float64(0.0) - timing_s/agent_loop/slowest/generate_sequences:np.float64(97.00309281609952) - timing_s/agent_loop/slowest/tool_calls:np.float64(0.0) - timing_s/agent_loop/slowest/prompt_length:1023 - timing_s/agent_loop/slowest/response_length:815 - timing_s/agent_loop/slowest/num_preempted:np.int64(-1) - timing_s/gen:5669.365844171494 - timing_s/reward:0.00017180852591991425 - timing_s/old_log_prob:100.31091501004994 - timing_s/ref:108.00576576776803 - timing_s/adv:0.1348793599754572 - timing_s/update_actor:364.133677829057 - timing_s/update_weights:44.11274326220155 - timing_s/step:6286.895022543147 - timing_s/stop_profile:0.00021255575120449066 - timing_per_token_ms/update_actor:0.20428004693858307 - timing_per_token_ms/gen:11.759730023172567 - timing_per_token_ms/adv:7.566771124028606e-05 - timing_per_token_ms/ref:0.060591547126917944 - perf/total_num_tokens:1782522 - perf/time_per_step:6286.895022543147 - perf/throughput:141.7648929724726 +step:7 - global_seqlen/min:876120 - global_seqlen/max:892045 - global_seqlen/minmax_diff:15925 - global_seqlen/balanced_min:884080 - global_seqlen/balanced_max:884085 - global_seqlen/mean:884082.5 - actor/entropy:0.32218098640441895 - perf/mfu/actor_infer:0 - actor/pg_loss:np.float64(0.00035182575114354325) - actor/kl_loss:np.float64(0.009961440227925777) - actor/pg_clipfrac:np.float64(0.003234748454512252) - actor/ppo_kl:np.float64(2.9654548105402984e-05) - actor/pg_clipfrac_lower:np.float64(7.33995041931242e-06) - actor/kl_coef:np.float64(0.0010000000000000002) - actor/grad_norm:np.float64(0.6822517216205597) - perf/mfu/actor:np.float64(0.22398201024750763) - perf/max_memory_allocated_gb:np.float64(68.36006116867065) - perf/max_memory_reserved_gb:np.float64(77.42578125) - perf/cpu_memory_used_gb:np.float64(584.4703979492188) - actor/lr:np.float64(1e-06) - training/global_step:7 - training/epoch:1 - critic/score/mean:-0.037312209606170654 - critic/score/max:0.6773999929428101 - critic/score/min:-1.9347000122070312 - critic/rewards/mean:-0.037312209606170654 - critic/rewards/max:0.6773999929428101 - critic/rewards/min:-1.9347000122070312 - critic/advantages/mean:-0.0016535327304154634 - critic/advantages/max:1.1546984910964966 - critic/advantages/min:-1.1546993255615234 - critic/returns/mean:-0.0016535327304154634 - critic/returns/max:1.1546984910964966 - critic/returns/min:-1.1546993255615234 - response_length/mean:310.9400939941406 - response_length/max:903.0 - response_length/min:54.0 - response_length/clip_ratio:0.0 - response_length_non_aborted/mean:310.9400939941406 - response_length_non_aborted/max:903.0 - response_length_non_aborted/min:54.0 - response_length_non_aborted/clip_ratio:0.0 - response/aborted_ratio:0.0 - prompt_length/mean:840.208984375 - prompt_length/max:1024.0 - prompt_length/min:500.0 - prompt_length/clip_ratio:0.00390625 - num_turns/min:np.int32(2) - num_turns/max:np.int32(2) - num_turns/mean:np.float64(2.0) - timing_s/start_profile:0.0005827322602272034 - timing_s/agent_loop/num_preempted/min:np.int64(-1) - timing_s/agent_loop/num_preempted/max:np.int64(-1) - timing_s/agent_loop/num_preempted/mean:np.float64(-1.0) - timing_s/agent_loop/generate_sequences/min:np.float64(8.906913017854095) - timing_s/agent_loop/generate_sequences/max:np.float64(92.80843842588365) - timing_s/agent_loop/generate_sequences/mean:np.float64(50.66800584747398) - timing_s/agent_loop/tool_calls/min:np.float64(0.0) - timing_s/agent_loop/tool_calls/max:np.float64(0.0) - timing_s/agent_loop/tool_calls/mean:np.float64(0.0) - timing_s/agent_loop/slowest/generate_sequences:np.float64(92.80843842588365) - timing_s/agent_loop/slowest/tool_calls:np.float64(0.0) - timing_s/agent_loop/slowest/prompt_length:1009 - timing_s/agent_loop/slowest/response_length:854 - timing_s/agent_loop/slowest/num_preempted:np.int64(-1) - timing_s/gen:5553.007653495297 - timing_s/reward:0.00030176714062690735 - timing_s/old_log_prob:89.42383965104818 - timing_s/ref:102.0808622892946 - timing_s/adv:0.13038942031562328 - timing_s/update_actor:394.95442538894713 - timing_s/update_weights:63.252722362056375 - timing_s/step:6203.772359175608 - timing_s/stop_profile:0.000279206782579422 - timing_per_token_ms/update_actor:0.22336966594686986 - timing_per_token_ms/gen:11.626803070106819 - timing_per_token_ms/adv:7.374279001994909e-05 - timing_per_token_ms/ref:0.05773265633540682 - perf/total_num_tokens:1768165 - perf/time_per_step:6203.772359175608 - perf/throughput:142.50724378891974 +step:8 - global_seqlen/min:901539 - global_seqlen/max:909991 - global_seqlen/minmax_diff:8452 - global_seqlen/balanced_min:905763 - global_seqlen/balanced_max:905767 - global_seqlen/mean:905765.0 - actor/entropy:0.3145217001438141 - perf/mfu/actor_infer:0 - actor/pg_loss:np.float64(0.0044407083466163864) - actor/kl_loss:np.float64(0.012944300950039178) - actor/pg_clipfrac:np.float64(0.0032360340483137406) - actor/ppo_kl:np.float64(3.551380960213161e-05) - actor/pg_clipfrac_lower:np.float64(0.0) - actor/kl_coef:np.float64(0.0010000000000000002) - actor/grad_norm:np.float64(0.7236647307872772) - perf/mfu/actor:np.float64(0.2622002649554521) - perf/max_memory_allocated_gb:np.float64(68.36006116867065) - perf/max_memory_reserved_gb:np.float64(77.42578125) - perf/cpu_memory_used_gb:np.float64(578.8945503234863) - actor/lr:np.float64(1e-06) - training/global_step:8 - training/epoch:1 - critic/score/mean:-0.033118799328804016 - critic/score/max:0.6773999929428101 - critic/score/min:-1.9347000122070312 - critic/rewards/mean:-0.033118799328804016 - critic/rewards/max:0.6773999929428101 - critic/rewards/min:-1.9347000122070312 - critic/advantages/mean:-0.0018672083970159292 - critic/advantages/max:1.1546947956085205 - critic/advantages/min:-1.1546993255615234 - critic/returns/mean:-0.0018672083970159292 - critic/returns/max:1.1546947956085205 - critic/returns/min:-1.1546993255615234 - response_length/mean:325.6197814941406 - response_length/max:1003.0 - response_length/min:64.0 - response_length/clip_ratio:0.0 - response_length_non_aborted/mean:325.6197814941406 - response_length_non_aborted/max:1003.0 - response_length_non_aborted/min:64.0 - response_length_non_aborted/clip_ratio:0.0 - response/aborted_ratio:0.0 - prompt_length/mean:853.76171875 - prompt_length/max:1024.0 - prompt_length/min:502.0 - prompt_length/clip_ratio:0.00390625 - num_turns/min:np.int32(2) - num_turns/max:np.int32(2) - num_turns/mean:np.float64(2.0) - timing_s/start_profile:5.797483026981354e-05 - timing_s/agent_loop/num_preempted/min:np.int64(-1) - timing_s/agent_loop/num_preempted/max:np.int64(-1) - timing_s/agent_loop/num_preempted/mean:np.float64(-1.0) - timing_s/agent_loop/generate_sequences/min:np.float64(9.67405105009675) - timing_s/agent_loop/generate_sequences/max:np.float64(95.09167942777276) - timing_s/agent_loop/generate_sequences/mean:np.float64(54.8818517910986) - timing_s/agent_loop/tool_calls/min:np.float64(0.0) - timing_s/agent_loop/tool_calls/max:np.float64(0.0) - timing_s/agent_loop/tool_calls/mean:np.float64(0.0) - timing_s/agent_loop/slowest/generate_sequences:np.float64(95.09167942777276) - timing_s/agent_loop/slowest/tool_calls:np.float64(0.0) - timing_s/agent_loop/slowest/prompt_length:848 - timing_s/agent_loop/slowest/response_length:642 - timing_s/agent_loop/slowest/num_preempted:np.int64(-1) - timing_s/gen:6187.22431800887 - timing_s/reward:0.00016247481107711792 - timing_s/old_log_prob:91.87148373574018 - timing_s/ref:100.93268248066306 - timing_s/adv:0.12321080267429352 - timing_s/update_actor:347.8723659925163 - timing_s/update_weights:57.924604373052716 - timing_s/step:6786.886287370697 - timing_s/stop_profile:0.0002636238932609558 - timing_per_token_ms/update_actor:0.19203235165441163 - timing_per_token_ms/gen:12.370687946881887 - timing_per_token_ms/adv:6.801477351978356e-05 - timing_per_token_ms/ref:0.05571681533326142 - perf/total_num_tokens:1811530 - perf/time_per_step:6786.886287370697 - perf/throughput:133.45810753975397 +step:9 - global_seqlen/min:883852 - global_seqlen/max:894389 - global_seqlen/minmax_diff:10537 - global_seqlen/balanced_min:889118 - global_seqlen/balanced_max:889123 - global_seqlen/mean:889120.5 - actor/entropy:0.305191308259964 - perf/mfu/actor_infer:0 - actor/pg_loss:np.float64(0.00927431862995337) - actor/kl_loss:np.float64(0.01769162520455817) - actor/pg_clipfrac:np.float64(0.0033205800306556434) - actor/ppo_kl:np.float64(5.7893916453129655e-05) - actor/pg_clipfrac_lower:np.float64(0.0) - actor/kl_coef:np.float64(0.0010000000000000002) - actor/grad_norm:np.float64(0.7476052939891815) - perf/mfu/actor:np.float64(0.24532816948028224) - perf/max_memory_allocated_gb:np.float64(68.36006116867065) - perf/max_memory_reserved_gb:np.float64(77.42578125) - perf/cpu_memory_used_gb:np.float64(586.248908996582) - actor/lr:np.float64(1e-06) - training/global_step:9 - training/epoch:1 - critic/score/mean:0.044463012367486954 - critic/score/max:0.6773999929428101 - critic/score/min:-1.9347000122070312 - critic/rewards/mean:0.044463012367486954 - critic/rewards/max:0.6773999929428101 - critic/rewards/min:-1.9347000122070312 - critic/advantages/mean:0.0010309863137081265 - critic/advantages/max:1.1546989679336548 - critic/advantages/min:-1.1546992063522339 - critic/returns/mean:0.0010309863137081265 - critic/returns/max:1.1546989679336548 - critic/returns/min:-1.1546992063522339 - response_length/mean:308.90234375 - response_length/max:889.0 - response_length/min:43.0 - response_length/clip_ratio:0.0 - response_length_non_aborted/mean:308.90234375 - response_length_non_aborted/max:889.0 - response_length_non_aborted/min:43.0 - response_length_non_aborted/clip_ratio:0.0 - response/aborted_ratio:0.0 - prompt_length/mean:848.806640625 - prompt_length/max:1024.0 - prompt_length/min:474.0 - prompt_length/clip_ratio:0.00390625 - num_turns/min:np.int32(2) - num_turns/max:np.int32(2) - num_turns/mean:np.float64(2.0) - timing_s/start_profile:0.000120602548122406 - timing_s/agent_loop/num_preempted/min:np.int64(-1) - timing_s/agent_loop/num_preempted/max:np.int64(-1) - timing_s/agent_loop/num_preempted/mean:np.float64(-1.0) - timing_s/agent_loop/generate_sequences/min:np.float64(11.329346572980285) - timing_s/agent_loop/generate_sequences/max:np.float64(94.06737651117146) - timing_s/agent_loop/generate_sequences/mean:np.float64(52.864936956724705) - timing_s/agent_loop/tool_calls/min:np.float64(0.0) - timing_s/agent_loop/tool_calls/max:np.float64(0.0) - timing_s/agent_loop/tool_calls/mean:np.float64(0.0) - timing_s/agent_loop/slowest/generate_sequences:np.float64(94.06737651117146) - timing_s/agent_loop/slowest/tool_calls:np.float64(0.0) - timing_s/agent_loop/slowest/prompt_length:902 - timing_s/agent_loop/slowest/response_length:754 - timing_s/agent_loop/slowest/num_preempted:np.int64(-1) - timing_s/gen:5498.060313463211 - timing_s/reward:0.00039741024374961853 - timing_s/old_log_prob:99.86962665244937 - timing_s/ref:106.55147680267692 - timing_s/adv:0.1368559468537569 - timing_s/update_actor:359.43351343274117 - timing_s/update_weights:45.83023336529732 - timing_s/step:6110.919870035723 - timing_s/stop_profile:0.00021479465067386627 - timing_per_token_ms/update_actor:0.2021286841506529 - timing_per_token_ms/gen:11.587695666070662 - timing_per_token_ms/adv:7.69614168460613e-05 - timing_per_token_ms/ref:0.05991959290258009 - perf/total_num_tokens:1778241 - perf/time_per_step:6110.919870035723 - perf/throughput:145.4969986367703 +test_gen_batch meta info: {'eos_token_id': 151645, 'pad_token_id': 151643, 'recompute_log_prob': False, 'do_sample': False, 'validate': True, 'global_steps': 10} +validation generation end +local_global_step_folder: /home/mshahidul/readctrl/code/RL_model/train_v2/global_step_10 +Warning: remove_previous_ckpt_in_save is deprecated, set max_actor_ckpt_to_keep=1 and max_critic_ckpt_to_keep=1 instead +Removed old checkpoint directory: /home/mshahidul/readctrl/code/RL_model/train_v2/global_step_5 +step:10 - global_seqlen/min:903543 - global_seqlen/max:916078 - global_seqlen/minmax_diff:12535 - global_seqlen/balanced_min:909804 - global_seqlen/balanced_max:909817 - global_seqlen/mean:909810.5 - actor/entropy:0.30126112699508667 - perf/mfu/actor_infer:0 - actor/pg_loss:np.float64(0.0005695469493124106) - actor/kl_loss:np.float64(0.019951117186186213) - actor/pg_clipfrac:np.float64(0.0032545081679321206) - actor/ppo_kl:np.float64(-8.904632615743442e-05) - actor/pg_clipfrac_lower:np.float64(6.7186755738172605e-06) - actor/kl_coef:np.float64(0.0010000000000000002) - actor/grad_norm:np.float64(1.1050503849983215) - perf/mfu/actor:np.float64(0.25686116762812944) - perf/max_memory_allocated_gb:np.float64(68.36006116867065) - perf/max_memory_reserved_gb:np.float64(77.42578125) - perf/cpu_memory_used_gb:np.float64(588.6107025146484) - actor/lr:np.float64(1e-06) - val-aux/multiclinsum/reward/mean@1:np.float64(-0.10784733978147476) - val-core/multiclinsum/acc/mean@1:np.float64(-0.10784734078105108) - val-aux/num_turns/min:np.int32(2) - val-aux/num_turns/max:np.int32(2) - val-aux/num_turns/mean:np.float64(2.0) - training/global_step:10 - training/epoch:1 - critic/score/mean:0.0028527751564979553 - critic/score/max:0.6773999929428101 - critic/score/min:-1.9347000122070312 - critic/rewards/mean:0.0028527751564979553 - critic/rewards/max:0.6773999929428101 - critic/rewards/min:-1.9347000122070312 - critic/advantages/mean:-0.0010558879002928734 - critic/advantages/max:1.1546971797943115 - critic/advantages/min:-1.1546992063522339 - critic/returns/mean:-0.0010558879002928734 - critic/returns/max:1.1546971797943115 - critic/returns/min:-1.1546992063522339 - response_length/mean:333.4752502441406 - response_length/max:949.0 - response_length/min:52.0 - response_length/clip_ratio:0.0 - response_length_non_aborted/mean:333.4752502441406 - response_length_non_aborted/max:949.0 - response_length_non_aborted/min:52.0 - response_length_non_aborted/clip_ratio:0.0 - response/aborted_ratio:0.0 - prompt_length/mean:851.173828125 - prompt_length/max:1024.0 - prompt_length/min:475.0 - prompt_length/clip_ratio:0.005859375 - num_turns/min:np.int32(2) - num_turns/max:np.int32(2) - num_turns/mean:np.float64(2.0) - timing_s/start_profile:6.205961108207703e-05 - timing_s/agent_loop/num_preempted/min:np.int64(-1) - timing_s/agent_loop/num_preempted/max:np.int64(-1) - timing_s/agent_loop/num_preempted/mean:np.float64(-1.0) - timing_s/agent_loop/generate_sequences/min:np.float64(9.929986648261547) - timing_s/agent_loop/generate_sequences/max:np.float64(104.17007439956069) - timing_s/agent_loop/generate_sequences/mean:np.float64(56.94846514850118) - timing_s/agent_loop/tool_calls/min:np.float64(0.0) - timing_s/agent_loop/tool_calls/max:np.float64(0.0) - timing_s/agent_loop/tool_calls/mean:np.float64(0.0) - timing_s/agent_loop/slowest/generate_sequences:np.float64(104.17007439956069) - timing_s/agent_loop/slowest/tool_calls:np.float64(0.0) - timing_s/agent_loop/slowest/prompt_length:985 - timing_s/agent_loop/slowest/response_length:751 - timing_s/agent_loop/slowest/num_preempted:np.int64(-1) - timing_s/gen:6064.999230513349 - timing_s/reward:0.00040877237915992737 - timing_s/old_log_prob:92.05172420665622 - timing_s/ref:101.7208273652941 - timing_s/adv:0.11700491048395634 - timing_s/update_actor:360.0715084206313 - timing_s/update_weights:61.29108513891697 - timing_s/step:6681.304787475616 - timing_s/testing:692.2788705602288 - timing_s/save_checkpoint:225.57176331430674 - timing_s/stop_profile:0.0004273168742656708 - timing_per_token_ms/update_actor:0.19788269558365795 - timing_per_token_ms/gen:11.840660091041997 - timing_per_token_ms/adv:6.430180267426917e-05 - timing_per_token_ms/ref:0.05590220566002156 - perf/total_num_tokens:1819621 - perf/time_per_step:6681.304787475616 - perf/throughput:136.17257840197288 +step:11 - global_seqlen/min:883866 - global_seqlen/max:898466 - global_seqlen/minmax_diff:14600 - global_seqlen/balanced_min:891165 - global_seqlen/balanced_max:891167 - global_seqlen/mean:891166.0 - actor/entropy:0.3025473952293396 - perf/mfu/actor_infer:0 - actor/pg_loss:np.float64(0.0046024472188340194) - actor/kl_loss:np.float64(0.02398725826060399) - actor/pg_clipfrac:np.float64(0.0029758745343618407) - actor/ppo_kl:np.float64(0.00010468888969228374) - actor/pg_clipfrac_lower:np.float64(0.0) - actor/kl_coef:np.float64(0.0010000000000000002) - actor/grad_norm:np.float64(0.7705514430999756) - perf/mfu/actor:np.float64(0.26531735002276124) - perf/max_memory_allocated_gb:np.float64(68.36006116867065) - perf/max_memory_reserved_gb:np.float64(77.42578125) - perf/cpu_memory_used_gb:np.float64(546.8144378662109) - actor/lr:np.float64(1e-06) - training/global_step:11 - training/epoch:1 - critic/score/mean:-0.032743390649557114 - critic/score/max:0.6773999929428101 - critic/score/min:-1.9347000122070312 - critic/rewards/mean:-0.032743390649557114 - critic/rewards/max:0.6773999929428101 - critic/rewards/min:-1.9347000122070312 - critic/advantages/mean:-0.0054254173301160336 - critic/advantages/max:1.1546963453292847 - critic/advantages/min:-1.154698371887207 - critic/returns/mean:-0.0054254173301160336 - critic/returns/max:1.1546963453292847 - critic/returns/min:-1.154698371887207 - response_length/mean:317.9466247558594 - response_length/max:941.0 - response_length/min:61.0 - response_length/clip_ratio:0.0 - response_length_non_aborted/mean:317.9466247558594 - response_length_non_aborted/max:941.0 - response_length_non_aborted/min:61.0 - response_length_non_aborted/clip_ratio:0.0 - response/aborted_ratio:0.0 - prompt_length/mean:842.42578125 - prompt_length/max:1024.0 - prompt_length/min:475.0 - prompt_length/clip_ratio:0.001953125 - num_turns/min:np.int32(2) - num_turns/max:np.int32(2) - num_turns/mean:np.float64(2.0) - timing_s/start_profile:0.0001893751323223114 - timing_s/agent_loop/num_preempted/min:np.int64(-1) - timing_s/agent_loop/num_preempted/max:np.int64(-1) - timing_s/agent_loop/num_preempted/mean:np.float64(-1.0) - timing_s/agent_loop/generate_sequences/min:np.float64(9.716094080358744) - timing_s/agent_loop/generate_sequences/max:np.float64(99.65476095303893) - timing_s/agent_loop/generate_sequences/mean:np.float64(52.03015679180438) - timing_s/agent_loop/tool_calls/min:np.float64(0.0) - timing_s/agent_loop/tool_calls/max:np.float64(0.0) - timing_s/agent_loop/tool_calls/mean:np.float64(0.0) - timing_s/agent_loop/slowest/generate_sequences:np.float64(99.65476095303893) - timing_s/agent_loop/slowest/tool_calls:np.float64(0.0) - timing_s/agent_loop/slowest/prompt_length:889 - timing_s/agent_loop/slowest/response_length:793 - timing_s/agent_loop/slowest/num_preempted:np.int64(-1) - timing_s/gen:6425.866334402934 - timing_s/reward:0.00018812716007232666 - timing_s/old_log_prob:91.17487784475088 - timing_s/ref:99.17542569339275 - timing_s/adv:0.1451300010085106 - timing_s/update_actor:333.07246146164834 - timing_s/update_weights:40.74480497278273 - timing_s/step:6991.19905612804 - timing_s/stop_profile:0.00012621283531188965 - timing_per_token_ms/update_actor:0.18687453373538057 - timing_per_token_ms/gen:13.157890464125131 - timing_per_token_ms/adv:8.142702987350874e-05 - timing_per_token_ms/ref:0.0556436318785685 - perf/total_num_tokens:1782332 - perf/time_per_step:6991.19905612804 - perf/throughput:127.46969337382271 +step:12 - global_seqlen/min:885218 - global_seqlen/max:887825 - global_seqlen/minmax_diff:2607 - global_seqlen/balanced_min:886521 - global_seqlen/balanced_max:886522 - global_seqlen/mean:886521.5 - actor/entropy:0.299950510263443 - perf/mfu/actor_infer:0 - actor/pg_loss:np.float64(-0.00479137469422616) - actor/kl_loss:np.float64(0.029990886008211724) - actor/pg_clipfrac:np.float64(0.002958147852041293) - actor/ppo_kl:np.float64(1.4379837086408997e-05) - actor/pg_clipfrac_lower:np.float64(3.522714602392322e-06) - actor/kl_coef:np.float64(0.0010000000000000002) - actor/grad_norm:np.float64(0.6316521167755127) - perf/mfu/actor:np.float64(0.21350456941817136) - perf/max_memory_allocated_gb:np.float64(68.36006116867065) - perf/max_memory_reserved_gb:np.float64(77.42578125) - perf/cpu_memory_used_gb:np.float64(601.3828163146973) - actor/lr:np.float64(1e-06) - training/global_step:12 - training/epoch:1 - critic/score/mean:-0.0019861154723912477 - critic/score/max:0.6773999929428101 - critic/score/min:-1.9347000122070312 - critic/rewards/mean:-0.0019861154723912477 - critic/rewards/max:0.6773999929428101 - critic/rewards/min:-1.9347000122070312 - critic/advantages/mean:0.0014671036042273045 - critic/advantages/max:1.154694676399231 - critic/advantages/min:-1.1546987295150757 - critic/returns/mean:0.0014671036042273045 - critic/returns/max:1.154694676399231 - critic/returns/min:-1.1546987295150757 - response_length/mean:310.0357971191406 - response_length/max:962.0 - response_length/min:51.0 - response_length/clip_ratio:0.0 - response_length_non_aborted/mean:310.0357971191406 - response_length_non_aborted/max:962.0 - response_length_non_aborted/min:51.0 - response_length_non_aborted/clip_ratio:0.0 - response/aborted_ratio:0.0 - prompt_length/mean:844.2890625 - prompt_length/max:1024.0 - prompt_length/min:500.0 - prompt_length/clip_ratio:0.001953125 - num_turns/min:np.int32(2) - num_turns/max:np.int32(2) - num_turns/mean:np.float64(2.0) - timing_s/start_profile:8.534453809261322e-05 - timing_s/agent_loop/num_preempted/min:np.int64(-1) - timing_s/agent_loop/num_preempted/max:np.int64(-1) - timing_s/agent_loop/num_preempted/mean:np.float64(-1.0) - timing_s/agent_loop/generate_sequences/min:np.float64(11.312721023336053) - timing_s/agent_loop/generate_sequences/max:np.float64(103.60064049623907) - timing_s/agent_loop/generate_sequences/mean:np.float64(52.82738708178173) - timing_s/agent_loop/tool_calls/min:np.float64(0.0) - timing_s/agent_loop/tool_calls/max:np.float64(0.0) - timing_s/agent_loop/tool_calls/mean:np.float64(0.0) - timing_s/agent_loop/slowest/generate_sequences:np.float64(103.60064049623907) - timing_s/agent_loop/slowest/tool_calls:np.float64(0.0) - timing_s/agent_loop/slowest/prompt_length:1013 - timing_s/agent_loop/slowest/response_length:785 - timing_s/agent_loop/slowest/num_preempted:np.int64(-1) - timing_s/gen:5590.149054495618 - timing_s/reward:0.0001854468137025833 - timing_s/old_log_prob:96.1725989766419 - timing_s/ref:98.21918299049139 - timing_s/adv:0.12194184400141239 - timing_s/update_actor:430.73480696976185 - timing_s/update_weights:67.66569714434445 - timing_s/step:6283.977762207389 - timing_s/stop_profile:7.301196455955505e-05 - timing_per_token_ms/update_actor:0.2429353416526062 - timing_per_token_ms/gen:11.738708470954544 - timing_per_token_ms/adv:6.877545778721237e-05 - timing_per_token_ms/ref:0.0553958268301961 - perf/total_num_tokens:1773043 - perf/time_per_step:6283.977762207389 - perf/throughput:141.0764858735893 +step:13 - global_seqlen/min:879269 - global_seqlen/max:895858 - global_seqlen/minmax_diff:16589 - global_seqlen/balanced_min:887561 - global_seqlen/balanced_max:887566 - global_seqlen/mean:887563.5 - actor/entropy:0.2929941415786743 - perf/mfu/actor_infer:0 - actor/pg_loss:np.float64(0.0065216225520998775) - actor/kl_loss:np.float64(0.03677630870758246) - actor/pg_clipfrac:np.float64(0.003221717942021011) - actor/ppo_kl:np.float64(-7.71382222618892e-05) - actor/pg_clipfrac_lower:np.float64(0.0) - actor/kl_coef:np.float64(0.0010000000000000002) - actor/grad_norm:np.float64(0.8784433901309967) - perf/mfu/actor:np.float64(0.20276139294359136) - perf/max_memory_allocated_gb:np.float64(68.36006116867065) - perf/max_memory_reserved_gb:np.float64(77.42578125) - perf/cpu_memory_used_gb:np.float64(597.0038108825684) - actor/lr:np.float64(1e-06) - training/global_step:13 - training/epoch:2 - critic/score/mean:0.02712526172399521 - critic/score/max:0.6773999929428101 - critic/score/min:-1.9347000122070312 - critic/rewards/mean:0.02712526172399521 - critic/rewards/max:0.6773999929428101 - critic/rewards/min:-1.9347000122070312 - critic/advantages/mean:0.0026945541612803936 - critic/advantages/max:1.154697299003601 - critic/advantages/min:-1.1546990871429443 - critic/returns/mean:0.0026945541612803936 - critic/returns/max:1.154697299003601 - critic/returns/min:-1.1546990871429443 - response_length/mean:304.095703125 - response_length/max:868.0 - response_length/min:46.0 - response_length/clip_ratio:0.0 - response_length_non_aborted/mean:304.095703125 - response_length_non_aborted/max:868.0 - response_length_non_aborted/min:46.0 - response_length_non_aborted/clip_ratio:0.0 - response/aborted_ratio:0.0 - prompt_length/mean:851.5859375 - prompt_length/max:1024.0 - prompt_length/min:505.0 - prompt_length/clip_ratio:0.001953125 - num_turns/min:np.int32(2) - num_turns/max:np.int32(2) - num_turns/mean:np.float64(2.0) - timing_s/start_profile:0.0004942677915096283 - timing_s/agent_loop/num_preempted/min:np.int64(-1) - timing_s/agent_loop/num_preempted/max:np.int64(-1) - timing_s/agent_loop/num_preempted/mean:np.float64(-1.0) - timing_s/agent_loop/generate_sequences/min:np.float64(8.30731506459415) - timing_s/agent_loop/generate_sequences/max:np.float64(94.17184654437006) - timing_s/agent_loop/generate_sequences/mean:np.float64(51.114441320853075) - timing_s/agent_loop/tool_calls/min:np.float64(0.0) - timing_s/agent_loop/tool_calls/max:np.float64(0.0) - timing_s/agent_loop/tool_calls/mean:np.float64(0.0) - timing_s/agent_loop/slowest/generate_sequences:np.float64(94.17184654437006) - timing_s/agent_loop/slowest/tool_calls:np.float64(0.0) - timing_s/agent_loop/slowest/prompt_length:928 - timing_s/agent_loop/slowest/response_length:868 - timing_s/agent_loop/slowest/num_preempted:np.int64(-1) - timing_s/gen:5612.400910867378 - timing_s/reward:0.00022694095969200134 - timing_s/old_log_prob:107.18799667805433 - timing_s/ref:114.15490566380322 - timing_s/adv:0.1520479954779148 - timing_s/update_actor:446.9840097371489 - timing_s/update_weights:67.63647554442286 - timing_s/step:6349.45863728784 - timing_s/stop_profile:0.00013229995965957642 - timing_per_token_ms/update_actor:0.25180396092062646 - timing_per_token_ms/gen:12.015647723607131 - timing_per_token_ms/adv:8.565471398830327e-05 - timing_per_token_ms/ref:0.06430802171551851 - perf/total_num_tokens:1775127 - perf/time_per_step:6349.45863728784 - perf/throughput:139.78569681322014 +step:14 - global_seqlen/min:852570 - global_seqlen/max:889370 - global_seqlen/minmax_diff:36800 - global_seqlen/balanced_min:870969 - global_seqlen/balanced_max:870971 - global_seqlen/mean:870970.0 - actor/entropy:0.2847031056880951 - perf/mfu/actor_infer:0 - actor/pg_loss:np.float64(0.005068646496511071) - actor/kl_loss:np.float64(0.04389468823016311) - actor/pg_clipfrac:np.float64(0.0034353809775590585) - actor/ppo_kl:np.float64(5.492370402275052e-05) - actor/pg_clipfrac_lower:np.float64(3.0601252850222713e-06) - actor/kl_coef:np.float64(0.0010000000000000002) - actor/grad_norm:np.float64(0.922976553440094) - perf/mfu/actor:np.float64(0.23610645984684916) - perf/max_memory_allocated_gb:np.float64(68.36006116867065) - perf/max_memory_reserved_gb:np.float64(77.42578125) - perf/cpu_memory_used_gb:np.float64(705.1283988952637) - actor/lr:np.float64(1e-06) - training/global_step:14 - training/epoch:2 - critic/score/mean:0.3278006613254547 - critic/score/max:1.67739999294281 - critic/score/min:-1.9347000122070312 - critic/rewards/mean:0.3278006613254547 - critic/rewards/max:1.67739999294281 - critic/rewards/min:-1.9347000122070312 - critic/advantages/mean:-0.007147991564124823 - critic/advantages/max:1.1546987295150757 - critic/advantages/min:-1.1546998023986816 - critic/returns/mean:-0.007147991564124823 - critic/returns/max:1.1546987295150757 - critic/returns/min:-1.1546998023986816 - response_length/mean:294.2630310058594 - response_length/max:845.0 - response_length/min:41.0 - response_length/clip_ratio:0.0 - response_length_non_aborted/mean:294.2630310058594 - response_length_non_aborted/max:845.0 - response_length_non_aborted/min:41.0 - response_length_non_aborted/clip_ratio:0.0 - response/aborted_ratio:0.0 - prompt_length/mean:839.8125 - prompt_length/max:1024.0 - prompt_length/min:499.0 - prompt_length/clip_ratio:0.001953125 - num_turns/min:np.int32(2) - num_turns/max:np.int32(2) - num_turns/mean:np.float64(2.0) - timing_s/start_profile:6.043165922164917e-05 - timing_s/agent_loop/num_preempted/min:np.int64(-1) - timing_s/agent_loop/num_preempted/max:np.int64(-1) - timing_s/agent_loop/num_preempted/mean:np.float64(-1.0) - timing_s/agent_loop/generate_sequences/min:np.float64(9.677272727712989) - timing_s/agent_loop/generate_sequences/max:np.float64(98.8829833921045) - timing_s/agent_loop/generate_sequences/mean:np.float64(51.963875385473024) - timing_s/agent_loop/tool_calls/min:np.float64(0.0) - timing_s/agent_loop/tool_calls/max:np.float64(0.0) - timing_s/agent_loop/tool_calls/mean:np.float64(0.0) - timing_s/agent_loop/slowest/generate_sequences:np.float64(98.8829833921045) - timing_s/agent_loop/slowest/tool_calls:np.float64(0.0) - timing_s/agent_loop/slowest/prompt_length:967 - timing_s/agent_loop/slowest/response_length:814 - timing_s/agent_loop/slowest/num_preempted:np.int64(-1) - timing_s/gen:5463.805643839762 - timing_s/reward:0.00018244795501232147 - timing_s/old_log_prob:86.59451204538345 - timing_s/ref:91.71580620110035 - timing_s/adv:0.12855872511863708 - timing_s/update_actor:363.57589877024293 - timing_s/update_weights:45.723994471132755 - timing_s/step:6052.4999258015305 - timing_s/stop_profile:0.00014985352754592896 - timing_per_token_ms/update_actor:0.2087189563189564 - timing_per_token_ms/gen:12.088386514331711 - timing_per_token_ms/adv:7.380203974800343e-05 - timing_per_token_ms/ref:0.052651530018887185 - perf/total_num_tokens:1741940 - perf/time_per_step:6052.4999258015305 - perf/throughput:143.90252138411347 diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_131724-1211jgw0/files/requirements.txt b/code/RL_model/verl/verl_train/wandb/run-20260210_131724-1211jgw0/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..dd2de633cd2e89d34a77b40967a7fc9ed3177c34 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_131724-1211jgw0/files/requirements.txt @@ -0,0 +1,283 @@ +verl==0.8.0.dev0 +psutil==7.1.3 +colorama==0.4.6 +annotated-doc==0.0.4 +sentry-sdk==2.51.0 +requests==2.32.5 +nvidia-cufile-cu12==1.13.1.3 +ml_dtypes==0.5.4 +xformers==0.0.32.post1 +sglang==0.5.2 +multidict==6.7.1 +typing_extensions==4.15.0 +nvidia-cusparselt-cu12==0.7.1 +openai-harmony==0.0.4 +transformers==4.56.1 +Werkzeug==3.1.5 +identify==2.6.16 +gepa==0.0.26 +pytest==9.0.2 +nvidia-cuda-runtime-cu12==12.8.90 +GitPython==3.1.46 +cupy-cuda12x==13.6.0 +tokenizers==0.22.2 +pybind11==3.0.1 +google-api-core==2.29.0 +partial-json-parser==0.2.1.1.post7 +aiohttp-cors==0.8.1 +sniffio==1.3.1 +tensordict==0.10.0 +smart_open==7.5.0 +cffi==2.0.0 +wcwidth==0.5.3 +asttokens==3.0.1 +opencensus==0.11.4 +rpds-py==0.30.0 +py-spy==0.4.1 +litellm==1.81.9 +gguf==0.17.1 +nvidia-nvjitlink-cu12==12.8.93 +httpx==0.28.1 +cuda-python==13.1.1 +annotated-types==0.7.0 +regex==2026.1.15 +vllm==0.11.0 +idna==3.11 +fsspec==2025.10.0 +parso==0.8.5 +pydantic-extra-types==2.11.0 +MarkupSafe==3.0.3 +cryptography==46.0.4 +openai==2.17.0 +filelock==3.20.3 +modelscope==1.34.0 +outlines==0.1.11 +dnspython==2.8.0 +scipy==1.17.0 +zipp==3.23.0 +PyYAML==6.0.3 +onnx==1.20.1 +torchdata==0.11.0 +cuda-pathfinder==1.3.3 +asyncer==0.0.8 +verl==0.8.0.dev0 +httptools==0.7.1 +opencv-python-headless==4.13.0.90 +importlib_metadata==8.7.1 +peft==0.18.1 +opentelemetry-sdk==1.39.1 +python-json-logger==4.0.0 +alembic==1.18.3 +cuda-bindings==13.1.1 +mdurl==0.1.2 +referencing==0.37.0 +xxhash==3.6.0 +interegular==0.3.3 +fastapi-cli==0.0.20 +uv==0.9.28 +tensorboard==2.20.0 +nvidia-cublas-cu12==12.8.4.1 +sentencepiece==0.2.1 +rich-toolkit==0.18.1 +numpy==2.2.0 +yarl==1.22.0 +opencv-fixer==0.2.5 +tqdm==4.67.2 +python-dotenv==1.2.1 +Mako==1.3.10 +timm==1.0.16 +aiohappyeyeballs==2.6.1 +decord==0.6.0 +jiter==0.12.0 +airportsdata==20250909 +markdown-it-py==4.0.0 +nvidia-cusolver-cu12==11.7.3.90 +pyarrow==23.0.0 +opentelemetry-proto==1.39.1 +anyio==4.12.1 +pycryptodomex==3.23.0 +prometheus_client==0.24.1 +aiohttp==3.13.3 +urllib3==2.6.3 +pexpect==4.9.0 +pydantic-settings==2.12.0 +distro==1.9.0 +av==16.1.0 +cloudpickle==3.1.2 +mpmath==1.3.0 +certifi==2026.1.4 +antlr4-python3-runtime==4.9.3 +torchvision==0.23.0 +accelerate==1.12.0 +watchfiles==1.1.1 +ruff==0.14.14 +wheel==0.46.3 +omegaconf==2.3.0 +nvidia-cufft-cu12==11.3.3.83 +multiprocess==0.70.18 +frozendict==2.4.7 +sympy==1.14.0 +setproctitle==1.3.7 +optuna==4.7.0 +setuptools==79.0.1 +py-cpuinfo==9.0.0 +ipython_pygments_lexers==1.1.1 +rich==14.3.2 +uvicorn==0.40.0 +outlines_core==0.2.11 +llvmlite==0.44.0 +nvidia-cuda-cupti-cu12==12.8.90 +attrs==25.4.0 +anthropic==0.77.0 +packaging==25.0 +fastrlock==0.8.3 +astor==0.8.1 +pluggy==1.6.0 +nvidia-cuda-nvrtc-cu12==12.8.93 +psutil==7.2.2 +virtualenv==20.36.1 +cbor2==5.8.0 +tenacity==9.1.4 +compressed-tensors==0.11.0 +SQLAlchemy==2.0.46 +nvidia-cusparse-cu12==12.5.8.93 +networkx==3.6.1 +httpcore==1.0.9 +onnxscript==0.3.1 +smmap==5.0.2 +opencv-python==4.13.0.90 +traitlets==5.14.3 +python-multipart==0.0.22 +pyvers==0.1.0 +huggingface-hub==0.36.0 +pillow==12.1.0 +jsonschema==4.26.0 +cfgv==3.5.0 +optree==0.18.0 +email-validator==2.3.0 +tabulate==0.9.0 +pre_commit==4.5.1 +msgpack==1.1.2 +depyf==0.19.0 +numba==0.61.2 +six==1.17.0 +aiosignal==1.4.0 +nvidia-nvtx-cu12==12.8.90 +propcache==0.4.1 +torch_memory_saver==0.0.8 +h11==0.16.0 +frozenlist==1.8.0 +websockets==16.0 +nvidia-cudnn-frontend==1.18.0 +build==1.4.0 +google-auth==2.48.0 +pycountry==24.6.1 +colorlog==6.10.1 +stack-data==0.6.3 +typing-inspection==0.4.2 +googleapis-common-protos==1.72.0 +pandas==3.0.0 +typer==0.21.1 +protobuf==6.33.5 +fastapi==0.128.0 +blake3==1.0.8 +opentelemetry-semantic-conventions==0.60b1 +opentelemetry-exporter-prometheus==0.60b1 +nvidia-cudnn-cu12==9.10.2.21 +Markdown==3.10.1 +liger_kernel==0.6.4 +json_repair==0.57.1 +nodeenv==1.10.0 +prompt_toolkit==3.0.52 +torchaudio==2.8.0 +codetiming==1.4.0 +platformdirs==4.5.1 +jsonschema-specifications==2025.9.1 +hydra-core==1.3.2 +tensorboard-data-server==0.7.2 +lm-format-enforcer==0.11.3 +pyasn1_modules==0.4.2 +tiktoken==0.12.0 +starlette==0.50.0 +pyproject_hooks==1.2.0 +flash_attn==2.8.1 +rsa==4.9.1 +ray==2.53.0 +nest-asyncio==1.6.0 +lark==1.2.2 +fastar==0.8.0 +orjson==3.11.6 +prometheus-fastapi-instrumentator==7.1.0 +opentelemetry-api==1.39.1 +mathruler==0.1.0 +pydantic_core==2.41.5 +fastapi-cloud-cli==0.11.0 +pynvml==13.0.1 +loguru==0.7.3 +torch==2.8.0 +msgspec==0.20.0 +nvidia-curand-cu12==10.3.9.90 +blobfile==3.0.0 +gitdb==4.0.12 +llguidance==0.7.30 +hf_transfer==0.1.9 +nvidia-nccl-cu12==2.27.3 +qwen-vl-utils==0.0.14 +ptyprocess==0.7.0 +ipdb==0.13.13 +opencensus-context==0.1.3 +jedi==0.19.2 +click==8.3.1 +datasets==4.5.0 +soxr==1.0.0 +sgl-kernel==0.3.9.post2 +colorful==0.5.8 +pyasn1==0.6.2 +charset-normalizer==3.4.4 +nvidia-ml-py==13.590.48 +hf-xet==1.2.0 +dill==0.4.0 +absl-py==2.4.0 +pydantic==2.12.5 +dspy==3.1.3 +wrapt==2.1.0 +flashinfer-python==0.3.1 +python-dateutil==2.9.0.post0 +torchao==0.9.0 +cachetools==7.0.0 +soundfile==0.13.1 +diskcache==5.6.3 +onnx-ir==0.1.15 +docstring_parser==0.17.0 +matplotlib-inline==0.2.1 +Pygments==2.19.2 +wandb==0.24.1 +pure_eval==0.2.3 +ninja==1.13.0 +proto-plus==1.27.0 +pyzmq==27.1.0 +iniconfig==2.3.0 +Jinja2==3.1.6 +megatron-core==0.13.1 +uvloop==0.22.1 +fastuuid==0.14.0 +pycparser==3.0 +pylatexenc==2.10 +decorator==5.2.1 +shellingham==1.5.4 +lxml==6.0.2 +safetensors==0.7.0 +xgrammar==0.1.25 +pybase64==1.4.3 +ipython==9.9.0 +greenlet==3.3.1 +mistral_common==1.9.0 +rignore==0.7.6 +einops==0.8.2 +distlib==0.4.0 +triton==3.4.0 +executing==2.2.1 +grpcio==1.76.0 +pip==25.3 +verl==0.8.0.dev0 +verl==0.8.0.dev0 diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_131724-1211jgw0/files/wandb-metadata.json b/code/RL_model/verl/verl_train/wandb/run-20260210_131724-1211jgw0/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..3180bf168767cfdddbc80ff9082e0e94772f6018 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_131724-1211jgw0/files/wandb-metadata.json @@ -0,0 +1,93 @@ +{ + "os": "Linux-5.15.0-160-generic-x86_64-with-glibc2.35", + "python": "CPython 3.12.12", + "startedAt": "2026-02-10T18:17:24.391991Z", + "args": [ + "--node-ip-address=172.16.34.29", + "--node-manager-port=38587", + "--object-store-name=/tmp/ray/session_2026-02-10_13-12-43_168090_906875/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2026-02-10_13-12-43_168090_906875/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=52541", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=50779", + "--gcs-address=172.16.34.29:65142", + "--session-name=session_2026-02-10_13-12-43_168090_906875", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8297", + "--cluster-id=cc26bcda266dacfdefb0040c5ef6a7ac63e7edbe872c7e2f5b8677ed", + "--startup-token=128", + "--worker-launch-time-ms=1770747175925", + "--node-id=6961e455de2b1822707f97248348e98fc4ab94cfcbf1056b1961af94", + "--runtime-env-hash=1096984665" + ], + "program": "/home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/verl-project/verl", + "commit": "d9939add7a2a01923a9088891f913a5d20c4e622" + }, + "email": "shahidulshakib034@gmail.com", + "root": "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train", + "host": "gamma", + "executable": "/home/mshahidul/miniconda3/envs/verl2/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA A100 80GB PCIe", + "gpu_count": 6, + "disk": { + "/": { + "total": "3766429188096", + "used": "191449497600" + } + }, + "memory": { + "total": "1081814863872" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-4a3678c7-34a9-356f-f7b7-7f7e2f44b596" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-df506764-0db5-91b4-8ec9-154a3bb8123f" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-2c3dbd62-b384-2996-a0f6-b32dcfcc3538" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-1ff3dabe-4b9a-ea62-5cc3-01f12f32d328" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper", + "uuid": "GPU-eefc4b8c-0e79-c1d6-a9ff-8325040572eb" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper", + "uuid": "GPU-d42b6057-13e8-1e88-6aa1-9307df72dece" + } + ], + "cudaVersion": "13.0", + "writerId": "zpcb1jsxvp2jkej1iccup1xb1osb9pmr" +} \ No newline at end of file diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_131724-1211jgw0/files/wandb-summary.json b/code/RL_model/verl/verl_train/wandb/run-20260210_131724-1211jgw0/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..2207bcaffa57907addcedf4d0312b3c78f9baf0a --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_131724-1211jgw0/files/wandb-summary.json @@ -0,0 +1 @@ +{"response_length_non_aborted/clip_ratio":0,"val-aux/num_turns/max":2,"timing_s/agent_loop/generate_sequences/max":98.8829833921045,"timing_s/agent_loop/tool_calls/min":0,"response_length/max":845,"prompt_length/mean":839.8125,"timing_s/agent_loop/tool_calls/mean":0,"timing_per_token_ms/adv":7.380203974800343e-05,"timing_s/adv":0.12855872511863708,"timing_s/testing":692.2788705602288,"timing_s/agent_loop/num_preempted/max":-1,"timing_per_token_ms/gen":12.088386514331711,"perf/time_per_step":6052.4999258015305,"response_length_non_aborted/mean":294.2630310058594,"timing_s/agent_loop/slowest/generate_sequences":98.8829833921045,"critic/rewards/min":-1.9347000122070312,"timing_s/agent_loop/slowest/num_preempted":-1,"perf/max_memory_allocated_gb":68.36006116867065,"critic/advantages/mean":-0.007147991564124823,"actor/pg_loss":0.005068646496511071,"perf/max_memory_reserved_gb":77.42578125,"timing_s/step":6052.4999258015305,"actor/pg_clipfrac":0.0034353809775590585,"critic/score/max":1.67739999294281,"timing_s/gen":5463.805643839762,"critic/rewards/max":1.67739999294281,"timing_s/agent_loop/tool_calls/max":0,"timing_s/update_actor":363.57589877024293,"timing_s/ref":91.71580620110035,"timing_s/save_checkpoint":225.57176331430674,"response_length_non_aborted/min":41,"response_length/min":41,"training/epoch":2,"response/aborted_ratio":0,"perf/cpu_memory_used_gb":705.1283988952637,"perf/mfu/actor_infer":0,"timing_per_token_ms/update_actor":0.2087189563189564,"actor/pg_clipfrac_lower":3.0601252850222713e-06,"actor/kl_coef":0.0010000000000000002,"actor/kl_loss":0.04389468823016311,"critic/advantages/min":-1.1546998023986816,"_timestamp":1.770846062330116e+09,"global_seqlen/max":889370,"prompt_length/max":1024,"actor/lr":1e-06,"response_length/clip_ratio":0,"response_length/mean":294.2630310058594,"global_seqlen/minmax_diff":36800,"timing_s/update_weights":45.723994471132755,"response_length_non_aborted/max":845,"timing_per_token_ms/ref":0.052651530018887185,"global_seqlen/min":852570,"timing_s/agent_loop/num_preempted/min":-1,"critic/advantages/max":1.1546987295150757,"prompt_length/clip_ratio":0.001953125,"perf/total_num_tokens":1741940,"timing_s/agent_loop/num_preempted/mean":-1,"timing_s/start_profile":6.043165922164917e-05,"timing_s/agent_loop/generate_sequences/min":9.677272727712989,"val-core/multiclinsum/acc/mean@1":-0.10784734078105108,"global_seqlen/balanced_min":870969,"_runtime":102109.490431734,"_wandb":{"runtime":102109},"critic/returns/min":-1.1546998023986816,"global_seqlen/balanced_max":870971,"val-aux/multiclinsum/reward/mean@1":-0.10784733978147476,"global_seqlen/mean":870970,"timing_s/agent_loop/slowest/prompt_length":967,"prompt_length/min":499,"critic/returns/max":1.1546987295150757,"actor/entropy":0.2847031056880951,"val-aux/num_turns/min":2,"actor/ppo_kl":5.492370402275052e-05,"critic/score/min":-1.9347000122070312,"critic/returns/mean":-0.007147991564124823,"num_turns/mean":2,"timing_s/agent_loop/slowest/response_length":814,"training/global_step":14,"timing_s/agent_loop/slowest/tool_calls":0,"critic/rewards/mean":0.3278006613254547,"_step":14,"timing_s/stop_profile":0.00014985352754592896,"actor/grad_norm":0.922976553440094,"timing_s/agent_loop/generate_sequences/mean":51.963875385473024,"perf/mfu/actor":0.23610645984684916,"val-aux/num_turns/mean":2,"perf/throughput":143.90252138411347,"timing_s/reward":0.00018244795501232147,"timing_s/old_log_prob":86.59451204538345,"critic/score/mean":0.3278006613254547,"num_turns/min":2,"num_turns/max":2} \ No newline at end of file diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_131724-1211jgw0/logs/debug-core.log b/code/RL_model/verl/verl_train/wandb/run-20260210_131724-1211jgw0/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..e95b0f044eeaa4e8b7410eb66f4ae78d90e99fa3 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_131724-1211jgw0/logs/debug-core.log @@ -0,0 +1,8 @@ +{"time":"2026-02-10T13:17:24.542273399-05:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpgkuatyf_/port-915293.txt","pid":915293,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-02-10T13:17:24.543356071-05:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":915293} +{"time":"2026-02-10T13:17:24.543351086-05:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-915293-923236-3193920924/socket","Net":"unix"}} +{"time":"2026-02-10T13:17:24.704517382-05:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-02-10T13:17:24.717931928-05:00","level":"INFO","msg":"handleInformInit: received","streamId":"1211jgw0","id":"1(@)"} +{"time":"2026-02-10T13:17:25.229585419-05:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"1211jgw0","id":"1(@)"} +{"time":"2026-02-10T13:17:31.184318044-05:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"i5977dn4dtm9"} +{"time":"2026-02-11T17:39:14.952382639-05:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"i5977dn4dtm9"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_131724-1211jgw0/logs/debug-internal.log b/code/RL_model/verl/verl_train/wandb/run-20260210_131724-1211jgw0/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..2c0e0b2df4bd8edf44b62a625e5811ab6b3ee54a --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_131724-1211jgw0/logs/debug-internal.log @@ -0,0 +1,8 @@ +{"time":"2026-02-10T13:17:24.719240184-05:00","level":"INFO","msg":"stream: starting","core version":"0.24.1"} +{"time":"2026-02-10T13:17:25.227007305-05:00","level":"INFO","msg":"stream: created new stream","id":"1211jgw0"} +{"time":"2026-02-10T13:17:25.227137581-05:00","level":"INFO","msg":"handler: started","stream_id":"1211jgw0"} +{"time":"2026-02-10T13:17:25.229562299-05:00","level":"INFO","msg":"stream: started","id":"1211jgw0"} +{"time":"2026-02-10T13:17:25.229595225-05:00","level":"INFO","msg":"writer: started","stream_id":"1211jgw0"} +{"time":"2026-02-10T13:17:25.229610074-05:00","level":"INFO","msg":"sender: started","stream_id":"1211jgw0"} +{"time":"2026-02-10T13:50:25.853109102-05:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/shahidulshakib034-khulna-university-of-engineering-techn/readctrl-verl/1211jgw0/file_stream","body":"\n\n\nPlease try again in 30 seconds.\n
\n\n"} +{"time":"2026-02-10T20:12:10.494276292-05:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/shahidulshakib034-khulna-university-of-engineering-techn/readctrl-verl/1211jgw0/file_stream\": read tcp 172.16.34.29:44914->35.186.228.49:443: read: connection reset by peer"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260210_131724-1211jgw0/logs/debug.log b/code/RL_model/verl/verl_train/wandb/run-20260210_131724-1211jgw0/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..7d61d5e78ca4e16588c1dddd1cb65d60024ad764 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260210_131724-1211jgw0/logs/debug.log @@ -0,0 +1,23 @@ +2026-02-10 13:17:24,414 INFO MainThread:915293 [wandb_setup.py:_flush():81] Current SDK version is 0.24.1 +2026-02-10 13:17:24,414 INFO MainThread:915293 [wandb_setup.py:_flush():81] Configure stats pid to 915293 +2026-02-10 13:17:24,415 INFO MainThread:915293 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-02-10 13:17:24,415 INFO MainThread:915293 [wandb_init.py:setup_run_log_directory():717] Logging user logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260210_131724-1211jgw0/logs/debug.log +2026-02-10 13:17:24,415 INFO MainThread:915293 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260210_131724-1211jgw0/logs/debug-internal.log +2026-02-10 13:17:24,415 INFO MainThread:915293 [wandb_init.py:init():844] calling init triggers +2026-02-10 13:17:24,416 INFO MainThread:915293 [wandb_init.py:init():849] wandb.init called with sweep_config: {} +config: {'actor_rollout_ref': {'actor': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-06, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 90, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': True, 'optimizer_offload': True, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'ppo_mini_batch_size': 256, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': 16, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 16384, 'clip_ratio': 0.2, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.2, 'tau_pos': 1.0, 'tau_neg': 1.05, 'freeze_vision_tower': False, 'policy_loss': {'_target_': 'verl.workers.config.PolicyLossConfig', 'loss_mode': 'vanilla', 'clip_cov_ratio': 0.0002, 'clip_cov_lb': 1.0, 'clip_cov_ub': 5.0, 'kl_cov_ratio': 0.0002, 'ppo_kl_coef': 0.1}, 'clip_ratio_c': 3.0, 'loss_agg_mode': 'token-mean', 'loss_scale_factor': None, 'entropy_coeff': 0, 'calculate_entropy': False, 'use_kl_loss': True, 'use_prefix_grouper': False, 'use_torch_compile': True, 'kl_loss_coef': 0.001, 'kl_loss_type': 'low_var_kl', 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'use_fused_kernels': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'grad_clip': 1.0, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False, 'use_remove_padding': True, 'calculate_sum_pi_squared': False, 'sum_pi_squared_checkpointing': False}, 'ref': {'rollout_n': 3, 'strategy': 'fsdp', 'use_torch_compile': True, 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': True, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': True, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False}, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': 'vllm', 'mode': 'async', 'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'prompt_length': 1024, 'response_length': 2048, 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.4, 'ignore_eos': False, 'enforce_eager': True, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'tensor_model_parallel_size': 1, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'pipeline_model_parallel_size': 1, 'max_num_batched_tokens': 8192, 'max_model_len': 8192, 'max_num_seqs': 1024, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'logprobs_mode': 'processed_logprobs', 'scheduling_policy': 'fcfs', 'load_format': 'dummy', 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'disable_log_stats': True, 'do_sample': True, 'n': 3, 'over_sample_rate': 0, 'multi_stage_wake_up': False, 'engine_kwargs': {'vllm': {}, 'sglang': {}, 'trtllm': {}}, 'val_kwargs': {'_target_': 'verl.workers.config.SamplingConfig', 'top_k': -1, 'top_p': 1.0, 'temperature': 0, 'n': 1, 'do_sample': False}, 'multi_turn': {'_target_': 'verl.workers.config.MultiTurnConfig', 'enable': False, 'max_assistant_turns': None, 'tool_config_path': None, 'max_user_turns': None, 'max_parallel_calls': 1, 'max_tool_response_length': 256, 'tool_response_truncate_side': 'middle', 'interaction_config_path': None, 'use_inference_chat_template': False, 'tokenization_sanity_check_mode': 'strict', 'format': 'hermes', 'num_repeat_rollouts': None}, 'calculate_log_probs': False, 'agent': {'_target_': 'verl.workers.config.AgentLoopConfig', 'num_workers': 8, 'default_agent_loop': 'single_turn_agent', 'agent_loop_config_path': None, 'custom_async_server': {'_target_': 'verl.workers.config.CustomAsyncServerConfig', 'path': None, 'name': None}}, 'checkpoint_engine': {'_target_': 'verl.workers.config.CheckpointEngineConfig', 'backend': 'naive', 'update_weights_bucket_megabytes': 2048, 'engine_kwargs': {}}, 'trace': {'_target_': 'verl.workers.config.TraceConfig', 'backend': None, 'token2text': False, 'max_samples_per_step_per_worker': None}, 'skip_rollout': False, 'skip_dump_dir': '/tmp/rollout_dump', 'skip_tokenizer_init': True, 'enable_rollout_routing_replay': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'prometheus': {'_target_': 'verl.workers.config.PrometheusConfig', 'enable': False, 'port': 9090, 'file': '/tmp/ray/session_latest/metrics/prometheus/prometheus.yml', 'served_model_name': 'Qwen/Qwen3-4B-Instruct-2507'}, 'quantization': None, 'quantization_config_file': None, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}, 'layered_summon': False}, 'model': {'_target_': 'verl.workers.config.HFModelConfig', 'path': 'Qwen/Qwen3-4B-Instruct-2507', 'hf_config_path': None, 'tokenizer_path': None, 'use_shm': False, 'trust_remote_code': False, 'custom_chat_template': None, 'external_lib': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': True, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'exclude_modules': None, 'lora_adapter_path': None, 'use_liger': False, 'use_fused_kernels': False, 'fused_kernel_options': {'impl_backend': 'torch'}, 'tiled_mlp': {'enabled': False, 'num_shards': 4}, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}}, 'hybrid_engine': True, 'nccl_timeout': 600}, 'data': {'tokenizer': None, 'use_shm': False, 'train_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/train.parquet', 'val_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/test.parquet', 'train_max_samples': -1, 'val_max_samples': -1, 'prompt_key': 'prompt', 'reward_fn_key': 'data_source', 'max_prompt_length': 1024, 'max_response_length': 2048, 'train_batch_size': 512, 'val_batch_size': None, 'tool_config_path': None, 'return_raw_input_ids': False, 'return_raw_chat': True, 'return_full_prompt': False, 'shuffle': True, 'seed': None, 'dataloader_num_workers': 8, 'image_patch_size': 14, 'validation_shuffle': False, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 1, 'truncation': 'error', 'image_key': 'images', 'video_key': 'videos', 'trust_remote_code': False, 'custom_cls': {'path': None, 'name': None}, 'return_multi_modal_inputs': True, 'sampler': {'class_path': None, 'class_name': None}, 'datagen': {'path': None, 'name': None}, 'apply_chat_template_kwargs': {}}, 'reward_manager': {'_target_': 'verl.trainer.config.config.RewardManagerConfig', 'source': 'register', 'name': 'naive', 'module': {'_target_': 'verl.trainer.config.config.ModuleConfig', 'path': None, 'name': 'custom_reward_manager'}}, 'critic': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-05, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 90, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'model': {'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, 'path': '~/models/deepseek-llm-7b-chat', 'tokenizer_path': 'Qwen/Qwen3-4B-Instruct-2507', 'override_config': {}, 'external_lib': None, 'trust_remote_code': False, '_target_': 'verl.workers.config.FSDPCriticModelCfg', 'use_shm': False, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': False, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'tiled_mlp': {'enabled': False, 'num_shards': 4}}, '_target_': 'verl.workers.config.FSDPCriticConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'enable': None, 'ppo_mini_batch_size': 256, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': None, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 32768, 'forward_max_token_len_per_gpu': 32768, 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'cliprange_value': 0.5, 'loss_agg_mode': 'token-mean', 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'forward_micro_batch_size': None, 'forward_micro_batch_size_per_gpu': None, 'ulysses_sequence_parallel_size': 1, 'grad_clip': 1.0}, 'reward_model': {'enable': False, 'enable_resource_pool': False, 'n_gpus_per_node': 8, 'nnodes': 0, 'strategy': 'fsdp', 'model': {'input_tokenizer': 'Qwen/Qwen3-4B-Instruct-2507', 'path': '~/models/FsfairX-LLaMA3-RM-v0.1', 'external_lib': None, 'trust_remote_code': False, 'override_config': {}, 'use_shm': False, 'use_remove_padding': False, 'use_fused_kernels': False, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False}}, 'micro_batch_size': None, 'micro_batch_size_per_gpu': None, 'max_length': None, 'use_dynamic_bsz': False, 'forward_max_token_len_per_gpu': 32768, 'reward_manager': 'naive', 'reward_loop_source': 'register', 'reward_loop_module_path': None, 'reward_loop_class_name': None, 'launch_reward_fn_async': False, 'sandbox_fusion': {'url': None, 'max_concurrent': 64, 'memory_limit_mb': 1024}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'ulysses_sequence_parallel_size': 1, 'use_reward_loop': True, 'num_workers': 1, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': '???', 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.5, 'enforce_eager': True, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'tensor_model_parallel_size': 2, 'max_num_batched_tokens': 8192, 'max_model_len': None, 'max_num_seqs': 1024, 'load_format': 'auto', 'engine_kwargs': {}, 'limit_images': None, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'disable_log_stats': True, 'skip_tokenizer_init': False, 'prompt_length': 2048, 'response_length': 2048}}, 'algorithm': {'rollout_correction': {'rollout_is': None, 'rollout_is_threshold': 2.0, 'rollout_rs': None, 'rollout_rs_threshold': None, 'bypass_mode': False, 'loss_type': 'ppo_clip', 'rollout_is_batch_normalize': False}, '_target_': 'verl.trainer.config.AlgoConfig', 'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'norm_adv_by_std_in_grpo': True, 'use_kl_in_reward': False, 'kl_penalty': 'kl', 'kl_ctrl': {'_target_': 'verl.trainer.config.KLControlConfig', 'type': 'fixed', 'kl_coef': 0.001, 'horizon': 10000, 'target_kl': 0.1}, 'use_pf_ppo': False, 'pf_ppo': {'reweight_method': 'pow', 'weight_pow': 2.0}}, 'custom_reward_function': {'path': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/reward_func/reward.py', 'name': 'compute_score'}, 'trainer': {'balance_batch': True, 'total_epochs': 15, 'total_training_steps': None, 'project_name': 'readctrl-verl', 'experiment_name': 'qwen3-4b-instruct-en', 'logger': ['console', 'wandb'], 'log_val_generations': 0, 'rollout_data_dir': None, 'validation_data_dir': None, 'nnodes': 1, 'n_gpus_per_node': 2, 'save_freq': 5, 'esi_redundant_time': 0, 'resume_mode': 'auto', 'resume_from_path': None, 'val_before_train': True, 'val_only': False, 'test_freq': 10, 'critic_warmup': 0, 'default_hdfs_dir': None, 'del_local_ckpt_after_load': False, 'default_local_dir': '/home/mshahidul/readctrl/code/RL_model/train_v2', 'max_actor_ckpt_to_keep': 1, 'max_critic_ckpt_to_keep': 1, 'ray_wait_register_center_timeout': 300, 'device': 'cuda', 'use_legacy_worker_impl': 'auto', 'remove_previous_ckpt_in_save': True}, 'global_profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'steps': None, 'profile_continuous_steps': False, 'save_path': 'outputs/profile', 'global_tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False, 'controller_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph'}, 'worker_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph', 'capture-range': 'cudaProfilerApi', 'capture-range-end': None, 'kill': 'none'}}, 'torch_memory': {'trace_alloc_max_entries': 100000, 'stack_depth': 32, 'context': 'all', 'stacks': 'all', 'kw_args': {}}}}, 'transfer_queue': {'enable': False}, 'ray_kwargs': {'ray_init': {'num_cpus': None}, 'timeline_json_file': None}, '_wandb': {}} +2026-02-10 13:17:24,416 INFO MainThread:915293 [wandb_init.py:init():892] starting backend +2026-02-10 13:17:24,704 INFO MainThread:915293 [wandb_init.py:init():895] sending inform_init request +2026-02-10 13:17:24,713 INFO MainThread:915293 [wandb_init.py:init():903] backend started and connected +2026-02-10 13:17:24,724 INFO MainThread:915293 [wandb_init.py:init():973] updated telemetry +2026-02-10 13:17:24,746 INFO MainThread:915293 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-02-10 13:17:25,466 INFO MainThread:915293 [wandb_init.py:init():1042] starting run threads in backend +2026-02-10 13:17:26,155 INFO MainThread:915293 [wandb_run.py:_console_start():2529] atexit reg +2026-02-10 13:17:26,155 INFO MainThread:915293 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-02-10 13:17:26,155 INFO MainThread:915293 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-02-10 13:17:26,156 INFO MainThread:915293 [wandb_run.py:_redirect():2469] Redirects installed. +2026-02-10 13:17:26,164 INFO MainThread:915293 [wandb_init.py:init():1082] run started, returning control to user process +2026-02-11 17:39:14,948 INFO MainThread:915293 [wandb_run.py:_finish():2295] finishing run shahidulshakib034-khulna-university-of-engineering-techn/readctrl-verl/1211jgw0 +2026-02-11 17:39:14,950 INFO MainThread:915293 [wandb_run.py:_atexit_cleanup():2494] got exitcode: 0 +2026-02-11 17:39:14,951 INFO MainThread:915293 [wandb_run.py:_restore():2476] restore +2026-02-11 17:39:14,951 INFO MainThread:915293 [wandb_run.py:_restore():2482] restore done diff --git a/code/RL_model/verl/verl_train/wandb/run-20260211_181504-2bnxrv8i/files/output.log b/code/RL_model/verl/verl_train/wandb/run-20260211_181504-2bnxrv8i/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..d6e17514f9e094c5b0f24b4c21d48b02131d1d93 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260211_181504-2bnxrv8i/files/output.log @@ -0,0 +1,14 @@ +wandb: Detected [dspy, litellm, openai] in use. +wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script. +wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/ +Checkpoint tracker file does not exist: /home/mshahidul/readctrl/code/RL_model/RL_model_subclaim_classifier/latest_checkpointed_iteration.txt +Training from scratch +test_gen_batch meta info: {'eos_token_id': 151645, 'pad_token_id': 151643, 'recompute_log_prob': False, 'do_sample': False, 'validate': True, 'global_steps': 0} +validation generation end +("Initial validation metrics: {'val-aux/multiclinsum/reward/mean@1': " + "np.float64(0.3417882988526358), 'val-core/multiclinsum/acc/mean@1': " + "np.float64(0.34178829897157226), 'val-aux/num_turns/min': np.int32(2), " + "'val-aux/num_turns/max': np.int32(2), 'val-aux/num_turns/mean': " + 'np.float64(2.0)}') +step:0 - val-aux/multiclinsum/reward/mean@1:np.float64(0.3417882988526358) - val-core/multiclinsum/acc/mean@1:np.float64(0.34178829897157226) - val-aux/num_turns/min:np.int32(2) - val-aux/num_turns/max:np.int32(2) - val-aux/num_turns/mean:np.float64(2.0) +Training Progress: 0%| | 0/45 [00:00, ?it/s] diff --git a/code/RL_model/verl/verl_train/wandb/run-20260211_181504-2bnxrv8i/files/requirements.txt b/code/RL_model/verl/verl_train/wandb/run-20260211_181504-2bnxrv8i/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3c860bd0e3457bae4d28e48b93f4be436aae99df --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260211_181504-2bnxrv8i/files/requirements.txt @@ -0,0 +1,291 @@ +verl==0.8.0.dev0 +psutil==7.1.3 +colorama==0.4.6 +annotated-doc==0.0.4 +sentry-sdk==2.51.0 +requests==2.32.5 +nvidia-cufile-cu12==1.13.1.3 +ml_dtypes==0.5.4 +xformers==0.0.32.post1 +sglang==0.5.2 +multidict==6.7.1 +typing_extensions==4.15.0 +nvidia-cusparselt-cu12==0.7.1 +openai-harmony==0.0.4 +transformers==4.56.1 +Werkzeug==3.1.5 +identify==2.6.16 +gepa==0.0.26 +pytest==9.0.2 +nvidia-cuda-runtime-cu12==12.8.90 +GitPython==3.1.46 +cupy-cuda12x==13.6.0 +tokenizers==0.22.2 +unsloth_zoo==2026.2.1 +pybind11==3.0.1 +google-api-core==2.29.0 +partial-json-parser==0.2.1.1.post7 +aiohttp-cors==0.8.1 +sniffio==1.3.1 +tensordict==0.10.0 +smart_open==7.5.0 +cffi==2.0.0 +wcwidth==0.5.3 +asttokens==3.0.1 +opencensus==0.11.4 +rpds-py==0.30.0 +py-spy==0.4.1 +litellm==1.81.9 +gguf==0.17.1 +nvidia-nvjitlink-cu12==12.8.93 +httpx==0.28.1 +cuda-python==13.1.1 +annotated-types==0.7.0 +regex==2026.1.15 +vllm==0.11.0 +idna==3.11 +parso==0.8.5 +pydantic-extra-types==2.11.0 +MarkupSafe==3.0.3 +cryptography==46.0.4 +openai==2.17.0 +filelock==3.20.3 +modelscope==1.34.0 +outlines==0.1.11 +dnspython==2.8.0 +scipy==1.17.0 +zipp==3.23.0 +PyYAML==6.0.3 +onnx==1.20.1 +torchdata==0.11.0 +unsloth==2026.2.1 +cuda-pathfinder==1.3.3 +asyncer==0.0.8 +verl==0.8.0.dev0 +httptools==0.7.1 +opencv-python-headless==4.13.0.90 +importlib_metadata==8.7.1 +peft==0.18.1 +opentelemetry-sdk==1.39.1 +python-json-logger==4.0.0 +alembic==1.18.3 +cuda-bindings==13.1.1 +mdurl==0.1.2 +fsspec==2025.9.0 +referencing==0.37.0 +xxhash==3.6.0 +interegular==0.3.3 +fastapi-cli==0.0.20 +uv==0.9.28 +tensorboard==2.20.0 +tyro==1.0.6 +nvidia-cublas-cu12==12.8.4.1 +sentencepiece==0.2.1 +rich-toolkit==0.18.1 +numpy==2.2.0 +yarl==1.22.0 +opencv-fixer==0.2.5 +tqdm==4.67.2 +python-dotenv==1.2.1 +Mako==1.3.10 +timm==1.0.16 +aiohappyeyeballs==2.6.1 +decord==0.6.0 +jiter==0.12.0 +airportsdata==20250909 +markdown-it-py==4.0.0 +nvidia-cusolver-cu12==11.7.3.90 +pyarrow==23.0.0 +opentelemetry-proto==1.39.1 +anyio==4.12.1 +pycryptodomex==3.23.0 +prometheus_client==0.24.1 +aiohttp==3.13.3 +urllib3==2.6.3 +pexpect==4.9.0 +pydantic-settings==2.12.0 +distro==1.9.0 +av==16.1.0 +cloudpickle==3.1.2 +mpmath==1.3.0 +certifi==2026.1.4 +antlr4-python3-runtime==4.9.3 +torchvision==0.23.0 +accelerate==1.12.0 +watchfiles==1.1.1 +ruff==0.14.14 +cut-cross-entropy==25.1.1 +wheel==0.46.3 +torchao==0.16.0 +omegaconf==2.3.0 +nvidia-cufft-cu12==11.3.3.83 +frozendict==2.4.7 +sympy==1.14.0 +setproctitle==1.3.7 +optuna==4.7.0 +setuptools==79.0.1 +py-cpuinfo==9.0.0 +ipython_pygments_lexers==1.1.1 +rich==14.3.2 +uvicorn==0.40.0 +outlines_core==0.2.11 +llvmlite==0.44.0 +nvidia-cuda-cupti-cu12==12.8.90 +attrs==25.4.0 +anthropic==0.77.0 +packaging==25.0 +fastrlock==0.8.3 +astor==0.8.1 +pluggy==1.6.0 +nvidia-cuda-nvrtc-cu12==12.8.93 +psutil==7.2.2 +virtualenv==20.36.1 +cbor2==5.8.0 +tenacity==9.1.4 +compressed-tensors==0.11.0 +SQLAlchemy==2.0.46 +nvidia-cusparse-cu12==12.5.8.93 +networkx==3.6.1 +httpcore==1.0.9 +onnxscript==0.3.1 +smmap==5.0.2 +opencv-python==4.13.0.90 +traitlets==5.14.3 +python-multipart==0.0.22 +pyvers==0.1.0 +huggingface-hub==0.36.0 +pillow==12.1.0 +jsonschema==4.26.0 +cfgv==3.5.0 +optree==0.18.0 +email-validator==2.3.0 +tabulate==0.9.0 +pre_commit==4.5.1 +msgpack==1.1.2 +depyf==0.19.0 +numba==0.61.2 +six==1.17.0 +aiosignal==1.4.0 +nvidia-nvtx-cu12==12.8.90 +propcache==0.4.1 +torch_memory_saver==0.0.8 +h11==0.16.0 +frozenlist==1.8.0 +websockets==16.0 +nvidia-cudnn-frontend==1.18.0 +build==1.4.0 +google-auth==2.48.0 +pycountry==24.6.1 +colorlog==6.10.1 +typeguard==4.4.4 +stack-data==0.6.3 +typing-inspection==0.4.2 +googleapis-common-protos==1.72.0 +pandas==3.0.0 +typer==0.21.1 +protobuf==6.33.5 +fastapi==0.128.0 +blake3==1.0.8 +opentelemetry-semantic-conventions==0.60b1 +opentelemetry-exporter-prometheus==0.60b1 +nvidia-cudnn-cu12==9.10.2.21 +Markdown==3.10.1 +liger_kernel==0.6.4 +json_repair==0.57.1 +nodeenv==1.10.0 +prompt_toolkit==3.0.52 +torchaudio==2.8.0 +datasets==4.3.0 +codetiming==1.4.0 +diffusers==0.36.0 +platformdirs==4.5.1 +jsonschema-specifications==2025.9.1 +hydra-core==1.3.2 +tensorboard-data-server==0.7.2 +lm-format-enforcer==0.11.3 +bitsandbytes==0.49.1 +pyasn1_modules==0.4.2 +tiktoken==0.12.0 +starlette==0.50.0 +pyproject_hooks==1.2.0 +flash_attn==2.8.1 +rsa==4.9.1 +ray==2.53.0 +nest-asyncio==1.6.0 +lark==1.2.2 +fastar==0.8.0 +orjson==3.11.6 +prometheus-fastapi-instrumentator==7.1.0 +opentelemetry-api==1.39.1 +mathruler==0.1.0 +pydantic_core==2.41.5 +fastapi-cloud-cli==0.11.0 +pynvml==13.0.1 +loguru==0.7.3 +torch==2.8.0 +msgspec==0.20.0 +nvidia-curand-cu12==10.3.9.90 +blobfile==3.0.0 +gitdb==4.0.12 +llguidance==0.7.30 +hf_transfer==0.1.9 +nvidia-nccl-cu12==2.27.3 +trl==0.24.0 +qwen-vl-utils==0.0.14 +ptyprocess==0.7.0 +ipdb==0.13.13 +opencensus-context==0.1.3 +jedi==0.19.2 +click==8.3.1 +multiprocess==0.70.16 +soxr==1.0.0 +sgl-kernel==0.3.9.post2 +colorful==0.5.8 +pyasn1==0.6.2 +charset-normalizer==3.4.4 +nvidia-ml-py==13.590.48 +hf-xet==1.2.0 +dill==0.4.0 +absl-py==2.4.0 +pydantic==2.12.5 +dspy==3.1.3 +wrapt==2.1.0 +flashinfer-python==0.3.1 +python-dateutil==2.9.0.post0 +cachetools==7.0.0 +soundfile==0.13.1 +diskcache==5.6.3 +onnx-ir==0.1.15 +docstring_parser==0.17.0 +matplotlib-inline==0.2.1 +Pygments==2.19.2 +wandb==0.24.1 +pure_eval==0.2.3 +ninja==1.13.0 +proto-plus==1.27.0 +pyzmq==27.1.0 +iniconfig==2.3.0 +Jinja2==3.1.6 +megatron-core==0.13.1 +uvloop==0.22.1 +fastuuid==0.14.0 +pycparser==3.0 +pylatexenc==2.10 +decorator==5.2.1 +shellingham==1.5.4 +lxml==6.0.2 +safetensors==0.7.0 +xgrammar==0.1.25 +pybase64==1.4.3 +ipython==9.9.0 +greenlet==3.3.1 +mistral_common==1.9.0 +rignore==0.7.6 +einops==0.8.2 +distlib==0.4.0 +triton==3.4.0 +executing==2.2.1 +grpcio==1.76.0 +pip==25.3 +verl==0.8.0.dev0 +verl==0.8.0.dev0 diff --git a/code/RL_model/verl/verl_train/wandb/run-20260213_213805-359jnobz/files/output.log b/code/RL_model/verl/verl_train/wandb/run-20260213_213805-359jnobz/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..2c0ff8a60d3205fde41da85bc5c1dd2d241c949e --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260213_213805-359jnobz/files/output.log @@ -0,0 +1,16 @@ +wandb: Detected [dspy, litellm, openai] in use. +wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script. +wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/ +Found checkpoint: %s /home/mshahidul/readctrl/code/RL_model/models/RL_model_subclaim_classifier_v2/global_step_20 +Load from checkpoint folder: /home/mshahidul/readctrl/code/RL_model/models/RL_model_subclaim_classifier_v2/global_step_20 +Setting global step to 20 +Resuming from /home/mshahidul/readctrl/code/RL_model/models/RL_model_subclaim_classifier_v2/global_step_20 +test_gen_batch meta info: {'eos_token_id': 151645, 'pad_token_id': 151643, 'recompute_log_prob': False, 'do_sample': False, 'validate': True, 'global_steps': 20} +validation generation end +("Initial validation metrics: {'val-aux/multiclinsum/reward/mean@1': " + "np.float64(-0.03472305548762133), 'val-core/multiclinsum/acc/mean@1': " + "np.float64(-0.034723053892215534), 'val-aux/num_turns/min': np.int32(2), " + "'val-aux/num_turns/max': np.int32(2), 'val-aux/num_turns/mean': " + 'np.float64(2.0)}') +step:20 - val-aux/multiclinsum/reward/mean@1:np.float64(-0.03472305548762133) - val-core/multiclinsum/acc/mean@1:np.float64(-0.034723053892215534) - val-aux/num_turns/min:np.int32(2) - val-aux/num_turns/max:np.int32(2) - val-aux/num_turns/mean:np.float64(2.0) +Training Progress: 44%|████▍ | 20/45 [00:00, ?it/s] diff --git a/code/RL_model/verl/verl_train/wandb/run-20260213_213805-359jnobz/files/requirements.txt b/code/RL_model/verl/verl_train/wandb/run-20260213_213805-359jnobz/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3c860bd0e3457bae4d28e48b93f4be436aae99df --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260213_213805-359jnobz/files/requirements.txt @@ -0,0 +1,291 @@ +verl==0.8.0.dev0 +psutil==7.1.3 +colorama==0.4.6 +annotated-doc==0.0.4 +sentry-sdk==2.51.0 +requests==2.32.5 +nvidia-cufile-cu12==1.13.1.3 +ml_dtypes==0.5.4 +xformers==0.0.32.post1 +sglang==0.5.2 +multidict==6.7.1 +typing_extensions==4.15.0 +nvidia-cusparselt-cu12==0.7.1 +openai-harmony==0.0.4 +transformers==4.56.1 +Werkzeug==3.1.5 +identify==2.6.16 +gepa==0.0.26 +pytest==9.0.2 +nvidia-cuda-runtime-cu12==12.8.90 +GitPython==3.1.46 +cupy-cuda12x==13.6.0 +tokenizers==0.22.2 +unsloth_zoo==2026.2.1 +pybind11==3.0.1 +google-api-core==2.29.0 +partial-json-parser==0.2.1.1.post7 +aiohttp-cors==0.8.1 +sniffio==1.3.1 +tensordict==0.10.0 +smart_open==7.5.0 +cffi==2.0.0 +wcwidth==0.5.3 +asttokens==3.0.1 +opencensus==0.11.4 +rpds-py==0.30.0 +py-spy==0.4.1 +litellm==1.81.9 +gguf==0.17.1 +nvidia-nvjitlink-cu12==12.8.93 +httpx==0.28.1 +cuda-python==13.1.1 +annotated-types==0.7.0 +regex==2026.1.15 +vllm==0.11.0 +idna==3.11 +parso==0.8.5 +pydantic-extra-types==2.11.0 +MarkupSafe==3.0.3 +cryptography==46.0.4 +openai==2.17.0 +filelock==3.20.3 +modelscope==1.34.0 +outlines==0.1.11 +dnspython==2.8.0 +scipy==1.17.0 +zipp==3.23.0 +PyYAML==6.0.3 +onnx==1.20.1 +torchdata==0.11.0 +unsloth==2026.2.1 +cuda-pathfinder==1.3.3 +asyncer==0.0.8 +verl==0.8.0.dev0 +httptools==0.7.1 +opencv-python-headless==4.13.0.90 +importlib_metadata==8.7.1 +peft==0.18.1 +opentelemetry-sdk==1.39.1 +python-json-logger==4.0.0 +alembic==1.18.3 +cuda-bindings==13.1.1 +mdurl==0.1.2 +fsspec==2025.9.0 +referencing==0.37.0 +xxhash==3.6.0 +interegular==0.3.3 +fastapi-cli==0.0.20 +uv==0.9.28 +tensorboard==2.20.0 +tyro==1.0.6 +nvidia-cublas-cu12==12.8.4.1 +sentencepiece==0.2.1 +rich-toolkit==0.18.1 +numpy==2.2.0 +yarl==1.22.0 +opencv-fixer==0.2.5 +tqdm==4.67.2 +python-dotenv==1.2.1 +Mako==1.3.10 +timm==1.0.16 +aiohappyeyeballs==2.6.1 +decord==0.6.0 +jiter==0.12.0 +airportsdata==20250909 +markdown-it-py==4.0.0 +nvidia-cusolver-cu12==11.7.3.90 +pyarrow==23.0.0 +opentelemetry-proto==1.39.1 +anyio==4.12.1 +pycryptodomex==3.23.0 +prometheus_client==0.24.1 +aiohttp==3.13.3 +urllib3==2.6.3 +pexpect==4.9.0 +pydantic-settings==2.12.0 +distro==1.9.0 +av==16.1.0 +cloudpickle==3.1.2 +mpmath==1.3.0 +certifi==2026.1.4 +antlr4-python3-runtime==4.9.3 +torchvision==0.23.0 +accelerate==1.12.0 +watchfiles==1.1.1 +ruff==0.14.14 +cut-cross-entropy==25.1.1 +wheel==0.46.3 +torchao==0.16.0 +omegaconf==2.3.0 +nvidia-cufft-cu12==11.3.3.83 +frozendict==2.4.7 +sympy==1.14.0 +setproctitle==1.3.7 +optuna==4.7.0 +setuptools==79.0.1 +py-cpuinfo==9.0.0 +ipython_pygments_lexers==1.1.1 +rich==14.3.2 +uvicorn==0.40.0 +outlines_core==0.2.11 +llvmlite==0.44.0 +nvidia-cuda-cupti-cu12==12.8.90 +attrs==25.4.0 +anthropic==0.77.0 +packaging==25.0 +fastrlock==0.8.3 +astor==0.8.1 +pluggy==1.6.0 +nvidia-cuda-nvrtc-cu12==12.8.93 +psutil==7.2.2 +virtualenv==20.36.1 +cbor2==5.8.0 +tenacity==9.1.4 +compressed-tensors==0.11.0 +SQLAlchemy==2.0.46 +nvidia-cusparse-cu12==12.5.8.93 +networkx==3.6.1 +httpcore==1.0.9 +onnxscript==0.3.1 +smmap==5.0.2 +opencv-python==4.13.0.90 +traitlets==5.14.3 +python-multipart==0.0.22 +pyvers==0.1.0 +huggingface-hub==0.36.0 +pillow==12.1.0 +jsonschema==4.26.0 +cfgv==3.5.0 +optree==0.18.0 +email-validator==2.3.0 +tabulate==0.9.0 +pre_commit==4.5.1 +msgpack==1.1.2 +depyf==0.19.0 +numba==0.61.2 +six==1.17.0 +aiosignal==1.4.0 +nvidia-nvtx-cu12==12.8.90 +propcache==0.4.1 +torch_memory_saver==0.0.8 +h11==0.16.0 +frozenlist==1.8.0 +websockets==16.0 +nvidia-cudnn-frontend==1.18.0 +build==1.4.0 +google-auth==2.48.0 +pycountry==24.6.1 +colorlog==6.10.1 +typeguard==4.4.4 +stack-data==0.6.3 +typing-inspection==0.4.2 +googleapis-common-protos==1.72.0 +pandas==3.0.0 +typer==0.21.1 +protobuf==6.33.5 +fastapi==0.128.0 +blake3==1.0.8 +opentelemetry-semantic-conventions==0.60b1 +opentelemetry-exporter-prometheus==0.60b1 +nvidia-cudnn-cu12==9.10.2.21 +Markdown==3.10.1 +liger_kernel==0.6.4 +json_repair==0.57.1 +nodeenv==1.10.0 +prompt_toolkit==3.0.52 +torchaudio==2.8.0 +datasets==4.3.0 +codetiming==1.4.0 +diffusers==0.36.0 +platformdirs==4.5.1 +jsonschema-specifications==2025.9.1 +hydra-core==1.3.2 +tensorboard-data-server==0.7.2 +lm-format-enforcer==0.11.3 +bitsandbytes==0.49.1 +pyasn1_modules==0.4.2 +tiktoken==0.12.0 +starlette==0.50.0 +pyproject_hooks==1.2.0 +flash_attn==2.8.1 +rsa==4.9.1 +ray==2.53.0 +nest-asyncio==1.6.0 +lark==1.2.2 +fastar==0.8.0 +orjson==3.11.6 +prometheus-fastapi-instrumentator==7.1.0 +opentelemetry-api==1.39.1 +mathruler==0.1.0 +pydantic_core==2.41.5 +fastapi-cloud-cli==0.11.0 +pynvml==13.0.1 +loguru==0.7.3 +torch==2.8.0 +msgspec==0.20.0 +nvidia-curand-cu12==10.3.9.90 +blobfile==3.0.0 +gitdb==4.0.12 +llguidance==0.7.30 +hf_transfer==0.1.9 +nvidia-nccl-cu12==2.27.3 +trl==0.24.0 +qwen-vl-utils==0.0.14 +ptyprocess==0.7.0 +ipdb==0.13.13 +opencensus-context==0.1.3 +jedi==0.19.2 +click==8.3.1 +multiprocess==0.70.16 +soxr==1.0.0 +sgl-kernel==0.3.9.post2 +colorful==0.5.8 +pyasn1==0.6.2 +charset-normalizer==3.4.4 +nvidia-ml-py==13.590.48 +hf-xet==1.2.0 +dill==0.4.0 +absl-py==2.4.0 +pydantic==2.12.5 +dspy==3.1.3 +wrapt==2.1.0 +flashinfer-python==0.3.1 +python-dateutil==2.9.0.post0 +cachetools==7.0.0 +soundfile==0.13.1 +diskcache==5.6.3 +onnx-ir==0.1.15 +docstring_parser==0.17.0 +matplotlib-inline==0.2.1 +Pygments==2.19.2 +wandb==0.24.1 +pure_eval==0.2.3 +ninja==1.13.0 +proto-plus==1.27.0 +pyzmq==27.1.0 +iniconfig==2.3.0 +Jinja2==3.1.6 +megatron-core==0.13.1 +uvloop==0.22.1 +fastuuid==0.14.0 +pycparser==3.0 +pylatexenc==2.10 +decorator==5.2.1 +shellingham==1.5.4 +lxml==6.0.2 +safetensors==0.7.0 +xgrammar==0.1.25 +pybase64==1.4.3 +ipython==9.9.0 +greenlet==3.3.1 +mistral_common==1.9.0 +rignore==0.7.6 +einops==0.8.2 +distlib==0.4.0 +triton==3.4.0 +executing==2.2.1 +grpcio==1.76.0 +pip==25.3 +verl==0.8.0.dev0 +verl==0.8.0.dev0 diff --git a/code/RL_model/verl/verl_train/wandb/run-20260213_213805-359jnobz/files/wandb-metadata.json b/code/RL_model/verl/verl_train/wandb/run-20260213_213805-359jnobz/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..478b24c1947a48112a6166ef99730b170975de5a --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260213_213805-359jnobz/files/wandb-metadata.json @@ -0,0 +1,93 @@ +{ + "os": "Linux-5.15.0-160-generic-x86_64-with-glibc2.35", + "python": "CPython 3.12.12", + "startedAt": "2026-02-14T02:38:05.472899Z", + "args": [ + "--node-ip-address=172.16.34.29", + "--node-manager-port=40275", + "--object-store-name=/tmp/ray/session_2026-02-13_21-33-26_947160_1780255/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2026-02-13_21-33-26_947160_1780255/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=60289", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=62751", + "--gcs-address=172.16.34.29:61399", + "--session-name=session_2026-02-13_21-33-26_947160_1780255", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8297", + "--cluster-id=b66ee6b50b9103e9f4efa1c173af562cd187ee160d75da1ec48622e3", + "--startup-token=128", + "--worker-launch-time-ms=1771036419075", + "--node-id=5e860392d67d9c5d07e1887ef6c7617a384dcd25422738daf1767179", + "--runtime-env-hash=1096984665" + ], + "program": "/home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/verl-project/verl", + "commit": "d9939add7a2a01923a9088891f913a5d20c4e622" + }, + "email": "shahidulshakib034@gmail.com", + "root": "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train", + "host": "gamma", + "executable": "/home/mshahidul/miniconda3/envs/verl2/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA A100 80GB PCIe", + "gpu_count": 6, + "disk": { + "/": { + "total": "3766429188096", + "used": "214471798784" + } + }, + "memory": { + "total": "1081814863872" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-4a3678c7-34a9-356f-f7b7-7f7e2f44b596" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-df506764-0db5-91b4-8ec9-154a3bb8123f" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-2c3dbd62-b384-2996-a0f6-b32dcfcc3538" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-1ff3dabe-4b9a-ea62-5cc3-01f12f32d328" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper", + "uuid": "GPU-eefc4b8c-0e79-c1d6-a9ff-8325040572eb" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper", + "uuid": "GPU-d42b6057-13e8-1e88-6aa1-9307df72dece" + } + ], + "cudaVersion": "13.0", + "writerId": "rhucmja0h81c4d6ugb07iipalj5bevi6" +} \ No newline at end of file diff --git a/code/RL_model/verl/verl_train/wandb/run-20260213_213805-359jnobz/logs/debug-core.log b/code/RL_model/verl/verl_train/wandb/run-20260213_213805-359jnobz/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..10292e42d05d2762b14ca3c559457fcd381b4464 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260213_213805-359jnobz/logs/debug-core.log @@ -0,0 +1,7 @@ +{"time":"2026-02-13T21:38:05.592445138-05:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpd_17hday/port-1788862.txt","pid":1788862,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-02-13T21:38:05.59364514-05:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":1788862} +{"time":"2026-02-13T21:38:05.593699137-05:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-1788862-1796744-626139941/socket","Net":"unix"}} +{"time":"2026-02-13T21:38:05.758142543-05:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-02-13T21:38:05.770836522-05:00","level":"INFO","msg":"handleInformInit: received","streamId":"359jnobz","id":"1(@)"} +{"time":"2026-02-13T21:38:07.195907797-05:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"359jnobz","id":"1(@)"} +{"time":"2026-02-13T21:38:13.303933851-05:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"ulcbio3cyibf"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260213_213805-359jnobz/logs/debug-internal.log b/code/RL_model/verl/verl_train/wandb/run-20260213_213805-359jnobz/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..6075a2efad1c626ebc55a4bd64a01d7877186b11 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260213_213805-359jnobz/logs/debug-internal.log @@ -0,0 +1,6 @@ +{"time":"2026-02-13T21:38:05.772146767-05:00","level":"INFO","msg":"stream: starting","core version":"0.24.1"} +{"time":"2026-02-13T21:38:07.192132297-05:00","level":"INFO","msg":"stream: created new stream","id":"359jnobz"} +{"time":"2026-02-13T21:38:07.192289475-05:00","level":"INFO","msg":"handler: started","stream_id":"359jnobz"} +{"time":"2026-02-13T21:38:07.195880283-05:00","level":"INFO","msg":"stream: started","id":"359jnobz"} +{"time":"2026-02-13T21:38:07.195916279-05:00","level":"INFO","msg":"writer: started","stream_id":"359jnobz"} +{"time":"2026-02-13T21:38:07.195948278-05:00","level":"INFO","msg":"sender: started","stream_id":"359jnobz"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260213_213805-359jnobz/logs/debug.log b/code/RL_model/verl/verl_train/wandb/run-20260213_213805-359jnobz/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..c68564454f534429298e78dd359cad56b5e7a9c5 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260213_213805-359jnobz/logs/debug.log @@ -0,0 +1,19 @@ +2026-02-13 21:38:05,495 INFO MainThread:1788862 [wandb_setup.py:_flush():81] Current SDK version is 0.24.1 +2026-02-13 21:38:05,495 INFO MainThread:1788862 [wandb_setup.py:_flush():81] Configure stats pid to 1788862 +2026-02-13 21:38:05,495 INFO MainThread:1788862 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-02-13 21:38:05,495 INFO MainThread:1788862 [wandb_init.py:setup_run_log_directory():717] Logging user logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260213_213805-359jnobz/logs/debug.log +2026-02-13 21:38:05,496 INFO MainThread:1788862 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260213_213805-359jnobz/logs/debug-internal.log +2026-02-13 21:38:05,496 INFO MainThread:1788862 [wandb_init.py:init():844] calling init triggers +2026-02-13 21:38:05,497 INFO MainThread:1788862 [wandb_init.py:init():849] wandb.init called with sweep_config: {} +config: {'actor_rollout_ref': {'actor': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-06, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 45, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': True, 'optimizer_offload': True, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'ppo_mini_batch_size': 256, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': 16, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 16384, 'clip_ratio': 0.2, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.2, 'tau_pos': 1.0, 'tau_neg': 1.05, 'freeze_vision_tower': False, 'policy_loss': {'_target_': 'verl.workers.config.PolicyLossConfig', 'loss_mode': 'vanilla', 'clip_cov_ratio': 0.0002, 'clip_cov_lb': 1.0, 'clip_cov_ub': 5.0, 'kl_cov_ratio': 0.0002, 'ppo_kl_coef': 0.1}, 'clip_ratio_c': 3.0, 'loss_agg_mode': 'token-mean', 'loss_scale_factor': None, 'entropy_coeff': 0, 'calculate_entropy': False, 'use_kl_loss': True, 'use_prefix_grouper': False, 'use_torch_compile': True, 'kl_loss_coef': 0.001, 'kl_loss_type': 'low_var_kl', 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'use_fused_kernels': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'grad_clip': 1.0, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False, 'use_remove_padding': True, 'calculate_sum_pi_squared': False, 'sum_pi_squared_checkpointing': False}, 'ref': {'rollout_n': 3, 'strategy': 'fsdp', 'use_torch_compile': True, 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': True, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': True, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False}, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': 'vllm', 'mode': 'async', 'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'prompt_length': 1024, 'response_length': 2048, 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.4, 'ignore_eos': False, 'enforce_eager': True, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'tensor_model_parallel_size': 1, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'pipeline_model_parallel_size': 1, 'max_num_batched_tokens': 8192, 'max_model_len': 8192, 'max_num_seqs': 1024, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'logprobs_mode': 'processed_logprobs', 'scheduling_policy': 'fcfs', 'load_format': 'dummy', 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'disable_log_stats': True, 'do_sample': True, 'n': 3, 'over_sample_rate': 0, 'multi_stage_wake_up': False, 'engine_kwargs': {'vllm': {}, 'sglang': {}, 'trtllm': {}}, 'val_kwargs': {'_target_': 'verl.workers.config.SamplingConfig', 'top_k': -1, 'top_p': 1.0, 'temperature': 0, 'n': 1, 'do_sample': False}, 'multi_turn': {'_target_': 'verl.workers.config.MultiTurnConfig', 'enable': False, 'max_assistant_turns': None, 'tool_config_path': None, 'max_user_turns': None, 'max_parallel_calls': 1, 'max_tool_response_length': 256, 'tool_response_truncate_side': 'middle', 'interaction_config_path': None, 'use_inference_chat_template': False, 'tokenization_sanity_check_mode': 'strict', 'format': 'hermes', 'num_repeat_rollouts': None}, 'calculate_log_probs': False, 'agent': {'_target_': 'verl.workers.config.AgentLoopConfig', 'num_workers': 8, 'default_agent_loop': 'single_turn_agent', 'agent_loop_config_path': None, 'custom_async_server': {'_target_': 'verl.workers.config.CustomAsyncServerConfig', 'path': None, 'name': None}}, 'checkpoint_engine': {'_target_': 'verl.workers.config.CheckpointEngineConfig', 'backend': 'naive', 'update_weights_bucket_megabytes': 2048, 'engine_kwargs': {}}, 'trace': {'_target_': 'verl.workers.config.TraceConfig', 'backend': None, 'token2text': False, 'max_samples_per_step_per_worker': None}, 'skip_rollout': False, 'skip_dump_dir': '/tmp/rollout_dump', 'skip_tokenizer_init': True, 'enable_rollout_routing_replay': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'prometheus': {'_target_': 'verl.workers.config.PrometheusConfig', 'enable': False, 'port': 9090, 'file': '/tmp/ray/session_latest/metrics/prometheus/prometheus.yml', 'served_model_name': 'Qwen/Qwen3-4B-Instruct-2507'}, 'quantization': None, 'quantization_config_file': None, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}, 'layered_summon': False}, 'model': {'_target_': 'verl.workers.config.HFModelConfig', 'path': 'Qwen/Qwen3-4B-Instruct-2507', 'hf_config_path': None, 'tokenizer_path': None, 'use_shm': False, 'trust_remote_code': False, 'custom_chat_template': None, 'external_lib': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': True, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'exclude_modules': None, 'lora_adapter_path': None, 'use_liger': False, 'use_fused_kernels': False, 'fused_kernel_options': {'impl_backend': 'torch'}, 'tiled_mlp': {'enabled': False, 'num_shards': 4}, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}}, 'hybrid_engine': True, 'nccl_timeout': 600}, 'data': {'tokenizer': None, 'use_shm': False, 'train_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/train.parquet', 'val_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/test.parquet', 'train_max_samples': -1, 'val_max_samples': -1, 'prompt_key': 'prompt', 'reward_fn_key': 'data_source', 'max_prompt_length': 1024, 'max_response_length': 2048, 'train_batch_size': 512, 'val_batch_size': None, 'tool_config_path': None, 'return_raw_input_ids': False, 'return_raw_chat': True, 'return_full_prompt': False, 'shuffle': True, 'seed': None, 'dataloader_num_workers': 8, 'image_patch_size': 14, 'validation_shuffle': False, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 1, 'truncation': 'error', 'image_key': 'images', 'video_key': 'videos', 'trust_remote_code': False, 'custom_cls': {'path': None, 'name': None}, 'return_multi_modal_inputs': True, 'sampler': {'class_path': None, 'class_name': None}, 'datagen': {'path': None, 'name': None}, 'apply_chat_template_kwargs': {}}, 'reward_manager': {'_target_': 'verl.trainer.config.config.RewardManagerConfig', 'source': 'register', 'name': 'naive', 'module': {'_target_': 'verl.trainer.config.config.ModuleConfig', 'path': None, 'name': 'custom_reward_manager'}}, 'critic': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-05, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 45, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'model': {'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, 'path': '~/models/deepseek-llm-7b-chat', 'tokenizer_path': 'Qwen/Qwen3-4B-Instruct-2507', 'override_config': {}, 'external_lib': None, 'trust_remote_code': False, '_target_': 'verl.workers.config.FSDPCriticModelCfg', 'use_shm': False, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': False, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'tiled_mlp': {'enabled': False, 'num_shards': 4}}, '_target_': 'verl.workers.config.FSDPCriticConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'enable': None, 'ppo_mini_batch_size': 256, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': None, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 32768, 'forward_max_token_len_per_gpu': 32768, 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'cliprange_value': 0.5, 'loss_agg_mode': 'token-mean', 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'forward_micro_batch_size': None, 'forward_micro_batch_size_per_gpu': None, 'ulysses_sequence_parallel_size': 1, 'grad_clip': 1.0}, 'reward_model': {'enable': False, 'enable_resource_pool': False, 'n_gpus_per_node': 8, 'nnodes': 0, 'strategy': 'fsdp', 'model': {'input_tokenizer': 'Qwen/Qwen3-4B-Instruct-2507', 'path': '~/models/FsfairX-LLaMA3-RM-v0.1', 'external_lib': None, 'trust_remote_code': False, 'override_config': {}, 'use_shm': False, 'use_remove_padding': False, 'use_fused_kernels': False, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False}}, 'micro_batch_size': None, 'micro_batch_size_per_gpu': None, 'max_length': None, 'use_dynamic_bsz': False, 'forward_max_token_len_per_gpu': 32768, 'reward_manager': 'naive', 'reward_loop_source': 'register', 'reward_loop_module_path': None, 'reward_loop_class_name': None, 'launch_reward_fn_async': False, 'sandbox_fusion': {'url': None, 'max_concurrent': 64, 'memory_limit_mb': 1024}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'ulysses_sequence_parallel_size': 1, 'use_reward_loop': True, 'num_workers': 1, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': '???', 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.5, 'enforce_eager': True, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'tensor_model_parallel_size': 2, 'max_num_batched_tokens': 8192, 'max_model_len': None, 'max_num_seqs': 1024, 'load_format': 'auto', 'engine_kwargs': {}, 'limit_images': None, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'disable_log_stats': True, 'skip_tokenizer_init': False, 'prompt_length': 2048, 'response_length': 2048}}, 'algorithm': {'rollout_correction': {'rollout_is': None, 'rollout_is_threshold': 2.0, 'rollout_rs': None, 'rollout_rs_threshold': None, 'bypass_mode': False, 'loss_type': 'ppo_clip', 'rollout_is_batch_normalize': False}, '_target_': 'verl.trainer.config.AlgoConfig', 'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'norm_adv_by_std_in_grpo': True, 'use_kl_in_reward': False, 'kl_penalty': 'kl', 'kl_ctrl': {'_target_': 'verl.trainer.config.KLControlConfig', 'type': 'fixed', 'kl_coef': 0.001, 'horizon': 10000, 'target_kl': 0.1}, 'use_pf_ppo': False, 'pf_ppo': {'reweight_method': 'pow', 'weight_pow': 2.0}}, 'custom_reward_function': {'path': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/reward_func/reward_func/reward_new.py', 'name': 'compute_score'}, 'trainer': {'balance_batch': True, 'total_epochs': 15, 'total_training_steps': None, 'project_name': 'readctrl-verl', 'experiment_name': 'qwen3-4b-instruct-en', 'logger': ['console', 'wandb'], 'log_val_generations': 0, 'rollout_data_dir': None, 'validation_data_dir': None, 'nnodes': 1, 'n_gpus_per_node': 2, 'save_freq': 5, 'esi_redundant_time': 0, 'resume_mode': 'auto', 'resume_from_path': None, 'val_before_train': True, 'val_only': False, 'test_freq': 10, 'critic_warmup': 0, 'default_hdfs_dir': None, 'del_local_ckpt_after_load': False, 'default_local_dir': '/home/mshahidul/readctrl/code/RL_model/models/RL_model_subclaim_classifier_v2', 'max_actor_ckpt_to_keep': 1, 'max_critic_ckpt_to_keep': 1, 'ray_wait_register_center_timeout': 300, 'device': 'cuda', 'use_legacy_worker_impl': 'auto', 'remove_previous_ckpt_in_save': True}, 'global_profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'steps': None, 'profile_continuous_steps': False, 'save_path': 'outputs/profile', 'global_tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False, 'controller_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph'}, 'worker_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph', 'capture-range': 'cudaProfilerApi', 'capture-range-end': None, 'kill': 'none'}}, 'torch_memory': {'trace_alloc_max_entries': 100000, 'stack_depth': 32, 'context': 'all', 'stacks': 'all', 'kw_args': {}}}}, 'transfer_queue': {'enable': False}, 'ray_kwargs': {'ray_init': {'num_cpus': None}, 'timeline_json_file': None}, '_wandb': {}} +2026-02-13 21:38:05,497 INFO MainThread:1788862 [wandb_init.py:init():892] starting backend +2026-02-13 21:38:05,758 INFO MainThread:1788862 [wandb_init.py:init():895] sending inform_init request +2026-02-13 21:38:05,766 INFO MainThread:1788862 [wandb_init.py:init():903] backend started and connected +2026-02-13 21:38:05,776 INFO MainThread:1788862 [wandb_init.py:init():973] updated telemetry +2026-02-13 21:38:05,798 INFO MainThread:1788862 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-02-13 21:38:07,469 INFO MainThread:1788862 [wandb_init.py:init():1042] starting run threads in backend +2026-02-13 21:38:08,279 INFO MainThread:1788862 [wandb_run.py:_console_start():2529] atexit reg +2026-02-13 21:38:08,279 INFO MainThread:1788862 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-02-13 21:38:08,279 INFO MainThread:1788862 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-02-13 21:38:08,279 INFO MainThread:1788862 [wandb_run.py:_redirect():2469] Redirects installed. +2026-02-13 21:38:08,287 INFO MainThread:1788862 [wandb_init.py:init():1082] run started, returning control to user process diff --git a/code/RL_model/verl/verl_train/wandb/run-20260213_215553-1w3n5xgv/files/output.log b/code/RL_model/verl/verl_train/wandb/run-20260213_215553-1w3n5xgv/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..d4ee80c9dac2719d4f17cd4a7d4051d937c4aa07 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260213_215553-1w3n5xgv/files/output.log @@ -0,0 +1,18 @@ +wandb: Detected [dspy, litellm, openai] in use. +wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script. +wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/ +Checkpoint tracker file does not exist: /home/mshahidul/readctrl/code/RL_model/models/RL_model_subclaim_classifier_v2/latest_checkpointed_iteration.txt +Training from scratch +test_gen_batch meta info: {'eos_token_id': 151645, 'pad_token_id': 151643, 'recompute_log_prob': False, 'do_sample': False, 'validate': True, 'global_steps': 0} +validation generation end +("Initial validation metrics: {'val-aux/multiclinsum/reward/mean@1': " + "np.float64(-0.08861527150262616), 'val-core/multiclinsum/acc/mean@1': " + "np.float64(-0.08861526946107783), 'val-aux/num_turns/min': np.int32(2), " + "'val-aux/num_turns/max': np.int32(2), 'val-aux/num_turns/mean': " + 'np.float64(2.0)}') +step:0 - val-aux/multiclinsum/reward/mean@1:np.float64(-0.08861527150262616) - val-core/multiclinsum/acc/mean@1:np.float64(-0.08861526946107783) - val-aux/num_turns/min:np.int32(2) - val-aux/num_turns/max:np.int32(2) - val-aux/num_turns/mean:np.float64(2.0) +Training Progress: 9%|▉ | 4/45 [2:16:56<29:17:49, 2572.44s/it] +step:1 - global_seqlen/min:898423 - global_seqlen/max:917834 - global_seqlen/minmax_diff:19411 - global_seqlen/balanced_min:908126 - global_seqlen/balanced_max:908131 - global_seqlen/mean:908128.5 - actor/entropy:0.3457244634628296 - perf/mfu/actor_infer:0 - actor/pg_loss:np.float64(0.004189887217459432) - actor/kl_loss:np.float64(0.0011186886513314676) - actor/pg_clipfrac:np.float64(0.001004521370987277) - actor/ppo_kl:np.float64(-1.7540993714722692e-05) - actor/pg_clipfrac_lower:np.float64(0.0) - actor/kl_coef:np.float64(0.0010000000000000002) - actor/grad_norm:np.float64(0.3402813673019409) - perf/mfu/actor:np.float64(0.2819438344554268) - perf/max_memory_allocated_gb:np.float64(66.60442161560059) - perf/max_memory_reserved_gb:np.float64(77.384765625) - perf/cpu_memory_used_gb:np.float64(468.8256301879883) - actor/lr:np.float64(1e-06) - training/global_step:1 - training/epoch:0 - critic/score/mean:-0.07906348258256912 - critic/score/max:0.17624999582767487 - critic/score/min:-1.0 - critic/rewards/mean:-0.07906348258256912 - critic/rewards/max:0.17624999582767487 - critic/rewards/min:-1.0 - critic/advantages/mean:-0.002085321582853794 - critic/advantages/max:1.1546986103057861 - critic/advantages/min:-1.1546986103057861 - critic/returns/mean:-0.002085321582853794 - critic/returns/max:1.1546986103057861 - critic/returns/min:-1.1546986103057861 - response_length/mean:340.1171875 - response_length/max:986.0 - response_length/min:59.0 - response_length/clip_ratio:0.0 - response_length_non_aborted/mean:340.1171875 - response_length_non_aborted/max:986.0 - response_length_non_aborted/min:59.0 - response_length_non_aborted/clip_ratio:0.0 - response/aborted_ratio:0.0 - prompt_length/mean:842.341796875 - prompt_length/max:1019.0 - prompt_length/min:500.0 - prompt_length/clip_ratio:0.0 - num_turns/min:np.int32(2) - num_turns/max:np.int32(2) - num_turns/mean:np.float64(2.0) - timing_s/start_profile:0.00042276084423065186 - timing_s/agent_loop/num_preempted/min:np.int64(-1) - timing_s/agent_loop/num_preempted/max:np.int64(-1) - timing_s/agent_loop/num_preempted/mean:np.float64(-1.0) - timing_s/agent_loop/generate_sequences/min:np.float64(9.257702158764005) - timing_s/agent_loop/generate_sequences/max:np.float64(100.75133720226586) - timing_s/agent_loop/generate_sequences/mean:np.float64(53.534057259706366) - timing_s/agent_loop/tool_calls/min:np.float64(0.0) - timing_s/agent_loop/tool_calls/max:np.float64(0.0) - timing_s/agent_loop/tool_calls/mean:np.float64(0.0) - timing_s/agent_loop/slowest/generate_sequences:np.float64(100.75133720226586) - timing_s/agent_loop/slowest/tool_calls:np.float64(0.0) - timing_s/agent_loop/slowest/prompt_length:964 - timing_s/agent_loop/slowest/response_length:789 - timing_s/agent_loop/slowest/num_preempted:np.int64(-1) - timing_s/gen:667.7847814075649 - timing_s/reward:0.0002728048712015152 - timing_s/old_log_prob:98.36985361762345 - timing_s/ref:146.30970324017107 - timing_s/adv:0.12113209627568722 - timing_s/update_actor:352.98185499385 - timing_s/update_weights:34.721503399312496 - timing_s/step:1301.2960510049015 - timing_s/stop_profile:0.00018197670578956604 - timing_per_token_ms/update_actor:0.19434576439008908 - timing_per_token_ms/ref:0.08055561698601633 - timing_per_token_ms/adv:6.669325776896509e-05 - timing_per_token_ms/gen:1.278252711242994 - perf/total_num_tokens:1816257 - perf/time_per_step:1301.2960510049015 - perf/throughput:697.8646398709308 +step:2 - global_seqlen/min:890125 - global_seqlen/max:916063 - global_seqlen/minmax_diff:25938 - global_seqlen/balanced_min:903094 - global_seqlen/balanced_max:903094 - global_seqlen/mean:903094.0 - actor/entropy:0.348809152841568 - perf/mfu/actor_infer:0 - actor/pg_loss:np.float64(0.0031031098668942514) - actor/kl_loss:np.float64(0.002522832196215555) - actor/pg_clipfrac:np.float64(0.0008689985443197656) - actor/ppo_kl:np.float64(1.2226807219425003e-05) - actor/pg_clipfrac_lower:np.float64(0.0) - actor/kl_coef:np.float64(0.0010000000000000002) - actor/grad_norm:np.float64(0.30022165179252625) - perf/mfu/actor:np.float64(0.29121296067506763) - perf/max_memory_allocated_gb:np.float64(67.76724529266357) - perf/max_memory_reserved_gb:np.float64(77.384765625) - perf/cpu_memory_used_gb:np.float64(510.3855285644531) - actor/lr:np.float64(1e-06) - training/global_step:2 - training/epoch:0 - critic/score/mean:-0.0677773579955101 - critic/score/max:0.17624999582767487 - critic/score/min:-1.0 - critic/rewards/mean:-0.0677773579955101 - critic/rewards/max:0.17624999582767487 - critic/rewards/min:-1.0 - critic/advantages/mean:-0.00021065887995064259 - critic/advantages/max:1.1546986103057861 - critic/advantages/min:-1.1546987295150757 - critic/returns/mean:-0.00021065887995064259 - critic/returns/max:1.1546986103057861 - critic/returns/min:-1.1546987295150757 - response_length/mean:327.4212341308594 - response_length/max:1054.0 - response_length/min:76.0 - response_length/clip_ratio:0.0 - response_length_non_aborted/mean:327.4212341308594 - response_length_non_aborted/max:1054.0 - response_length_non_aborted/min:76.0 - response_length_non_aborted/clip_ratio:0.0 - response/aborted_ratio:0.0 - prompt_length/mean:848.482421875 - prompt_length/max:1023.0 - prompt_length/min:505.0 - prompt_length/clip_ratio:0.0 - num_turns/min:np.int32(2) - num_turns/max:np.int32(2) - num_turns/mean:np.float64(2.0) - timing_s/start_profile:9.004026651382446e-05 - timing_s/agent_loop/num_preempted/min:np.int64(-1) - timing_s/agent_loop/num_preempted/max:np.int64(-1) - timing_s/agent_loop/num_preempted/mean:np.float64(-1.0) - timing_s/agent_loop/generate_sequences/min:np.float64(10.453220216557384) - timing_s/agent_loop/generate_sequences/max:np.float64(104.73029360361397) - timing_s/agent_loop/generate_sequences/mean:np.float64(54.800669857322646) - timing_s/agent_loop/tool_calls/min:np.float64(0.0) - timing_s/agent_loop/tool_calls/max:np.float64(0.0) - timing_s/agent_loop/tool_calls/mean:np.float64(0.0) - timing_s/agent_loop/slowest/generate_sequences:np.float64(104.73029360361397) - timing_s/agent_loop/slowest/tool_calls:np.float64(0.0) - timing_s/agent_loop/slowest/prompt_length:993 - timing_s/agent_loop/slowest/response_length:942 - timing_s/agent_loop/slowest/num_preempted:np.int64(-1) - timing_s/gen:498.47939578816295 - timing_s/reward:0.0003405865281820297 - timing_s/old_log_prob:88.77088643424213 - timing_s/ref:89.88370759785175 - timing_s/adv:0.13123660907149315 - timing_s/update_actor:305.75301293842494 - timing_s/update_weights:36.16422727890313 - timing_s/step:1020.1395932789892 - timing_s/stop_profile:0.00018119998276233673 - timing_per_token_ms/update_actor:0.16928083507277478 - timing_per_token_ms/ref:0.049764314455555986 - timing_per_token_ms/adv:7.265944025289346e-05 - timing_per_token_ms/gen:0.9911723275282162 - perf/total_num_tokens:1806188 - perf/time_per_step:1020.1395932789892 - perf/throughput:885.265120528481 +step:3 - global_seqlen/min:915510 - global_seqlen/max:916819 - global_seqlen/minmax_diff:1309 - global_seqlen/balanced_min:916164 - global_seqlen/balanced_max:916165 - global_seqlen/mean:916164.5 - actor/entropy:0.3486952781677246 - perf/mfu/actor_infer:0 - actor/pg_loss:np.float64(0.0024401814929054426) - actor/kl_loss:np.float64(0.003560353665913377) - actor/pg_clipfrac:np.float64(0.001022032242417481) - actor/ppo_kl:np.float64(-3.528642836651367e-05) - actor/pg_clipfrac_lower:np.float64(3.281873432570137e-06) - actor/kl_coef:np.float64(0.0010000000000000002) - actor/grad_norm:np.float64(0.5852286368608475) - perf/mfu/actor:np.float64(0.289663735289053) - perf/max_memory_allocated_gb:np.float64(67.98293256759644) - perf/max_memory_reserved_gb:np.float64(77.384765625) - perf/cpu_memory_used_gb:np.float64(510.8772830963135) - actor/lr:np.float64(1e-06) - training/global_step:3 - training/epoch:0 - critic/score/mean:-0.0731097087264061 - critic/score/max:0.17624999582767487 - critic/score/min:-1.0 - critic/rewards/mean:-0.0731097087264061 - critic/rewards/max:0.17624999582767487 - critic/rewards/min:-1.0 - critic/advantages/mean:-0.0005602409364655614 - critic/advantages/max:1.1546986103057861 - critic/advantages/min:-1.1546986103057861 - critic/returns/mean:-0.0005602409364655614 - critic/returns/max:1.1546986103057861 - critic/returns/min:-1.1546986103057861 - response_length/mean:340.8893127441406 - response_length/max:994.0 - response_length/min:77.0 - response_length/clip_ratio:0.0 - response_length_non_aborted/mean:340.8893127441406 - response_length_non_aborted/max:994.0 - response_length_non_aborted/min:77.0 - response_length_non_aborted/clip_ratio:0.0 - response/aborted_ratio:0.0 - prompt_length/mean:852.033203125 - prompt_length/max:1024.0 - prompt_length/min:499.0 - prompt_length/clip_ratio:0.0078125 - num_turns/min:np.int32(2) - num_turns/max:np.int32(2) - num_turns/mean:np.float64(2.0) - timing_s/start_profile:7.29970633983612e-05 - timing_s/agent_loop/num_preempted/min:np.int64(-1) - timing_s/agent_loop/num_preempted/max:np.int64(-1) - timing_s/agent_loop/num_preempted/mean:np.float64(-1.0) - timing_s/agent_loop/generate_sequences/min:np.float64(10.099095538258553) - timing_s/agent_loop/generate_sequences/max:np.float64(106.84864890947938) - timing_s/agent_loop/generate_sequences/mean:np.float64(56.32817641153937) - timing_s/agent_loop/tool_calls/min:np.float64(0.0) - timing_s/agent_loop/tool_calls/max:np.float64(0.0) - timing_s/agent_loop/tool_calls/mean:np.float64(0.0) - timing_s/agent_loop/slowest/generate_sequences:np.float64(106.84864890947938) - timing_s/agent_loop/slowest/tool_calls:np.float64(0.0) - timing_s/agent_loop/slowest/prompt_length:986 - timing_s/agent_loop/slowest/response_length:895 - timing_s/agent_loop/slowest/num_preempted:np.int64(-1) - timing_s/gen:546.8315224144608 - timing_s/reward:0.00024061650037765503 - timing_s/old_log_prob:95.67794878967106 - timing_s/ref:91.2667645458132 - timing_s/adv:0.1310591846704483 - timing_s/update_actor:312.726605694741 - timing_s/update_weights:34.786637088283896 - timing_s/step:1082.3957688752562 - timing_s/stop_profile:7.178820669651031e-05 - timing_per_token_ms/update_actor:0.1706716455913436 - timing_per_token_ms/ref:0.04980915793278019 - timing_per_token_ms/adv:7.152601125149921e-05 - timing_per_token_ms/gen:1.0443568683599134 - perf/total_num_tokens:1832329 - perf/time_per_step:1082.3957688752562 - perf/throughput:846.4228393575567 +step:4 - global_seqlen/min:891665 - global_seqlen/max:909469 - global_seqlen/minmax_diff:17804 - global_seqlen/balanced_min:900565 - global_seqlen/balanced_max:900569 - global_seqlen/mean:900567.0 - actor/entropy:0.3486519455909729 - perf/mfu/actor_infer:0 - actor/pg_loss:np.float64(0.0020197018044806787) - actor/kl_loss:np.float64(0.0034875107824821803) - actor/pg_clipfrac:np.float64(0.0008864099226381464) - actor/ppo_kl:np.float64(6.629080974107637e-05) - actor/pg_clipfrac_lower:np.float64(0.0) - actor/kl_coef:np.float64(0.0010000000000000002) - actor/grad_norm:np.float64(0.3287675082683563) - perf/mfu/actor:np.float64(0.2895947585778393) - perf/max_memory_allocated_gb:np.float64(67.98293256759644) - perf/max_memory_reserved_gb:np.float64(77.384765625) - perf/cpu_memory_used_gb:np.float64(512.2715435028076) - actor/lr:np.float64(1e-06) - training/global_step:4 - training/epoch:1 - critic/score/mean:-0.06673845648765564 - critic/score/max:0.17624999582767487 - critic/score/min:-2.0 - critic/rewards/mean:-0.06673845648765564 - critic/rewards/max:0.17624999582767487 - critic/rewards/min:-2.0 - critic/advantages/mean:-0.0020189464557915926 - critic/advantages/max:1.1546986103057861 - critic/advantages/min:-1.1546995639801025 - critic/returns/mean:-0.0020189464557915926 - critic/returns/max:1.1546986103057861 - critic/returns/min:-1.1546995639801025 - response_length/mean:328.0625 - response_length/max:965.0 - response_length/min:56.0 - response_length/clip_ratio:0.0 - response_length_non_aborted/mean:328.0625 - response_length_non_aborted/max:965.0 - response_length_non_aborted/min:56.0 - response_length_non_aborted/clip_ratio:0.0 - response/aborted_ratio:0.0 - prompt_length/mean:844.55078125 - prompt_length/max:1024.0 - prompt_length/min:504.0 - prompt_length/clip_ratio:0.00390625 - num_turns/min:np.int32(2) - num_turns/max:np.int32(2) - num_turns/mean:np.float64(2.0) - timing_s/start_profile:0.0003397967666387558 - timing_s/agent_loop/num_preempted/min:np.int64(-1) - timing_s/agent_loop/num_preempted/max:np.int64(-1) - timing_s/agent_loop/num_preempted/mean:np.float64(-1.0) - timing_s/agent_loop/generate_sequences/min:np.float64(9.421482956036925) - timing_s/agent_loop/generate_sequences/max:np.float64(104.48706599511206) - timing_s/agent_loop/generate_sequences/mean:np.float64(55.07142766390947) - timing_s/agent_loop/tool_calls/min:np.float64(0.0) - timing_s/agent_loop/tool_calls/max:np.float64(0.0) - timing_s/agent_loop/tool_calls/mean:np.float64(0.0) - timing_s/agent_loop/slowest/generate_sequences:np.float64(104.48706599511206) - timing_s/agent_loop/slowest/tool_calls:np.float64(0.0) - timing_s/agent_loop/slowest/prompt_length:971 - timing_s/agent_loop/slowest/response_length:901 - timing_s/agent_loop/slowest/num_preempted:np.int64(-1) - timing_s/gen:4280.501511642709 - timing_s/reward:0.00020960159599781036 - timing_s/old_log_prob:92.02643754333258 - timing_s/ref:89.52895365096629 - timing_s/adv:0.15796544775366783 - timing_s/update_actor:311.4067133292556 - timing_s/update_weights:36.13739615119994 - timing_s/step:4810.773923719302 - timing_s/stop_profile:0.00030566006898880005 - timing_per_token_ms/update_actor:0.17289480589964742 - timing_per_token_ms/ref:0.04970699217879752 - timing_per_token_ms/adv:8.770332898810851e-05 - timing_per_token_ms/gen:8.494676588482546 - perf/total_num_tokens:1801134 - perf/time_per_step:4810.773923719302 - perf/throughput:187.19794658397797 diff --git a/code/RL_model/verl/verl_train/wandb/run-20260213_215553-1w3n5xgv/files/requirements.txt b/code/RL_model/verl/verl_train/wandb/run-20260213_215553-1w3n5xgv/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3c860bd0e3457bae4d28e48b93f4be436aae99df --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260213_215553-1w3n5xgv/files/requirements.txt @@ -0,0 +1,291 @@ +verl==0.8.0.dev0 +psutil==7.1.3 +colorama==0.4.6 +annotated-doc==0.0.4 +sentry-sdk==2.51.0 +requests==2.32.5 +nvidia-cufile-cu12==1.13.1.3 +ml_dtypes==0.5.4 +xformers==0.0.32.post1 +sglang==0.5.2 +multidict==6.7.1 +typing_extensions==4.15.0 +nvidia-cusparselt-cu12==0.7.1 +openai-harmony==0.0.4 +transformers==4.56.1 +Werkzeug==3.1.5 +identify==2.6.16 +gepa==0.0.26 +pytest==9.0.2 +nvidia-cuda-runtime-cu12==12.8.90 +GitPython==3.1.46 +cupy-cuda12x==13.6.0 +tokenizers==0.22.2 +unsloth_zoo==2026.2.1 +pybind11==3.0.1 +google-api-core==2.29.0 +partial-json-parser==0.2.1.1.post7 +aiohttp-cors==0.8.1 +sniffio==1.3.1 +tensordict==0.10.0 +smart_open==7.5.0 +cffi==2.0.0 +wcwidth==0.5.3 +asttokens==3.0.1 +opencensus==0.11.4 +rpds-py==0.30.0 +py-spy==0.4.1 +litellm==1.81.9 +gguf==0.17.1 +nvidia-nvjitlink-cu12==12.8.93 +httpx==0.28.1 +cuda-python==13.1.1 +annotated-types==0.7.0 +regex==2026.1.15 +vllm==0.11.0 +idna==3.11 +parso==0.8.5 +pydantic-extra-types==2.11.0 +MarkupSafe==3.0.3 +cryptography==46.0.4 +openai==2.17.0 +filelock==3.20.3 +modelscope==1.34.0 +outlines==0.1.11 +dnspython==2.8.0 +scipy==1.17.0 +zipp==3.23.0 +PyYAML==6.0.3 +onnx==1.20.1 +torchdata==0.11.0 +unsloth==2026.2.1 +cuda-pathfinder==1.3.3 +asyncer==0.0.8 +verl==0.8.0.dev0 +httptools==0.7.1 +opencv-python-headless==4.13.0.90 +importlib_metadata==8.7.1 +peft==0.18.1 +opentelemetry-sdk==1.39.1 +python-json-logger==4.0.0 +alembic==1.18.3 +cuda-bindings==13.1.1 +mdurl==0.1.2 +fsspec==2025.9.0 +referencing==0.37.0 +xxhash==3.6.0 +interegular==0.3.3 +fastapi-cli==0.0.20 +uv==0.9.28 +tensorboard==2.20.0 +tyro==1.0.6 +nvidia-cublas-cu12==12.8.4.1 +sentencepiece==0.2.1 +rich-toolkit==0.18.1 +numpy==2.2.0 +yarl==1.22.0 +opencv-fixer==0.2.5 +tqdm==4.67.2 +python-dotenv==1.2.1 +Mako==1.3.10 +timm==1.0.16 +aiohappyeyeballs==2.6.1 +decord==0.6.0 +jiter==0.12.0 +airportsdata==20250909 +markdown-it-py==4.0.0 +nvidia-cusolver-cu12==11.7.3.90 +pyarrow==23.0.0 +opentelemetry-proto==1.39.1 +anyio==4.12.1 +pycryptodomex==3.23.0 +prometheus_client==0.24.1 +aiohttp==3.13.3 +urllib3==2.6.3 +pexpect==4.9.0 +pydantic-settings==2.12.0 +distro==1.9.0 +av==16.1.0 +cloudpickle==3.1.2 +mpmath==1.3.0 +certifi==2026.1.4 +antlr4-python3-runtime==4.9.3 +torchvision==0.23.0 +accelerate==1.12.0 +watchfiles==1.1.1 +ruff==0.14.14 +cut-cross-entropy==25.1.1 +wheel==0.46.3 +torchao==0.16.0 +omegaconf==2.3.0 +nvidia-cufft-cu12==11.3.3.83 +frozendict==2.4.7 +sympy==1.14.0 +setproctitle==1.3.7 +optuna==4.7.0 +setuptools==79.0.1 +py-cpuinfo==9.0.0 +ipython_pygments_lexers==1.1.1 +rich==14.3.2 +uvicorn==0.40.0 +outlines_core==0.2.11 +llvmlite==0.44.0 +nvidia-cuda-cupti-cu12==12.8.90 +attrs==25.4.0 +anthropic==0.77.0 +packaging==25.0 +fastrlock==0.8.3 +astor==0.8.1 +pluggy==1.6.0 +nvidia-cuda-nvrtc-cu12==12.8.93 +psutil==7.2.2 +virtualenv==20.36.1 +cbor2==5.8.0 +tenacity==9.1.4 +compressed-tensors==0.11.0 +SQLAlchemy==2.0.46 +nvidia-cusparse-cu12==12.5.8.93 +networkx==3.6.1 +httpcore==1.0.9 +onnxscript==0.3.1 +smmap==5.0.2 +opencv-python==4.13.0.90 +traitlets==5.14.3 +python-multipart==0.0.22 +pyvers==0.1.0 +huggingface-hub==0.36.0 +pillow==12.1.0 +jsonschema==4.26.0 +cfgv==3.5.0 +optree==0.18.0 +email-validator==2.3.0 +tabulate==0.9.0 +pre_commit==4.5.1 +msgpack==1.1.2 +depyf==0.19.0 +numba==0.61.2 +six==1.17.0 +aiosignal==1.4.0 +nvidia-nvtx-cu12==12.8.90 +propcache==0.4.1 +torch_memory_saver==0.0.8 +h11==0.16.0 +frozenlist==1.8.0 +websockets==16.0 +nvidia-cudnn-frontend==1.18.0 +build==1.4.0 +google-auth==2.48.0 +pycountry==24.6.1 +colorlog==6.10.1 +typeguard==4.4.4 +stack-data==0.6.3 +typing-inspection==0.4.2 +googleapis-common-protos==1.72.0 +pandas==3.0.0 +typer==0.21.1 +protobuf==6.33.5 +fastapi==0.128.0 +blake3==1.0.8 +opentelemetry-semantic-conventions==0.60b1 +opentelemetry-exporter-prometheus==0.60b1 +nvidia-cudnn-cu12==9.10.2.21 +Markdown==3.10.1 +liger_kernel==0.6.4 +json_repair==0.57.1 +nodeenv==1.10.0 +prompt_toolkit==3.0.52 +torchaudio==2.8.0 +datasets==4.3.0 +codetiming==1.4.0 +diffusers==0.36.0 +platformdirs==4.5.1 +jsonschema-specifications==2025.9.1 +hydra-core==1.3.2 +tensorboard-data-server==0.7.2 +lm-format-enforcer==0.11.3 +bitsandbytes==0.49.1 +pyasn1_modules==0.4.2 +tiktoken==0.12.0 +starlette==0.50.0 +pyproject_hooks==1.2.0 +flash_attn==2.8.1 +rsa==4.9.1 +ray==2.53.0 +nest-asyncio==1.6.0 +lark==1.2.2 +fastar==0.8.0 +orjson==3.11.6 +prometheus-fastapi-instrumentator==7.1.0 +opentelemetry-api==1.39.1 +mathruler==0.1.0 +pydantic_core==2.41.5 +fastapi-cloud-cli==0.11.0 +pynvml==13.0.1 +loguru==0.7.3 +torch==2.8.0 +msgspec==0.20.0 +nvidia-curand-cu12==10.3.9.90 +blobfile==3.0.0 +gitdb==4.0.12 +llguidance==0.7.30 +hf_transfer==0.1.9 +nvidia-nccl-cu12==2.27.3 +trl==0.24.0 +qwen-vl-utils==0.0.14 +ptyprocess==0.7.0 +ipdb==0.13.13 +opencensus-context==0.1.3 +jedi==0.19.2 +click==8.3.1 +multiprocess==0.70.16 +soxr==1.0.0 +sgl-kernel==0.3.9.post2 +colorful==0.5.8 +pyasn1==0.6.2 +charset-normalizer==3.4.4 +nvidia-ml-py==13.590.48 +hf-xet==1.2.0 +dill==0.4.0 +absl-py==2.4.0 +pydantic==2.12.5 +dspy==3.1.3 +wrapt==2.1.0 +flashinfer-python==0.3.1 +python-dateutil==2.9.0.post0 +cachetools==7.0.0 +soundfile==0.13.1 +diskcache==5.6.3 +onnx-ir==0.1.15 +docstring_parser==0.17.0 +matplotlib-inline==0.2.1 +Pygments==2.19.2 +wandb==0.24.1 +pure_eval==0.2.3 +ninja==1.13.0 +proto-plus==1.27.0 +pyzmq==27.1.0 +iniconfig==2.3.0 +Jinja2==3.1.6 +megatron-core==0.13.1 +uvloop==0.22.1 +fastuuid==0.14.0 +pycparser==3.0 +pylatexenc==2.10 +decorator==5.2.1 +shellingham==1.5.4 +lxml==6.0.2 +safetensors==0.7.0 +xgrammar==0.1.25 +pybase64==1.4.3 +ipython==9.9.0 +greenlet==3.3.1 +mistral_common==1.9.0 +rignore==0.7.6 +einops==0.8.2 +distlib==0.4.0 +triton==3.4.0 +executing==2.2.1 +grpcio==1.76.0 +pip==25.3 +verl==0.8.0.dev0 +verl==0.8.0.dev0 diff --git a/code/RL_model/verl/verl_train/wandb/run-20260213_215553-1w3n5xgv/files/wandb-metadata.json b/code/RL_model/verl/verl_train/wandb/run-20260213_215553-1w3n5xgv/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..03949032a84a5d4829cbf55af2c2a4c8d566ffa2 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260213_215553-1w3n5xgv/files/wandb-metadata.json @@ -0,0 +1,93 @@ +{ + "os": "Linux-5.15.0-160-generic-x86_64-with-glibc2.35", + "python": "CPython 3.12.12", + "startedAt": "2026-02-14T02:55:53.529220Z", + "args": [ + "--node-ip-address=172.16.34.29", + "--node-manager-port=33067", + "--object-store-name=/tmp/ray/session_2026-02-13_21-51-10_490201_1819839/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2026-02-13_21-51-10_490201_1819839/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=52433", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=58423", + "--gcs-address=172.16.34.29:62343", + "--session-name=session_2026-02-13_21-51-10_490201_1819839", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8297", + "--cluster-id=8297edc960cc20b6301e6203e32f4da166a645411fe0ebf59f1b9637", + "--startup-token=128", + "--worker-launch-time-ms=1771037488684", + "--node-id=1ceed7dd7eccc70d36768bfdf588864c3956096581eb695c423e66bf", + "--runtime-env-hash=1096984665" + ], + "program": "/home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/verl-project/verl", + "commit": "d9939add7a2a01923a9088891f913a5d20c4e622" + }, + "email": "shahidulshakib034@gmail.com", + "root": "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train", + "host": "gamma", + "executable": "/home/mshahidul/miniconda3/envs/verl2/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA A100 80GB PCIe", + "gpu_count": 6, + "disk": { + "/": { + "total": "3766429188096", + "used": "214493904896" + } + }, + "memory": { + "total": "1081814863872" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-4a3678c7-34a9-356f-f7b7-7f7e2f44b596" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-df506764-0db5-91b4-8ec9-154a3bb8123f" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-2c3dbd62-b384-2996-a0f6-b32dcfcc3538" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-1ff3dabe-4b9a-ea62-5cc3-01f12f32d328" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper", + "uuid": "GPU-eefc4b8c-0e79-c1d6-a9ff-8325040572eb" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper", + "uuid": "GPU-d42b6057-13e8-1e88-6aa1-9307df72dece" + } + ], + "cudaVersion": "13.0", + "writerId": "ne1bg5sjzbb5rd8lydjw7src49n0d1le" +} \ No newline at end of file diff --git a/code/RL_model/verl/verl_train/wandb/run-20260213_215553-1w3n5xgv/logs/debug-core.log b/code/RL_model/verl/verl_train/wandb/run-20260213_215553-1w3n5xgv/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..530d6b25a35553e9e2cad9372b4569a6944658fe --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260213_215553-1w3n5xgv/logs/debug-core.log @@ -0,0 +1,7 @@ +{"time":"2026-02-13T21:55:53.707716286-05:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpz4fj8qix/port-1829046.txt","pid":1829046,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-02-13T21:55:53.709174832-05:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":1829046} +{"time":"2026-02-13T21:55:53.709061448-05:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-1829046-1837054-2859281923/socket","Net":"unix"}} +{"time":"2026-02-13T21:55:53.864997659-05:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-02-13T21:55:53.881144455-05:00","level":"INFO","msg":"handleInformInit: received","streamId":"1w3n5xgv","id":"1(@)"} +{"time":"2026-02-13T21:55:54.896818121-05:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"1w3n5xgv","id":"1(@)"} +{"time":"2026-02-13T21:56:01.063883519-05:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"9cpwhu363uoy"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260213_215553-1w3n5xgv/logs/debug-internal.log b/code/RL_model/verl/verl_train/wandb/run-20260213_215553-1w3n5xgv/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..21bb8fb8b6557a4215a334fe99961734b6ad5a21 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260213_215553-1w3n5xgv/logs/debug-internal.log @@ -0,0 +1,6 @@ +{"time":"2026-02-13T21:55:53.88265126-05:00","level":"INFO","msg":"stream: starting","core version":"0.24.1"} +{"time":"2026-02-13T21:55:54.894422326-05:00","level":"INFO","msg":"stream: created new stream","id":"1w3n5xgv"} +{"time":"2026-02-13T21:55:54.894629734-05:00","level":"INFO","msg":"handler: started","stream_id":"1w3n5xgv"} +{"time":"2026-02-13T21:55:54.896791659-05:00","level":"INFO","msg":"stream: started","id":"1w3n5xgv"} +{"time":"2026-02-13T21:55:54.896858048-05:00","level":"INFO","msg":"writer: started","stream_id":"1w3n5xgv"} +{"time":"2026-02-13T21:55:54.896866774-05:00","level":"INFO","msg":"sender: started","stream_id":"1w3n5xgv"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260213_215553-1w3n5xgv/logs/debug.log b/code/RL_model/verl/verl_train/wandb/run-20260213_215553-1w3n5xgv/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..d32587641aaf2b69ff48db8cfc7434a701ab6f05 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260213_215553-1w3n5xgv/logs/debug.log @@ -0,0 +1,19 @@ +2026-02-13 21:55:53,552 INFO MainThread:1829046 [wandb_setup.py:_flush():81] Current SDK version is 0.24.1 +2026-02-13 21:55:53,552 INFO MainThread:1829046 [wandb_setup.py:_flush():81] Configure stats pid to 1829046 +2026-02-13 21:55:53,553 INFO MainThread:1829046 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-02-13 21:55:53,553 INFO MainThread:1829046 [wandb_init.py:setup_run_log_directory():717] Logging user logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260213_215553-1w3n5xgv/logs/debug.log +2026-02-13 21:55:53,553 INFO MainThread:1829046 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260213_215553-1w3n5xgv/logs/debug-internal.log +2026-02-13 21:55:53,553 INFO MainThread:1829046 [wandb_init.py:init():844] calling init triggers +2026-02-13 21:55:53,555 INFO MainThread:1829046 [wandb_init.py:init():849] wandb.init called with sweep_config: {} +config: {'actor_rollout_ref': {'actor': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-06, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 45, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': True, 'optimizer_offload': True, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'ppo_mini_batch_size': 256, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': 16, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 16384, 'clip_ratio': 0.2, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.2, 'tau_pos': 1.0, 'tau_neg': 1.05, 'freeze_vision_tower': False, 'policy_loss': {'_target_': 'verl.workers.config.PolicyLossConfig', 'loss_mode': 'vanilla', 'clip_cov_ratio': 0.0002, 'clip_cov_lb': 1.0, 'clip_cov_ub': 5.0, 'kl_cov_ratio': 0.0002, 'ppo_kl_coef': 0.1}, 'clip_ratio_c': 3.0, 'loss_agg_mode': 'token-mean', 'loss_scale_factor': None, 'entropy_coeff': 0, 'calculate_entropy': False, 'use_kl_loss': True, 'use_prefix_grouper': False, 'use_torch_compile': True, 'kl_loss_coef': 0.001, 'kl_loss_type': 'low_var_kl', 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'use_fused_kernels': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'grad_clip': 1.0, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False, 'use_remove_padding': True, 'calculate_sum_pi_squared': False, 'sum_pi_squared_checkpointing': False}, 'ref': {'rollout_n': 3, 'strategy': 'fsdp', 'use_torch_compile': True, 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': True, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': True, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False}, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': 'vllm', 'mode': 'async', 'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'prompt_length': 1024, 'response_length': 2048, 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.4, 'ignore_eos': False, 'enforce_eager': True, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'tensor_model_parallel_size': 1, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'pipeline_model_parallel_size': 1, 'max_num_batched_tokens': 8192, 'max_model_len': 8192, 'max_num_seqs': 1024, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'logprobs_mode': 'processed_logprobs', 'scheduling_policy': 'fcfs', 'load_format': 'dummy', 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'disable_log_stats': True, 'do_sample': True, 'n': 3, 'over_sample_rate': 0, 'multi_stage_wake_up': False, 'engine_kwargs': {'vllm': {}, 'sglang': {}, 'trtllm': {}}, 'val_kwargs': {'_target_': 'verl.workers.config.SamplingConfig', 'top_k': -1, 'top_p': 1.0, 'temperature': 0, 'n': 1, 'do_sample': False}, 'multi_turn': {'_target_': 'verl.workers.config.MultiTurnConfig', 'enable': False, 'max_assistant_turns': None, 'tool_config_path': None, 'max_user_turns': None, 'max_parallel_calls': 1, 'max_tool_response_length': 256, 'tool_response_truncate_side': 'middle', 'interaction_config_path': None, 'use_inference_chat_template': False, 'tokenization_sanity_check_mode': 'strict', 'format': 'hermes', 'num_repeat_rollouts': None}, 'calculate_log_probs': False, 'agent': {'_target_': 'verl.workers.config.AgentLoopConfig', 'num_workers': 8, 'default_agent_loop': 'single_turn_agent', 'agent_loop_config_path': None, 'custom_async_server': {'_target_': 'verl.workers.config.CustomAsyncServerConfig', 'path': None, 'name': None}}, 'checkpoint_engine': {'_target_': 'verl.workers.config.CheckpointEngineConfig', 'backend': 'naive', 'update_weights_bucket_megabytes': 2048, 'engine_kwargs': {}}, 'trace': {'_target_': 'verl.workers.config.TraceConfig', 'backend': None, 'token2text': False, 'max_samples_per_step_per_worker': None}, 'skip_rollout': False, 'skip_dump_dir': '/tmp/rollout_dump', 'skip_tokenizer_init': True, 'enable_rollout_routing_replay': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'prometheus': {'_target_': 'verl.workers.config.PrometheusConfig', 'enable': False, 'port': 9090, 'file': '/tmp/ray/session_latest/metrics/prometheus/prometheus.yml', 'served_model_name': 'Qwen/Qwen3-4B-Instruct-2507'}, 'quantization': None, 'quantization_config_file': None, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}, 'layered_summon': False}, 'model': {'_target_': 'verl.workers.config.HFModelConfig', 'path': 'Qwen/Qwen3-4B-Instruct-2507', 'hf_config_path': None, 'tokenizer_path': None, 'use_shm': False, 'trust_remote_code': False, 'custom_chat_template': None, 'external_lib': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': True, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'exclude_modules': None, 'lora_adapter_path': None, 'use_liger': False, 'use_fused_kernels': False, 'fused_kernel_options': {'impl_backend': 'torch'}, 'tiled_mlp': {'enabled': False, 'num_shards': 4}, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}}, 'hybrid_engine': True, 'nccl_timeout': 600}, 'data': {'tokenizer': None, 'use_shm': False, 'train_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/train.parquet', 'val_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/test.parquet', 'train_max_samples': -1, 'val_max_samples': -1, 'prompt_key': 'prompt', 'reward_fn_key': 'data_source', 'max_prompt_length': 1024, 'max_response_length': 2048, 'train_batch_size': 512, 'val_batch_size': None, 'tool_config_path': None, 'return_raw_input_ids': False, 'return_raw_chat': True, 'return_full_prompt': False, 'shuffle': True, 'seed': None, 'dataloader_num_workers': 8, 'image_patch_size': 14, 'validation_shuffle': False, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 1, 'truncation': 'error', 'image_key': 'images', 'video_key': 'videos', 'trust_remote_code': False, 'custom_cls': {'path': None, 'name': None}, 'return_multi_modal_inputs': True, 'sampler': {'class_path': None, 'class_name': None}, 'datagen': {'path': None, 'name': None}, 'apply_chat_template_kwargs': {}}, 'reward_manager': {'_target_': 'verl.trainer.config.config.RewardManagerConfig', 'source': 'register', 'name': 'naive', 'module': {'_target_': 'verl.trainer.config.config.ModuleConfig', 'path': None, 'name': 'custom_reward_manager'}}, 'critic': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-05, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 45, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'model': {'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, 'path': '~/models/deepseek-llm-7b-chat', 'tokenizer_path': 'Qwen/Qwen3-4B-Instruct-2507', 'override_config': {}, 'external_lib': None, 'trust_remote_code': False, '_target_': 'verl.workers.config.FSDPCriticModelCfg', 'use_shm': False, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': False, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'tiled_mlp': {'enabled': False, 'num_shards': 4}}, '_target_': 'verl.workers.config.FSDPCriticConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'enable': None, 'ppo_mini_batch_size': 256, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': None, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 32768, 'forward_max_token_len_per_gpu': 32768, 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'cliprange_value': 0.5, 'loss_agg_mode': 'token-mean', 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'forward_micro_batch_size': None, 'forward_micro_batch_size_per_gpu': None, 'ulysses_sequence_parallel_size': 1, 'grad_clip': 1.0}, 'reward_model': {'enable': False, 'enable_resource_pool': False, 'n_gpus_per_node': 8, 'nnodes': 0, 'strategy': 'fsdp', 'model': {'input_tokenizer': 'Qwen/Qwen3-4B-Instruct-2507', 'path': '~/models/FsfairX-LLaMA3-RM-v0.1', 'external_lib': None, 'trust_remote_code': False, 'override_config': {}, 'use_shm': False, 'use_remove_padding': False, 'use_fused_kernels': False, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False}}, 'micro_batch_size': None, 'micro_batch_size_per_gpu': None, 'max_length': None, 'use_dynamic_bsz': False, 'forward_max_token_len_per_gpu': 32768, 'reward_manager': 'naive', 'reward_loop_source': 'register', 'reward_loop_module_path': None, 'reward_loop_class_name': None, 'launch_reward_fn_async': False, 'sandbox_fusion': {'url': None, 'max_concurrent': 64, 'memory_limit_mb': 1024}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'ulysses_sequence_parallel_size': 1, 'use_reward_loop': True, 'num_workers': 1, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': '???', 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.5, 'enforce_eager': True, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'tensor_model_parallel_size': 2, 'max_num_batched_tokens': 8192, 'max_model_len': None, 'max_num_seqs': 1024, 'load_format': 'auto', 'engine_kwargs': {}, 'limit_images': None, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'disable_log_stats': True, 'skip_tokenizer_init': False, 'prompt_length': 2048, 'response_length': 2048}}, 'algorithm': {'rollout_correction': {'rollout_is': None, 'rollout_is_threshold': 2.0, 'rollout_rs': None, 'rollout_rs_threshold': None, 'bypass_mode': False, 'loss_type': 'ppo_clip', 'rollout_is_batch_normalize': False}, '_target_': 'verl.trainer.config.AlgoConfig', 'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'norm_adv_by_std_in_grpo': True, 'use_kl_in_reward': False, 'kl_penalty': 'kl', 'kl_ctrl': {'_target_': 'verl.trainer.config.KLControlConfig', 'type': 'fixed', 'kl_coef': 0.001, 'horizon': 10000, 'target_kl': 0.1}, 'use_pf_ppo': False, 'pf_ppo': {'reweight_method': 'pow', 'weight_pow': 2.0}}, 'custom_reward_function': {'path': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/reward_func/reward_func/reward_new.py', 'name': 'compute_score'}, 'trainer': {'balance_batch': True, 'total_epochs': 15, 'total_training_steps': None, 'project_name': 'readctrl-verl', 'experiment_name': 'qwen3-4b-instruct-en', 'logger': ['console', 'wandb'], 'log_val_generations': 0, 'rollout_data_dir': None, 'validation_data_dir': None, 'nnodes': 1, 'n_gpus_per_node': 2, 'save_freq': 5, 'esi_redundant_time': 0, 'resume_mode': 'auto', 'resume_from_path': None, 'val_before_train': True, 'val_only': False, 'test_freq': 10, 'critic_warmup': 0, 'default_hdfs_dir': None, 'del_local_ckpt_after_load': False, 'default_local_dir': '/home/mshahidul/readctrl/code/RL_model/models/RL_model_subclaim_classifier_v2', 'max_actor_ckpt_to_keep': 1, 'max_critic_ckpt_to_keep': 1, 'ray_wait_register_center_timeout': 300, 'device': 'cuda', 'use_legacy_worker_impl': 'auto', 'remove_previous_ckpt_in_save': True}, 'global_profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'steps': None, 'profile_continuous_steps': False, 'save_path': 'outputs/profile', 'global_tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False, 'controller_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph'}, 'worker_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph', 'capture-range': 'cudaProfilerApi', 'capture-range-end': None, 'kill': 'none'}}, 'torch_memory': {'trace_alloc_max_entries': 100000, 'stack_depth': 32, 'context': 'all', 'stacks': 'all', 'kw_args': {}}}}, 'transfer_queue': {'enable': False}, 'ray_kwargs': {'ray_init': {'num_cpus': None}, 'timeline_json_file': None}, '_wandb': {}} +2026-02-13 21:55:53,555 INFO MainThread:1829046 [wandb_init.py:init():892] starting backend +2026-02-13 21:55:53,865 INFO MainThread:1829046 [wandb_init.py:init():895] sending inform_init request +2026-02-13 21:55:53,874 INFO MainThread:1829046 [wandb_init.py:init():903] backend started and connected +2026-02-13 21:55:53,891 INFO MainThread:1829046 [wandb_init.py:init():973] updated telemetry +2026-02-13 21:55:53,913 INFO MainThread:1829046 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-02-13 21:55:55,188 INFO MainThread:1829046 [wandb_init.py:init():1042] starting run threads in backend +2026-02-13 21:55:56,036 INFO MainThread:1829046 [wandb_run.py:_console_start():2529] atexit reg +2026-02-13 21:55:56,037 INFO MainThread:1829046 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-02-13 21:55:56,037 INFO MainThread:1829046 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-02-13 21:55:56,037 INFO MainThread:1829046 [wandb_run.py:_redirect():2469] Redirects installed. +2026-02-13 21:55:56,054 INFO MainThread:1829046 [wandb_init.py:init():1082] run started, returning control to user process diff --git a/code/RL_model/verl/verl_train/wandb/run-20260215_041259-udcrfv6m/files/config.yaml b/code/RL_model/verl/verl_train/wandb/run-20260215_041259-udcrfv6m/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5d1042136eedc22dd62c47aec0d2de87a51559c4 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260215_041259-udcrfv6m/files/config.yaml @@ -0,0 +1,819 @@ +_wandb: + value: + cli_version: 0.24.1 + e: + 22e2fvlcbnolcv1k78ls5xixd1flwp1h: + args: + - --node-ip-address=172.16.34.29 + - --node-manager-port=41571 + - --object-store-name=/tmp/ray/session_2026-02-15_04-08-18_573543_3523343/sockets/plasma_store + - --raylet-name=/tmp/ray/session_2026-02-15_04-08-18_573543_3523343/sockets/raylet + - --redis-address=None + - --metrics-agent-port=48237 + - --logging-rotate-bytes=536870912 + - --logging-rotate-backup-count=5 + - --runtime-env-agent-port=59869 + - --gcs-address=172.16.34.29:57209 + - --session-name=session_2026-02-15_04-08-18_573543_3523343 + - --temp-dir=/tmp/ray + - --webui=127.0.0.1:8297 + - --cluster-id=ce2a78ac568c8641864ba956ce4b4c0b3c6b210b0d53b0adf6a9237c + - --startup-token=128 + - --worker-launch-time-ms=1771146511418 + - --node-id=51f4628367337e08a514b1cb6019d20f6c804c0fbe4296a236360afa + - --runtime-env-hash=1096984665 + cpu_count: 64 + cpu_count_logical: 128 + cudaVersion: "13.0" + disk: + /: + total: "3766429188096" + used: "214873206784" + email: shahidulshakib034@gmail.com + executable: /home/mshahidul/miniconda3/envs/verl2/bin/python3 + git: + commit: d9939add7a2a01923a9088891f913a5d20c4e622 + remote: https://github.com/verl-project/verl + gpu: NVIDIA A100 80GB PCIe + gpu_count: 6 + gpu_nvidia: + - architecture: Ampere + cudaCores: 6912 + memoryTotal: "85899345920" + name: NVIDIA A100 80GB PCIe + uuid: GPU-4a3678c7-34a9-356f-f7b7-7f7e2f44b596 + - architecture: Ampere + cudaCores: 6912 + memoryTotal: "85899345920" + name: NVIDIA A100 80GB PCIe + uuid: GPU-df506764-0db5-91b4-8ec9-154a3bb8123f + - architecture: Ampere + cudaCores: 6912 + memoryTotal: "85899345920" + name: NVIDIA A100 80GB PCIe + uuid: GPU-2c3dbd62-b384-2996-a0f6-b32dcfcc3538 + - architecture: Ampere + cudaCores: 6912 + memoryTotal: "85899345920" + name: NVIDIA A100 80GB PCIe + uuid: GPU-1ff3dabe-4b9a-ea62-5cc3-01f12f32d328 + - architecture: Hopper + cudaCores: 14592 + memoryTotal: "85520809984" + name: NVIDIA H100 PCIe + uuid: GPU-eefc4b8c-0e79-c1d6-a9ff-8325040572eb + - architecture: Hopper + cudaCores: 14592 + memoryTotal: "85520809984" + name: NVIDIA H100 PCIe + uuid: GPU-d42b6057-13e8-1e88-6aa1-9307df72dece + host: gamma + memory: + total: "1081814863872" + os: Linux-5.15.0-160-generic-x86_64-with-glibc2.35 + program: /home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/ray/_private/workers/default_worker.py + python: CPython 3.12.12 + root: /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train + startedAt: "2026-02-15T09:12:59.214126Z" + writerId: 22e2fvlcbnolcv1k78ls5xixd1flwp1h + m: [] + python_version: 3.12.12 + t: + "1": + - 1 + - 11 + - 30 + - 35 + - 41 + - 49 + - 50 + - 51 + - 71 + - 95 + - 98 + - 105 + - 107 + "2": + - 1 + - 11 + - 30 + - 35 + - 41 + - 49 + - 50 + - 51 + - 71 + - 95 + - 98 + - 105 + - 107 + "3": + - 2 + - 13 + - 16 + - 61 + "4": 3.12.12 + "5": 0.24.1 + "6": 4.56.1 + "12": 0.24.1 + "13": linux-x86_64 +actor_rollout_ref: + value: + actor: + _target_: verl.workers.config.FSDPActorConfig + calculate_entropy: false + calculate_sum_pi_squared: false + checkpoint: + _target_: verl.trainer.config.CheckpointConfig + async_save: false + load_contents: + - model + - optimizer + - extra + save_contents: + - model + - optimizer + - extra + clip_ratio: 0.2 + clip_ratio_c: 3 + clip_ratio_high: 0.2 + clip_ratio_low: 0.2 + data_loader_seed: 42 + entropy_checkpointing: false + entropy_coeff: 0 + entropy_from_logits_with_chunking: false + freeze_vision_tower: false + fsdp_config: + _target_: verl.workers.config.FSDPEngineConfig + dtype: bfloat16 + entropy_checkpointing: false + entropy_from_logits_with_chunking: false + forward_only: false + forward_prefetch: false + fsdp_size: -1 + full_determinism: false + model_dtype: fp32 + offload_policy: false + optimizer_offload: true + param_offload: true + reshard_after_forward: true + seed: 42 + strategy: fsdp + ulysses_sequence_parallel_size: 1 + use_orig_params: false + use_torch_compile: true + wrap_policy: + min_num_params: 0 + grad_clip: 1 + kl_loss_coef: 0.001 + kl_loss_type: low_var_kl + loss_agg_mode: token-mean + loss_scale_factor: null + optim: + _target_: verl.workers.config.FSDPOptimizerConfig + betas: + - 0.9 + - 0.999 + clip_grad: 1 + lr: 1e-06 + lr_scheduler_type: constant + lr_warmup_steps: -1 + lr_warmup_steps_ratio: 0 + min_lr_ratio: 0 + num_cycles: 0.5 + optimizer: AdamW + optimizer_impl: torch.optim + override_optimizer_config: null + total_training_steps: 45 + warmup_style: null + weight_decay: 0.01 + policy_loss: + _target_: verl.workers.config.PolicyLossConfig + clip_cov_lb: 1 + clip_cov_ratio: 0.0002 + clip_cov_ub: 5 + kl_cov_ratio: 0.0002 + loss_mode: vanilla + ppo_kl_coef: 0.1 + ppo_epochs: 1 + ppo_max_token_len_per_gpu: 16384 + ppo_micro_batch_size: null + ppo_micro_batch_size_per_gpu: 16 + ppo_mini_batch_size: 256 + profiler: + _target_: verl.utils.profiler.ProfilerConfig + all_ranks: false + enable: false + ranks: [] + save_path: outputs/profile + tool: null + tool_config: + npu: + _target_: verl.utils.profiler.config.NPUToolConfig + analysis: true + contents: [] + discrete: false + level: level0 + nsys: + _target_: verl.utils.profiler.config.NsightToolConfig + discrete: false + torch: + _target_: verl.utils.profiler.config.TorchProfilerToolConfig + contents: [] + discrete: false + torch_memory: + _target_: verl.utils.profiler.config.TorchMemoryToolConfig + stack_depth: 32 + trace_alloc_max_entries: 100000 + rollout_n: 3 + router_replay: + _target_: verl.workers.config.RouterReplayConfig + mode: disabled + record_file: null + replay_file: null + shuffle: false + strategy: fsdp + sum_pi_squared_checkpointing: false + tau_neg: 1.05 + tau_pos: 1 + ulysses_sequence_parallel_size: 1 + use_dynamic_bsz: false + use_fused_kernels: false + use_kl_loss: true + use_prefix_grouper: false + use_remove_padding: true + use_torch_compile: true + hybrid_engine: true + model: + _target_: verl.workers.config.HFModelConfig + custom_chat_template: null + enable_activation_offload: false + enable_gradient_checkpointing: true + exclude_modules: null + external_lib: null + fused_kernel_options: + impl_backend: torch + hf_config_path: null + lora_adapter_path: null + lora_alpha: 16 + lora_rank: 0 + mtp: + _target_: verl.workers.config.MtpConfig + detach_encoder: false + enable: false + enable_rollout: false + enable_train: false + method: mtp + mtp_loss_scaling_factor: 0.1 + num_speculative_tokens: 1 + speculative_algorithm: EAGLE + speculative_eagle_topk: 1 + speculative_num_draft_tokens: 4 + speculative_num_steps: 3 + path: Qwen/Qwen3-4B-Instruct-2507 + target_modules: all-linear + tiled_mlp: + enabled: false + num_shards: 4 + tokenizer_path: null + trust_remote_code: false + use_fused_kernels: false + use_liger: false + use_remove_padding: true + use_shm: false + nccl_timeout: 600 + ref: + _target_: verl.workers.config.FSDPActorConfig + entropy_checkpointing: false + entropy_from_logits_with_chunking: false + fsdp_config: + _target_: verl.workers.config.FSDPEngineConfig + dtype: bfloat16 + entropy_checkpointing: false + entropy_from_logits_with_chunking: false + forward_only: true + forward_prefetch: false + fsdp_size: -1 + full_determinism: false + model_dtype: fp32 + offload_policy: false + optimizer_offload: false + param_offload: true + reshard_after_forward: true + seed: 42 + strategy: fsdp + ulysses_sequence_parallel_size: 1 + use_orig_params: false + use_torch_compile: true + wrap_policy: + min_num_params: 0 + log_prob_max_token_len_per_gpu: 16384 + log_prob_micro_batch_size: null + log_prob_micro_batch_size_per_gpu: 32 + log_prob_use_dynamic_bsz: false + profiler: + _target_: verl.utils.profiler.ProfilerConfig + all_ranks: false + enable: false + ranks: [] + save_path: outputs/profile + tool: null + tool_config: + npu: + _target_: verl.utils.profiler.config.NPUToolConfig + analysis: true + contents: [] + discrete: false + level: level0 + nsys: + _target_: verl.utils.profiler.config.NsightToolConfig + discrete: false + torch: + _target_: verl.utils.profiler.config.TorchProfilerToolConfig + contents: [] + discrete: false + torch_memory: + _target_: verl.utils.profiler.config.TorchMemoryToolConfig + stack_depth: 32 + trace_alloc_max_entries: 100000 + rollout_n: 3 + router_replay: + _target_: verl.workers.config.RouterReplayConfig + mode: disabled + record_file: null + replay_file: null + strategy: fsdp + ulysses_sequence_parallel_size: 1 + use_torch_compile: true + rollout: + _target_: verl.workers.config.RolloutConfig + agent: + _target_: verl.workers.config.AgentLoopConfig + agent_loop_config_path: null + custom_async_server: + _target_: verl.workers.config.CustomAsyncServerConfig + name: null + path: null + default_agent_loop: single_turn_agent + num_workers: 8 + calculate_log_probs: false + checkpoint_engine: + _target_: verl.workers.config.CheckpointEngineConfig + backend: naive + update_weights_bucket_megabytes: 2048 + cudagraph_capture_sizes: null + data_parallel_size: 1 + disable_log_stats: true + do_sample: true + dtype: bfloat16 + enable_chunked_prefill: true + enable_prefix_caching: true + enable_rollout_routing_replay: false + enforce_eager: true + expert_parallel_size: 1 + free_cache_engine: true + gpu_memory_utilization: 0.4 + ignore_eos: false + layered_summon: false + load_format: dummy + log_prob_max_token_len_per_gpu: 16384 + log_prob_micro_batch_size: null + log_prob_micro_batch_size_per_gpu: 32 + log_prob_use_dynamic_bsz: false + logprobs_mode: processed_logprobs + max_model_len: 8192 + max_num_batched_tokens: 8192 + max_num_seqs: 1024 + mode: async + mtp: + _target_: verl.workers.config.MtpConfig + detach_encoder: false + enable: false + enable_rollout: false + enable_train: false + method: mtp + mtp_loss_scaling_factor: 0.1 + num_speculative_tokens: 1 + speculative_algorithm: EAGLE + speculative_eagle_topk: 1 + speculative_num_draft_tokens: 4 + speculative_num_steps: 3 + multi_stage_wake_up: false + multi_turn: + _target_: verl.workers.config.MultiTurnConfig + enable: false + format: hermes + interaction_config_path: null + max_assistant_turns: null + max_parallel_calls: 1 + max_tool_response_length: 256 + max_user_turns: null + num_repeat_rollouts: null + tokenization_sanity_check_mode: strict + tool_config_path: null + tool_response_truncate_side: middle + use_inference_chat_template: false + "n": 3 + name: vllm + over_sample_rate: 0 + pipeline_model_parallel_size: 1 + profiler: + _target_: verl.utils.profiler.ProfilerConfig + all_ranks: false + enable: false + ranks: [] + save_path: outputs/profile + tool: null + tool_config: + npu: + _target_: verl.utils.profiler.config.NPUToolConfig + analysis: true + contents: [] + discrete: false + level: level0 + nsys: + _target_: verl.utils.profiler.config.NsightToolConfig + discrete: false + torch: + _target_: verl.utils.profiler.config.TorchProfilerToolConfig + contents: [] + discrete: false + torch_memory: + _target_: verl.utils.profiler.config.TorchMemoryToolConfig + stack_depth: 32 + trace_alloc_max_entries: 100000 + prometheus: + _target_: verl.workers.config.PrometheusConfig + enable: false + file: /tmp/ray/session_latest/metrics/prometheus/prometheus.yml + port: 9090 + served_model_name: Qwen/Qwen3-4B-Instruct-2507 + prompt_length: 1024 + quantization: null + quantization_config_file: null + response_length: 2048 + scheduling_policy: fcfs + skip_dump_dir: /tmp/rollout_dump + skip_rollout: false + skip_tokenizer_init: true + temperature: 1 + tensor_model_parallel_size: 1 + top_k: -1 + top_p: 1 + trace: + _target_: verl.workers.config.TraceConfig + backend: null + max_samples_per_step_per_worker: null + token2text: false + val_kwargs: + _target_: verl.workers.config.SamplingConfig + do_sample: false + "n": 1 + temperature: 0 + top_k: -1 + top_p: 1 +algorithm: + value: + _target_: verl.trainer.config.AlgoConfig + adv_estimator: grpo + gamma: 1 + kl_ctrl: + _target_: verl.trainer.config.KLControlConfig + horizon: 10000 + kl_coef: 0.001 + target_kl: 0.1 + type: fixed + kl_penalty: kl + lam: 1 + norm_adv_by_std_in_grpo: true + pf_ppo: + reweight_method: pow + weight_pow: 2 + rollout_correction: + bypass_mode: false + loss_type: ppo_clip + rollout_is: null + rollout_is_batch_normalize: false + rollout_is_threshold: 2 + rollout_rs: null + rollout_rs_threshold: null + use_kl_in_reward: false + use_pf_ppo: false +critic: + value: + _target_: verl.workers.config.FSDPCriticConfig + checkpoint: + _target_: verl.trainer.config.CheckpointConfig + async_save: false + load_contents: + - model + - optimizer + - extra + save_contents: + - model + - optimizer + - extra + cliprange_value: 0.5 + data_loader_seed: 42 + enable: null + forward_max_token_len_per_gpu: 32768 + forward_micro_batch_size: null + forward_micro_batch_size_per_gpu: null + grad_clip: 1 + loss_agg_mode: token-mean + model: + _target_: verl.workers.config.FSDPCriticModelCfg + enable_activation_offload: false + enable_gradient_checkpointing: true + external_lib: null + fsdp_config: + _target_: verl.workers.config.FSDPEngineConfig + dtype: bfloat16 + entropy_checkpointing: false + entropy_from_logits_with_chunking: false + forward_only: false + forward_prefetch: false + fsdp_size: -1 + full_determinism: false + model_dtype: fp32 + offload_policy: false + optimizer_offload: false + param_offload: false + reshard_after_forward: true + seed: 42 + strategy: fsdp + ulysses_sequence_parallel_size: 1 + use_orig_params: false + use_torch_compile: true + wrap_policy: + min_num_params: 0 + lora_alpha: 16 + lora_rank: 0 + path: ~/models/deepseek-llm-7b-chat + target_modules: all-linear + tiled_mlp: + enabled: false + num_shards: 4 + tokenizer_path: Qwen/Qwen3-4B-Instruct-2507 + trust_remote_code: false + use_remove_padding: false + use_shm: false + optim: + _target_: verl.workers.config.FSDPOptimizerConfig + betas: + - 0.9 + - 0.999 + clip_grad: 1 + lr: 1e-05 + lr_scheduler_type: constant + lr_warmup_steps: -1 + lr_warmup_steps_ratio: 0 + min_lr_ratio: 0 + num_cycles: 0.5 + optimizer: AdamW + optimizer_impl: torch.optim + override_optimizer_config: null + total_training_steps: 45 + warmup_style: null + weight_decay: 0.01 + ppo_epochs: 1 + ppo_max_token_len_per_gpu: 32768 + ppo_micro_batch_size: null + ppo_micro_batch_size_per_gpu: null + ppo_mini_batch_size: 256 + profiler: + _target_: verl.utils.profiler.ProfilerConfig + all_ranks: false + enable: false + ranks: [] + save_path: outputs/profile + tool: null + tool_config: + npu: + _target_: verl.utils.profiler.config.NPUToolConfig + analysis: true + contents: [] + discrete: false + level: level0 + nsys: + _target_: verl.utils.profiler.config.NsightToolConfig + discrete: false + torch: + _target_: verl.utils.profiler.config.TorchProfilerToolConfig + contents: [] + discrete: false + torch_memory: + _target_: verl.utils.profiler.config.TorchMemoryToolConfig + stack_depth: 32 + trace_alloc_max_entries: 100000 + rollout_n: 3 + shuffle: false + strategy: fsdp + ulysses_sequence_parallel_size: 1 + use_dynamic_bsz: false +custom_reward_function: + value: + name: compute_score + path: /home/mshahidul/readctrl/code/RL_model/verl/verl_train/reward_func/reward_func/reward_new_v2.py +data: + value: + custom_cls: + name: null + path: null + datagen: + name: null + path: null + dataloader_num_workers: 8 + filter_overlong_prompts: true + filter_overlong_prompts_workers: 1 + image_key: images + image_patch_size: 14 + max_prompt_length: 1024 + max_response_length: 2048 + prompt_key: prompt + return_full_prompt: false + return_multi_modal_inputs: true + return_raw_chat: true + return_raw_input_ids: false + reward_fn_key: data_source + sampler: + class_name: null + class_path: null + seed: null + shuffle: true + tokenizer: null + tool_config_path: null + train_batch_size: 512 + train_files: /home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/train.parquet + train_max_samples: -1 + truncation: error + trust_remote_code: false + use_shm: false + val_batch_size: null + val_files: /home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/test.parquet + val_max_samples: -1 + validation_shuffle: false + video_key: videos +global_profiler: + value: + _target_: verl.utils.profiler.ProfilerConfig + global_tool_config: + nsys: + _target_: verl.utils.profiler.config.NsightToolConfig + controller_nsight_options: + cuda-graph-trace: graph + cuda-memory-usage: "true" + trace: cuda,nvtx,cublas,ucx + discrete: false + worker_nsight_options: + capture-range: cudaProfilerApi + capture-range-end: null + cuda-graph-trace: graph + cuda-memory-usage: "true" + kill: none + trace: cuda,nvtx,cublas,ucx + torch_memory: + context: all + stack_depth: 32 + stacks: all + trace_alloc_max_entries: 100000 + profile_continuous_steps: false + save_path: outputs/profile + steps: null + tool: null +ray_kwargs: + value: + ray_init: + num_cpus: null + timeline_json_file: null +reward_manager: + value: + _target_: verl.trainer.config.config.RewardManagerConfig + module: + _target_: verl.trainer.config.config.ModuleConfig + name: custom_reward_manager + path: null + name: naive + source: register +reward_model: + value: + enable: false + enable_resource_pool: false + forward_max_token_len_per_gpu: 32768 + launch_reward_fn_async: false + max_length: null + micro_batch_size: null + micro_batch_size_per_gpu: null + model: + external_lib: null + fsdp_config: + _target_: verl.workers.config.FSDPEngineConfig + forward_prefetch: false + fsdp_size: -1 + param_offload: false + reshard_after_forward: true + wrap_policy: + min_num_params: 0 + input_tokenizer: Qwen/Qwen3-4B-Instruct-2507 + path: ~/models/FsfairX-LLaMA3-RM-v0.1 + trust_remote_code: false + use_fused_kernels: false + use_remove_padding: false + use_shm: false + n_gpus_per_node: 8 + nnodes: 0 + num_workers: 1 + profiler: + _target_: verl.utils.profiler.ProfilerConfig + all_ranks: false + enable: false + ranks: [] + save_path: outputs/profile + tool: null + tool_config: + npu: + _target_: verl.utils.profiler.config.NPUToolConfig + analysis: true + contents: [] + discrete: false + level: level0 + nsys: + _target_: verl.utils.profiler.config.NsightToolConfig + discrete: false + torch: + _target_: verl.utils.profiler.config.TorchProfilerToolConfig + contents: [] + discrete: false + torch_memory: + _target_: verl.utils.profiler.config.TorchMemoryToolConfig + stack_depth: 32 + trace_alloc_max_entries: 100000 + reward_loop_class_name: null + reward_loop_module_path: null + reward_loop_source: register + reward_manager: naive + rollout: + _target_: verl.workers.config.RolloutConfig + cudagraph_capture_sizes: null + data_parallel_size: 1 + disable_log_stats: true + dtype: bfloat16 + enable_chunked_prefill: true + enable_prefix_caching: true + enforce_eager: true + expert_parallel_size: 1 + free_cache_engine: true + gpu_memory_utilization: 0.5 + limit_images: null + load_format: auto + max_model_len: null + max_num_batched_tokens: 8192 + max_num_seqs: 1024 + name: ??? + prompt_length: 2048 + response_length: 2048 + skip_tokenizer_init: false + tensor_model_parallel_size: 2 + sandbox_fusion: + max_concurrent: 64 + memory_limit_mb: 1024 + url: null + strategy: fsdp + ulysses_sequence_parallel_size: 1 + use_dynamic_bsz: false + use_reward_loop: true +trainer: + value: + balance_batch: true + critic_warmup: 0 + default_hdfs_dir: null + default_local_dir: /home/mshahidul/readctrl/code/RL_model/models/RL_model_subclaim_classifier_v2 + del_local_ckpt_after_load: false + device: cuda + esi_redundant_time: 0 + experiment_name: qwen3-4b-instruct-en + log_val_generations: 0 + logger: + - console + - wandb + max_actor_ckpt_to_keep: 1 + max_critic_ckpt_to_keep: 1 + n_gpus_per_node: 2 + nnodes: 1 + project_name: readctrl-verl + ray_wait_register_center_timeout: 300 + remove_previous_ckpt_in_save: true + resume_from_path: null + resume_mode: auto + rollout_data_dir: null + save_freq: 5 + test_freq: 10 + total_epochs: 15 + total_training_steps: null + use_legacy_worker_impl: auto + val_before_train: true + val_only: false + validation_data_dir: null +transfer_queue: + value: + enable: false diff --git a/code/RL_model/verl/verl_train/wandb/run-20260215_041259-udcrfv6m/files/output.log b/code/RL_model/verl/verl_train/wandb/run-20260215_041259-udcrfv6m/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..0b0b88a3954fcebb52b7b51034a5879c8ee018cb --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260215_041259-udcrfv6m/files/output.log @@ -0,0 +1,14 @@ +wandb: Detected [dspy, litellm, openai] in use. +wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script. +wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/ +Checkpoint tracker file does not exist: /home/mshahidul/readctrl/code/RL_model/models/RL_model_subclaim_classifier_v2/latest_checkpointed_iteration.txt +Training from scratch +test_gen_batch meta info: {'eos_token_id': 151645, 'pad_token_id': 151643, 'recompute_log_prob': False, 'do_sample': False, 'validate': True, 'global_steps': 0} +validation generation end +("Initial validation metrics: {'val-aux/multiclinsum/reward/mean@1': " + "np.float64(3.428422510445475), 'val-core/multiclinsum/acc/mean@1': " + "np.float64(3.428422560297041), 'val-aux/num_turns/min': np.int32(2), " + "'val-aux/num_turns/max': np.int32(2), 'val-aux/num_turns/mean': " + 'np.float64(2.0)}') +step:0 - val-aux/multiclinsum/reward/mean@1:np.float64(3.428422510445475) - val-core/multiclinsum/acc/mean@1:np.float64(3.428422560297041) - val-aux/num_turns/min:np.int32(2) - val-aux/num_turns/max:np.int32(2) - val-aux/num_turns/mean:np.float64(2.0) +Training Progress: 0%| | 0/45 [00:00, ?it/s] diff --git a/code/RL_model/verl/verl_train/wandb/run-20260215_041259-udcrfv6m/files/requirements.txt b/code/RL_model/verl/verl_train/wandb/run-20260215_041259-udcrfv6m/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3c860bd0e3457bae4d28e48b93f4be436aae99df --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260215_041259-udcrfv6m/files/requirements.txt @@ -0,0 +1,291 @@ +verl==0.8.0.dev0 +psutil==7.1.3 +colorama==0.4.6 +annotated-doc==0.0.4 +sentry-sdk==2.51.0 +requests==2.32.5 +nvidia-cufile-cu12==1.13.1.3 +ml_dtypes==0.5.4 +xformers==0.0.32.post1 +sglang==0.5.2 +multidict==6.7.1 +typing_extensions==4.15.0 +nvidia-cusparselt-cu12==0.7.1 +openai-harmony==0.0.4 +transformers==4.56.1 +Werkzeug==3.1.5 +identify==2.6.16 +gepa==0.0.26 +pytest==9.0.2 +nvidia-cuda-runtime-cu12==12.8.90 +GitPython==3.1.46 +cupy-cuda12x==13.6.0 +tokenizers==0.22.2 +unsloth_zoo==2026.2.1 +pybind11==3.0.1 +google-api-core==2.29.0 +partial-json-parser==0.2.1.1.post7 +aiohttp-cors==0.8.1 +sniffio==1.3.1 +tensordict==0.10.0 +smart_open==7.5.0 +cffi==2.0.0 +wcwidth==0.5.3 +asttokens==3.0.1 +opencensus==0.11.4 +rpds-py==0.30.0 +py-spy==0.4.1 +litellm==1.81.9 +gguf==0.17.1 +nvidia-nvjitlink-cu12==12.8.93 +httpx==0.28.1 +cuda-python==13.1.1 +annotated-types==0.7.0 +regex==2026.1.15 +vllm==0.11.0 +idna==3.11 +parso==0.8.5 +pydantic-extra-types==2.11.0 +MarkupSafe==3.0.3 +cryptography==46.0.4 +openai==2.17.0 +filelock==3.20.3 +modelscope==1.34.0 +outlines==0.1.11 +dnspython==2.8.0 +scipy==1.17.0 +zipp==3.23.0 +PyYAML==6.0.3 +onnx==1.20.1 +torchdata==0.11.0 +unsloth==2026.2.1 +cuda-pathfinder==1.3.3 +asyncer==0.0.8 +verl==0.8.0.dev0 +httptools==0.7.1 +opencv-python-headless==4.13.0.90 +importlib_metadata==8.7.1 +peft==0.18.1 +opentelemetry-sdk==1.39.1 +python-json-logger==4.0.0 +alembic==1.18.3 +cuda-bindings==13.1.1 +mdurl==0.1.2 +fsspec==2025.9.0 +referencing==0.37.0 +xxhash==3.6.0 +interegular==0.3.3 +fastapi-cli==0.0.20 +uv==0.9.28 +tensorboard==2.20.0 +tyro==1.0.6 +nvidia-cublas-cu12==12.8.4.1 +sentencepiece==0.2.1 +rich-toolkit==0.18.1 +numpy==2.2.0 +yarl==1.22.0 +opencv-fixer==0.2.5 +tqdm==4.67.2 +python-dotenv==1.2.1 +Mako==1.3.10 +timm==1.0.16 +aiohappyeyeballs==2.6.1 +decord==0.6.0 +jiter==0.12.0 +airportsdata==20250909 +markdown-it-py==4.0.0 +nvidia-cusolver-cu12==11.7.3.90 +pyarrow==23.0.0 +opentelemetry-proto==1.39.1 +anyio==4.12.1 +pycryptodomex==3.23.0 +prometheus_client==0.24.1 +aiohttp==3.13.3 +urllib3==2.6.3 +pexpect==4.9.0 +pydantic-settings==2.12.0 +distro==1.9.0 +av==16.1.0 +cloudpickle==3.1.2 +mpmath==1.3.0 +certifi==2026.1.4 +antlr4-python3-runtime==4.9.3 +torchvision==0.23.0 +accelerate==1.12.0 +watchfiles==1.1.1 +ruff==0.14.14 +cut-cross-entropy==25.1.1 +wheel==0.46.3 +torchao==0.16.0 +omegaconf==2.3.0 +nvidia-cufft-cu12==11.3.3.83 +frozendict==2.4.7 +sympy==1.14.0 +setproctitle==1.3.7 +optuna==4.7.0 +setuptools==79.0.1 +py-cpuinfo==9.0.0 +ipython_pygments_lexers==1.1.1 +rich==14.3.2 +uvicorn==0.40.0 +outlines_core==0.2.11 +llvmlite==0.44.0 +nvidia-cuda-cupti-cu12==12.8.90 +attrs==25.4.0 +anthropic==0.77.0 +packaging==25.0 +fastrlock==0.8.3 +astor==0.8.1 +pluggy==1.6.0 +nvidia-cuda-nvrtc-cu12==12.8.93 +psutil==7.2.2 +virtualenv==20.36.1 +cbor2==5.8.0 +tenacity==9.1.4 +compressed-tensors==0.11.0 +SQLAlchemy==2.0.46 +nvidia-cusparse-cu12==12.5.8.93 +networkx==3.6.1 +httpcore==1.0.9 +onnxscript==0.3.1 +smmap==5.0.2 +opencv-python==4.13.0.90 +traitlets==5.14.3 +python-multipart==0.0.22 +pyvers==0.1.0 +huggingface-hub==0.36.0 +pillow==12.1.0 +jsonschema==4.26.0 +cfgv==3.5.0 +optree==0.18.0 +email-validator==2.3.0 +tabulate==0.9.0 +pre_commit==4.5.1 +msgpack==1.1.2 +depyf==0.19.0 +numba==0.61.2 +six==1.17.0 +aiosignal==1.4.0 +nvidia-nvtx-cu12==12.8.90 +propcache==0.4.1 +torch_memory_saver==0.0.8 +h11==0.16.0 +frozenlist==1.8.0 +websockets==16.0 +nvidia-cudnn-frontend==1.18.0 +build==1.4.0 +google-auth==2.48.0 +pycountry==24.6.1 +colorlog==6.10.1 +typeguard==4.4.4 +stack-data==0.6.3 +typing-inspection==0.4.2 +googleapis-common-protos==1.72.0 +pandas==3.0.0 +typer==0.21.1 +protobuf==6.33.5 +fastapi==0.128.0 +blake3==1.0.8 +opentelemetry-semantic-conventions==0.60b1 +opentelemetry-exporter-prometheus==0.60b1 +nvidia-cudnn-cu12==9.10.2.21 +Markdown==3.10.1 +liger_kernel==0.6.4 +json_repair==0.57.1 +nodeenv==1.10.0 +prompt_toolkit==3.0.52 +torchaudio==2.8.0 +datasets==4.3.0 +codetiming==1.4.0 +diffusers==0.36.0 +platformdirs==4.5.1 +jsonschema-specifications==2025.9.1 +hydra-core==1.3.2 +tensorboard-data-server==0.7.2 +lm-format-enforcer==0.11.3 +bitsandbytes==0.49.1 +pyasn1_modules==0.4.2 +tiktoken==0.12.0 +starlette==0.50.0 +pyproject_hooks==1.2.0 +flash_attn==2.8.1 +rsa==4.9.1 +ray==2.53.0 +nest-asyncio==1.6.0 +lark==1.2.2 +fastar==0.8.0 +orjson==3.11.6 +prometheus-fastapi-instrumentator==7.1.0 +opentelemetry-api==1.39.1 +mathruler==0.1.0 +pydantic_core==2.41.5 +fastapi-cloud-cli==0.11.0 +pynvml==13.0.1 +loguru==0.7.3 +torch==2.8.0 +msgspec==0.20.0 +nvidia-curand-cu12==10.3.9.90 +blobfile==3.0.0 +gitdb==4.0.12 +llguidance==0.7.30 +hf_transfer==0.1.9 +nvidia-nccl-cu12==2.27.3 +trl==0.24.0 +qwen-vl-utils==0.0.14 +ptyprocess==0.7.0 +ipdb==0.13.13 +opencensus-context==0.1.3 +jedi==0.19.2 +click==8.3.1 +multiprocess==0.70.16 +soxr==1.0.0 +sgl-kernel==0.3.9.post2 +colorful==0.5.8 +pyasn1==0.6.2 +charset-normalizer==3.4.4 +nvidia-ml-py==13.590.48 +hf-xet==1.2.0 +dill==0.4.0 +absl-py==2.4.0 +pydantic==2.12.5 +dspy==3.1.3 +wrapt==2.1.0 +flashinfer-python==0.3.1 +python-dateutil==2.9.0.post0 +cachetools==7.0.0 +soundfile==0.13.1 +diskcache==5.6.3 +onnx-ir==0.1.15 +docstring_parser==0.17.0 +matplotlib-inline==0.2.1 +Pygments==2.19.2 +wandb==0.24.1 +pure_eval==0.2.3 +ninja==1.13.0 +proto-plus==1.27.0 +pyzmq==27.1.0 +iniconfig==2.3.0 +Jinja2==3.1.6 +megatron-core==0.13.1 +uvloop==0.22.1 +fastuuid==0.14.0 +pycparser==3.0 +pylatexenc==2.10 +decorator==5.2.1 +shellingham==1.5.4 +lxml==6.0.2 +safetensors==0.7.0 +xgrammar==0.1.25 +pybase64==1.4.3 +ipython==9.9.0 +greenlet==3.3.1 +mistral_common==1.9.0 +rignore==0.7.6 +einops==0.8.2 +distlib==0.4.0 +triton==3.4.0 +executing==2.2.1 +grpcio==1.76.0 +pip==25.3 +verl==0.8.0.dev0 +verl==0.8.0.dev0 diff --git a/code/RL_model/verl/verl_train/wandb/run-20260215_041259-udcrfv6m/files/wandb-metadata.json b/code/RL_model/verl/verl_train/wandb/run-20260215_041259-udcrfv6m/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..c8231a4f53969f993608fb37e67743c72f317105 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260215_041259-udcrfv6m/files/wandb-metadata.json @@ -0,0 +1,93 @@ +{ + "os": "Linux-5.15.0-160-generic-x86_64-with-glibc2.35", + "python": "CPython 3.12.12", + "startedAt": "2026-02-15T09:12:59.214126Z", + "args": [ + "--node-ip-address=172.16.34.29", + "--node-manager-port=41571", + "--object-store-name=/tmp/ray/session_2026-02-15_04-08-18_573543_3523343/sockets/plasma_store", + "--raylet-name=/tmp/ray/session_2026-02-15_04-08-18_573543_3523343/sockets/raylet", + "--redis-address=None", + "--metrics-agent-port=48237", + "--logging-rotate-bytes=536870912", + "--logging-rotate-backup-count=5", + "--runtime-env-agent-port=59869", + "--gcs-address=172.16.34.29:57209", + "--session-name=session_2026-02-15_04-08-18_573543_3523343", + "--temp-dir=/tmp/ray", + "--webui=127.0.0.1:8297", + "--cluster-id=ce2a78ac568c8641864ba956ce4b4c0b3c6b210b0d53b0adf6a9237c", + "--startup-token=128", + "--worker-launch-time-ms=1771146511418", + "--node-id=51f4628367337e08a514b1cb6019d20f6c804c0fbe4296a236360afa", + "--runtime-env-hash=1096984665" + ], + "program": "/home/mshahidul/miniconda3/envs/verl2/lib/python3.12/site-packages/ray/_private/workers/default_worker.py", + "git": { + "remote": "https://github.com/verl-project/verl", + "commit": "d9939add7a2a01923a9088891f913a5d20c4e622" + }, + "email": "shahidulshakib034@gmail.com", + "root": "/data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train", + "host": "gamma", + "executable": "/home/mshahidul/miniconda3/envs/verl2/bin/python3", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA A100 80GB PCIe", + "gpu_count": 6, + "disk": { + "/": { + "total": "3766429188096", + "used": "214873206784" + } + }, + "memory": { + "total": "1081814863872" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-4a3678c7-34a9-356f-f7b7-7f7e2f44b596" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-df506764-0db5-91b4-8ec9-154a3bb8123f" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-2c3dbd62-b384-2996-a0f6-b32dcfcc3538" + }, + { + "name": "NVIDIA A100 80GB PCIe", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-1ff3dabe-4b9a-ea62-5cc3-01f12f32d328" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper", + "uuid": "GPU-eefc4b8c-0e79-c1d6-a9ff-8325040572eb" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper", + "uuid": "GPU-d42b6057-13e8-1e88-6aa1-9307df72dece" + } + ], + "cudaVersion": "13.0", + "writerId": "22e2fvlcbnolcv1k78ls5xixd1flwp1h" +} \ No newline at end of file diff --git a/code/RL_model/verl/verl_train/wandb/run-20260215_041259-udcrfv6m/files/wandb-summary.json b/code/RL_model/verl/verl_train/wandb/run-20260215_041259-udcrfv6m/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..30d9e393fe9631b63dfe0a1ed4bbfba360b1d7c4 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260215_041259-udcrfv6m/files/wandb-summary.json @@ -0,0 +1 @@ +{"val-aux/multiclinsum/reward/mean@1":3.428422510445475,"_step":0,"val-aux/num_turns/min":2,"_runtime":5078.563672917,"val-aux/num_turns/max":2,"_timestamp":1.7711469060723004e+09,"val-core/multiclinsum/acc/mean@1":3.428422560297041,"_wandb":{"runtime":5078},"val-aux/num_turns/mean":2} \ No newline at end of file diff --git a/code/RL_model/verl/verl_train/wandb/run-20260215_041259-udcrfv6m/logs/debug-core.log b/code/RL_model/verl/verl_train/wandb/run-20260215_041259-udcrfv6m/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..a7d6a3b88f979d3628aa772bae7565505d811fba --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260215_041259-udcrfv6m/logs/debug-core.log @@ -0,0 +1,11 @@ +{"time":"2026-02-15T04:12:59.338130457-05:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp309lh4gf/port-3531859.txt","pid":3531859,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-02-15T04:12:59.338990323-05:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":3531859} +{"time":"2026-02-15T04:12:59.338957793-05:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3531859-3540660-1250128463/socket","Net":"unix"}} +{"time":"2026-02-15T04:12:59.513756388-05:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-02-15T04:12:59.526499185-05:00","level":"INFO","msg":"handleInformInit: received","streamId":"udcrfv6m","id":"1(@)"} +{"time":"2026-02-15T04:13:00.473645766-05:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"udcrfv6m","id":"1(@)"} +{"time":"2026-02-15T04:13:06.83979438-05:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"hh0l2wvjdvqw"} +{"time":"2026-02-15T05:37:39.555119974-05:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"hh0l2wvjdvqw"} +{"time":"2026-02-15T05:37:40.196401258-05:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"hh0l2wvjdvqw"} +{"time":"2026-02-15T05:37:40.199648123-05:00","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"udcrfv6m","id":"1(@)"} +{"time":"2026-02-15T05:37:40.204065011-05:00","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"udcrfv6m","id":"1(@)"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260215_041259-udcrfv6m/logs/debug-internal.log b/code/RL_model/verl/verl_train/wandb/run-20260215_041259-udcrfv6m/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..d85087a26ed250275422dfe0e3ff1553995e6ad6 --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260215_041259-udcrfv6m/logs/debug-internal.log @@ -0,0 +1,12 @@ +{"time":"2026-02-15T04:12:59.527492701-05:00","level":"INFO","msg":"stream: starting","core version":"0.24.1"} +{"time":"2026-02-15T04:13:00.471569564-05:00","level":"INFO","msg":"stream: created new stream","id":"udcrfv6m"} +{"time":"2026-02-15T04:13:00.471681245-05:00","level":"INFO","msg":"handler: started","stream_id":"udcrfv6m"} +{"time":"2026-02-15T04:13:00.473630928-05:00","level":"INFO","msg":"stream: started","id":"udcrfv6m"} +{"time":"2026-02-15T04:13:00.473737398-05:00","level":"INFO","msg":"writer: started","stream_id":"udcrfv6m"} +{"time":"2026-02-15T04:13:00.473729123-05:00","level":"INFO","msg":"sender: started","stream_id":"udcrfv6m"} +{"time":"2026-02-15T05:37:40.070121053-05:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-02-15T05:37:40.192804138-05:00","level":"INFO","msg":"handler: operation stats","stats":{}} +{"time":"2026-02-15T05:37:40.199668455-05:00","level":"INFO","msg":"stream: closing","id":"udcrfv6m"} +{"time":"2026-02-15T05:37:40.199684204-05:00","level":"INFO","msg":"handler: closed","stream_id":"udcrfv6m"} +{"time":"2026-02-15T05:37:40.202653026-05:00","level":"INFO","msg":"sender: closed","stream_id":"udcrfv6m"} +{"time":"2026-02-15T05:37:40.202682659-05:00","level":"INFO","msg":"stream: closed","id":"udcrfv6m"} diff --git a/code/RL_model/verl/verl_train/wandb/run-20260215_041259-udcrfv6m/logs/debug.log b/code/RL_model/verl/verl_train/wandb/run-20260215_041259-udcrfv6m/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..93cd729d522367d34b09dd4f4e626ffe89394a4d --- /dev/null +++ b/code/RL_model/verl/verl_train/wandb/run-20260215_041259-udcrfv6m/logs/debug.log @@ -0,0 +1,24 @@ +2026-02-15 04:12:59,236 INFO MainThread:3531859 [wandb_setup.py:_flush():81] Current SDK version is 0.24.1 +2026-02-15 04:12:59,236 INFO MainThread:3531859 [wandb_setup.py:_flush():81] Configure stats pid to 3531859 +2026-02-15 04:12:59,236 INFO MainThread:3531859 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-02-15 04:12:59,236 INFO MainThread:3531859 [wandb_init.py:setup_run_log_directory():717] Logging user logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260215_041259-udcrfv6m/logs/debug.log +2026-02-15 04:12:59,236 INFO MainThread:3531859 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to /data/home_beta/mshahidul/readctrl/code/RL_model/verl/verl_train/wandb/run-20260215_041259-udcrfv6m/logs/debug-internal.log +2026-02-15 04:12:59,237 INFO MainThread:3531859 [wandb_init.py:init():844] calling init triggers +2026-02-15 04:12:59,237 INFO MainThread:3531859 [wandb_init.py:init():849] wandb.init called with sweep_config: {} +config: {'actor_rollout_ref': {'actor': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-06, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 45, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': True, 'optimizer_offload': True, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'ppo_mini_batch_size': 256, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': 16, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 16384, 'clip_ratio': 0.2, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.2, 'tau_pos': 1.0, 'tau_neg': 1.05, 'freeze_vision_tower': False, 'policy_loss': {'_target_': 'verl.workers.config.PolicyLossConfig', 'loss_mode': 'vanilla', 'clip_cov_ratio': 0.0002, 'clip_cov_lb': 1.0, 'clip_cov_ub': 5.0, 'kl_cov_ratio': 0.0002, 'ppo_kl_coef': 0.1}, 'clip_ratio_c': 3.0, 'loss_agg_mode': 'token-mean', 'loss_scale_factor': None, 'entropy_coeff': 0, 'calculate_entropy': False, 'use_kl_loss': True, 'use_prefix_grouper': False, 'use_torch_compile': True, 'kl_loss_coef': 0.001, 'kl_loss_type': 'low_var_kl', 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'use_fused_kernels': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'grad_clip': 1.0, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False, 'use_remove_padding': True, 'calculate_sum_pi_squared': False, 'sum_pi_squared_checkpointing': False}, 'ref': {'rollout_n': 3, 'strategy': 'fsdp', 'use_torch_compile': True, 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'router_replay': {'_target_': 'verl.workers.config.RouterReplayConfig', 'mode': 'disabled', 'record_file': None, 'replay_file': None}, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': True, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': True, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, '_target_': 'verl.workers.config.FSDPActorConfig', 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'entropy_checkpointing': False}, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': 'vllm', 'mode': 'async', 'temperature': 1.0, 'top_k': -1, 'top_p': 1, 'prompt_length': 1024, 'response_length': 2048, 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.4, 'ignore_eos': False, 'enforce_eager': True, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'tensor_model_parallel_size': 1, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'pipeline_model_parallel_size': 1, 'max_num_batched_tokens': 8192, 'max_model_len': 8192, 'max_num_seqs': 1024, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'logprobs_mode': 'processed_logprobs', 'scheduling_policy': 'fcfs', 'load_format': 'dummy', 'log_prob_micro_batch_size': None, 'log_prob_micro_batch_size_per_gpu': 32, 'log_prob_use_dynamic_bsz': False, 'log_prob_max_token_len_per_gpu': 16384, 'disable_log_stats': True, 'do_sample': True, 'n': 3, 'over_sample_rate': 0, 'multi_stage_wake_up': False, 'engine_kwargs': {'vllm': {}, 'sglang': {}, 'trtllm': {}}, 'val_kwargs': {'_target_': 'verl.workers.config.SamplingConfig', 'top_k': -1, 'top_p': 1.0, 'temperature': 0, 'n': 1, 'do_sample': False}, 'multi_turn': {'_target_': 'verl.workers.config.MultiTurnConfig', 'enable': False, 'max_assistant_turns': None, 'tool_config_path': None, 'max_user_turns': None, 'max_parallel_calls': 1, 'max_tool_response_length': 256, 'tool_response_truncate_side': 'middle', 'interaction_config_path': None, 'use_inference_chat_template': False, 'tokenization_sanity_check_mode': 'strict', 'format': 'hermes', 'num_repeat_rollouts': None}, 'calculate_log_probs': False, 'agent': {'_target_': 'verl.workers.config.AgentLoopConfig', 'num_workers': 8, 'default_agent_loop': 'single_turn_agent', 'agent_loop_config_path': None, 'custom_async_server': {'_target_': 'verl.workers.config.CustomAsyncServerConfig', 'path': None, 'name': None}}, 'checkpoint_engine': {'_target_': 'verl.workers.config.CheckpointEngineConfig', 'backend': 'naive', 'update_weights_bucket_megabytes': 2048, 'engine_kwargs': {}}, 'trace': {'_target_': 'verl.workers.config.TraceConfig', 'backend': None, 'token2text': False, 'max_samples_per_step_per_worker': None}, 'skip_rollout': False, 'skip_dump_dir': '/tmp/rollout_dump', 'skip_tokenizer_init': True, 'enable_rollout_routing_replay': False, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'prometheus': {'_target_': 'verl.workers.config.PrometheusConfig', 'enable': False, 'port': 9090, 'file': '/tmp/ray/session_latest/metrics/prometheus/prometheus.yml', 'served_model_name': 'Qwen/Qwen3-4B-Instruct-2507'}, 'quantization': None, 'quantization_config_file': None, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}, 'layered_summon': False}, 'model': {'_target_': 'verl.workers.config.HFModelConfig', 'path': 'Qwen/Qwen3-4B-Instruct-2507', 'hf_config_path': None, 'tokenizer_path': None, 'use_shm': False, 'trust_remote_code': False, 'custom_chat_template': None, 'external_lib': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': True, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'exclude_modules': None, 'lora_adapter_path': None, 'use_liger': False, 'use_fused_kernels': False, 'fused_kernel_options': {'impl_backend': 'torch'}, 'tiled_mlp': {'enabled': False, 'num_shards': 4}, 'mtp': {'_target_': 'verl.workers.config.MtpConfig', 'enable': False, 'enable_train': False, 'enable_rollout': False, 'detach_encoder': False, 'mtp_loss_scaling_factor': 0.1, 'speculative_algorithm': 'EAGLE', 'speculative_num_steps': 3, 'speculative_eagle_topk': 1, 'speculative_num_draft_tokens': 4, 'method': 'mtp', 'num_speculative_tokens': 1}}, 'hybrid_engine': True, 'nccl_timeout': 600}, 'data': {'tokenizer': None, 'use_shm': False, 'train_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/train.parquet', 'val_files': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/test.parquet', 'train_max_samples': -1, 'val_max_samples': -1, 'prompt_key': 'prompt', 'reward_fn_key': 'data_source', 'max_prompt_length': 1024, 'max_response_length': 2048, 'train_batch_size': 512, 'val_batch_size': None, 'tool_config_path': None, 'return_raw_input_ids': False, 'return_raw_chat': True, 'return_full_prompt': False, 'shuffle': True, 'seed': None, 'dataloader_num_workers': 8, 'image_patch_size': 14, 'validation_shuffle': False, 'filter_overlong_prompts': True, 'filter_overlong_prompts_workers': 1, 'truncation': 'error', 'image_key': 'images', 'video_key': 'videos', 'trust_remote_code': False, 'custom_cls': {'path': None, 'name': None}, 'return_multi_modal_inputs': True, 'sampler': {'class_path': None, 'class_name': None}, 'datagen': {'path': None, 'name': None}, 'apply_chat_template_kwargs': {}}, 'reward_manager': {'_target_': 'verl.trainer.config.config.RewardManagerConfig', 'source': 'register', 'name': 'naive', 'module': {'_target_': 'verl.trainer.config.config.ModuleConfig', 'path': None, 'name': 'custom_reward_manager'}}, 'critic': {'optim': {'_target_': 'verl.workers.config.FSDPOptimizerConfig', 'optimizer': 'AdamW', 'optimizer_impl': 'torch.optim', 'lr': 1e-05, 'lr_warmup_steps_ratio': 0.0, 'total_training_steps': 45, 'weight_decay': 0.01, 'lr_warmup_steps': -1, 'betas': [0.9, 0.999], 'clip_grad': 1.0, 'min_lr_ratio': 0.0, 'num_cycles': 0.5, 'lr_scheduler_type': 'constant', 'warmup_style': None, 'override_optimizer_config': None}, 'model': {'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'optimizer_offload': False, 'offload_policy': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False, 'model_dtype': 'fp32', 'use_orig_params': False, 'seed': 42, 'full_determinism': False, 'ulysses_sequence_parallel_size': 1, 'entropy_from_logits_with_chunking': False, 'use_torch_compile': True, 'entropy_checkpointing': False, 'forward_only': False, 'strategy': 'fsdp', 'dtype': 'bfloat16'}, 'path': '~/models/deepseek-llm-7b-chat', 'tokenizer_path': 'Qwen/Qwen3-4B-Instruct-2507', 'override_config': {}, 'external_lib': None, 'trust_remote_code': False, '_target_': 'verl.workers.config.FSDPCriticModelCfg', 'use_shm': False, 'enable_gradient_checkpointing': True, 'enable_activation_offload': False, 'use_remove_padding': False, 'lora_rank': 0, 'lora_alpha': 16, 'target_modules': 'all-linear', 'tiled_mlp': {'enabled': False, 'num_shards': 4}}, '_target_': 'verl.workers.config.FSDPCriticConfig', 'rollout_n': 3, 'strategy': 'fsdp', 'enable': None, 'ppo_mini_batch_size': 256, 'ppo_micro_batch_size': None, 'ppo_micro_batch_size_per_gpu': None, 'use_dynamic_bsz': False, 'ppo_max_token_len_per_gpu': 32768, 'forward_max_token_len_per_gpu': 32768, 'ppo_epochs': 1, 'shuffle': False, 'data_loader_seed': 42, 'cliprange_value': 0.5, 'loss_agg_mode': 'token-mean', 'checkpoint': {'_target_': 'verl.trainer.config.CheckpointConfig', 'save_contents': ['model', 'optimizer', 'extra'], 'load_contents': ['model', 'optimizer', 'extra'], 'async_save': False}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'forward_micro_batch_size': None, 'forward_micro_batch_size_per_gpu': None, 'ulysses_sequence_parallel_size': 1, 'grad_clip': 1.0}, 'reward_model': {'enable': False, 'enable_resource_pool': False, 'n_gpus_per_node': 8, 'nnodes': 0, 'strategy': 'fsdp', 'model': {'input_tokenizer': 'Qwen/Qwen3-4B-Instruct-2507', 'path': '~/models/FsfairX-LLaMA3-RM-v0.1', 'external_lib': None, 'trust_remote_code': False, 'override_config': {}, 'use_shm': False, 'use_remove_padding': False, 'use_fused_kernels': False, 'fsdp_config': {'_target_': 'verl.workers.config.FSDPEngineConfig', 'wrap_policy': {'min_num_params': 0}, 'param_offload': False, 'reshard_after_forward': True, 'fsdp_size': -1, 'forward_prefetch': False}}, 'micro_batch_size': None, 'micro_batch_size_per_gpu': None, 'max_length': None, 'use_dynamic_bsz': False, 'forward_max_token_len_per_gpu': 32768, 'reward_manager': 'naive', 'reward_loop_source': 'register', 'reward_loop_module_path': None, 'reward_loop_class_name': None, 'launch_reward_fn_async': False, 'sandbox_fusion': {'url': None, 'max_concurrent': 64, 'memory_limit_mb': 1024}, 'profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'enable': False, 'all_ranks': False, 'ranks': [], 'save_path': 'outputs/profile', 'tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False}, 'npu': {'_target_': 'verl.utils.profiler.config.NPUToolConfig', 'contents': [], 'level': 'level0', 'analysis': True, 'discrete': False}, 'torch': {'_target_': 'verl.utils.profiler.config.TorchProfilerToolConfig', 'contents': [], 'discrete': False}, 'torch_memory': {'_target_': 'verl.utils.profiler.config.TorchMemoryToolConfig', 'trace_alloc_max_entries': 100000, 'stack_depth': 32}}}, 'ulysses_sequence_parallel_size': 1, 'use_reward_loop': True, 'num_workers': 1, 'rollout': {'_target_': 'verl.workers.config.RolloutConfig', 'name': '???', 'dtype': 'bfloat16', 'gpu_memory_utilization': 0.5, 'enforce_eager': True, 'cudagraph_capture_sizes': None, 'free_cache_engine': True, 'data_parallel_size': 1, 'expert_parallel_size': 1, 'tensor_model_parallel_size': 2, 'max_num_batched_tokens': 8192, 'max_model_len': None, 'max_num_seqs': 1024, 'load_format': 'auto', 'engine_kwargs': {}, 'limit_images': None, 'enable_chunked_prefill': True, 'enable_prefix_caching': True, 'disable_log_stats': True, 'skip_tokenizer_init': False, 'prompt_length': 2048, 'response_length': 2048}}, 'algorithm': {'rollout_correction': {'rollout_is': None, 'rollout_is_threshold': 2.0, 'rollout_rs': None, 'rollout_rs_threshold': None, 'bypass_mode': False, 'loss_type': 'ppo_clip', 'rollout_is_batch_normalize': False}, '_target_': 'verl.trainer.config.AlgoConfig', 'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'norm_adv_by_std_in_grpo': True, 'use_kl_in_reward': False, 'kl_penalty': 'kl', 'kl_ctrl': {'_target_': 'verl.trainer.config.KLControlConfig', 'type': 'fixed', 'kl_coef': 0.001, 'horizon': 10000, 'target_kl': 0.1}, 'use_pf_ppo': False, 'pf_ppo': {'reweight_method': 'pow', 'weight_pow': 2.0}}, 'custom_reward_function': {'path': '/home/mshahidul/readctrl/code/RL_model/verl/verl_train/reward_func/reward_func/reward_new_v2.py', 'name': 'compute_score'}, 'trainer': {'balance_batch': True, 'total_epochs': 15, 'total_training_steps': None, 'project_name': 'readctrl-verl', 'experiment_name': 'qwen3-4b-instruct-en', 'logger': ['console', 'wandb'], 'log_val_generations': 0, 'rollout_data_dir': None, 'validation_data_dir': None, 'nnodes': 1, 'n_gpus_per_node': 2, 'save_freq': 5, 'esi_redundant_time': 0, 'resume_mode': 'auto', 'resume_from_path': None, 'val_before_train': True, 'val_only': False, 'test_freq': 10, 'critic_warmup': 0, 'default_hdfs_dir': None, 'del_local_ckpt_after_load': False, 'default_local_dir': '/home/mshahidul/readctrl/code/RL_model/models/RL_model_subclaim_classifier_v2', 'max_actor_ckpt_to_keep': 1, 'max_critic_ckpt_to_keep': 1, 'ray_wait_register_center_timeout': 300, 'device': 'cuda', 'use_legacy_worker_impl': 'auto', 'remove_previous_ckpt_in_save': True}, 'global_profiler': {'_target_': 'verl.utils.profiler.ProfilerConfig', 'tool': None, 'steps': None, 'profile_continuous_steps': False, 'save_path': 'outputs/profile', 'global_tool_config': {'nsys': {'_target_': 'verl.utils.profiler.config.NsightToolConfig', 'discrete': False, 'controller_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph'}, 'worker_nsight_options': {'trace': 'cuda,nvtx,cublas,ucx', 'cuda-memory-usage': 'true', 'cuda-graph-trace': 'graph', 'capture-range': 'cudaProfilerApi', 'capture-range-end': None, 'kill': 'none'}}, 'torch_memory': {'trace_alloc_max_entries': 100000, 'stack_depth': 32, 'context': 'all', 'stacks': 'all', 'kw_args': {}}}}, 'transfer_queue': {'enable': False}, 'ray_kwargs': {'ray_init': {'num_cpus': None}, 'timeline_json_file': None}, '_wandb': {}} +2026-02-15 04:12:59,238 INFO MainThread:3531859 [wandb_init.py:init():892] starting backend +2026-02-15 04:12:59,515 INFO MainThread:3531859 [wandb_init.py:init():895] sending inform_init request +2026-02-15 04:12:59,524 INFO MainThread:3531859 [wandb_init.py:init():903] backend started and connected +2026-02-15 04:12:59,536 INFO MainThread:3531859 [wandb_init.py:init():973] updated telemetry +2026-02-15 04:12:59,723 INFO MainThread:3531859 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-02-15 04:13:00,989 INFO MainThread:3531859 [wandb_init.py:init():1042] starting run threads in backend +2026-02-15 04:13:01,813 INFO MainThread:3531859 [wandb_run.py:_console_start():2529] atexit reg +2026-02-15 04:13:01,814 INFO MainThread:3531859 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-02-15 04:13:01,814 INFO MainThread:3531859 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-02-15 04:13:01,814 INFO MainThread:3531859 [wandb_run.py:_redirect():2469] Redirects installed. +2026-02-15 04:13:01,826 INFO MainThread:3531859 [wandb_init.py:init():1082] run started, returning control to user process +2026-02-15 05:37:39,550 INFO MainThread:3531859 [wandb_run.py:_finish():2295] finishing run shahidulshakib034-khulna-university-of-engineering-techn/readctrl-verl/udcrfv6m +2026-02-15 05:37:39,552 INFO MainThread:3531859 [wandb_run.py:_atexit_cleanup():2494] got exitcode: 0 +2026-02-15 05:37:39,553 INFO MainThread:3531859 [wandb_run.py:_restore():2476] restore +2026-02-15 05:37:39,554 INFO MainThread:3531859 [wandb_run.py:_restore():2482] restore done +2026-02-15 05:37:40,196 INFO MainThread:3531859 [wandb_run.py:_footer_sync_info():3871] logging synced files