{ "train_dataset": "/workspace/fms4navigation/datasets/Faithfulness-Critic-Dataset/train_dataset.jsonl", "val_dataset": "/workspace/fms4navigation/datasets/Faithfulness-Critic-Dataset/val_dataset_512.jsonl", "dataset_dir": "/workspace/fms4navigation/datasets/Faithfulness-Critic-Dataset", "max_length": 4096, "seed": 0, "eval_max_new_tokens": 96, "eval_steps": 100, "balance_by_inconsistent_count": false, "balance_alpha": 1.0, "base_model": "Qwen/Qwen3-VL-4B-Instruct", "use_lora": true, "lora_rank": 256, "lora_alpha": null, "lora_dropout": 0.05, "lora_target_modules": "q_proj,k_proj,v_proj,o_proj", "flash_attn": false, "gradient_checkpointing": false, "output_dir": "/workspace/fms4navigation/results/PRM-v2-r256", "epochs": 1.0, "per_device_batch_size": 4, "grad_accum_steps": 1, "lr": 0.0001, "warmup_ratio": 0.03, "weight_decay": 0.0, "logging_steps": 20, "save_steps": 300, "save_total_limit": 3, "max_steps": -1, "report_to": "wandb" }