| { |
| "mode": "train", |
| "lineage": { |
| "created_at_utc": "2026-04-11T22:36:28Z", |
| "run_id": "shaer_grpo_20260411_223409", |
| "run_dir": "/root/workspace/Shaer/grpo/outputs/train/shaer_grpo_20260411_223409", |
| "chain_id": "shaer_grpo_20260411_192107", |
| "root_run_id": "shaer_grpo_20260411_192107", |
| "parent_run_id": "shaer_grpo_20260411_192107", |
| "parent_run_dir": "/root/workspace/Shaer/grpo/outputs/train/shaer_grpo_20260411_192107", |
| "run_sequence_index": 1 |
| }, |
| "dataset": { |
| "dataset_id": "Shaer-AI/ashaar-enhanced-desc-baseform-final-sft-lte20-min500-splits-grpo-meter-count-v1", |
| "dataset_source_id": "Shaer-AI/ashaar-with-enhanced-descriptions-baseform-final-sft-lte20-min500-splits", |
| "train_split": "train", |
| "eval_split": "eval", |
| "test_split": "test", |
| "train_size": 24897, |
| "eval_size": 104, |
| "test_size": 204, |
| "hard_diagnostic_size": 3126, |
| "phase1_max_bayts": "20", |
| "allowed_meters": [], |
| "train_manifest_path": "/root/workspace/Shaer/grpo/outputs/curated_meter_count_drop_trio/cap_3000/selected_manifest.csv", |
| "hard_diagnostic_manifest_path": "/root/workspace/Shaer/grpo/outputs/curated_meter_count_drop_trio/hard_diagnostic_cap_256/selected_manifest.csv" |
| }, |
| "rewards": { |
| "active_rewards": [ |
| "meter", |
| "count_adherence", |
| "arabic_clean", |
| "repeat_penalty", |
| "total_composite" |
| ], |
| "weights": { |
| "meter": 0.0, |
| "count_adherence": 0.0, |
| "arabic_clean": 0.0, |
| "repeat_penalty": 0.0, |
| "total_composite": 1.0 |
| } |
| }, |
| "model": { |
| "base_model_id": "Navid-AI/Yehia-7B-preview", |
| "sft_adapter_repo": "Shaer-AI/Shaer-adapters", |
| "sft_adapter_mode": "fresh_sft/train", |
| "load_in_4bit_requested": true, |
| "gradient_checkpointing": true |
| }, |
| "effective_generation": { |
| "use_vllm": true, |
| "vllm_mode": "colocate", |
| "vllm_gpu_memory_utilization": 0.5, |
| "max_prompt_length": 1024, |
| "max_completion_length": 512, |
| "temperature": 0.9, |
| "top_p": 1.0, |
| "num_generations": 8, |
| "num_generations_eval": 1 |
| }, |
| "effective_trainer": { |
| "learning_rate": 1e-05, |
| "per_device_train_batch_size": 1, |
| "per_device_eval_batch_size_configured": 8, |
| "per_device_eval_batch_size": 8, |
| "gradient_accumulation_steps": 8, |
| "max_steps": 3300, |
| "logging_steps": 1, |
| "eval_steps": 50, |
| "save_steps": 50, |
| "save_total_limit": 4, |
| "beta": 0.0, |
| "scale_rewards": "group", |
| "loss_type": "dapo", |
| "mask_truncated_completions": true, |
| "bf16": true, |
| "load_best_model_at_end": true, |
| "metric_for_best_model": "eval_reward_total_mean", |
| "greater_is_better": true, |
| "hub_strategy": "HubStrategy.CHECKPOINT", |
| "push_to_hub": true, |
| "hub_model_id": "Shaer-AI/Shaer-adapters-grpo", |
| "generations_repo_id": "Shaer-AI/Shaer-adapters-grpo-generations" |
| }, |
| "sanity_mode_note": "When mode='sanity', these effective values override the base trainer/generation blocks from config_snapshot.json." |
| } |