Upload training_meta.json with huggingface_hub
Browse files- training_meta.json +50 -50
training_meta.json
CHANGED
|
@@ -1,64 +1,64 @@
|
|
| 1 |
{
|
| 2 |
"best_val_loss": 1.6575258653610945,
|
| 3 |
"stream_offsets": {
|
| 4 |
-
"nvidia/Nemotron-Cascade-SFT-Stage-1/general/train":
|
| 5 |
-
"nvidia/Nemotron-Cascade-SFT-Stage-1/math/train":
|
| 6 |
-
"nvidia/Nemotron-Cascade-SFT-Stage-1/code/train":
|
| 7 |
-
"nvidia/Nemotron-Cascade-SFT-Stage-1/science/train":
|
| 8 |
-
"nvidia/Nemotron-Cascade-SFT-Stage-2/math/train":
|
| 9 |
-
"nvidia/Nemotron-Cascade-SFT-Stage-2/code/train":
|
| 10 |
-
"nvidia/Nemotron-Cascade-SFT-Stage-2/science/train":
|
| 11 |
-
"nvidia/Nemotron-Cascade-SFT-Stage-2/general/train":
|
| 12 |
-
"nvidia/Nemotron-Cascade-SFT-Stage-2/tool_calling/train":
|
| 13 |
-
"nvidia/Nemotron-Cascade-SFT-Stage-2/instruction-following/train":
|
| 14 |
-
"nvidia/Nemotron-Cascade-SFT-Stage-2/swe_repair/train":
|
| 15 |
-
"nvidia/Nemotron-Cascade-SFT-Stage-2/swe_localization/train":
|
| 16 |
-
"nvidia/Nemotron-Cascade-SFT-Stage-2/swe_testgen/train":
|
| 17 |
-
"nvidia/Nemotron-Cascade-2-SFT-Data/math/train":
|
| 18 |
-
"nvidia/Nemotron-Cascade-2-SFT-Data/science/train":
|
| 19 |
-
"nvidia/Nemotron-Cascade-2-SFT-Data/chat/train":
|
| 20 |
-
"nvidia/Nemotron-Cascade-2-SFT-Data/instruction_following/train":
|
| 21 |
"nvidia/Nemotron-Cascade-2-SFT-Data/safety/train": 3570,
|
| 22 |
-
"nvidia/Nemotron-Cascade-2-SFT-Data/conversational_agent/train":
|
| 23 |
-
"nvidia/Nemotron-Cascade-2-SFT-Data/swe/train":
|
| 24 |
-
"nvidia/Nemotron-Post-Training-Dataset-v1/default/chat":
|
| 25 |
-
"nvidia/Nemotron-Post-Training-Dataset-v1/default/code":
|
| 26 |
-
"nvidia/Nemotron-Post-Training-Dataset-v1/default/math":
|
| 27 |
-
"nvidia/Nemotron-Post-Training-Dataset-v1/default/stem":
|
| 28 |
-
"nvidia/Nemotron-Post-Training-Dataset-v1/default/tool_calling":
|
| 29 |
-
"nvidia/AceReason-1.1-SFT/default/train":
|
| 30 |
"nvidia/OpenMathInstruct-2/default/train": 58705,
|
| 31 |
"nvidia/OpenMathReasoning/default/cot": 0,
|
| 32 |
"nvidia/OpenMathReasoning/default/tir": 0,
|
| 33 |
"nvidia/OpenMathReasoning/default/genselect": 0,
|
| 34 |
-
"nvidia/Nemotron-Math-v2/default/high_part00":
|
| 35 |
-
"nvidia/Nemotron-Math-v2/default/high_part01":
|
| 36 |
-
"nvidia/Nemotron-Math-v2/default/high_part02":
|
| 37 |
-
"nvidia/Nemotron-Math-v2/default/medium":
|
| 38 |
-
"nvidia/Nemotron-Math-v2/default/low":
|
| 39 |
-
"nvidia/Nemotron-SFT-Math-v3/default/train":
|
| 40 |
-
"nvidia/Nemotron-Research-GooseReason-0.7M/default/math":
|
| 41 |
-
"nvidia/Nemotron-Research-GooseReason-0.7M/default/code":
|
| 42 |
-
"nvidia/Nemotron-Research-GooseReason-0.7M/default/stem":
|
| 43 |
-
"nvidia/OpenCodeGeneticInstruct/mixtral-8x22b-instruct/train":
|
| 44 |
-
"nvidia/OpenCodeGeneticInstruct/qwen2.5-32b-instruct/train":
|
| 45 |
-
"nvidia/OpenCodeReasoning/split_0/split_0":
|
| 46 |
-
"nvidia/OpenCodeReasoning/split_1/split_1":
|
| 47 |
"nvidia/Nemotron-Math-HumanReasoning/default/train": 0,
|
| 48 |
-
"nvidia/Nemotron-SFT-Agentic-v2/default/interactive_agent":
|
| 49 |
"nvidia/Nemotron-SFT-Agentic-v2/default/tool_calling": 0,
|
| 50 |
-
"nvidia/Nemotron-Instruction-Following-Chat-v1/default/chat_if":
|
| 51 |
"nvidia/Nemotron-Instruction-Following-Chat-v1/default/structured_outputs": 4969,
|
| 52 |
-
"nvidia/Nemotron-Competitive-Programming-v1/default/competitive_coding_python_part00":
|
| 53 |
-
"nvidia/Nemotron-Competitive-Programming-v1/default/competitive_coding_python_part01":
|
| 54 |
-
"nvidia/Nemotron-Competitive-Programming-v1/default/competitive_coding_cpp_part00":
|
| 55 |
-
"nvidia/Nemotron-Competitive-Programming-v1/default/competitive_coding_cpp_part01":
|
| 56 |
"nvidia/Nemotron-RL-Super-Training-Blends/default/rlvr1": 3933,
|
| 57 |
-
"ibivibiv/math_instruct/default/train":
|
| 58 |
-
"javi22/simple-instruct-dataset/default/train":
|
| 59 |
"agentlans/chatgpt/default/train": 0,
|
| 60 |
"fineinstructions-pretraining/nemotron_fineinstructions_1T_exp_chat/default/train": 0,
|
| 61 |
-
"agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/HydraLM-airoboros-gpt4-1.4_alpaca.jsonl.zst":
|
| 62 |
"agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/Mxode-Magpie-Pro-10K-GPT4o-mini.jsonl.zst": 0,
|
| 63 |
"agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/PKU-Alignment-Align-Anything-Instruction-100K.jsonl.zst": 0,
|
| 64 |
"agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/PawanKrd-gpt-4o-200k.jsonl.zst": 0,
|
|
@@ -116,9 +116,9 @@
|
|
| 116 |
"agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/microsoft-orca-agentinstruct-1m-v1.jsonl.zst": 0,
|
| 117 |
"agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/microsoft-orca-math-word-problems-200k.jsonl.zst": 0
|
| 118 |
},
|
| 119 |
-
"avg_train_loss": 2.
|
| 120 |
-
"sft_step":
|
| 121 |
-
"timestamp": "2026-04-22 14:
|
| 122 |
"global_batch": 128,
|
| 123 |
"seq_len": 8192,
|
| 124 |
"lr_start": 5e-05,
|
|
|
|
| 1 |
{
|
| 2 |
"best_val_loss": 1.6575258653610945,
|
| 3 |
"stream_offsets": {
|
| 4 |
+
"nvidia/Nemotron-Cascade-SFT-Stage-1/general/train": 29711,
|
| 5 |
+
"nvidia/Nemotron-Cascade-SFT-Stage-1/math/train": 29711,
|
| 6 |
+
"nvidia/Nemotron-Cascade-SFT-Stage-1/code/train": 29711,
|
| 7 |
+
"nvidia/Nemotron-Cascade-SFT-Stage-1/science/train": 29711,
|
| 8 |
+
"nvidia/Nemotron-Cascade-SFT-Stage-2/math/train": 29711,
|
| 9 |
+
"nvidia/Nemotron-Cascade-SFT-Stage-2/code/train": 29711,
|
| 10 |
+
"nvidia/Nemotron-Cascade-SFT-Stage-2/science/train": 29711,
|
| 11 |
+
"nvidia/Nemotron-Cascade-SFT-Stage-2/general/train": 29711,
|
| 12 |
+
"nvidia/Nemotron-Cascade-SFT-Stage-2/tool_calling/train": 29711,
|
| 13 |
+
"nvidia/Nemotron-Cascade-SFT-Stage-2/instruction-following/train": 29711,
|
| 14 |
+
"nvidia/Nemotron-Cascade-SFT-Stage-2/swe_repair/train": 29711,
|
| 15 |
+
"nvidia/Nemotron-Cascade-SFT-Stage-2/swe_localization/train": 29711,
|
| 16 |
+
"nvidia/Nemotron-Cascade-SFT-Stage-2/swe_testgen/train": 29710,
|
| 17 |
+
"nvidia/Nemotron-Cascade-2-SFT-Data/math/train": 29710,
|
| 18 |
+
"nvidia/Nemotron-Cascade-2-SFT-Data/science/train": 29710,
|
| 19 |
+
"nvidia/Nemotron-Cascade-2-SFT-Data/chat/train": 29710,
|
| 20 |
+
"nvidia/Nemotron-Cascade-2-SFT-Data/instruction_following/train": 29710,
|
| 21 |
"nvidia/Nemotron-Cascade-2-SFT-Data/safety/train": 3570,
|
| 22 |
+
"nvidia/Nemotron-Cascade-2-SFT-Data/conversational_agent/train": 29710,
|
| 23 |
+
"nvidia/Nemotron-Cascade-2-SFT-Data/swe/train": 29710,
|
| 24 |
+
"nvidia/Nemotron-Post-Training-Dataset-v1/default/chat": 29710,
|
| 25 |
+
"nvidia/Nemotron-Post-Training-Dataset-v1/default/code": 29710,
|
| 26 |
+
"nvidia/Nemotron-Post-Training-Dataset-v1/default/math": 29710,
|
| 27 |
+
"nvidia/Nemotron-Post-Training-Dataset-v1/default/stem": 29710,
|
| 28 |
+
"nvidia/Nemotron-Post-Training-Dataset-v1/default/tool_calling": 29710,
|
| 29 |
+
"nvidia/AceReason-1.1-SFT/default/train": 29710,
|
| 30 |
"nvidia/OpenMathInstruct-2/default/train": 58705,
|
| 31 |
"nvidia/OpenMathReasoning/default/cot": 0,
|
| 32 |
"nvidia/OpenMathReasoning/default/tir": 0,
|
| 33 |
"nvidia/OpenMathReasoning/default/genselect": 0,
|
| 34 |
+
"nvidia/Nemotron-Math-v2/default/high_part00": 29710,
|
| 35 |
+
"nvidia/Nemotron-Math-v2/default/high_part01": 29710,
|
| 36 |
+
"nvidia/Nemotron-Math-v2/default/high_part02": 29710,
|
| 37 |
+
"nvidia/Nemotron-Math-v2/default/medium": 29710,
|
| 38 |
+
"nvidia/Nemotron-Math-v2/default/low": 29710,
|
| 39 |
+
"nvidia/Nemotron-SFT-Math-v3/default/train": 29710,
|
| 40 |
+
"nvidia/Nemotron-Research-GooseReason-0.7M/default/math": 29710,
|
| 41 |
+
"nvidia/Nemotron-Research-GooseReason-0.7M/default/code": 29710,
|
| 42 |
+
"nvidia/Nemotron-Research-GooseReason-0.7M/default/stem": 29710,
|
| 43 |
+
"nvidia/OpenCodeGeneticInstruct/mixtral-8x22b-instruct/train": 29710,
|
| 44 |
+
"nvidia/OpenCodeGeneticInstruct/qwen2.5-32b-instruct/train": 29710,
|
| 45 |
+
"nvidia/OpenCodeReasoning/split_0/split_0": 29710,
|
| 46 |
+
"nvidia/OpenCodeReasoning/split_1/split_1": 29710,
|
| 47 |
"nvidia/Nemotron-Math-HumanReasoning/default/train": 0,
|
| 48 |
+
"nvidia/Nemotron-SFT-Agentic-v2/default/interactive_agent": 29710,
|
| 49 |
"nvidia/Nemotron-SFT-Agentic-v2/default/tool_calling": 0,
|
| 50 |
+
"nvidia/Nemotron-Instruction-Following-Chat-v1/default/chat_if": 29710,
|
| 51 |
"nvidia/Nemotron-Instruction-Following-Chat-v1/default/structured_outputs": 4969,
|
| 52 |
+
"nvidia/Nemotron-Competitive-Programming-v1/default/competitive_coding_python_part00": 29710,
|
| 53 |
+
"nvidia/Nemotron-Competitive-Programming-v1/default/competitive_coding_python_part01": 29710,
|
| 54 |
+
"nvidia/Nemotron-Competitive-Programming-v1/default/competitive_coding_cpp_part00": 29710,
|
| 55 |
+
"nvidia/Nemotron-Competitive-Programming-v1/default/competitive_coding_cpp_part01": 29710,
|
| 56 |
"nvidia/Nemotron-RL-Super-Training-Blends/default/rlvr1": 3933,
|
| 57 |
+
"ibivibiv/math_instruct/default/train": 7007801,
|
| 58 |
+
"javi22/simple-instruct-dataset/default/train": 7007800,
|
| 59 |
"agentlans/chatgpt/default/train": 0,
|
| 60 |
"fineinstructions-pretraining/nemotron_fineinstructions_1T_exp_chat/default/train": 0,
|
| 61 |
+
"agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/HydraLM-airoboros-gpt4-1.4_alpaca.jsonl.zst": 29710,
|
| 62 |
"agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/Mxode-Magpie-Pro-10K-GPT4o-mini.jsonl.zst": 0,
|
| 63 |
"agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/PKU-Alignment-Align-Anything-Instruction-100K.jsonl.zst": 0,
|
| 64 |
"agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/PawanKrd-gpt-4o-200k.jsonl.zst": 0,
|
|
|
|
| 116 |
"agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/microsoft-orca-agentinstruct-1m-v1.jsonl.zst": 0,
|
| 117 |
"agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/microsoft-orca-math-word-problems-200k.jsonl.zst": 0
|
| 118 |
},
|
| 119 |
+
"avg_train_loss": 2.22396332859993,
|
| 120 |
+
"sft_step": 29000,
|
| 121 |
+
"timestamp": "2026-04-22 14:42:21",
|
| 122 |
"global_batch": 128,
|
| 123 |
"seq_len": 8192,
|
| 124 |
"lr_start": 5e-05,
|