Arko007 commited on
Commit
6892cff
·
verified ·
1 Parent(s): 13c26bb

Upload training_meta.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. training_meta.json +50 -50
training_meta.json CHANGED
@@ -1,64 +1,64 @@
1
  {
2
  "best_val_loss": 1.6575258653610945,
3
  "stream_offsets": {
4
- "nvidia/Nemotron-Cascade-SFT-Stage-1/general/train": 27416,
5
- "nvidia/Nemotron-Cascade-SFT-Stage-1/math/train": 27416,
6
- "nvidia/Nemotron-Cascade-SFT-Stage-1/code/train": 27416,
7
- "nvidia/Nemotron-Cascade-SFT-Stage-1/science/train": 27416,
8
- "nvidia/Nemotron-Cascade-SFT-Stage-2/math/train": 27416,
9
- "nvidia/Nemotron-Cascade-SFT-Stage-2/code/train": 27415,
10
- "nvidia/Nemotron-Cascade-SFT-Stage-2/science/train": 27415,
11
- "nvidia/Nemotron-Cascade-SFT-Stage-2/general/train": 27415,
12
- "nvidia/Nemotron-Cascade-SFT-Stage-2/tool_calling/train": 27415,
13
- "nvidia/Nemotron-Cascade-SFT-Stage-2/instruction-following/train": 27415,
14
- "nvidia/Nemotron-Cascade-SFT-Stage-2/swe_repair/train": 27415,
15
- "nvidia/Nemotron-Cascade-SFT-Stage-2/swe_localization/train": 27415,
16
- "nvidia/Nemotron-Cascade-SFT-Stage-2/swe_testgen/train": 27415,
17
- "nvidia/Nemotron-Cascade-2-SFT-Data/math/train": 27415,
18
- "nvidia/Nemotron-Cascade-2-SFT-Data/science/train": 27415,
19
- "nvidia/Nemotron-Cascade-2-SFT-Data/chat/train": 27415,
20
- "nvidia/Nemotron-Cascade-2-SFT-Data/instruction_following/train": 27415,
21
  "nvidia/Nemotron-Cascade-2-SFT-Data/safety/train": 3570,
22
- "nvidia/Nemotron-Cascade-2-SFT-Data/conversational_agent/train": 27415,
23
- "nvidia/Nemotron-Cascade-2-SFT-Data/swe/train": 27415,
24
- "nvidia/Nemotron-Post-Training-Dataset-v1/default/chat": 27415,
25
- "nvidia/Nemotron-Post-Training-Dataset-v1/default/code": 27415,
26
- "nvidia/Nemotron-Post-Training-Dataset-v1/default/math": 27415,
27
- "nvidia/Nemotron-Post-Training-Dataset-v1/default/stem": 27415,
28
- "nvidia/Nemotron-Post-Training-Dataset-v1/default/tool_calling": 27415,
29
- "nvidia/AceReason-1.1-SFT/default/train": 27415,
30
  "nvidia/OpenMathInstruct-2/default/train": 58705,
31
  "nvidia/OpenMathReasoning/default/cot": 0,
32
  "nvidia/OpenMathReasoning/default/tir": 0,
33
  "nvidia/OpenMathReasoning/default/genselect": 0,
34
- "nvidia/Nemotron-Math-v2/default/high_part00": 27415,
35
- "nvidia/Nemotron-Math-v2/default/high_part01": 27415,
36
- "nvidia/Nemotron-Math-v2/default/high_part02": 27415,
37
- "nvidia/Nemotron-Math-v2/default/medium": 27415,
38
- "nvidia/Nemotron-Math-v2/default/low": 27415,
39
- "nvidia/Nemotron-SFT-Math-v3/default/train": 27415,
40
- "nvidia/Nemotron-Research-GooseReason-0.7M/default/math": 27415,
41
- "nvidia/Nemotron-Research-GooseReason-0.7M/default/code": 27415,
42
- "nvidia/Nemotron-Research-GooseReason-0.7M/default/stem": 27415,
43
- "nvidia/OpenCodeGeneticInstruct/mixtral-8x22b-instruct/train": 27415,
44
- "nvidia/OpenCodeGeneticInstruct/qwen2.5-32b-instruct/train": 27415,
45
- "nvidia/OpenCodeReasoning/split_0/split_0": 27415,
46
- "nvidia/OpenCodeReasoning/split_1/split_1": 27415,
47
  "nvidia/Nemotron-Math-HumanReasoning/default/train": 0,
48
- "nvidia/Nemotron-SFT-Agentic-v2/default/interactive_agent": 27415,
49
  "nvidia/Nemotron-SFT-Agentic-v2/default/tool_calling": 0,
50
- "nvidia/Nemotron-Instruction-Following-Chat-v1/default/chat_if": 27415,
51
  "nvidia/Nemotron-Instruction-Following-Chat-v1/default/structured_outputs": 4969,
52
- "nvidia/Nemotron-Competitive-Programming-v1/default/competitive_coding_python_part00": 27415,
53
- "nvidia/Nemotron-Competitive-Programming-v1/default/competitive_coding_python_part01": 27415,
54
- "nvidia/Nemotron-Competitive-Programming-v1/default/competitive_coding_cpp_part00": 27415,
55
- "nvidia/Nemotron-Competitive-Programming-v1/default/competitive_coding_cpp_part01": 27415,
56
  "nvidia/Nemotron-RL-Super-Training-Blends/default/rlvr1": 3933,
57
- "ibivibiv/math_instruct/default/train": 7005506,
58
- "javi22/simple-instruct-dataset/default/train": 7005505,
59
  "agentlans/chatgpt/default/train": 0,
60
  "fineinstructions-pretraining/nemotron_fineinstructions_1T_exp_chat/default/train": 0,
61
- "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/HydraLM-airoboros-gpt4-1.4_alpaca.jsonl.zst": 27415,
62
  "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/Mxode-Magpie-Pro-10K-GPT4o-mini.jsonl.zst": 0,
63
  "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/PKU-Alignment-Align-Anything-Instruction-100K.jsonl.zst": 0,
64
  "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/PawanKrd-gpt-4o-200k.jsonl.zst": 0,
@@ -116,9 +116,9 @@
116
  "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/microsoft-orca-agentinstruct-1m-v1.jsonl.zst": 0,
117
  "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/microsoft-orca-math-word-problems-200k.jsonl.zst": 0
118
  },
119
- "avg_train_loss": 2.226559741258621,
120
- "sft_step": 28500,
121
- "timestamp": "2026-04-22 14:09:16",
122
  "global_batch": 128,
123
  "seq_len": 8192,
124
  "lr_start": 5e-05,
 
1
  {
2
  "best_val_loss": 1.6575258653610945,
3
  "stream_offsets": {
4
+ "nvidia/Nemotron-Cascade-SFT-Stage-1/general/train": 29711,
5
+ "nvidia/Nemotron-Cascade-SFT-Stage-1/math/train": 29711,
6
+ "nvidia/Nemotron-Cascade-SFT-Stage-1/code/train": 29711,
7
+ "nvidia/Nemotron-Cascade-SFT-Stage-1/science/train": 29711,
8
+ "nvidia/Nemotron-Cascade-SFT-Stage-2/math/train": 29711,
9
+ "nvidia/Nemotron-Cascade-SFT-Stage-2/code/train": 29711,
10
+ "nvidia/Nemotron-Cascade-SFT-Stage-2/science/train": 29711,
11
+ "nvidia/Nemotron-Cascade-SFT-Stage-2/general/train": 29711,
12
+ "nvidia/Nemotron-Cascade-SFT-Stage-2/tool_calling/train": 29711,
13
+ "nvidia/Nemotron-Cascade-SFT-Stage-2/instruction-following/train": 29711,
14
+ "nvidia/Nemotron-Cascade-SFT-Stage-2/swe_repair/train": 29711,
15
+ "nvidia/Nemotron-Cascade-SFT-Stage-2/swe_localization/train": 29711,
16
+ "nvidia/Nemotron-Cascade-SFT-Stage-2/swe_testgen/train": 29710,
17
+ "nvidia/Nemotron-Cascade-2-SFT-Data/math/train": 29710,
18
+ "nvidia/Nemotron-Cascade-2-SFT-Data/science/train": 29710,
19
+ "nvidia/Nemotron-Cascade-2-SFT-Data/chat/train": 29710,
20
+ "nvidia/Nemotron-Cascade-2-SFT-Data/instruction_following/train": 29710,
21
  "nvidia/Nemotron-Cascade-2-SFT-Data/safety/train": 3570,
22
+ "nvidia/Nemotron-Cascade-2-SFT-Data/conversational_agent/train": 29710,
23
+ "nvidia/Nemotron-Cascade-2-SFT-Data/swe/train": 29710,
24
+ "nvidia/Nemotron-Post-Training-Dataset-v1/default/chat": 29710,
25
+ "nvidia/Nemotron-Post-Training-Dataset-v1/default/code": 29710,
26
+ "nvidia/Nemotron-Post-Training-Dataset-v1/default/math": 29710,
27
+ "nvidia/Nemotron-Post-Training-Dataset-v1/default/stem": 29710,
28
+ "nvidia/Nemotron-Post-Training-Dataset-v1/default/tool_calling": 29710,
29
+ "nvidia/AceReason-1.1-SFT/default/train": 29710,
30
  "nvidia/OpenMathInstruct-2/default/train": 58705,
31
  "nvidia/OpenMathReasoning/default/cot": 0,
32
  "nvidia/OpenMathReasoning/default/tir": 0,
33
  "nvidia/OpenMathReasoning/default/genselect": 0,
34
+ "nvidia/Nemotron-Math-v2/default/high_part00": 29710,
35
+ "nvidia/Nemotron-Math-v2/default/high_part01": 29710,
36
+ "nvidia/Nemotron-Math-v2/default/high_part02": 29710,
37
+ "nvidia/Nemotron-Math-v2/default/medium": 29710,
38
+ "nvidia/Nemotron-Math-v2/default/low": 29710,
39
+ "nvidia/Nemotron-SFT-Math-v3/default/train": 29710,
40
+ "nvidia/Nemotron-Research-GooseReason-0.7M/default/math": 29710,
41
+ "nvidia/Nemotron-Research-GooseReason-0.7M/default/code": 29710,
42
+ "nvidia/Nemotron-Research-GooseReason-0.7M/default/stem": 29710,
43
+ "nvidia/OpenCodeGeneticInstruct/mixtral-8x22b-instruct/train": 29710,
44
+ "nvidia/OpenCodeGeneticInstruct/qwen2.5-32b-instruct/train": 29710,
45
+ "nvidia/OpenCodeReasoning/split_0/split_0": 29710,
46
+ "nvidia/OpenCodeReasoning/split_1/split_1": 29710,
47
  "nvidia/Nemotron-Math-HumanReasoning/default/train": 0,
48
+ "nvidia/Nemotron-SFT-Agentic-v2/default/interactive_agent": 29710,
49
  "nvidia/Nemotron-SFT-Agentic-v2/default/tool_calling": 0,
50
+ "nvidia/Nemotron-Instruction-Following-Chat-v1/default/chat_if": 29710,
51
  "nvidia/Nemotron-Instruction-Following-Chat-v1/default/structured_outputs": 4969,
52
+ "nvidia/Nemotron-Competitive-Programming-v1/default/competitive_coding_python_part00": 29710,
53
+ "nvidia/Nemotron-Competitive-Programming-v1/default/competitive_coding_python_part01": 29710,
54
+ "nvidia/Nemotron-Competitive-Programming-v1/default/competitive_coding_cpp_part00": 29710,
55
+ "nvidia/Nemotron-Competitive-Programming-v1/default/competitive_coding_cpp_part01": 29710,
56
  "nvidia/Nemotron-RL-Super-Training-Blends/default/rlvr1": 3933,
57
+ "ibivibiv/math_instruct/default/train": 7007801,
58
+ "javi22/simple-instruct-dataset/default/train": 7007800,
59
  "agentlans/chatgpt/default/train": 0,
60
  "fineinstructions-pretraining/nemotron_fineinstructions_1T_exp_chat/default/train": 0,
61
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/HydraLM-airoboros-gpt4-1.4_alpaca.jsonl.zst": 29710,
62
  "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/Mxode-Magpie-Pro-10K-GPT4o-mini.jsonl.zst": 0,
63
  "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/PKU-Alignment-Align-Anything-Instruction-100K.jsonl.zst": 0,
64
  "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/PawanKrd-gpt-4o-200k.jsonl.zst": 0,
 
116
  "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/microsoft-orca-agentinstruct-1m-v1.jsonl.zst": 0,
117
  "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/microsoft-orca-math-word-problems-200k.jsonl.zst": 0
118
  },
119
+ "avg_train_loss": 2.22396332859993,
120
+ "sft_step": 29000,
121
+ "timestamp": "2026-04-22 14:42:21",
122
  "global_batch": 128,
123
  "seq_len": 8192,
124
  "lr_start": 5e-05,