Arko007 commited on
Commit
226dc5a
·
verified ·
1 Parent(s): 4652a08

Upload meta/step_0024500.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. meta/step_0024500.json +127 -0
meta/step_0024500.json ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_val_loss": 1.6575258653610945,
3
+ "stream_offsets": {
4
+ "nvidia/Nemotron-Cascade-SFT-Stage-1/general/train": 9062,
5
+ "nvidia/Nemotron-Cascade-SFT-Stage-1/math/train": 9062,
6
+ "nvidia/Nemotron-Cascade-SFT-Stage-1/code/train": 9062,
7
+ "nvidia/Nemotron-Cascade-SFT-Stage-1/science/train": 9062,
8
+ "nvidia/Nemotron-Cascade-SFT-Stage-2/math/train": 9062,
9
+ "nvidia/Nemotron-Cascade-SFT-Stage-2/code/train": 9062,
10
+ "nvidia/Nemotron-Cascade-SFT-Stage-2/science/train": 9062,
11
+ "nvidia/Nemotron-Cascade-SFT-Stage-2/general/train": 9062,
12
+ "nvidia/Nemotron-Cascade-SFT-Stage-2/tool_calling/train": 9062,
13
+ "nvidia/Nemotron-Cascade-SFT-Stage-2/instruction-following/train": 9062,
14
+ "nvidia/Nemotron-Cascade-SFT-Stage-2/swe_repair/train": 9062,
15
+ "nvidia/Nemotron-Cascade-SFT-Stage-2/swe_localization/train": 9061,
16
+ "nvidia/Nemotron-Cascade-SFT-Stage-2/swe_testgen/train": 9061,
17
+ "nvidia/Nemotron-Cascade-2-SFT-Data/math/train": 9061,
18
+ "nvidia/Nemotron-Cascade-2-SFT-Data/science/train": 9061,
19
+ "nvidia/Nemotron-Cascade-2-SFT-Data/chat/train": 9061,
20
+ "nvidia/Nemotron-Cascade-2-SFT-Data/instruction_following/train": 9061,
21
+ "nvidia/Nemotron-Cascade-2-SFT-Data/safety/train": 3570,
22
+ "nvidia/Nemotron-Cascade-2-SFT-Data/conversational_agent/train": 9061,
23
+ "nvidia/Nemotron-Cascade-2-SFT-Data/swe/train": 9061,
24
+ "nvidia/Nemotron-Post-Training-Dataset-v1/default/chat": 9061,
25
+ "nvidia/Nemotron-Post-Training-Dataset-v1/default/code": 9061,
26
+ "nvidia/Nemotron-Post-Training-Dataset-v1/default/math": 9061,
27
+ "nvidia/Nemotron-Post-Training-Dataset-v1/default/stem": 9061,
28
+ "nvidia/Nemotron-Post-Training-Dataset-v1/default/tool_calling": 9061,
29
+ "nvidia/AceReason-1.1-SFT/default/train": 9061,
30
+ "nvidia/OpenMathInstruct-2/default/train": 58705,
31
+ "nvidia/OpenMathReasoning/default/cot": 0,
32
+ "nvidia/OpenMathReasoning/default/tir": 0,
33
+ "nvidia/OpenMathReasoning/default/genselect": 0,
34
+ "nvidia/Nemotron-Math-v2/default/high_part00": 9061,
35
+ "nvidia/Nemotron-Math-v2/default/high_part01": 9061,
36
+ "nvidia/Nemotron-Math-v2/default/high_part02": 9061,
37
+ "nvidia/Nemotron-Math-v2/default/medium": 9061,
38
+ "nvidia/Nemotron-Math-v2/default/low": 9061,
39
+ "nvidia/Nemotron-SFT-Math-v3/default/train": 9061,
40
+ "nvidia/Nemotron-Research-GooseReason-0.7M/default/math": 9061,
41
+ "nvidia/Nemotron-Research-GooseReason-0.7M/default/code": 9061,
42
+ "nvidia/Nemotron-Research-GooseReason-0.7M/default/stem": 9061,
43
+ "nvidia/OpenCodeGeneticInstruct/mixtral-8x22b-instruct/train": 9061,
44
+ "nvidia/OpenCodeGeneticInstruct/qwen2.5-32b-instruct/train": 9061,
45
+ "nvidia/OpenCodeReasoning/split_0/split_0": 9061,
46
+ "nvidia/OpenCodeReasoning/split_1/split_1": 9061,
47
+ "nvidia/Nemotron-Math-HumanReasoning/default/train": 0,
48
+ "nvidia/Nemotron-SFT-Agentic-v2/default/interactive_agent": 9061,
49
+ "nvidia/Nemotron-SFT-Agentic-v2/default/tool_calling": 0,
50
+ "nvidia/Nemotron-Instruction-Following-Chat-v1/default/chat_if": 9061,
51
+ "nvidia/Nemotron-Instruction-Following-Chat-v1/default/structured_outputs": 4969,
52
+ "nvidia/Nemotron-Competitive-Programming-v1/default/competitive_coding_python_part00": 9061,
53
+ "nvidia/Nemotron-Competitive-Programming-v1/default/competitive_coding_python_part01": 9061,
54
+ "nvidia/Nemotron-Competitive-Programming-v1/default/competitive_coding_cpp_part00": 9061,
55
+ "nvidia/Nemotron-Competitive-Programming-v1/default/competitive_coding_cpp_part01": 9061,
56
+ "nvidia/Nemotron-RL-Super-Training-Blends/default/rlvr1": 3933,
57
+ "ibivibiv/math_instruct/default/train": 6987152,
58
+ "javi22/simple-instruct-dataset/default/train": 6987151,
59
+ "agentlans/chatgpt/default/train": 0,
60
+ "fineinstructions-pretraining/nemotron_fineinstructions_1T_exp_chat/default/train": 0,
61
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/HydraLM-airoboros-gpt4-1.4_alpaca.jsonl.zst": 9061,
62
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/Mxode-Magpie-Pro-10K-GPT4o-mini.jsonl.zst": 0,
63
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/PKU-Alignment-Align-Anything-Instruction-100K.jsonl.zst": 0,
64
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/PawanKrd-gpt-4o-200k.jsonl.zst": 0,
65
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/PawanKrd-math-gpt-4o-200k.jsonl.zst": 0,
66
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/WildChat_single_turn_shuf.jsonl.zst": 0,
67
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/causal-lm-instruction_gpt4.jsonl.zst": 0,
68
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0001.jsonl.zst": 0,
69
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0002.jsonl.zst": 0,
70
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0003.jsonl.zst": 0,
71
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0004.jsonl.zst": 0,
72
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0005.jsonl.zst": 0,
73
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0006.jsonl.zst": 0,
74
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0007.jsonl.zst": 0,
75
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0008.jsonl.zst": 0,
76
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0009.jsonl.zst": 0,
77
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0010.jsonl.zst": 0,
78
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0011.jsonl.zst": 0,
79
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0012.jsonl.zst": 0,
80
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0013.jsonl.zst": 0,
81
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0014.jsonl.zst": 0,
82
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0015.jsonl.zst": 0,
83
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0016.jsonl.zst": 0,
84
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0017.jsonl.zst": 0,
85
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0018.jsonl.zst": 0,
86
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0019.jsonl.zst": 0,
87
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0020.jsonl.zst": 0,
88
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0021.jsonl.zst": 0,
89
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0022.jsonl.zst": 0,
90
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0023.jsonl.zst": 0,
91
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0024.jsonl.zst": 0,
92
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0025.jsonl.zst": 0,
93
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0026.jsonl.zst": 0,
94
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0027.jsonl.zst": 0,
95
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0028.jsonl.zst": 0,
96
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0029.jsonl.zst": 0,
97
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0030.jsonl.zst": 0,
98
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0031.jsonl.zst": 0,
99
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0032.jsonl.zst": 0,
100
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/chatgpt/0033.jsonl.zst": 0,
101
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/filtered/filtered.jsonl.zst": 0,
102
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/filtered/k100.jsonl.zst": 0,
103
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/filtered/k1000.jsonl.zst": 0,
104
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/filtered/k10000.jsonl.zst": 0,
105
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/filtered/k100000.jsonl.zst": 0,
106
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/filtered/k200.jsonl.zst": 0,
107
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/filtered/k2000.jsonl.zst": 0,
108
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/filtered/k20000.jsonl.zst": 0,
109
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/filtered/k200000.jsonl.zst": 0,
110
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/filtered/k500.jsonl.zst": 0,
111
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/filtered/k5000.jsonl.zst": 0,
112
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/filtered/k50000.jsonl.zst": 0,
113
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/hugfaceguy0001-ChatGPTGroundTruth.jsonl.zst": 0,
114
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/jondurbin-airoboros-gpt4-m2.0.jsonl.zst": 0,
115
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/lightblue-tagengo-gpt4.jsonl.zst": 0,
116
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/microsoft-orca-agentinstruct-1m-v1.jsonl.zst": 0,
117
+ "agentlans/chatgpt/default/train::hf://datasets/agentlans/chatgpt/microsoft-orca-math-word-problems-200k.jsonl.zst": 0
118
+ },
119
+ "avg_train_loss": 2.248078753948212,
120
+ "sft_step": 24500,
121
+ "timestamp": "2026-04-22 09:45:59",
122
+ "global_batch": 128,
123
+ "seq_len": 8192,
124
+ "lr_start": 5e-05,
125
+ "lr_min": 1e-06,
126
+ "vocab_size": 32777
127
+ }