ilessio-aiflowlab commited on
Commit
a097f2c
·
verified ·
1 Parent(s): 59d2be7

Upload benchmarks/bench_11_student_variants.json with huggingface_hub

Browse files
benchmarks/bench_11_student_variants.json ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "benchmark": "student_variants",
3
+ "timestamp": "2026-03-19T12:35:24.930881+00:00",
4
+ "device": "cuda",
5
+ "gpu": "NVIDIA L4",
6
+ "variants": {
7
+ "nano_baseline": {
8
+ "variant": "nano_baseline",
9
+ "config": {
10
+ "variant": "nano",
11
+ "language_model": "Qwen/Qwen2.5-0.5B",
12
+ "lora_rank": 32,
13
+ "action_head_type": "diffusion"
14
+ },
15
+ "total_params_m": 967.9,
16
+ "trainable_params_m": 495.6,
17
+ "frozen_params_m": 472.3,
18
+ "build_time_s": 5.9,
19
+ "inference": {
20
+ "fp32_p50_ms": 125.64,
21
+ "fp32_fps": 7.9,
22
+ "fp16_p50_ms": 90.41,
23
+ "fp16_fps": 11.0,
24
+ "fp16_speedup": 1.39,
25
+ "gpu_mem_gb": 4.32
26
+ },
27
+ "training": {
28
+ "n_steps": 30,
29
+ "loss_start": 3.2132,
30
+ "loss_end": 1.0615,
31
+ "loss_reduction_pct": 67.0,
32
+ "step_time_ms": 610.5,
33
+ "steps_per_sec": 1.64,
34
+ "gpu_mem_gb": 9.01,
35
+ "loss_curve": [
36
+ 3.2132,
37
+ 41.0657,
38
+ 85.2859,
39
+ 39.3644,
40
+ 11.2475,
41
+ 2.2388,
42
+ 1.3854,
43
+ 1.0555,
44
+ 1.5505,
45
+ 1.2637,
46
+ 2.3099,
47
+ 1.9009,
48
+ 1.5492,
49
+ 2.2762,
50
+ 1.3436,
51
+ 1.0509,
52
+ 0.7195,
53
+ 1.1853,
54
+ 1.4664,
55
+ 0.9882,
56
+ 0.7097,
57
+ 1.8235,
58
+ 1.5141,
59
+ 2.4983,
60
+ 1.6145,
61
+ 0.87,
62
+ 1.5064,
63
+ 1.5116,
64
+ 1.2326,
65
+ 1.0615
66
+ ]
67
+ }
68
+ },
69
+ "nano_lora64": {
70
+ "variant": "nano_lora64",
71
+ "config": {
72
+ "variant": "nano",
73
+ "language_model": "Qwen/Qwen2.5-0.5B",
74
+ "lora_rank": 64,
75
+ "action_head_type": "diffusion"
76
+ },
77
+ "total_params_m": 972.3,
78
+ "trainable_params_m": 500.0,
79
+ "frozen_params_m": 472.3,
80
+ "build_time_s": 2.9,
81
+ "inference": {
82
+ "fp32_p50_ms": 126.67,
83
+ "fp32_fps": 7.9,
84
+ "fp16_p50_ms": 92.33,
85
+ "fp16_fps": 10.8,
86
+ "fp16_speedup": 1.37,
87
+ "gpu_mem_gb": 7.66
88
+ },
89
+ "training": {
90
+ "n_steps": 30,
91
+ "loss_start": 5.4019,
92
+ "loss_end": 1.2458,
93
+ "loss_reduction_pct": 76.9,
94
+ "step_time_ms": 618.1,
95
+ "steps_per_sec": 1.62,
96
+ "gpu_mem_gb": 9.1,
97
+ "loss_curve": [
98
+ 5.4019,
99
+ 22.3632,
100
+ 51.01,
101
+ 42.1202,
102
+ 46.8094,
103
+ 3.5741,
104
+ 7.4527,
105
+ 4.3158,
106
+ 1.9751,
107
+ 1.9907,
108
+ 1.8124,
109
+ 2.6258,
110
+ 2.0602,
111
+ 3.2828,
112
+ 1.5963,
113
+ 1.2459,
114
+ 1.539,
115
+ 2.3277,
116
+ 1.3788,
117
+ 1.9966,
118
+ 1.8496,
119
+ 1.064,
120
+ 1.7389,
121
+ 0.6589,
122
+ 1.3724,
123
+ 1.2252,
124
+ 1.164,
125
+ 0.8581,
126
+ 1.2236,
127
+ 1.2458
128
+ ]
129
+ }
130
+ },
131
+ "nano_flow": {
132
+ "variant": "nano_flow",
133
+ "config": {
134
+ "variant": "nano",
135
+ "language_model": "Qwen/Qwen2.5-0.5B",
136
+ "lora_rank": 32,
137
+ "action_head_type": "flow"
138
+ },
139
+ "total_params_m": 967.9,
140
+ "trainable_params_m": 495.6,
141
+ "frozen_params_m": 472.3,
142
+ "build_time_s": 2.9,
143
+ "inference": {
144
+ "fp32_p50_ms": 121.72,
145
+ "fp32_fps": 8.2,
146
+ "fp16_p50_ms": 79.13,
147
+ "fp16_fps": 12.6,
148
+ "fp16_speedup": 1.54,
149
+ "gpu_mem_gb": 7.73
150
+ },
151
+ "training": {
152
+ "n_steps": 30,
153
+ "loss_start": 7.475,
154
+ "loss_end": 1.0583,
155
+ "loss_reduction_pct": 85.8,
156
+ "step_time_ms": 630.9,
157
+ "steps_per_sec": 1.58,
158
+ "gpu_mem_gb": 9.02,
159
+ "loss_curve": [
160
+ 7.475,
161
+ 52.8619,
162
+ 27.0023,
163
+ 48.6988,
164
+ 4.5813,
165
+ 4.4473,
166
+ 2.7973,
167
+ 2.4244,
168
+ 2.0722,
169
+ 2.7877,
170
+ 2.2137,
171
+ 1.4398,
172
+ 1.8208,
173
+ 2.5302,
174
+ 2.0963,
175
+ 1.8007,
176
+ 2.9426,
177
+ 1.9881,
178
+ 1.8891,
179
+ 2.3121,
180
+ 2.3319,
181
+ 2.6846,
182
+ 3.4347,
183
+ 2.3639,
184
+ 3.0736,
185
+ 3.5383,
186
+ 3.0924,
187
+ 2.5027,
188
+ 2.4456,
189
+ 1.0583
190
+ ]
191
+ }
192
+ },
193
+ "small_baseline": {
194
+ "error": "CUDA out of memory. Tried to allocate 54.00 MiB. GPU 0 has a total capacity of 22.03 GiB of which 5.06 MiB is free. Including non-PyTorch memory, this process has 22.02 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 632.04 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)"
195
+ },
196
+ "small_flow": {
197
+ "error": "CUDA out of memory. Tried to allocate 54.00 MiB. GPU 0 has a total capacity of 22.03 GiB of which 37.06 MiB is free. Including non-PyTorch memory, this process has 21.99 GiB memory in use. Of the allocated memory 21.11 GiB is allocated by PyTorch, and 653.15 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)"
198
+ }
199
+ }
200
+ }