File size: 7,699 Bytes
ebe598e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
{
  "experiments": {
    "test_exp_001": {
      "id": "test_exp_001",
      "name": "Test Experiment",
      "description": "Debug test",
      "created_at": "2025-07-20T14:01:48.871089",
      "status": "running",
      "metrics": [
        {
          "timestamp": "2025-07-20T14:01:48.871096",
          "step": 25,
          "metrics": {
            "loss": 1.165,
            "accuracy": 0.75,
            "learning_rate": 3.5e-06
          }
        }
      ],
      "parameters": {},
      "artifacts": [],
      "logs": []
    },
    "exp_20250720_130853": {
      "id": "exp_20250720_130853",
      "name": "petite-elle-l-aime-3",
      "description": "SmolLM3 fine-tuning experiment",
      "created_at": "2025-07-20T11:20:01.780908",
      "status": "running",
      "metrics": [
        {
          "timestamp": "2025-07-20T11:20:01.780908",
          "step": 25,
          "metrics": {
            "loss": 1.1659,
            "grad_norm": 10.3125,
            "learning_rate": 7e-08,
            "num_tokens": 1642080.0,
            "mean_token_accuracy": 0.75923578992486,
            "epoch": 0.004851130919895701
          }
        },
        {
          "timestamp": "2025-07-20T11:26:39.042155",
          "step": 50,
          "metrics": {
            "loss": 1.165,
            "grad_norm": 10.75,
            "learning_rate": 1.4291666666666667e-07,
            "num_tokens": 3324682.0,
            "mean_token_accuracy": 0.7577659255266189,
            "epoch": 0.009702261839791402
          }
        },
        {
          "timestamp": "2025-07-20T11:33:16.203045",
          "step": 75,
          "metrics": {
            "loss": 1.1639,
            "grad_norm": 10.6875,
            "learning_rate": 2.1583333333333334e-07,
            "num_tokens": 4987941.0,
            "mean_token_accuracy": 0.7581205774843692,
            "epoch": 0.014553392759687101
          }
        },
        {
          "timestamp": "2025-07-20T11:39:53.453917",
          "step": 100,
          "metrics": {
            "loss": 1.1528,
            "grad_norm": 10.75,
            "learning_rate": 2.8875e-07,
            "num_tokens": 6630190.0,
            "mean_token_accuracy": 0.7614579878747463,
            "epoch": 0.019404523679582803
          }
        }
      ],
      "parameters": {
        "model_name": "HuggingFaceTB/SmolLM3-3B",
        "max_seq_length": 12288,
        "use_flash_attention": true,
        "use_gradient_checkpointing": false,
        "batch_size": 8,
        "gradient_accumulation_steps": 16,
        "learning_rate": 3.5e-06,
        "weight_decay": 0.01,
        "warmup_steps": 1200,
        "max_iters": 18000,
        "eval_interval": 1000,
        "log_interval": 25,
        "save_interval": 2000,
        "optimizer": "adamw_torch",
        "beta1": 0.9,
        "beta2": 0.999,
        "eps": 1e-08,
        "scheduler": "cosine",
        "min_lr": 3.5e-07,
        "fp16": false,
        "bf16": true,
        "ddp_backend": "nccl",
        "ddp_find_unused_parameters": false,
        "save_steps": 2000,
        "eval_steps": 1000,
        "logging_steps": 25,
        "save_total_limit": 5,
        "eval_strategy": "steps",
        "metric_for_best_model": "eval_loss",
        "greater_is_better": false,
        "load_best_model_at_end": true,
        "data_dir": null,
        "train_file": null,
        "validation_file": null,
        "test_file": null,
        "use_chat_template": true,
        "chat_template_kwargs": {
          "add_generation_prompt": true,
          "no_think_system_message": true
        },
        "enable_tracking": true,
        "trackio_url": "https://tonic-test-trackio-test.hf.space",
        "trackio_token": null,
        "log_artifacts": true,
        "log_metrics": true,
        "log_config": true,
        "experiment_name": "petite-elle-l-aime-3",
        "dataset_name": "legmlai/openhermes-fr",
        "dataset_split": "train",
        "input_field": "prompt",
        "target_field": "accepted_completion",
        "filter_bad_entries": true,
        "bad_entry_field": "bad_entry",
        "packing": false,
        "max_prompt_length": 12288,
        "max_completion_length": 8192,
        "truncation": true,
        "dataloader_num_workers": 10,
        "dataloader_pin_memory": true,
        "dataloader_prefetch_factor": 3,
        "max_grad_norm": 1.0,
        "group_by_length": true
      },
      "artifacts": [],
      "logs": []
    },
    "exp_20250720_134319": {
      "id": "exp_20250720_134319",
      "name": "petite-elle-l-aime-3-1",
      "description": "SmolLM3 fine-tuning experiment",
      "created_at": "2025-07-20T11:54:31.993219",
      "status": "running",
      "metrics": [
        {
          "timestamp": "2025-07-20T11:54:31.993219",
          "step": 25,
          "metrics": {
            "loss": 1.166,
            "grad_norm": 10.375,
            "learning_rate": 7e-08,
            "num_tokens": 1642080.0,
            "mean_token_accuracy": 0.7590958896279335,
            "epoch": 0.004851130919895701
          }
        },
        {
          "timestamp": "2025-07-20T11:54:33.589487",
          "step": 25,
          "metrics": {
            "gpu_0_memory_allocated": 17.202261447906494,
            "gpu_0_memory_reserved": 75.474609375,
            "gpu_0_utilization": 0,
            "cpu_percent": 2.7,
            "memory_percent": 10.1
          }
        }
      ],
      "parameters": {
        "model_name": "HuggingFaceTB/SmolLM3-3B",
        "max_seq_length": 12288,
        "use_flash_attention": true,
        "use_gradient_checkpointing": false,
        "batch_size": 8,
        "gradient_accumulation_steps": 16,
        "learning_rate": 3.5e-06,
        "weight_decay": 0.01,
        "warmup_steps": 1200,
        "max_iters": 18000,
        "eval_interval": 1000,
        "log_interval": 25,
        "save_interval": 2000,
        "optimizer": "adamw_torch",
        "beta1": 0.9,
        "beta2": 0.999,
        "eps": 1e-08,
        "scheduler": "cosine",
        "min_lr": 3.5e-07,
        "fp16": false,
        "bf16": true,
        "ddp_backend": "nccl",
        "ddp_find_unused_parameters": false,
        "save_steps": 2000,
        "eval_steps": 1000,
        "logging_steps": 25,
        "save_total_limit": 5,
        "eval_strategy": "steps",
        "metric_for_best_model": "eval_loss",
        "greater_is_better": false,
        "load_best_model_at_end": true,
        "data_dir": null,
        "train_file": null,
        "validation_file": null,
        "test_file": null,
        "use_chat_template": true,
        "chat_template_kwargs": {
          "add_generation_prompt": true,
          "no_think_system_message": true
        },
        "enable_tracking": true,
        "trackio_url": "https://tonic-test-trackio-test.hf.space",
        "trackio_token": null,
        "log_artifacts": true,
        "log_metrics": true,
        "log_config": true,
        "experiment_name": "petite-elle-l-aime-3-1",
        "dataset_name": "legmlai/openhermes-fr",
        "dataset_split": "train",
        "input_field": "prompt",
        "target_field": "accepted_completion",
        "filter_bad_entries": true,
        "bad_entry_field": "bad_entry",
        "packing": false,
        "max_prompt_length": 12288,
        "max_completion_length": 8192,
        "truncation": true,
        "dataloader_num_workers": 10,
        "dataloader_pin_memory": true,
        "dataloader_prefetch_factor": 3,
        "max_grad_norm": 1.0,
        "group_by_length": true
      },
      "artifacts": [],
      "logs": []
    }
  },
  "current_experiment": "exp_20250720_134319",
  "last_updated": "2025-07-20T14:05:18.615415"
}