zaydzuhri commited on
Commit
05685b7
·
verified ·
1 Parent(s): 530e859

Add files using upload-large-folder tool

Browse files
Files changed (50) hide show
  1. download_checkpoint.py +35 -0
  2. fla/models/abc/__pycache__/modeling_abc.cpython-312.pyc +0 -0
  3. fla/models/bitnet/__pycache__/modeling_bitnet.cpython-312.pyc +0 -0
  4. fla/models/bitnet/modeling_bitnet.py +441 -0
  5. fla/models/delta_net/__pycache__/configuration_delta_net.cpython-312.pyc +0 -0
  6. fla/models/delta_net/__pycache__/modeling_delta_net.cpython-312.pyc +0 -0
  7. fla/models/gated_deltanet/__pycache__/modeling_gated_deltanet.cpython-312.pyc +0 -0
  8. fla/models/gated_deltaproduct/__pycache__/modeling_gated_deltaproduct.cpython-312.pyc +0 -0
  9. fla/models/gla/__pycache__/__init__.cpython-312.pyc +0 -0
  10. fla/models/gla/__pycache__/configuration_gla.cpython-312.pyc +0 -0
  11. fla/models/hgrn/__init__.py +13 -0
  12. fla/models/hgrn/__pycache__/__init__.cpython-312.pyc +0 -0
  13. fla/models/hgrn/__pycache__/configuration_hgrn.cpython-312.pyc +0 -0
  14. fla/models/hgrn/modeling_hgrn.py +420 -0
  15. fla/models/hgrn2/__pycache__/__init__.cpython-312.pyc +0 -0
  16. fla/models/hgrn2/__pycache__/configuration_hgrn2.cpython-312.pyc +0 -0
  17. fla/models/linear_attn/__pycache__/configuration_linear_attn.cpython-312.pyc +0 -0
  18. fla/models/linear_attn/__pycache__/modeling_linear_attn.cpython-312.pyc +0 -0
  19. fla/models/mamba/__pycache__/configuration_mamba.cpython-312.pyc +0 -0
  20. fla/models/mamba2/__pycache__/modeling_mamba2.cpython-312.pyc +0 -0
  21. fla/models/nsa/__pycache__/configuration_nsa.cpython-312.pyc +0 -0
  22. fla/models/nsa/__pycache__/modeling_nsa.cpython-312.pyc +0 -0
  23. fla/models/retnet/__pycache__/configuration_retnet.cpython-312.pyc +0 -0
  24. fla/models/retnet/__pycache__/modeling_retnet.cpython-312.pyc +0 -0
  25. fla/models/rwkv6/__pycache__/configuration_rwkv6.cpython-312.pyc +0 -0
  26. fla/models/rwkv6/__pycache__/modeling_rwkv6.cpython-312.pyc +0 -0
  27. fla/models/rwkv7/__pycache__/modeling_rwkv7.cpython-312.pyc +0 -0
  28. fla/models/samba/__pycache__/__init__.cpython-312.pyc +0 -0
  29. fla/models/transformer/__pycache__/__init__.cpython-312.pyc +0 -0
  30. fla/models/transformer_dsmtp/__pycache__/modeling_transformer.cpython-312.pyc +0 -0
  31. fla/models/transformer_mtp/__pycache__/modeling_transformer.cpython-312.pyc +0 -0
  32. fla/models/transformer_top/__pycache__/modeling_transformer.cpython-312.pyc +0 -0
  33. fla/modules/__pycache__/convolution.cpython-312.pyc +0 -0
  34. fla/modules/__pycache__/feature_map.cpython-312.pyc +0 -0
  35. fla/modules/__pycache__/fused_bitlinear.cpython-312.pyc +0 -0
  36. fla/modules/__pycache__/fused_kl_div.cpython-312.pyc +0 -0
  37. fla/modules/__pycache__/fused_linear_cross_entropy.cpython-312.pyc +0 -0
  38. fla/modules/__pycache__/fused_norm_gate.cpython-312.pyc +0 -0
  39. fla/modules/__pycache__/l2norm.cpython-312.pyc +0 -0
  40. fla/modules/__pycache__/layernorm_gated.cpython-312.pyc +0 -0
  41. fla/modules/__pycache__/rotary.cpython-312.pyc +0 -0
  42. fla/modules/__pycache__/seq_to_dsmtp.cpython-312.pyc +0 -0
  43. fla/modules/__pycache__/seq_to_top.cpython-312.pyc +0 -0
  44. tb/20260114-0839/wandb/debug-internal.log +132 -0
  45. tb/20260114-0839/wandb/run-20260114_083920--top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/files/config.yaml +156 -0
  46. tb/20260114-0839/wandb/run-20260114_083920--top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/files/output.log +0 -0
  47. tb/20260114-0839/wandb/run-20260114_083920--top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/files/requirements.txt +164 -0
  48. tb/20260114-0839/wandb/run-20260114_083920--top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/files/wandb-summary.json +1 -0
  49. tb/20260114-0839/wandb/run-20260114_083920--top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/logs/debug-internal.log +132 -0
  50. tb/20260114-0839/wandb/run-20260114_083920--top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/logs/debug.log +26 -0
download_checkpoint.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ from huggingface_hub import HfApi, HfFolder, snapshot_download
4
+
5
+ def main(args):
6
+ api = HfApi()
7
+ token = HfFolder.get_token()
8
+ experiment_checkpoint_folder = os.path.join(args.experiment_checkpoint_folder, "checkpoint")
9
+ os.makedirs(
10
+ experiment_checkpoint_folder,
11
+ exist_ok=True
12
+ )
13
+
14
+ snapshot_download(
15
+ repo_id=args.repo_id,
16
+ token=token,
17
+ local_dir=experiment_checkpoint_folder,
18
+ )
19
+
20
+ if __name__ == "__main__":
21
+ parser = argparse.ArgumentParser(description="Download a checkpoint from Hugging Face Hub.")
22
+ parser.add_argument(
23
+ "--repo_id",
24
+ type=str,
25
+ required=True,
26
+ help="The repository ID on Hugging Face Hub.",
27
+ )
28
+ parser.add_argument(
29
+ "--experiment_checkpoint_folder",
30
+ type=str,
31
+ required=True,
32
+ help="The local directory to save the downloaded checkpoint.",
33
+ )
34
+ args = parser.parse_args()
35
+ main(args)
fla/models/abc/__pycache__/modeling_abc.cpython-312.pyc ADDED
Binary file (18.4 kB). View file
 
fla/models/bitnet/__pycache__/modeling_bitnet.cpython-312.pyc ADDED
Binary file (18.6 kB). View file
 
fla/models/bitnet/modeling_bitnet.py ADDED
@@ -0,0 +1,441 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+
3
+ from __future__ import annotations
4
+
5
+ import math
6
+ import warnings
7
+ from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Union
8
+
9
+ import torch
10
+ import torch.nn as nn
11
+ import torch.utils.checkpoint
12
+ from transformers.generation import GenerationMixin
13
+ from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast
14
+ from transformers.modeling_utils import PreTrainedModel
15
+ from transformers.utils import logging
16
+ from transformers.utils.deprecation import deprecate_kwarg
17
+
18
+ from fla.layers.bitattn import BitAttention
19
+ from fla.models.bitnet.configuration_bitnet import BitNetConfig
20
+ from fla.models.utils import Cache
21
+ from fla.modules import FusedCrossEntropyLoss, FusedLinearCrossEntropyLoss, RMSNorm
22
+ from fla.modules.activations import swiglu
23
+ from fla.modules.fused_bitlinear import FusedBitLinear
24
+
25
+ if TYPE_CHECKING:
26
+ from transformers.processing_utils import Unpack
27
+
28
+ logger = logging.get_logger(__name__)
29
+
30
+
31
+ class BitNetMLP(nn.Module):
32
+
33
+ def __init__(
34
+ self,
35
+ hidden_size: int,
36
+ hidden_ratio: Optional[int] = None,
37
+ intermediate_size: Optional[int] = None,
38
+ hidden_act: str = 'swish',
39
+ fuse_swiglu: bool = True
40
+ ) -> BitNetMLP:
41
+ super().__init__()
42
+
43
+ self.hidden_size = hidden_size
44
+ # the final number of params is `hidden_ratio * hidden_size^2`
45
+ # `intermediate_size` is chosen to be a multiple of 256 closest to `2/3 * hidden_size * hidden_ratio`
46
+ if hidden_ratio is None:
47
+ hidden_ratio = 4
48
+ if intermediate_size is None:
49
+ intermediate_size = int(hidden_size * hidden_ratio * 2 / 3)
50
+ intermediate_size = 256 * ((intermediate_size + 256 - 1) // 256)
51
+ self.hidden_ratio = hidden_ratio
52
+ self.intermediate_size = intermediate_size
53
+ self.hidden_act = hidden_act
54
+ self.fuse_swiglu = fuse_swiglu
55
+
56
+ if hidden_act != 'swish':
57
+ raise ValueError(f'Unsupported hidden_act: {hidden_act}')
58
+
59
+ self.gate_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False)
60
+ self.up_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False)
61
+ self.down_proj = nn.Linear(self.intermediate_size, self.hidden_size, bias=False)
62
+
63
+ def forward(
64
+ self,
65
+ x: torch.Tensor,
66
+ **kwargs: Unpack[Any]
67
+ ) -> torch.Tensor:
68
+ gate, y = self.gate_proj(x), self.up_proj(x)
69
+ return self.down_proj(swiglu(gate, y))
70
+
71
+
72
+ class BitNetBlock(nn.Module):
73
+
74
+ def __init__(self, config: BitNetConfig, layer_idx: int):
75
+ super().__init__()
76
+
77
+ self.config = config
78
+ self.layer_idx = layer_idx
79
+
80
+ self.attn_norm = (RMSNorm if config.fuse_norm else nn.RMSNorm)(config.hidden_size, eps=config.norm_eps)
81
+ self.attn = BitAttention(
82
+ hidden_size=config.hidden_size,
83
+ num_heads=config.num_heads,
84
+ num_kv_heads=config.num_kv_heads,
85
+ window_size=config.window_size,
86
+ rope_theta=config.rope_theta,
87
+ max_position_embeddings=config.max_position_embeddings,
88
+ layer_idx=layer_idx
89
+ )
90
+ self.mlp_norm = (RMSNorm if config.fuse_norm else nn.RMSNorm)(config.hidden_size, eps=config.norm_eps)
91
+ self.mlp = BitNetMLP(
92
+ hidden_size=config.hidden_size,
93
+ hidden_ratio=config.hidden_ratio,
94
+ intermediate_size=config.intermediate_size,
95
+ hidden_act=config.hidden_act,
96
+ fuse_swiglu=config.fuse_swiglu
97
+ )
98
+
99
+ def forward(
100
+ self,
101
+ hidden_states: torch.Tensor,
102
+ attention_mask: Optional[torch.Tensor] = None,
103
+ past_key_values: Optional[Tuple[torch.Tensor]] = None,
104
+ output_attentions: Optional[bool] = False,
105
+ use_cache: Optional[bool] = False,
106
+ **kwargs: Unpack[Any]
107
+ ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
108
+
109
+ residual = hidden_states
110
+ hidden_states = self.attn_norm(hidden_states)
111
+ hidden_states, attentions, past_key_values = self.attn(
112
+ hidden_states=hidden_states,
113
+ attention_mask=attention_mask,
114
+ past_key_values=past_key_values,
115
+ use_cache=use_cache,
116
+ output_attentions=output_attentions,
117
+ **kwargs
118
+ )
119
+ if self.config.fuse_norm:
120
+ hidden_states, residual = self.mlp_norm(hidden_states, residual, True)
121
+ else:
122
+ hidden_states = residual + hidden_states
123
+ residual = hidden_states
124
+ hidden_states = self.mlp_norm(hidden_states)
125
+ hidden_states = self.mlp(hidden_states, **kwargs)
126
+ hidden_states = residual + hidden_states
127
+
128
+ outputs = (hidden_states,)
129
+
130
+ if output_attentions:
131
+ outputs += (attentions,)
132
+
133
+ if use_cache:
134
+ outputs += (past_key_values,)
135
+
136
+ return outputs
137
+
138
+
139
+ class BitNetPreTrainedModel(PreTrainedModel):
140
+
141
+ config_class = BitNetConfig
142
+ base_model_prefix = 'model'
143
+ supports_gradient_checkpointing = True
144
+ _no_split_modules = ['BitNetBlock']
145
+ _supports_cache_class = True
146
+
147
+ def __init__(self, *inputs, **kwargs):
148
+ super().__init__(*inputs, **kwargs)
149
+
150
+ def _init_weights(
151
+ self,
152
+ module: nn.Module,
153
+ rescale_prenorm_residual: bool = False,
154
+ num_residuals_per_layer: int = 2,
155
+ ):
156
+ if isinstance(module, (nn.Linear, nn.Conv1d, FusedBitLinear)):
157
+ # Slightly different from the TF version which uses truncated_normal for initialization
158
+ # cf https://github.com/pytorch/pytorch/pull/5617
159
+ nn.init.normal_(module.weight, mean=0.0, std=self.config.initializer_range)
160
+ if module.bias is not None:
161
+ nn.init.zeros_(module.bias)
162
+ elif isinstance(module, nn.Embedding):
163
+ nn.init.normal_(module.weight, mean=0.0, std=self.config.initializer_range)
164
+ elif hasattr(module, 'reset_parameters'):
165
+ module.reset_parameters()
166
+
167
+ if rescale_prenorm_residual:
168
+ # Reinitialize selected weights subject to the OpenAI GPT-2 Paper Scheme:
169
+ # > A modified initialization which accounts for the accumulation on the residual path with model depth. Scale
170
+ # > the weights of residual layers at initialization by a factor of 1/√N where N is the # of residual layers.
171
+ # > -- GPT-2 :: https://openai.com/blog/better-language-models/
172
+ #
173
+ # Reference (Megatron-LM): https://github.com/NVIDIA/Megatron-LM/blob/main/megatron/model/gpt_model.py
174
+ p = None
175
+ if hasattr(module, 'o_proj'):
176
+ p = module.o_proj.weight
177
+ elif hasattr(module, 'down_proj'):
178
+ p = module.down_proj.weight
179
+ if p is not None:
180
+ # Special Scaled Initialization --> There are 2 Layer Norms per Transformer Block
181
+ # Following Pytorch init, except scale by 1/sqrt(2 * n_layer)
182
+ # We need to reinit p since this code could be called multiple times
183
+ # Having just p *= scale would repeatedly scale it down
184
+ nn.init.kaiming_uniform_(p, a=math.sqrt(5))
185
+ with torch.no_grad():
186
+ p /= math.sqrt(num_residuals_per_layer * self.config.num_hidden_layers)
187
+
188
+
189
+ class BitNetModel(BitNetPreTrainedModel):
190
+
191
+ def __init__(
192
+ self,
193
+ config: BitNetConfig
194
+ ) -> BitNetModel:
195
+ super().__init__(config)
196
+ self.padding_idx = config.pad_token_id
197
+ self.vocab_size = config.vocab_size
198
+
199
+ self.embeddings = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
200
+ self.layers = nn.ModuleList([BitNetBlock(config, layer_idx) for layer_idx in range(config.num_hidden_layers)])
201
+ self.norm = (RMSNorm if config.fuse_norm else nn.RMSNorm)(config.hidden_size, eps=config.norm_eps)
202
+
203
+ self.gradient_checkpointing = False
204
+
205
+ self.post_init()
206
+
207
+ def get_input_embeddings(self):
208
+ return self.embeddings
209
+
210
+ def set_input_embeddings(self, value):
211
+ self.embeddings = value
212
+
213
+ def forward(
214
+ self,
215
+ input_ids: Optional[torch.LongTensor] = None,
216
+ attention_mask: Optional[torch.Tensor] = None,
217
+ past_key_values: Optional[List[torch.FloatTensor]] = None,
218
+ inputs_embeds: Optional[torch.FloatTensor] = None,
219
+ use_cache: Optional[bool] = None,
220
+ output_attentions: Optional[bool] = None,
221
+ output_hidden_states: Optional[bool] = None,
222
+ return_dict: Optional[bool] = None,
223
+ **kwargs: Unpack[Any]
224
+ ) -> Union[Tuple, CausalLMOutputWithPast]:
225
+ if output_attentions:
226
+ warnings.warn(
227
+ "`BitNetModel` does not support output attention weights now, so `output_attentions` is set to `False`."
228
+ )
229
+ output_attentions = False
230
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
231
+ output_hidden_states = output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
232
+ use_cache = use_cache if use_cache is not None else (self.config.use_cache if not self.training else False)
233
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
234
+
235
+ # retrieve input_ids and inputs_embeds
236
+ if input_ids is not None and inputs_embeds is not None:
237
+ raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
238
+ elif input_ids is None and inputs_embeds is None:
239
+ raise ValueError("You have to specify either input_ids or inputs_embeds")
240
+
241
+ if use_cache and not isinstance(past_key_values, Cache):
242
+ past_key_values = Cache.from_legacy_cache(past_key_values)
243
+
244
+ if inputs_embeds is None:
245
+ inputs_embeds = self.embeddings(input_ids)
246
+
247
+ # embed positions
248
+ hidden_states = inputs_embeds
249
+
250
+ if self.gradient_checkpointing and self.training:
251
+ if use_cache:
252
+ logger.warning_once(
253
+ "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
254
+ )
255
+ use_cache = False
256
+
257
+ all_hidden_states = () if output_hidden_states else None
258
+ all_attns = () if output_attentions else None
259
+ next_cache = None
260
+
261
+ for layer in self.layers:
262
+ if output_hidden_states:
263
+ all_hidden_states += (hidden_states,)
264
+
265
+ if self.gradient_checkpointing and self.training:
266
+ layer_outputs = self._gradient_checkpointing_func(
267
+ layer.__call__,
268
+ hidden_states,
269
+ attention_mask,
270
+ past_key_values,
271
+ output_attentions,
272
+ use_cache,
273
+ **kwargs
274
+ )
275
+ else:
276
+ layer_outputs = layer(
277
+ hidden_states,
278
+ attention_mask=attention_mask,
279
+ past_key_values=past_key_values,
280
+ output_attentions=output_attentions,
281
+ use_cache=use_cache,
282
+ **kwargs
283
+ )
284
+
285
+ hidden_states = layer_outputs[0]
286
+
287
+ if use_cache:
288
+ next_cache = layer_outputs[2 if output_attentions else 1]
289
+
290
+ if output_attentions:
291
+ all_attns += (layer_outputs[1],)
292
+
293
+ hidden_states = self.norm(hidden_states)
294
+
295
+ # add hidden states from the last decoder layer
296
+ if output_hidden_states:
297
+ all_hidden_states += (hidden_states,)
298
+
299
+ if not return_dict:
300
+ return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_attns] if v is not None)
301
+
302
+ return BaseModelOutputWithPast(
303
+ last_hidden_state=hidden_states,
304
+ past_key_values=next_cache,
305
+ hidden_states=all_hidden_states,
306
+ attentions=all_attns
307
+ )
308
+
309
+
310
+ class BitNetForCausalLM(BitNetPreTrainedModel, GenerationMixin):
311
+
312
+ _tied_weights_keys = ["lm_head.weight"]
313
+
314
+ def __init__(self, config):
315
+ super().__init__(config)
316
+ self.model = BitNetModel(config)
317
+ self.vocab_size = config.vocab_size
318
+ self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
319
+ self.criterion = None
320
+
321
+ # Initialize weights and apply final processing
322
+ self.post_init()
323
+
324
+ def get_input_embeddings(self):
325
+ return self.model.embeddings
326
+
327
+ def set_input_embeddings(self, value):
328
+ self.model.embeddings = value
329
+
330
+ def get_output_embeddings(self):
331
+ return self.lm_head
332
+
333
+ def set_output_embeddings(self, new_embeddings):
334
+ self.lm_head = new_embeddings
335
+
336
+ def set_decoder(self, decoder):
337
+ self.model = decoder
338
+
339
+ def get_decoder(self):
340
+ return self.model
341
+
342
+ @deprecate_kwarg("num_logits_to_keep", version="4.50", new_name="logits_to_keep")
343
+ def prepare_inputs_for_generation(
344
+ self,
345
+ input_ids: torch.LongTensor = None,
346
+ past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None,
347
+ attention_mask: Optional[torch.Tensor] = None,
348
+ inputs_embeds: Optional[torch.Tensor] = None,
349
+ use_cache: bool = True,
350
+ logits_to_keep: Optional[int] = None,
351
+ **kwargs
352
+ ):
353
+ # only last token for `inputs_ids` if the `past_key_values` is not empty.
354
+ if past_key_values is not None and len(past_key_values) > 0:
355
+ input_ids = input_ids[:, -1:]
356
+ # if `inputs_embeds` are passed, we only want to use them in the 1st generation step
357
+ if inputs_embeds is not None and len(past_key_values) == 0:
358
+ model_inputs = {'inputs_embeds': inputs_embeds}
359
+ else:
360
+ # The `contiguous()` here is necessary to have a static stride during decoding. torchdynamo otherwise
361
+ # recompiles graphs as the stride of the inputs is a guard.
362
+ # Ref: https://github.com/huggingface/transformers/pull/29114
363
+ # TODO: use `next_tokens` directly instead.
364
+ model_inputs = {'input_ids': input_ids.contiguous()}
365
+
366
+ if logits_to_keep is not None:
367
+ model_inputs['logits_to_keep'] = logits_to_keep
368
+
369
+ model_inputs.update({
370
+ 'past_key_values': past_key_values,
371
+ 'use_cache': use_cache,
372
+ 'attention_mask': attention_mask,
373
+ })
374
+ return model_inputs
375
+
376
+ def forward(
377
+ self,
378
+ input_ids: torch.LongTensor = None,
379
+ attention_mask: Optional[torch.Tensor] = None,
380
+ past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None,
381
+ inputs_embeds: Optional[torch.FloatTensor] = None,
382
+ labels: Optional[torch.LongTensor] = None,
383
+ use_cache: Optional[bool] = None,
384
+ output_attentions: Optional[bool] = None,
385
+ output_hidden_states: Optional[bool] = None,
386
+ return_dict: Optional[bool] = None,
387
+ logits_to_keep: Optional[int] = 0,
388
+ **kwargs: Unpack[Any]
389
+ ) -> Union[Tuple, CausalLMOutputWithPast]:
390
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
391
+ output_hidden_states = (
392
+ output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
393
+ )
394
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
395
+
396
+ outputs = self.model(
397
+ input_ids=input_ids,
398
+ attention_mask=attention_mask,
399
+ past_key_values=past_key_values,
400
+ inputs_embeds=inputs_embeds,
401
+ use_cache=use_cache,
402
+ output_attentions=output_attentions,
403
+ output_hidden_states=output_hidden_states,
404
+ return_dict=return_dict,
405
+ **kwargs
406
+ )
407
+
408
+ hidden_states = outputs[0]
409
+ fuse_linear_and_cross_entropy = self.config.fuse_cross_entropy and self.training
410
+
411
+ loss, logits = None, None
412
+ if not fuse_linear_and_cross_entropy or labels is None:
413
+ logits = self.lm_head(hidden_states if logits_to_keep is None else hidden_states[:, -logits_to_keep:])
414
+ if labels is not None:
415
+ if getattr(self, 'criterion', None) is None:
416
+ if fuse_linear_and_cross_entropy:
417
+ criterion = FusedLinearCrossEntropyLoss()
418
+ elif self.config.fuse_cross_entropy:
419
+ criterion = FusedCrossEntropyLoss(inplace_backward=True)
420
+ else:
421
+ criterion = nn.CrossEntropyLoss()
422
+ else:
423
+ criterion = self.criterion
424
+ labels = labels.to(hidden_states.device)
425
+ labels = torch.cat((labels[..., 1:], torch.full_like(labels[:, :1], criterion.ignore_index)), 1)
426
+ if fuse_linear_and_cross_entropy:
427
+ loss = criterion(hidden_states, labels, self.lm_head.weight, self.lm_head.bias)
428
+ else:
429
+ loss = criterion(logits.view(labels.numel(), -1), labels.view(-1))
430
+
431
+ if not return_dict:
432
+ output = (logits,) + outputs[1:]
433
+ return (loss,) + output if loss is not None else output
434
+
435
+ return CausalLMOutputWithPast(
436
+ loss=loss,
437
+ logits=logits,
438
+ past_key_values=outputs.past_key_values,
439
+ hidden_states=outputs.hidden_states,
440
+ attentions=outputs.attentions,
441
+ )
fla/models/delta_net/__pycache__/configuration_delta_net.cpython-312.pyc ADDED
Binary file (3.58 kB). View file
 
fla/models/delta_net/__pycache__/modeling_delta_net.cpython-312.pyc ADDED
Binary file (18.5 kB). View file
 
fla/models/gated_deltanet/__pycache__/modeling_gated_deltanet.cpython-312.pyc ADDED
Binary file (18.5 kB). View file
 
fla/models/gated_deltaproduct/__pycache__/modeling_gated_deltaproduct.cpython-312.pyc ADDED
Binary file (20.7 kB). View file
 
fla/models/gla/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (653 Bytes). View file
 
fla/models/gla/__pycache__/configuration_gla.cpython-312.pyc ADDED
Binary file (3.72 kB). View file
 
fla/models/hgrn/__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+
3
+ from transformers import AutoConfig, AutoModel, AutoModelForCausalLM
4
+
5
+ from fla.models.hgrn.configuration_hgrn import HGRNConfig
6
+ from fla.models.hgrn.modeling_hgrn import HGRNForCausalLM, HGRNModel
7
+
8
+ AutoConfig.register(HGRNConfig.model_type, HGRNConfig)
9
+ AutoModel.register(HGRNConfig, HGRNModel)
10
+ AutoModelForCausalLM.register(HGRNConfig, HGRNForCausalLM)
11
+
12
+
13
+ __all__ = ['HGRNConfig', 'HGRNForCausalLM', 'HGRNModel']
fla/models/hgrn/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (661 Bytes). View file
 
fla/models/hgrn/__pycache__/configuration_hgrn.cpython-312.pyc ADDED
Binary file (3.27 kB). View file
 
fla/models/hgrn/modeling_hgrn.py ADDED
@@ -0,0 +1,420 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+
3
+ from __future__ import annotations
4
+
5
+ import math
6
+ import warnings
7
+ from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
8
+
9
+ import torch
10
+ import torch.nn as nn
11
+ import torch.utils.checkpoint
12
+ from transformers.generation import GenerationMixin
13
+ from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast
14
+ from transformers.modeling_utils import PreTrainedModel
15
+ from transformers.utils import logging
16
+ from transformers.utils.deprecation import deprecate_kwarg
17
+
18
+ from fla.layers.attn import Attention
19
+ from fla.layers.hgrn import HGRNAttention
20
+ from fla.models.hgrn.configuration_hgrn import HGRNConfig
21
+ from fla.models.utils import Cache
22
+ from fla.modules import FusedCrossEntropyLoss, FusedLinearCrossEntropyLoss
23
+ from fla.modules import GatedMLP as HGRNMLP
24
+ from fla.modules import RMSNorm
25
+
26
+ if TYPE_CHECKING:
27
+ from transformers.processing_utils import Unpack
28
+
29
+ logger = logging.get_logger(__name__)
30
+
31
+
32
+ class HGRNBlock(nn.Module):
33
+ def __init__(self, config: HGRNConfig, layer_idx: int):
34
+ super().__init__()
35
+
36
+ self.config = config
37
+ self.layer_idx = layer_idx
38
+
39
+ self.attn_norm = (RMSNorm if config.fuse_norm else nn.RMSNorm)(config.hidden_size, eps=config.norm_eps)
40
+ if config.attn is not None and layer_idx in config.attn['layers']:
41
+ self.attn = Attention(
42
+ hidden_size=config.hidden_size,
43
+ num_heads=config.attn['num_heads'],
44
+ num_kv_heads=config.attn['num_kv_heads'],
45
+ qkv_bias=config.attn['qkv_bias'],
46
+ window_size=config.attn['window_size'],
47
+ rope_theta=config.attn['rope_theta'],
48
+ max_position_embeddings=config.max_position_embeddings,
49
+ layer_idx=layer_idx
50
+ )
51
+ else:
52
+ self.attn = HGRNAttention(
53
+ mode=config.attn_mode,
54
+ hidden_size=config.hidden_size,
55
+ expand_ratio=config.expand_ratio,
56
+ use_short_conv=config.use_short_conv,
57
+ conv_size=config.conv_size,
58
+ elementwise_affine=config.elementwise_affine,
59
+ norm_eps=config.norm_eps,
60
+ layer_idx=layer_idx
61
+ )
62
+ self.mlp_norm = (RMSNorm if config.fuse_norm else nn.RMSNorm)(config.hidden_size, eps=config.norm_eps)
63
+ self.mlp = HGRNMLP(
64
+ hidden_size=config.hidden_size,
65
+ hidden_ratio=config.hidden_ratio,
66
+ intermediate_size=config.intermediate_size,
67
+ hidden_act=config.hidden_act,
68
+ fuse_swiglu=config.fuse_swiglu
69
+ )
70
+
71
+ def forward(
72
+ self,
73
+ hidden_states: torch.Tensor,
74
+ attention_mask: Optional[torch.Tensor] = None,
75
+ past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None,
76
+ use_cache: Optional[bool] = False,
77
+ output_attentions: Optional[bool] = False,
78
+ lower_bound: Optional[torch.Tensor] = False,
79
+ **kwargs: Unpack[Dict]
80
+ ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
81
+ residual = hidden_states
82
+ hidden_states = self.attn_norm(hidden_states)
83
+ hidden_states, attentions, past_key_values = self.attn(
84
+ hidden_states=hidden_states,
85
+ attention_mask=attention_mask,
86
+ past_key_values=past_key_values,
87
+ use_cache=use_cache,
88
+ output_attentions=output_attentions,
89
+ lower_bound=lower_bound,
90
+ **kwargs
91
+ )
92
+ if self.config.fuse_norm:
93
+ hidden_states, residual = self.mlp_norm(hidden_states, residual, True)
94
+ else:
95
+ hidden_states = residual + hidden_states
96
+ residual = hidden_states
97
+ hidden_states = self.mlp_norm(hidden_states)
98
+ hidden_states = self.mlp(hidden_states, **kwargs)
99
+ hidden_states = residual + hidden_states
100
+
101
+ outputs = (hidden_states, attentions, past_key_values)
102
+
103
+ return outputs
104
+
105
+
106
+ class HGRNPreTrainedModel(PreTrainedModel):
107
+
108
+ config_class = HGRNConfig
109
+ base_model_prefix = 'model'
110
+ supports_gradient_checkpointing = True
111
+ _no_split_modules = ['HGRNBlock']
112
+ _supports_cache_class = True
113
+
114
+ def __init__(self, *inputs, **kwargs):
115
+ super().__init__(*inputs, **kwargs)
116
+
117
+ def _init_weights(
118
+ self,
119
+ module: nn.Module,
120
+ prenorm_residual_strategy: Optional[str] = 'rescale',
121
+ num_residuals_per_layer: int = 2,
122
+ ):
123
+ if isinstance(module, (nn.Linear, nn.Conv1d)):
124
+ # Slightly different from the TF version which uses truncated_normal for initialization
125
+ # cf https://github.com/pytorch/pytorch/pull/5617
126
+ nn.init.normal_(module.weight, mean=0.0, std=self.config.initializer_range)
127
+ if module.bias is not None:
128
+ nn.init.zeros_(module.bias)
129
+ elif isinstance(module, nn.Embedding):
130
+ nn.init.normal_(module.weight, mean=0.0, std=self.config.initializer_range)
131
+ elif hasattr(module, 'reset_parameters'):
132
+ module.reset_parameters()
133
+
134
+ if prenorm_residual_strategy is not None:
135
+ # Reinitialize selected weights subject to the OpenAI GPT-2 Paper Scheme:
136
+ # > A modified initialization which accounts for the accumulation on the residual path with model depth. Scale
137
+ # > the weights of residual layers at initialization by a factor of 1/√N where N is the # of residual layers.
138
+ # > -- GPT-2 :: https://openai.com/blog/better-language-models/
139
+ #
140
+ # Reference (Megatron-LM): https://github.com/NVIDIA/Megatron-LM/blob/main/megatron/model/gpt_model.py
141
+ p = None
142
+ if hasattr(module, 'o_proj'):
143
+ p = module.o_proj.weight
144
+ elif hasattr(module, 'down_proj'):
145
+ p = module.down_proj.weight
146
+ if p is not None:
147
+ # Special Scaled Initialization --> There are 2 Layer Norms per Transformer Block
148
+ # Following Pytorch init, except scale by 1/sqrt(2 * n_layer)
149
+ # We need to reinit p since this code could be called multiple times
150
+ # Having just p *= scale would repeatedly scale it down
151
+ if prenorm_residual_strategy == 'rescale':
152
+ nn.init.kaiming_uniform_(p, a=math.sqrt(5))
153
+ with torch.no_grad():
154
+ p /= math.sqrt(num_residuals_per_layer * self.config.num_hidden_layers)
155
+ elif prenorm_residual_strategy == 'zero':
156
+ nn.init.zeros_(p)
157
+ else:
158
+ raise ValueError(f"Invalid prenorm_residual_strategy: {prenorm_residual_strategy}")
159
+
160
+
161
+ class HGRNModel(HGRNPreTrainedModel):
162
+
163
+ def __init__(self, config: HGRNConfig):
164
+ super().__init__(config)
165
+ self.padding_idx = config.pad_token_id
166
+ self.vocab_size = config.vocab_size
167
+
168
+ self.embeddings = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
169
+ if config.use_lower_bound:
170
+ self.lower_bounds = nn.Parameter(torch.zeros(config.num_hidden_layers, config.hidden_size))
171
+ self.layers = nn.ModuleList([HGRNBlock(config, layer_idx) for layer_idx in range(config.num_hidden_layers)])
172
+ self.norm = (RMSNorm if config.fuse_norm else nn.RMSNorm)(config.hidden_size, eps=config.norm_eps)
173
+
174
+ self.gradient_checkpointing = False
175
+
176
+ self.post_init()
177
+
178
+ def get_input_embeddings(self):
179
+ return self.embeddings
180
+
181
+ def set_input_embeddings(self, value):
182
+ self.embeddings = value
183
+
184
+ def forward(
185
+ self,
186
+ input_ids: Optional[torch.LongTensor] = None,
187
+ attention_mask: Optional[torch.Tensor] = None, # noqa
188
+ inputs_embeds: Optional[torch.FloatTensor] = None,
189
+ past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None,
190
+ use_cache: Optional[bool] = None,
191
+ output_attentions: Optional[bool] = None,
192
+ output_hidden_states: Optional[bool] = None,
193
+ return_dict: Optional[bool] = None,
194
+ **kwargs: Unpack[Dict]
195
+ ) -> Union[Tuple, BaseModelOutputWithPast]:
196
+ if output_attentions:
197
+ warnings.warn("`HGRNModel` does not `output_attentions` now, setting it to `False`.")
198
+ output_attentions = False
199
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
200
+ output_hidden_states = output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
201
+ use_cache = use_cache if use_cache is not None else (self.config.use_cache if not self.training else False)
202
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
203
+
204
+ # retrieve input_ids and inputs_embeds
205
+ if input_ids is not None and inputs_embeds is not None:
206
+ raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
207
+ if input_ids is None and inputs_embeds is None:
208
+ raise ValueError("You have to specify either input_ids or inputs_embeds")
209
+
210
+ if inputs_embeds is None:
211
+ inputs_embeds = self.embeddings(input_ids)
212
+ hidden_states = inputs_embeds
213
+
214
+ if use_cache and not isinstance(past_key_values, Cache):
215
+ past_key_values = Cache.from_legacy_cache(past_key_values)
216
+
217
+ if self.gradient_checkpointing and self.training and use_cache:
218
+ logger.warning_once("`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...")
219
+ use_cache = False
220
+
221
+ all_hidden_states = () if output_hidden_states else None
222
+ all_attns = () if output_attentions else None
223
+
224
+ if self.config.use_lower_bound:
225
+ lower_bounds = self.lower_bounds.softmax(0)
226
+ lower_bounds = lower_bounds.cumsum(0) - lower_bounds[0]
227
+ for i, layer in enumerate(self.layers):
228
+ if output_hidden_states:
229
+ all_hidden_states += (hidden_states,)
230
+
231
+ lower_bound = lower_bounds[i] if self.config.use_lower_bound else None
232
+ if self.gradient_checkpointing and self.training:
233
+ hidden_states, attentions, past_key_values = self._gradient_checkpointing_func(
234
+ layer.__call__,
235
+ hidden_states,
236
+ attention_mask,
237
+ past_key_values,
238
+ use_cache,
239
+ output_attentions,
240
+ lower_bound,
241
+ **kwargs
242
+ )
243
+ else:
244
+ hidden_states, attentions, past_key_values = layer(
245
+ hidden_states,
246
+ attention_mask=attention_mask,
247
+ past_key_values=past_key_values,
248
+ use_cache=use_cache,
249
+ output_attentions=output_attentions,
250
+ lower_bound=lower_bound,
251
+ **kwargs
252
+ )
253
+
254
+ if output_attentions:
255
+ all_attns += (attentions,)
256
+
257
+ hidden_states = self.norm(hidden_states)
258
+
259
+ # add hidden states from the last decoder layer
260
+ if output_hidden_states:
261
+ all_hidden_states += (hidden_states,)
262
+
263
+ if not return_dict:
264
+ return tuple(i for i in [hidden_states, past_key_values, all_hidden_states, all_attns] if i is not None)
265
+ return BaseModelOutputWithPast(
266
+ last_hidden_state=hidden_states,
267
+ past_key_values=past_key_values,
268
+ hidden_states=all_hidden_states,
269
+ attentions=all_attns
270
+ )
271
+
272
+
273
+ class HGRNForCausalLM(HGRNPreTrainedModel, GenerationMixin):
274
+
275
+ _tied_weights_keys = ["lm_head.weight"]
276
+
277
+ def __init__(self, config):
278
+ super().__init__(config)
279
+ self.model = HGRNModel(config)
280
+ self.vocab_size = config.vocab_size
281
+ self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
282
+ self.criterion = None
283
+
284
+ # Initialize weights and apply final processing
285
+ self.post_init()
286
+
287
+ def get_input_embeddings(self):
288
+ return self.model.embeddings
289
+
290
+ def set_input_embeddings(self, value):
291
+ self.model.embeddings = value
292
+
293
+ def get_output_embeddings(self):
294
+ return self.lm_head
295
+
296
+ def set_output_embeddings(self, new_embeddings):
297
+ self.lm_head = new_embeddings
298
+
299
+ def set_decoder(self, decoder):
300
+ self.model = decoder
301
+
302
+ def get_decoder(self):
303
+ return self.model
304
+
305
+ def generate(self, *args, **kwargs):
306
+ try:
307
+ return super().generate(*args, **kwargs)
308
+ except AttributeError as exception:
309
+ if 'past_key_values' in str(exception):
310
+ raise AttributeError(
311
+ f"You tried to call `generate` with a decoding strategy that manipulates `past_key_values`, "
312
+ f"which is not supported for {self.__class__.__name__}. "
313
+ f"Try another generation strategy instead. "
314
+ f"For the available generation strategies, check this doc: "
315
+ f"https://huggingface.co/docs/transformers/en/generation_strategies#decoding-strategies"
316
+ )
317
+ else:
318
+ raise exception
319
+
320
+ @deprecate_kwarg("num_logits_to_keep", version="4.50", new_name="logits_to_keep")
321
+ def prepare_inputs_for_generation(
322
+ self,
323
+ input_ids: torch.LongTensor = None,
324
+ past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None,
325
+ attention_mask: Optional[torch.Tensor] = None,
326
+ inputs_embeds: Optional[torch.Tensor] = None,
327
+ use_cache: bool = True,
328
+ logits_to_keep: Optional[int] = None,
329
+ **kwargs: Unpack[Dict]
330
+ ):
331
+ # only last token for `inputs_ids` if the `past_key_values` is not empty.
332
+ if past_key_values is not None and len(past_key_values) > 0:
333
+ input_ids = input_ids[:, -1:]
334
+ # if `inputs_embeds` are passed, we only want to use them in the 1st generation step
335
+ if inputs_embeds is not None and len(past_key_values) == 0:
336
+ model_inputs = {'inputs_embeds': inputs_embeds}
337
+ else:
338
+ # The `contiguous()` here is necessary to have a static stride during decoding. torchdynamo otherwise
339
+ # recompiles graphs as the stride of the inputs is a guard.
340
+ # Ref: https://github.com/huggingface/transformers/pull/29114
341
+ # TODO: use `next_tokens` directly instead.
342
+ model_inputs = {'input_ids': input_ids.contiguous()}
343
+
344
+ if logits_to_keep is not None:
345
+ model_inputs['logits_to_keep'] = logits_to_keep
346
+
347
+ model_inputs.update({
348
+ 'past_key_values': past_key_values,
349
+ 'use_cache': use_cache,
350
+ 'attention_mask': attention_mask,
351
+ })
352
+ return model_inputs
353
+
354
+ @deprecate_kwarg("num_logits_to_keep", version="4.50", new_name="logits_to_keep")
355
+ def forward(
356
+ self,
357
+ input_ids: torch.LongTensor = None,
358
+ attention_mask: Optional[torch.Tensor] = None,
359
+ inputs_embeds: Optional[torch.Tensor] = None,
360
+ past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None,
361
+ labels: Optional[torch.LongTensor] = None,
362
+ use_cache: Optional[bool] = None,
363
+ output_attentions: Optional[bool] = None,
364
+ output_hidden_states: Optional[bool] = None,
365
+ return_dict: Optional[bool] = None,
366
+ logits_to_keep: Optional[int] = 0,
367
+ **kwargs: Unpack[Dict]
368
+ ) -> Union[Tuple, CausalLMOutputWithPast]:
369
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
370
+ output_hidden_states = (
371
+ output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
372
+ )
373
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
374
+
375
+ outputs = self.model(
376
+ input_ids=input_ids,
377
+ attention_mask=attention_mask,
378
+ inputs_embeds=inputs_embeds,
379
+ past_key_values=past_key_values,
380
+ use_cache=use_cache,
381
+ output_attentions=output_attentions,
382
+ output_hidden_states=output_hidden_states,
383
+ return_dict=return_dict,
384
+ **kwargs
385
+ )
386
+
387
+ hidden_states = outputs[0]
388
+ fuse_linear_and_cross_entropy = self.config.fuse_cross_entropy and self.training
389
+
390
+ loss, logits = None, None
391
+ if not fuse_linear_and_cross_entropy or labels is None:
392
+ logits = self.lm_head(hidden_states if logits_to_keep is None else hidden_states[:, -logits_to_keep:])
393
+ if labels is not None:
394
+ if getattr(self, 'criterion', None) is None:
395
+ if fuse_linear_and_cross_entropy:
396
+ criterion = FusedLinearCrossEntropyLoss()
397
+ elif self.config.fuse_cross_entropy:
398
+ criterion = FusedCrossEntropyLoss(inplace_backward=True)
399
+ else:
400
+ criterion = nn.CrossEntropyLoss()
401
+ else:
402
+ criterion = self.criterion
403
+ labels = labels.to(hidden_states.device)
404
+ labels = torch.cat((labels[..., 1:], torch.full_like(labels[:, :1], criterion.ignore_index)), 1)
405
+ if fuse_linear_and_cross_entropy:
406
+ loss = criterion(hidden_states, labels, self.lm_head.weight, self.lm_head.bias)
407
+ else:
408
+ loss = criterion(logits.view(labels.numel(), -1), labels.view(-1))
409
+
410
+ if not return_dict:
411
+ output = (logits,) + outputs[1:]
412
+ return (loss,) + output if loss is not None else output
413
+
414
+ return CausalLMOutputWithPast(
415
+ loss=loss,
416
+ logits=logits,
417
+ past_key_values=outputs.past_key_values,
418
+ hidden_states=outputs.hidden_states,
419
+ attentions=outputs.attentions,
420
+ )
fla/models/hgrn2/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (670 Bytes). View file
 
fla/models/hgrn2/__pycache__/configuration_hgrn2.cpython-312.pyc ADDED
Binary file (3.54 kB). View file
 
fla/models/linear_attn/__pycache__/configuration_linear_attn.cpython-312.pyc ADDED
Binary file (3.64 kB). View file
 
fla/models/linear_attn/__pycache__/modeling_linear_attn.cpython-312.pyc ADDED
Binary file (18.5 kB). View file
 
fla/models/mamba/__pycache__/configuration_mamba.cpython-312.pyc ADDED
Binary file (7.06 kB). View file
 
fla/models/mamba2/__pycache__/modeling_mamba2.cpython-312.pyc ADDED
Binary file (52.4 kB). View file
 
fla/models/nsa/__pycache__/configuration_nsa.cpython-312.pyc ADDED
Binary file (2.64 kB). View file
 
fla/models/nsa/__pycache__/modeling_nsa.cpython-312.pyc ADDED
Binary file (17.6 kB). View file
 
fla/models/retnet/__pycache__/configuration_retnet.cpython-312.pyc ADDED
Binary file (3.72 kB). View file
 
fla/models/retnet/__pycache__/modeling_retnet.cpython-312.pyc ADDED
Binary file (18.4 kB). View file
 
fla/models/rwkv6/__pycache__/configuration_rwkv6.cpython-312.pyc ADDED
Binary file (3.32 kB). View file
 
fla/models/rwkv6/__pycache__/modeling_rwkv6.cpython-312.pyc ADDED
Binary file (21.2 kB). View file
 
fla/models/rwkv7/__pycache__/modeling_rwkv7.cpython-312.pyc ADDED
Binary file (22.3 kB). View file
 
fla/models/samba/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (713 Bytes). View file
 
fla/models/transformer/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (724 Bytes). View file
 
fla/models/transformer_dsmtp/__pycache__/modeling_transformer.cpython-312.pyc ADDED
Binary file (22.1 kB). View file
 
fla/models/transformer_mtp/__pycache__/modeling_transformer.cpython-312.pyc ADDED
Binary file (24.9 kB). View file
 
fla/models/transformer_top/__pycache__/modeling_transformer.cpython-312.pyc ADDED
Binary file (19.1 kB). View file
 
fla/modules/__pycache__/convolution.cpython-312.pyc ADDED
Binary file (21 kB). View file
 
fla/modules/__pycache__/feature_map.cpython-312.pyc ADDED
Binary file (17.6 kB). View file
 
fla/modules/__pycache__/fused_bitlinear.cpython-312.pyc ADDED
Binary file (23.6 kB). View file
 
fla/modules/__pycache__/fused_kl_div.cpython-312.pyc ADDED
Binary file (11.7 kB). View file
 
fla/modules/__pycache__/fused_linear_cross_entropy.cpython-312.pyc ADDED
Binary file (20.6 kB). View file
 
fla/modules/__pycache__/fused_norm_gate.cpython-312.pyc ADDED
Binary file (35.3 kB). View file
 
fla/modules/__pycache__/l2norm.cpython-312.pyc ADDED
Binary file (6.96 kB). View file
 
fla/modules/__pycache__/layernorm_gated.cpython-312.pyc ADDED
Binary file (23.5 kB). View file
 
fla/modules/__pycache__/rotary.cpython-312.pyc ADDED
Binary file (23.2 kB). View file
 
fla/modules/__pycache__/seq_to_dsmtp.cpython-312.pyc ADDED
Binary file (1.35 kB). View file
 
fla/modules/__pycache__/seq_to_top.cpython-312.pyc ADDED
Binary file (4.09 kB). View file
 
tb/20260114-0839/wandb/debug-internal.log ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-01-14T08:39:20.983194417Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
2
+ {"time":"2026-01-14T08:39:23.832796813Z","level":"INFO","msg":"stream: created new stream","id":"-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834"}
3
+ {"time":"2026-01-14T08:39:23.833049183Z","level":"INFO","msg":"stream: started","id":"-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834"}
4
+ {"time":"2026-01-14T08:39:23.834752679Z","level":"INFO","msg":"handler: started","stream_id":"-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834"}
5
+ {"time":"2026-01-14T08:39:23.834811381Z","level":"INFO","msg":"writer: started","stream_id":"-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834"}
6
+ {"time":"2026-01-14T08:39:23.834906425Z","level":"INFO","msg":"sender: started","stream_id":"-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834"}
7
+ {"time":"2026-01-14T08:49:42.927432861Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
8
+ {"time":"2026-01-14T10:55:25.630741135Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:54532->10.43.0.10:53: i/o timeout"}
9
+ {"time":"2026-01-14T12:15:32.359274661Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": unexpected EOF"}
10
+ {"time":"2026-01-14T13:08:41.787033162Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
11
+ {"time":"2026-01-14T14:59:44.911105606Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": unexpected EOF"}
12
+ {"time":"2026-01-14T15:37:12.523994545Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
13
+ {"time":"2026-01-14T16:47:29.020950166Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
14
+ {"time":"2026-01-14T16:47:39.426278777Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
15
+ {"time":"2026-01-14T17:03:26.755419877Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
16
+ {"time":"2026-01-14T17:22:40.06571008Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:52915->10.43.0.10:53: i/o timeout"}
17
+ {"time":"2026-01-14T17:23:30.097683124Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:53512->10.43.0.10:53: i/o timeout"}
18
+ {"time":"2026-01-14T17:29:28.638957324Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
19
+ {"time":"2026-01-14T18:28:19.032143046Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:35700->10.43.0.10:53: i/o timeout"}
20
+ {"time":"2026-01-14T18:30:35.607887985Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:53327->10.43.0.10:53: i/o timeout"}
21
+ {"time":"2026-01-14T18:36:50.614833057Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:52149->10.43.0.10:53: i/o timeout"}
22
+ {"time":"2026-01-14T18:37:03.615318388Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:45725->10.43.0.10:53: i/o timeout"}
23
+ {"time":"2026-01-14T19:09:34.794976855Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:40175->10.43.0.10:53: i/o timeout"}
24
+ {"time":"2026-01-14T19:10:12.299343829Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:49428->10.43.0.10:53: i/o timeout"}
25
+ {"time":"2026-01-14T19:23:00.077361468Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:46379->10.43.0.10:53: i/o timeout"}
26
+ {"time":"2026-01-14T19:23:42.567949074Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:60354->10.43.0.10:53: i/o timeout"}
27
+ {"time":"2026-01-14T19:50:12.533936926Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
28
+ {"time":"2026-01-14T20:54:42.468102299Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
29
+ {"time":"2026-01-14T20:54:46.291617296Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
30
+ {"time":"2026-01-14T21:26:50.300321781Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
31
+ {"time":"2026-01-14T21:28:30.989741658Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
32
+ {"time":"2026-01-14T21:29:11.344863371Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
33
+ {"time":"2026-01-14T21:42:33.145399798Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
34
+ {"time":"2026-01-14T21:44:11.121211897Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
35
+ {"time":"2026-01-14T21:45:55.018487294Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
36
+ {"time":"2026-01-14T22:03:26.109590923Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
37
+ {"time":"2026-01-14T22:53:41.136507439Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
38
+ {"time":"2026-01-14T23:18:04.214239942Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
39
+ {"time":"2026-01-14T23:34:17.461111568Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
40
+ {"time":"2026-01-15T00:05:40.866317923Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:40646->10.43.0.10:53: i/o timeout"}
41
+ {"time":"2026-01-15T00:09:26.482014139Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
42
+ {"time":"2026-01-15T00:14:06.753203823Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:50085->10.43.0.10:53: i/o timeout"}
43
+ {"time":"2026-01-15T00:20:58.119418478Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
44
+ {"time":"2026-01-15T00:35:55.616848461Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:51127->10.43.0.10:53: i/o timeout"}
45
+ {"time":"2026-01-15T00:53:12.727126278Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": unexpected EOF"}
46
+ {"time":"2026-01-15T00:57:04.800943133Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:57532->10.43.0.10:53: i/o timeout"}
47
+ {"time":"2026-01-15T02:26:47.673452244Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
48
+ {"time":"2026-01-15T02:29:05.611180187Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:51078->10.43.0.10:53: i/o timeout"}
49
+ {"time":"2026-01-15T03:27:56.134074839Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
50
+ {"time":"2026-01-15T03:55:11.994175322Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
51
+ {"time":"2026-01-15T04:24:35.605673891Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:50208->10.43.0.10:53: i/o timeout"}
52
+ {"time":"2026-01-15T04:37:20.602369981Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:43934->10.43.0.10:53: i/o timeout"}
53
+ {"time":"2026-01-15T04:41:45.608069565Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:45762->10.43.0.10:53: i/o timeout"}
54
+ {"time":"2026-01-15T05:00:55.619281375Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:43775->10.43.0.10:53: i/o timeout"}
55
+ {"time":"2026-01-15T05:28:30.612971338Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:46075->10.43.0.10:53: i/o timeout"}
56
+ {"time":"2026-01-15T05:29:33.531886958Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:42432->10.43.0.10:53: i/o timeout"}
57
+ {"time":"2026-01-15T05:35:40.072772995Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:59878->10.43.0.10:53: i/o timeout"}
58
+ {"time":"2026-01-15T05:36:43.896495268Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": unexpected EOF"}
59
+ {"time":"2026-01-15T05:38:38.262916762Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:57324->10.43.0.10:53: i/o timeout"}
60
+ {"time":"2026-01-15T07:15:30.362963603Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:38981->10.43.0.10:53: i/o timeout"}
61
+ {"time":"2026-01-15T08:02:42.136897075Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
62
+ {"time":"2026-01-15T08:14:04.800047375Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:43963->10.43.0.10:53: i/o timeout"}
63
+ {"time":"2026-01-15T10:17:27.761701465Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
64
+ {"time":"2026-01-15T13:37:53.828365872Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:33487->10.43.0.10:53: i/o timeout"}
65
+ {"time":"2026-01-15T13:51:54.922768702Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:57005->10.43.0.10:53: i/o timeout"}
66
+ {"time":"2026-01-15T15:08:27.572248208Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
67
+ {"time":"2026-01-15T15:15:56.876304792Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
68
+ {"time":"2026-01-15T15:16:14.129525568Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
69
+ {"time":"2026-01-15T16:11:26.701614401Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
70
+ {"time":"2026-01-15T16:48:20.611977635Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:51630->10.43.0.10:53: i/o timeout"}
71
+ {"time":"2026-01-15T16:58:27.768733378Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
72
+ {"time":"2026-01-15T17:01:27.738651643Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
73
+ {"time":"2026-01-15T17:04:07.656661744Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
74
+ {"time":"2026-01-15T17:43:56.168098895Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
75
+ {"time":"2026-01-15T19:48:55.070431388Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:51270->10.43.0.10:53: i/o timeout"}
76
+ {"time":"2026-01-15T20:09:34.79778952Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:54431->10.43.0.10:53: i/o timeout"}
77
+ {"time":"2026-01-15T21:01:41.803987004Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:47517->10.43.0.10:53: i/o timeout"}
78
+ {"time":"2026-01-15T21:05:05.631884156Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:52918->10.43.0.10:53: i/o timeout"}
79
+ {"time":"2026-01-15T21:22:05.626770979Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:49610->10.43.0.10:53: i/o timeout"}
80
+ {"time":"2026-01-15T21:52:50.642808493Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:47673->10.43.0.10:53: i/o timeout"}
81
+ {"time":"2026-01-15T21:58:20.618768345Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:36948->10.43.0.10:53: i/o timeout"}
82
+ {"time":"2026-01-15T22:27:52.156100864Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:56570->10.43.0.10:53: i/o timeout"}
83
+ {"time":"2026-01-15T22:30:37.146827256Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:38625->10.43.0.10:53: i/o timeout"}
84
+ {"time":"2026-01-15T23:30:42.173288276Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:48544->10.43.0.10:53: i/o timeout"}
85
+ {"time":"2026-01-15T23:32:27.158278584Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:57974->10.43.0.10:53: i/o timeout"}
86
+ {"time":"2026-01-16T00:34:57.666020998Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
87
+ {"time":"2026-01-16T07:28:21.197815253Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:60604->10.43.0.10:53: i/o timeout"}
88
+ {"time":"2026-01-16T08:45:20.999483918Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:57880->10.43.0.10:53: i/o timeout"}
89
+ {"time":"2026-01-16T08:47:26.004410083Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:38091->10.43.0.10:53: i/o timeout"}
90
+ {"time":"2026-01-16T09:51:50.1867647Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:41800->10.43.0.10:53: i/o timeout"}
91
+ {"time":"2026-01-16T09:56:22.256493752Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": unexpected EOF"}
92
+ {"time":"2026-01-16T09:57:07.582527561Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:42185->10.43.0.10:53: i/o timeout"}
93
+ {"time":"2026-01-16T09:59:01.852668736Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:58029->10.43.0.10:53: i/o timeout"}
94
+ {"time":"2026-01-16T10:00:49.795503168Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:41189->10.43.0.10:53: i/o timeout"}
95
+ {"time":"2026-01-16T10:01:53.920461569Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:52395->10.43.0.10:53: i/o timeout"}
96
+ {"time":"2026-01-16T10:03:19.040721217Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": unexpected EOF"}
97
+ {"time":"2026-01-16T10:05:55.076170566Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:56833->10.43.0.10:53: i/o timeout"}
98
+ {"time":"2026-01-16T10:06:27.942002595Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:51238->10.43.0.10:53: i/o timeout"}
99
+ {"time":"2026-01-16T10:07:30.42021806Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": unexpected EOF"}
100
+ {"time":"2026-01-16T10:10:43.226139011Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": context deadline exceeded"}
101
+ {"time":"2026-01-16T10:16:22.590616038Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:40117->10.43.0.10:53: i/o timeout"}
102
+ {"time":"2026-01-16T10:16:50.507907411Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:47171->10.43.0.10:53: i/o timeout"}
103
+ {"time":"2026-01-16T10:24:55.070449016Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:34432->10.43.0.10:53: i/o timeout"}
104
+ {"time":"2026-01-16T10:25:18.367780544Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:36852->10.43.0.10:53: i/o timeout"}
105
+ {"time":"2026-01-16T10:26:55.818200633Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": unexpected EOF"}
106
+ {"time":"2026-01-16T10:28:34.800025282Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:59732->10.43.0.10:53: i/o timeout"}
107
+ {"time":"2026-01-16T10:34:43.919151425Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:60755->10.43.0.10:53: i/o timeout"}
108
+ {"time":"2026-01-16T10:38:34.603516707Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:58996->10.43.0.10:53: i/o timeout"}
109
+ {"time":"2026-01-16T10:44:20.341045071Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:37287->10.43.0.10:53: i/o timeout"}
110
+ {"time":"2026-01-16T10:45:53.655456166Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": unexpected EOF"}
111
+ {"time":"2026-01-16T12:53:20.616362694Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:53959->10.43.0.10:53: i/o timeout"}
112
+ {"time":"2026-01-16T12:54:00.113794175Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:55534->10.43.0.10:53: i/o timeout"}
113
+ {"time":"2026-01-16T18:45:41.149118727Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
114
+ {"time":"2026-01-16T19:46:30.080975633Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:46152->10.43.0.10:53: i/o timeout"}
115
+ {"time":"2026-01-16T19:47:12.362852863Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:53133->10.43.0.10:53: i/o timeout"}
116
+ {"time":"2026-01-16T19:58:19.801124967Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:44611->10.43.0.10:53: i/o timeout"}
117
+ {"time":"2026-01-16T19:58:52.310948627Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:57138->10.43.0.10:53: i/o timeout"}
118
+ {"time":"2026-01-16T21:04:34.800662455Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:48045->10.43.0.10:53: i/o timeout"}
119
+ {"time":"2026-01-16T21:05:48.683738194Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:36932->10.43.0.10:53: i/o timeout"}
120
+ {"time":"2026-01-16T22:36:50.618772429Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:45006->10.43.0.10:53: i/o timeout"}
121
+ {"time":"2026-01-17T00:52:30.079930754Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:48132->10.43.0.10:53: i/o timeout"}
122
+ {"time":"2026-01-17T04:59:49.798891968Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:54551->10.43.0.10:53: i/o timeout"}
123
+ {"time":"2026-01-17T05:56:44.39370991Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:47123->10.43.0.10:53: i/o timeout"}
124
+ {"time":"2026-01-17T06:00:10.342698713Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:52714->10.43.0.10:53: i/o timeout"}
125
+ {"time":"2026-01-17T06:01:00.401835183Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:33545->10.43.0.10:53: i/o timeout"}
126
+ {"time":"2026-01-17T06:24:30.07071737Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:55594->10.43.0.10:53: i/o timeout"}
127
+ {"time":"2026-01-17T07:05:20.996870718Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
128
+ {"time":"2026-01-17T07:05:23.850214274Z","level":"INFO","msg":"handler: operation stats","stats":{}}
129
+ {"time":"2026-01-17T07:05:23.858304758Z","level":"INFO","msg":"stream: closing","id":"-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834"}
130
+ {"time":"2026-01-17T07:05:23.858429125Z","level":"INFO","msg":"handler: closed","stream_id":"-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834"}
131
+ {"time":"2026-01-17T07:05:23.858688602Z","level":"INFO","msg":"sender: closed","stream_id":"-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834"}
132
+ {"time":"2026-01-17T07:05:23.858707241Z","level":"INFO","msg":"stream: closed","id":"-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834"}
tb/20260114-0839/wandb/run-20260114_083920--top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/files/config.yaml ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.23.1
4
+ e:
5
+ lh52admfzo146xswph7xp2vbkx1xx0ty:
6
+ args:
7
+ - --job.config_file
8
+ - flame/models/fla.toml
9
+ - --job.dump_folder
10
+ - exp/top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine
11
+ - --model.config
12
+ - configs/top_transformer_7B.json
13
+ - --model.tokenizer_path
14
+ - fla-hub/transformer-1.3B-100B
15
+ - --optimizer.name
16
+ - AdamW
17
+ - --optimizer.eps
18
+ - "1e-15"
19
+ - --optimizer.lr
20
+ - "2e-5"
21
+ - --lr_scheduler.warmup_steps
22
+ - "400"
23
+ - --lr_scheduler.lr_min
24
+ - "0.1"
25
+ - --lr_scheduler.decay_type
26
+ - cosine
27
+ - --training.batch_size
28
+ - "8"
29
+ - --training.seq_len
30
+ - "4096"
31
+ - --training.context_len
32
+ - "4096"
33
+ - --training.gradient_accumulation_steps
34
+ - "2"
35
+ - --training.steps
36
+ - "40000"
37
+ - --training.max_norm
38
+ - "1.0"
39
+ - --training.skip_nan_inf
40
+ - --training.dataset
41
+ - /root/.cache/zaydzuhri___stack-edu-python/default
42
+ - --training.dataset_split
43
+ - train
44
+ - --training.num_workers
45
+ - "32"
46
+ - --training.prefetch_factor
47
+ - "2"
48
+ - --training.seed
49
+ - "79"
50
+ - --training.compile
51
+ - --checkpoint.interval
52
+ - "8000"
53
+ - --checkpoint.load_step
54
+ - "-1"
55
+ - --metrics.log_freq
56
+ - "5"
57
+ - --checkpoint.hf_upload_enabled
58
+ - --checkpoint.hf_repo_base_name
59
+ - zaydzuhri/top-code-7B-4096-batch8x2-steps40000-2
60
+ - --comm.init_timeout_seconds
61
+ - "6000"
62
+ - --comm.train_timeout_seconds
63
+ - "6000"
64
+ cpu_count: 64
65
+ cpu_count_logical: 128
66
+ cudaVersion: "12.8"
67
+ disk:
68
+ /:
69
+ total: "3246163542016"
70
+ used: "305102368768"
71
+ email: zaydzuhri@gmail.com
72
+ executable: /root/miniconda3/envs/flame-env/bin/python3.12
73
+ git:
74
+ commit: 5bcd6b6423606e07b92dd2644ecc24d908d2c7a4
75
+ remote: https://github.com/zaydzuhri/flame.git
76
+ gpu: NVIDIA H200
77
+ gpu_count: 8
78
+ gpu_nvidia:
79
+ - architecture: Hopper
80
+ cudaCores: 16896
81
+ memoryTotal: "150754820096"
82
+ name: NVIDIA H200
83
+ uuid: GPU-b8257f0c-859b-3229-f91f-c647f9a67f18
84
+ - architecture: Hopper
85
+ cudaCores: 16896
86
+ memoryTotal: "150754820096"
87
+ name: NVIDIA H200
88
+ uuid: GPU-deddf621-c42a-5400-0269-9aa6e05cbd3e
89
+ - architecture: Hopper
90
+ cudaCores: 16896
91
+ memoryTotal: "150754820096"
92
+ name: NVIDIA H200
93
+ uuid: GPU-edd21823-aed6-c910-cade-0faef1878943
94
+ - architecture: Hopper
95
+ cudaCores: 16896
96
+ memoryTotal: "150754820096"
97
+ name: NVIDIA H200
98
+ uuid: GPU-f7d62644-de4b-d4c9-ef7c-f00dc7301c28
99
+ - architecture: Hopper
100
+ cudaCores: 16896
101
+ memoryTotal: "150754820096"
102
+ name: NVIDIA H200
103
+ uuid: GPU-86b86993-ceaf-a6c0-4c37-bbc8dfd50e20
104
+ - architecture: Hopper
105
+ cudaCores: 16896
106
+ memoryTotal: "150754820096"
107
+ name: NVIDIA H200
108
+ uuid: GPU-2e0d4ca1-eb76-54a9-a96c-8eb0714816cf
109
+ - architecture: Hopper
110
+ cudaCores: 16896
111
+ memoryTotal: "150754820096"
112
+ name: NVIDIA H200
113
+ uuid: GPU-f535239b-dbde-7ddc-5020-1c9846d88033
114
+ - architecture: Hopper
115
+ cudaCores: 16896
116
+ memoryTotal: "150754820096"
117
+ name: NVIDIA H200
118
+ uuid: GPU-feabe082-0b23-82b0-789d-da848e0617a4
119
+ host: rentals-6z3zwezo0sfapf3y-697b4fc787-jbkxv
120
+ memory:
121
+ total: "2870219702272"
122
+ os: Linux-6.8.0-85-generic-x86_64-with-glibc2.39
123
+ program: -m flame.train
124
+ python: CPython 3.12.12
125
+ root: exp/top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine/tb/20260114-0839
126
+ startedAt: "2026-01-14T08:39:20.478463Z"
127
+ writerId: lh52admfzo146xswph7xp2vbkx1xx0ty
128
+ m: []
129
+ python_version: 3.12.12
130
+ t:
131
+ "1":
132
+ - 1
133
+ - 5
134
+ - 11
135
+ - 49
136
+ - 51
137
+ - 53
138
+ - 71
139
+ "2":
140
+ - 1
141
+ - 5
142
+ - 11
143
+ - 49
144
+ - 51
145
+ - 53
146
+ - 71
147
+ "3":
148
+ - 2
149
+ - 13
150
+ - 14
151
+ - 61
152
+ "4": 3.12.12
153
+ "5": 0.23.1
154
+ "6": 4.51.3
155
+ "12": 0.23.1
156
+ "13": linux-x86_64
tb/20260114-0839/wandb/run-20260114_083920--top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/files/output.log ADDED
The diff for this file is too large to render. See raw diff
 
tb/20260114-0839/wandb/run-20260114_083920--top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/files/requirements.txt ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ flame==0.1.0
2
+ fsspec==2025.10.0
3
+ aiohappyeyeballs==2.6.1
4
+ ipykernel==7.1.0
5
+ smmap==5.0.2
6
+ pybind11==3.0.1
7
+ tabulate==0.9.0
8
+ parso==0.8.5
9
+ numpy==2.4.1
10
+ yarl==1.22.0
11
+ asttokens==3.0.1
12
+ pandas==2.3.3
13
+ xxhash==3.6.0
14
+ pathvalidate==3.3.1
15
+ regex==2025.11.3
16
+ click==8.3.1
17
+ idna==3.11
18
+ pydantic==2.12.5
19
+ pexpect==4.9.0
20
+ typepy==1.3.4
21
+ cmake==4.2.1
22
+ wcwidth==0.2.14
23
+ ipython==9.9.0
24
+ triton==3.2.0
25
+ hf-xet==1.2.0
26
+ joblib==1.5.3
27
+ tqdm==4.67.1
28
+ nvidia-nvtx-cu12==12.4.127
29
+ setuptools==80.9.0
30
+ lxml==6.0.2
31
+ nvidia-cufft-cu12==11.2.1.3
32
+ evaluate==0.4.6
33
+ Markdown==3.10
34
+ aiohttp==3.13.3
35
+ pillow==12.1.0
36
+ chardet==5.2.0
37
+ multiprocess==0.70.18
38
+ tensorboard==2.20.0
39
+ nvidia-nvjitlink-cu12==12.4.127
40
+ flame==0.1.0
41
+ matplotlib-inline==0.2.1
42
+ tensorboard-data-server==0.7.2
43
+ nvidia-cusparse-cu12==12.3.1.170
44
+ urllib3==2.6.3
45
+ lm_eval==0.4.9.1
46
+ pure_eval==0.2.3
47
+ DataProperty==1.1.0
48
+ nvidia-cudnn-cu12==9.1.0.70
49
+ accelerate==1.12.0
50
+ Jinja2==3.1.6
51
+ datasets==4.4.2
52
+ scikit-learn==1.8.0
53
+ nvidia-nccl-cu12==2.21.5
54
+ typing_extensions==4.15.0
55
+ pyzmq==27.1.0
56
+ mpmath==1.3.0
57
+ annotated-types==0.7.0
58
+ propcache==0.4.1
59
+ wandb==0.23.1
60
+ requests==2.32.5
61
+ more-itertools==10.8.0
62
+ nvidia-cuda-runtime-cu12==12.4.127
63
+ httpx==0.28.1
64
+ huggingface-hub==0.36.0
65
+ MarkupSafe==3.0.3
66
+ nvidia-cusolver-cu12==11.6.1.9
67
+ gitdb==4.0.12
68
+ torchdata==0.11.0
69
+ sympy==1.13.1
70
+ safetensors==0.7.0
71
+ httpcore==1.0.9
72
+ portalocker==3.2.0
73
+ attrs==25.4.0
74
+ typing-inspection==0.4.2
75
+ ptyprocess==0.7.0
76
+ nvidia-cublas-cu12==12.4.5.8
77
+ numexpr==2.14.1
78
+ executing==2.2.1
79
+ networkx==3.6.1
80
+ threadpoolctl==3.6.0
81
+ nvidia-cusparselt-cu12==0.6.2
82
+ filelock==3.20.3
83
+ einops==0.8.1
84
+ zstandard==0.25.0
85
+ comm==0.2.3
86
+ six==1.17.0
87
+ packaging==25.0
88
+ sacrebleu==2.6.0
89
+ tqdm-multiprocess==0.0.11
90
+ colorama==0.4.6
91
+ nvidia-cuda-cupti-cu12==12.4.127
92
+ jupyter_client==8.8.0
93
+ GitPython==3.1.46
94
+ psutil==7.2.1
95
+ tornado==6.5.4
96
+ nltk==3.9.2
97
+ jupyter_core==5.9.1
98
+ sqlitedict==2.1.0
99
+ tzdata==2025.3
100
+ pytz==2025.2
101
+ Pygments==2.19.2
102
+ python-dotenv==1.2.1
103
+ scipy==1.17.0
104
+ tiktoken==0.12.0
105
+ PyYAML==6.0.3
106
+ dill==0.4.0
107
+ nvidia-cuda-nvrtc-cu12==12.4.127
108
+ prompt_toolkit==3.0.52
109
+ jedi==0.19.2
110
+ sentry-sdk==2.49.0
111
+ frozenlist==1.8.0
112
+ tokenizers==0.21.4
113
+ grpcio==1.76.0
114
+ ninja==1.13.0
115
+ mbstrdecoder==1.1.4
116
+ flash-attn==2.7.3
117
+ aiosignal==1.4.0
118
+ tabledata==1.3.4
119
+ h11==0.16.0
120
+ absl-py==2.3.1
121
+ torch==2.6.0
122
+ nest_asyncio==1.6.0
123
+ certifi==2026.1.4
124
+ pip==25.3
125
+ platformdirs==4.5.1
126
+ wheel==0.45.1
127
+ peft==0.17.0
128
+ debugpy==1.8.19
129
+ ipython_pygments_lexers==1.1.1
130
+ rouge_score==0.1.2
131
+ protobuf==6.33.4
132
+ multidict==6.7.0
133
+ tcolorpy==0.1.7
134
+ nvidia-curand-cu12==10.3.5.147
135
+ pydantic_core==2.41.5
136
+ pytablewriter==1.2.1
137
+ charset-normalizer==3.4.4
138
+ transformers==4.51.3
139
+ word2number==1.1
140
+ anyio==4.12.1
141
+ jsonlines==4.0.0
142
+ stack_data==0.6.3
143
+ decorator==5.2.1
144
+ python-dateutil==2.9.0.post0
145
+ pyarrow==22.0.0
146
+ traitlets==5.14.3
147
+ Cython==3.2.4
148
+ Werkzeug==3.1.5
149
+ tomli==2.0.1
150
+ more-itertools==10.3.0
151
+ inflect==7.3.1
152
+ zipp==3.19.2
153
+ jaraco.functools==4.0.1
154
+ autocommand==2.2.2
155
+ jaraco.collections==5.1.0
156
+ platformdirs==4.2.2
157
+ backports.tarfile==1.2.0
158
+ importlib_metadata==8.0.0
159
+ jaraco.text==3.12.1
160
+ typing_extensions==4.12.2
161
+ jaraco.context==5.3.0
162
+ typeguard==4.3.0
163
+ packaging==24.2
164
+ wheel==0.45.1
tb/20260114-0839/wandb/run-20260114_083920--top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"tflops":508.00860648951715,"mfu(%)":51.36588538822215,"_step":40000,"memory/num_ooms":0,"memory/max_active(GiB)":120.17096424102783,"throughput(tps)":10659.175384655096,"memory/max_reserved(GiB)":122.056640625,"time_metrics/data_loading(%)":0.36455180712626784,"_wandb":{"runtime":253550},"loss_metrics/global_avg_top_loss":1.901174783706665,"memory/num_alloc_retries":0,"memory/max_reserved(%)":87.58692881450952,"memory/max_active(%)":86.23378160052383,"loss_metrics/global_max_loss":2.8611221313476562,"loss_metrics/global_avg_loss":2.641481399536133,"_runtime":253550.723934285,"time_metrics/end_to_end(s)":6.148318011010997,"loss_metrics/global_avg_ntp_loss":0.7403066754341125,"_timestamp":1.768632343637205e+09,"time_metrics/data_loading(s)":0.011206902208505199,"optimizer/grad_norm":0.35768887400627136,"optimizer/skipped_step":0,"optimizer/lr":2.0000000000000003e-06}
tb/20260114-0839/wandb/run-20260114_083920--top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/logs/debug-internal.log ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-01-14T08:39:20.983194417Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
2
+ {"time":"2026-01-14T08:39:23.832796813Z","level":"INFO","msg":"stream: created new stream","id":"-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834"}
3
+ {"time":"2026-01-14T08:39:23.833049183Z","level":"INFO","msg":"stream: started","id":"-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834"}
4
+ {"time":"2026-01-14T08:39:23.834752679Z","level":"INFO","msg":"handler: started","stream_id":"-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834"}
5
+ {"time":"2026-01-14T08:39:23.834811381Z","level":"INFO","msg":"writer: started","stream_id":"-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834"}
6
+ {"time":"2026-01-14T08:39:23.834906425Z","level":"INFO","msg":"sender: started","stream_id":"-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834"}
7
+ {"time":"2026-01-14T08:49:42.927432861Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
8
+ {"time":"2026-01-14T10:55:25.630741135Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:54532->10.43.0.10:53: i/o timeout"}
9
+ {"time":"2026-01-14T12:15:32.359274661Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": unexpected EOF"}
10
+ {"time":"2026-01-14T13:08:41.787033162Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
11
+ {"time":"2026-01-14T14:59:44.911105606Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": unexpected EOF"}
12
+ {"time":"2026-01-14T15:37:12.523994545Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
13
+ {"time":"2026-01-14T16:47:29.020950166Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
14
+ {"time":"2026-01-14T16:47:39.426278777Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
15
+ {"time":"2026-01-14T17:03:26.755419877Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
16
+ {"time":"2026-01-14T17:22:40.06571008Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:52915->10.43.0.10:53: i/o timeout"}
17
+ {"time":"2026-01-14T17:23:30.097683124Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:53512->10.43.0.10:53: i/o timeout"}
18
+ {"time":"2026-01-14T17:29:28.638957324Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
19
+ {"time":"2026-01-14T18:28:19.032143046Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:35700->10.43.0.10:53: i/o timeout"}
20
+ {"time":"2026-01-14T18:30:35.607887985Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:53327->10.43.0.10:53: i/o timeout"}
21
+ {"time":"2026-01-14T18:36:50.614833057Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:52149->10.43.0.10:53: i/o timeout"}
22
+ {"time":"2026-01-14T18:37:03.615318388Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:45725->10.43.0.10:53: i/o timeout"}
23
+ {"time":"2026-01-14T19:09:34.794976855Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:40175->10.43.0.10:53: i/o timeout"}
24
+ {"time":"2026-01-14T19:10:12.299343829Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:49428->10.43.0.10:53: i/o timeout"}
25
+ {"time":"2026-01-14T19:23:00.077361468Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:46379->10.43.0.10:53: i/o timeout"}
26
+ {"time":"2026-01-14T19:23:42.567949074Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:60354->10.43.0.10:53: i/o timeout"}
27
+ {"time":"2026-01-14T19:50:12.533936926Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
28
+ {"time":"2026-01-14T20:54:42.468102299Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
29
+ {"time":"2026-01-14T20:54:46.291617296Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
30
+ {"time":"2026-01-14T21:26:50.300321781Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
31
+ {"time":"2026-01-14T21:28:30.989741658Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
32
+ {"time":"2026-01-14T21:29:11.344863371Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
33
+ {"time":"2026-01-14T21:42:33.145399798Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
34
+ {"time":"2026-01-14T21:44:11.121211897Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
35
+ {"time":"2026-01-14T21:45:55.018487294Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
36
+ {"time":"2026-01-14T22:03:26.109590923Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
37
+ {"time":"2026-01-14T22:53:41.136507439Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
38
+ {"time":"2026-01-14T23:18:04.214239942Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
39
+ {"time":"2026-01-14T23:34:17.461111568Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
40
+ {"time":"2026-01-15T00:05:40.866317923Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:40646->10.43.0.10:53: i/o timeout"}
41
+ {"time":"2026-01-15T00:09:26.482014139Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
42
+ {"time":"2026-01-15T00:14:06.753203823Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:50085->10.43.0.10:53: i/o timeout"}
43
+ {"time":"2026-01-15T00:20:58.119418478Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
44
+ {"time":"2026-01-15T00:35:55.616848461Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:51127->10.43.0.10:53: i/o timeout"}
45
+ {"time":"2026-01-15T00:53:12.727126278Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": unexpected EOF"}
46
+ {"time":"2026-01-15T00:57:04.800943133Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:57532->10.43.0.10:53: i/o timeout"}
47
+ {"time":"2026-01-15T02:26:47.673452244Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
48
+ {"time":"2026-01-15T02:29:05.611180187Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:51078->10.43.0.10:53: i/o timeout"}
49
+ {"time":"2026-01-15T03:27:56.134074839Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
50
+ {"time":"2026-01-15T03:55:11.994175322Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
51
+ {"time":"2026-01-15T04:24:35.605673891Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:50208->10.43.0.10:53: i/o timeout"}
52
+ {"time":"2026-01-15T04:37:20.602369981Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:43934->10.43.0.10:53: i/o timeout"}
53
+ {"time":"2026-01-15T04:41:45.608069565Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:45762->10.43.0.10:53: i/o timeout"}
54
+ {"time":"2026-01-15T05:00:55.619281375Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:43775->10.43.0.10:53: i/o timeout"}
55
+ {"time":"2026-01-15T05:28:30.612971338Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:46075->10.43.0.10:53: i/o timeout"}
56
+ {"time":"2026-01-15T05:29:33.531886958Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:42432->10.43.0.10:53: i/o timeout"}
57
+ {"time":"2026-01-15T05:35:40.072772995Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:59878->10.43.0.10:53: i/o timeout"}
58
+ {"time":"2026-01-15T05:36:43.896495268Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": unexpected EOF"}
59
+ {"time":"2026-01-15T05:38:38.262916762Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:57324->10.43.0.10:53: i/o timeout"}
60
+ {"time":"2026-01-15T07:15:30.362963603Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:38981->10.43.0.10:53: i/o timeout"}
61
+ {"time":"2026-01-15T08:02:42.136897075Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
62
+ {"time":"2026-01-15T08:14:04.800047375Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:43963->10.43.0.10:53: i/o timeout"}
63
+ {"time":"2026-01-15T10:17:27.761701465Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
64
+ {"time":"2026-01-15T13:37:53.828365872Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:33487->10.43.0.10:53: i/o timeout"}
65
+ {"time":"2026-01-15T13:51:54.922768702Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:57005->10.43.0.10:53: i/o timeout"}
66
+ {"time":"2026-01-15T15:08:27.572248208Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
67
+ {"time":"2026-01-15T15:15:56.876304792Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
68
+ {"time":"2026-01-15T15:16:14.129525568Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
69
+ {"time":"2026-01-15T16:11:26.701614401Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
70
+ {"time":"2026-01-15T16:48:20.611977635Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:51630->10.43.0.10:53: i/o timeout"}
71
+ {"time":"2026-01-15T16:58:27.768733378Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
72
+ {"time":"2026-01-15T17:01:27.738651643Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
73
+ {"time":"2026-01-15T17:04:07.656661744Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
74
+ {"time":"2026-01-15T17:43:56.168098895Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
75
+ {"time":"2026-01-15T19:48:55.070431388Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:51270->10.43.0.10:53: i/o timeout"}
76
+ {"time":"2026-01-15T20:09:34.79778952Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:54431->10.43.0.10:53: i/o timeout"}
77
+ {"time":"2026-01-15T21:01:41.803987004Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:47517->10.43.0.10:53: i/o timeout"}
78
+ {"time":"2026-01-15T21:05:05.631884156Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:52918->10.43.0.10:53: i/o timeout"}
79
+ {"time":"2026-01-15T21:22:05.626770979Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:49610->10.43.0.10:53: i/o timeout"}
80
+ {"time":"2026-01-15T21:52:50.642808493Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:47673->10.43.0.10:53: i/o timeout"}
81
+ {"time":"2026-01-15T21:58:20.618768345Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:36948->10.43.0.10:53: i/o timeout"}
82
+ {"time":"2026-01-15T22:27:52.156100864Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:56570->10.43.0.10:53: i/o timeout"}
83
+ {"time":"2026-01-15T22:30:37.146827256Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:38625->10.43.0.10:53: i/o timeout"}
84
+ {"time":"2026-01-15T23:30:42.173288276Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:48544->10.43.0.10:53: i/o timeout"}
85
+ {"time":"2026-01-15T23:32:27.158278584Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:57974->10.43.0.10:53: i/o timeout"}
86
+ {"time":"2026-01-16T00:34:57.666020998Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
87
+ {"time":"2026-01-16T07:28:21.197815253Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:60604->10.43.0.10:53: i/o timeout"}
88
+ {"time":"2026-01-16T08:45:20.999483918Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:57880->10.43.0.10:53: i/o timeout"}
89
+ {"time":"2026-01-16T08:47:26.004410083Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:38091->10.43.0.10:53: i/o timeout"}
90
+ {"time":"2026-01-16T09:51:50.1867647Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:41800->10.43.0.10:53: i/o timeout"}
91
+ {"time":"2026-01-16T09:56:22.256493752Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": unexpected EOF"}
92
+ {"time":"2026-01-16T09:57:07.582527561Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:42185->10.43.0.10:53: i/o timeout"}
93
+ {"time":"2026-01-16T09:59:01.852668736Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:58029->10.43.0.10:53: i/o timeout"}
94
+ {"time":"2026-01-16T10:00:49.795503168Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:41189->10.43.0.10:53: i/o timeout"}
95
+ {"time":"2026-01-16T10:01:53.920461569Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:52395->10.43.0.10:53: i/o timeout"}
96
+ {"time":"2026-01-16T10:03:19.040721217Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": unexpected EOF"}
97
+ {"time":"2026-01-16T10:05:55.076170566Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:56833->10.43.0.10:53: i/o timeout"}
98
+ {"time":"2026-01-16T10:06:27.942002595Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:51238->10.43.0.10:53: i/o timeout"}
99
+ {"time":"2026-01-16T10:07:30.42021806Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": unexpected EOF"}
100
+ {"time":"2026-01-16T10:10:43.226139011Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": context deadline exceeded"}
101
+ {"time":"2026-01-16T10:16:22.590616038Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:40117->10.43.0.10:53: i/o timeout"}
102
+ {"time":"2026-01-16T10:16:50.507907411Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:47171->10.43.0.10:53: i/o timeout"}
103
+ {"time":"2026-01-16T10:24:55.070449016Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:34432->10.43.0.10:53: i/o timeout"}
104
+ {"time":"2026-01-16T10:25:18.367780544Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:36852->10.43.0.10:53: i/o timeout"}
105
+ {"time":"2026-01-16T10:26:55.818200633Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": unexpected EOF"}
106
+ {"time":"2026-01-16T10:28:34.800025282Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:59732->10.43.0.10:53: i/o timeout"}
107
+ {"time":"2026-01-16T10:34:43.919151425Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:60755->10.43.0.10:53: i/o timeout"}
108
+ {"time":"2026-01-16T10:38:34.603516707Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:58996->10.43.0.10:53: i/o timeout"}
109
+ {"time":"2026-01-16T10:44:20.341045071Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:37287->10.43.0.10:53: i/o timeout"}
110
+ {"time":"2026-01-16T10:45:53.655456166Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": unexpected EOF"}
111
+ {"time":"2026-01-16T12:53:20.616362694Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:53959->10.43.0.10:53: i/o timeout"}
112
+ {"time":"2026-01-16T12:54:00.113794175Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:55534->10.43.0.10:53: i/o timeout"}
113
+ {"time":"2026-01-16T18:45:41.149118727Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
114
+ {"time":"2026-01-16T19:46:30.080975633Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:46152->10.43.0.10:53: i/o timeout"}
115
+ {"time":"2026-01-16T19:47:12.362852863Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:53133->10.43.0.10:53: i/o timeout"}
116
+ {"time":"2026-01-16T19:58:19.801124967Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:44611->10.43.0.10:53: i/o timeout"}
117
+ {"time":"2026-01-16T19:58:52.310948627Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:57138->10.43.0.10:53: i/o timeout"}
118
+ {"time":"2026-01-16T21:04:34.800662455Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:48045->10.43.0.10:53: i/o timeout"}
119
+ {"time":"2026-01-16T21:05:48.683738194Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:36932->10.43.0.10:53: i/o timeout"}
120
+ {"time":"2026-01-16T22:36:50.618772429Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:45006->10.43.0.10:53: i/o timeout"}
121
+ {"time":"2026-01-17T00:52:30.079930754Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:48132->10.43.0.10:53: i/o timeout"}
122
+ {"time":"2026-01-17T04:59:49.798891968Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:54551->10.43.0.10:53: i/o timeout"}
123
+ {"time":"2026-01-17T05:56:44.39370991Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:47123->10.43.0.10:53: i/o timeout"}
124
+ {"time":"2026-01-17T06:00:10.342698713Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:52714->10.43.0.10:53: i/o timeout"}
125
+ {"time":"2026-01-17T06:01:00.401835183Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:33545->10.43.0.10:53: i/o timeout"}
126
+ {"time":"2026-01-17T06:24:30.07071737Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.227.13:55594->10.43.0.10:53: i/o timeout"}
127
+ {"time":"2026-01-17T07:05:20.996870718Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
128
+ {"time":"2026-01-17T07:05:23.850214274Z","level":"INFO","msg":"handler: operation stats","stats":{}}
129
+ {"time":"2026-01-17T07:05:23.858304758Z","level":"INFO","msg":"stream: closing","id":"-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834"}
130
+ {"time":"2026-01-17T07:05:23.858429125Z","level":"INFO","msg":"handler: closed","stream_id":"-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834"}
131
+ {"time":"2026-01-17T07:05:23.858688602Z","level":"INFO","msg":"sender: closed","stream_id":"-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834"}
132
+ {"time":"2026-01-17T07:05:23.858707241Z","level":"INFO","msg":"stream: closed","id":"-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834"}
tb/20260114-0839/wandb/run-20260114_083920--top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/logs/debug.log ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-01-14 08:39:20,481 INFO MainThread:405480 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
2
+ 2026-01-14 08:39:20,481 INFO MainThread:405480 [wandb_setup.py:_flush():80] Configure stats pid to 405480
3
+ 2026-01-14 08:39:20,481 INFO MainThread:405480 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
4
+ 2026-01-14 08:39:20,481 INFO MainThread:405480 [wandb_setup.py:_flush():80] Loading settings from /root/flame/wandb/settings
5
+ 2026-01-14 08:39:20,481 INFO MainThread:405480 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2026-01-14 08:39:20,481 INFO MainThread:405480 [wandb_init.py:setup_run_log_directory():714] Logging user logs to exp/top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine/tb/20260114-0839/wandb/run-20260114_083920--top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/logs/debug.log
7
+ 2026-01-14 08:39:20,482 INFO MainThread:405480 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to exp/top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine/tb/20260114-0839/wandb/run-20260114_083920--top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834/logs/debug-internal.log
8
+ 2026-01-14 08:39:20,482 INFO MainThread:405480 [wandb_init.py:init():841] calling init triggers
9
+ 2026-01-14 08:39:20,482 INFO MainThread:405480 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2026-01-14 08:39:20,482 INFO MainThread:405480 [wandb_init.py:init():889] starting backend
12
+ 2026-01-14 08:39:20,920 INFO MainThread:405480 [wandb_init.py:init():892] sending inform_init request
13
+ 2026-01-14 08:39:20,974 INFO MainThread:405480 [wandb_init.py:init():900] backend started and connected
14
+ 2026-01-14 08:39:20,976 INFO MainThread:405480 [wandb_init.py:init():970] updated telemetry
15
+ 2026-01-14 08:39:20,985 INFO MainThread:405480 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
16
+ 2026-01-14 08:39:24,781 INFO MainThread:405480 [wandb_init.py:init():1041] starting run threads in backend
17
+ 2026-01-14 08:39:24,917 INFO MainThread:405480 [wandb_run.py:_console_start():2521] atexit reg
18
+ 2026-01-14 08:39:24,918 INFO MainThread:405480 [wandb_run.py:_redirect():2369] redirect: wrap_raw
19
+ 2026-01-14 08:39:24,918 INFO MainThread:405480 [wandb_run.py:_redirect():2438] Wrapping output streams.
20
+ 2026-01-14 08:39:24,918 INFO MainThread:405480 [wandb_run.py:_redirect():2461] Redirects installed.
21
+ 2026-01-14 08:39:24,922 INFO MainThread:405480 [wandb_init.py:init():1081] run started, returning control to user process
22
+ 2026-01-17 07:05:15,503 INFO MainThread:405480 [wandb_run.py:_finish():2287] finishing run zaydzuhri/fla/-top.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine-202601140834
23
+ 2026-01-17 07:05:15,504 INFO MainThread:405480 [wandb_run.py:_atexit_cleanup():2486] got exitcode: 0
24
+ 2026-01-17 07:05:15,504 INFO MainThread:405480 [wandb_run.py:_restore():2468] restore
25
+ 2026-01-17 07:05:15,504 INFO MainThread:405480 [wandb_run.py:_restore():2474] restore done
26
+ 2026-01-17 07:05:23,857 INFO MainThread:405480 [wandb_run.py:_footer_sync_info():3862] logging synced files