zaydzuhri commited on
Commit
b3f00e2
·
verified ·
1 Parent(s): f2d47ff

Add files using upload-large-folder tool

Browse files
Files changed (50) hide show
  1. fla/models/abc/__pycache__/configuration_abc.cpython-312.pyc +0 -0
  2. fla/models/abc/__pycache__/modeling_abc.cpython-312.pyc +0 -0
  3. fla/models/abc/configuration_abc.py +91 -0
  4. fla/models/bitnet/__pycache__/__init__.cpython-312.pyc +0 -0
  5. fla/models/forgetting_transformer/__pycache__/__init__.cpython-312.pyc +0 -0
  6. fla/models/forgetting_transformer/__pycache__/configuration_forgetting_transformer.cpython-312.pyc +0 -0
  7. fla/models/forgetting_transformer/__pycache__/modeling_forgetting_transformer.cpython-312.pyc +0 -0
  8. fla/models/gated_deltanet/__pycache__/__init__.cpython-312.pyc +0 -0
  9. fla/models/gated_deltanet/__pycache__/modeling_gated_deltanet.cpython-312.pyc +0 -0
  10. fla/models/gla/__init__.py +13 -0
  11. fla/models/gla/__pycache__/configuration_gla.cpython-312.pyc +0 -0
  12. fla/models/gsa/__pycache__/configuration_gsa.cpython-312.pyc +0 -0
  13. fla/models/hgrn/__init__.py +13 -0
  14. fla/models/hgrn/__pycache__/__init__.cpython-312.pyc +0 -0
  15. fla/models/hgrn/__pycache__/modeling_hgrn.cpython-312.pyc +0 -0
  16. fla/models/hgrn/configuration_hgrn.py +81 -0
  17. fla/models/hgrn2/__pycache__/configuration_hgrn2.cpython-312.pyc +0 -0
  18. fla/models/linear_attn/__pycache__/modeling_linear_attn.cpython-312.pyc +0 -0
  19. fla/models/mamba/__pycache__/configuration_mamba.cpython-312.pyc +0 -0
  20. fla/models/retnet/__pycache__/__init__.cpython-312.pyc +0 -0
  21. fla/models/rwkv6/__pycache__/configuration_rwkv6.cpython-312.pyc +0 -0
  22. fla/models/rwkv6/__pycache__/modeling_rwkv6.cpython-312.pyc +0 -0
  23. fla/models/rwkv7/__pycache__/__init__.cpython-312.pyc +0 -0
  24. fla/models/rwkv7/__pycache__/configuration_rwkv7.cpython-312.pyc +0 -0
  25. fla/models/samba/__init__.py +13 -0
  26. fla/models/transformer/__pycache__/__init__.cpython-312.pyc +0 -0
  27. fla/models/transformer_dsmtp/__pycache__/__init__.cpython-312.pyc +0 -0
  28. fla/models/transformer_top/__pycache__/modeling_transformer.cpython-312.pyc +0 -0
  29. fla/modules/__pycache__/__init__.cpython-312.pyc +0 -0
  30. fla/modules/__pycache__/convolution.cpython-312.pyc +0 -0
  31. fla/modules/__pycache__/feature_map.cpython-312.pyc +0 -0
  32. fla/modules/__pycache__/fused_bitlinear.cpython-312.pyc +0 -0
  33. fla/modules/__pycache__/fused_cross_entropy.cpython-312.pyc +0 -0
  34. fla/modules/__pycache__/fused_linear_listnet_loss.cpython-312.pyc +0 -0
  35. fla/modules/__pycache__/fused_norm_gate.cpython-312.pyc +0 -0
  36. fla/modules/__pycache__/l2norm.cpython-312.pyc +0 -0
  37. fla/modules/__pycache__/layernorm_gated.cpython-312.pyc +0 -0
  38. fla/modules/__pycache__/mlp.cpython-312.pyc +0 -0
  39. fla/modules/__pycache__/parallel.cpython-312.pyc +0 -0
  40. fla/modules/__pycache__/rotary.cpython-312.pyc +0 -0
  41. fla/modules/__pycache__/seq_to_dsmtp.cpython-312.pyc +0 -0
  42. generation_config.json +7 -0
  43. tb/20251231-0837/wandb/debug-internal.log +21 -0
  44. tb/20251231-0837/wandb/debug.log +26 -0
  45. tb/20251231-0837/wandb/run-20251231_083738--dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834/files/output.log +0 -0
  46. tb/20251231-0837/wandb/run-20251231_083738--dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834/files/requirements.txt +169 -0
  47. tb/20251231-0837/wandb/run-20251231_083738--dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834/files/wandb-summary.json +1 -0
  48. tb/20251231-0837/wandb/run-20251231_083738--dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834/logs/debug-core.log +16 -0
  49. tb/20251231-0837/wandb/run-20251231_083738--dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834/logs/debug-internal.log +21 -0
  50. tb/20251231-0837/wandb/run-20251231_083738--dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834/logs/debug.log +26 -0
fla/models/abc/__pycache__/configuration_abc.cpython-312.pyc ADDED
Binary file (3.63 kB). View file
 
fla/models/abc/__pycache__/modeling_abc.cpython-312.pyc ADDED
Binary file (18.4 kB). View file
 
fla/models/abc/configuration_abc.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+
3
+ from typing import Dict, Optional
4
+
5
+ from transformers.configuration_utils import PretrainedConfig
6
+
7
+
8
+ class ABCConfig(PretrainedConfig):
9
+
10
+ model_type = 'abc'
11
+ keys_to_ignore_at_inference = ['past_key_values']
12
+
13
+ def __init__(
14
+ self,
15
+ hidden_size: int = 2048,
16
+ gate_low_rank_dim: int = 16,
17
+ clamp_min: float = -32,
18
+ clamp_max: float = 32,
19
+ hidden_ratio: Optional[int] = 4,
20
+ intermediate_size: Optional[int] = None,
21
+ num_hidden_layers: int = 24,
22
+ num_heads: int = 4,
23
+ num_slots: Optional[int] = 64,
24
+ use_short_conv: bool = False,
25
+ conv_size: int = 4,
26
+ exapnd_k: float = 0.5,
27
+ exapnd_v: float = 1,
28
+ hidden_act: str = "swish",
29
+ max_position_embeddings: int = 2048,
30
+ elementwise_affine: Optional[bool] = True,
31
+ norm_eps: float = 1e-6,
32
+ use_rope: bool = True,
33
+ attn: Optional[Dict] = None,
34
+ use_cache: bool = True,
35
+ pad_token_id: int = None,
36
+ bos_token_id: int = 1,
37
+ eos_token_id: int = 2,
38
+ tie_word_embeddings: bool = False,
39
+ initializer_range: float = 0.006,
40
+ fuse_norm: bool = True,
41
+ fuse_swiglu: bool = True,
42
+ fuse_cross_entropy: bool = True,
43
+ vocab_size: int = 32000,
44
+ **kwargs
45
+ ):
46
+ self.hidden_size = hidden_size
47
+ self.gate_low_rank_dim = gate_low_rank_dim
48
+ self.clamp_min = clamp_min
49
+ self.clamp_max = clamp_max
50
+ self.hidden_ratio = hidden_ratio
51
+ self.intermediate_size = intermediate_size
52
+ self.num_hidden_layers = num_hidden_layers
53
+ self.num_heads = num_heads
54
+ self.num_slots = num_slots
55
+ self.use_short_conv = use_short_conv
56
+ self.conv_size = conv_size
57
+ self.expand_k = exapnd_k
58
+ self.expand_v = exapnd_v
59
+ self.hidden_act = hidden_act
60
+ self.max_position_embeddings = max_position_embeddings
61
+ self.elementwise_affine = elementwise_affine
62
+ self.norm_eps = norm_eps
63
+ self.use_rope = use_rope
64
+ self.attn = attn
65
+ self.use_cache = use_cache
66
+ self.initializer_range = initializer_range
67
+
68
+ self.fuse_norm = fuse_norm
69
+ self.fuse_swiglu = fuse_swiglu
70
+ self.fuse_cross_entropy = fuse_cross_entropy
71
+ self.vocab_size = vocab_size
72
+
73
+ if attn is not None:
74
+ if not isinstance(attn, Dict):
75
+ raise ValueError("attn must be a dictionary")
76
+ if 'layers' not in attn:
77
+ raise ValueError("Layer indices must be provided to initialize hybrid attention layers")
78
+ if 'num_heads' not in attn:
79
+ raise ValueError("Number of heads must be provided to initialize hybrid attention layers")
80
+ attn['num_kv_heads'] = attn.get('num_kv_heads', attn['num_heads'])
81
+ attn['qkv_bias'] = attn.get('qkv_bias', False)
82
+ attn['window_size'] = attn.get('window_size', None)
83
+ attn['rope_theta'] = attn.get('rope_theta', 10000.)
84
+
85
+ super().__init__(
86
+ pad_token_id=pad_token_id,
87
+ bos_token_id=bos_token_id,
88
+ eos_token_id=eos_token_id,
89
+ tie_word_embeddings=tie_word_embeddings,
90
+ **kwargs,
91
+ )
fla/models/bitnet/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (710 Bytes). View file
 
fla/models/forgetting_transformer/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (845 Bytes). View file
 
fla/models/forgetting_transformer/__pycache__/configuration_forgetting_transformer.cpython-312.pyc ADDED
Binary file (2.52 kB). View file
 
fla/models/forgetting_transformer/__pycache__/modeling_forgetting_transformer.cpython-312.pyc ADDED
Binary file (17.3 kB). View file
 
fla/models/gated_deltanet/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (774 Bytes). View file
 
fla/models/gated_deltanet/__pycache__/modeling_gated_deltanet.cpython-312.pyc ADDED
Binary file (18.5 kB). View file
 
fla/models/gla/__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+
3
+ from transformers import AutoConfig, AutoModel, AutoModelForCausalLM
4
+
5
+ from fla.models.gla.configuration_gla import GLAConfig
6
+ from fla.models.gla.modeling_gla import GLAForCausalLM, GLAModel
7
+
8
+ AutoConfig.register(GLAConfig.model_type, GLAConfig)
9
+ AutoModel.register(GLAConfig, GLAModel)
10
+ AutoModelForCausalLM.register(GLAConfig, GLAForCausalLM)
11
+
12
+
13
+ __all__ = ['GLAConfig', 'GLAForCausalLM', 'GLAModel']
fla/models/gla/__pycache__/configuration_gla.cpython-312.pyc ADDED
Binary file (3.76 kB). View file
 
fla/models/gsa/__pycache__/configuration_gsa.cpython-312.pyc ADDED
Binary file (3.87 kB). View file
 
fla/models/hgrn/__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+
3
+ from transformers import AutoConfig, AutoModel, AutoModelForCausalLM
4
+
5
+ from fla.models.hgrn.configuration_hgrn import HGRNConfig
6
+ from fla.models.hgrn.modeling_hgrn import HGRNForCausalLM, HGRNModel
7
+
8
+ AutoConfig.register(HGRNConfig.model_type, HGRNConfig)
9
+ AutoModel.register(HGRNConfig, HGRNModel)
10
+ AutoModelForCausalLM.register(HGRNConfig, HGRNForCausalLM)
11
+
12
+
13
+ __all__ = ['HGRNConfig', 'HGRNForCausalLM', 'HGRNModel']
fla/models/hgrn/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (693 Bytes). View file
 
fla/models/hgrn/__pycache__/modeling_hgrn.cpython-312.pyc ADDED
Binary file (18.9 kB). View file
 
fla/models/hgrn/configuration_hgrn.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+
3
+ from typing import Dict, Optional
4
+
5
+ from transformers.configuration_utils import PretrainedConfig
6
+
7
+
8
+ class HGRNConfig(PretrainedConfig):
9
+
10
+ model_type = 'hgrn'
11
+ keys_to_ignore_at_inference = ['past_key_values']
12
+
13
+ def __init__(
14
+ self,
15
+ attn_mode: str = "fused_recurrent",
16
+ hidden_size: int = 2048,
17
+ num_hidden_layers: int = 24,
18
+ expand_ratio: Optional[int] = 1,
19
+ use_short_conv: bool = False,
20
+ conv_size: int = 4,
21
+ use_lower_bound: bool = True,
22
+ max_position_embeddings: int = 2048,
23
+ hidden_ratio: Optional[int] = 4,
24
+ intermediate_size: Optional[int] = None,
25
+ hidden_act: str = "swish",
26
+ elementwise_affine: Optional[bool] = True,
27
+ norm_eps: float = 1e-6,
28
+ attn: Optional[Dict] = None,
29
+ use_cache: bool = True,
30
+ pad_token_id: int = None,
31
+ bos_token_id: int = 1,
32
+ eos_token_id: int = 2,
33
+ tie_word_embeddings: bool = False,
34
+ initializer_range: float = 0.006,
35
+ fuse_norm: bool = True,
36
+ fuse_swiglu: bool = True,
37
+ fuse_cross_entropy: bool = True,
38
+ vocab_size: int = 32000,
39
+ **kwargs
40
+ ):
41
+ self.attn_mode = attn_mode
42
+ self.hidden_size = hidden_size
43
+ self.num_hidden_layers = num_hidden_layers
44
+ self.expand_ratio = expand_ratio
45
+ self.use_short_conv = use_short_conv
46
+ self.conv_size = conv_size
47
+ self.use_lower_bound = use_lower_bound
48
+ self.max_position_embeddings = max_position_embeddings
49
+ self.hidden_ratio = hidden_ratio
50
+ self.intermediate_size = intermediate_size
51
+ self.elementwise_affine = elementwise_affine
52
+ self.attn = attn
53
+ self.norm_eps = norm_eps
54
+ self.hidden_act = hidden_act
55
+ self.use_cache = use_cache
56
+ self.initializer_range = initializer_range
57
+
58
+ self.fuse_norm = fuse_norm
59
+ self.fuse_swiglu = fuse_swiglu
60
+ self.fuse_cross_entropy = fuse_cross_entropy
61
+ self.vocab_size = vocab_size
62
+
63
+ if attn is not None:
64
+ if not isinstance(attn, Dict):
65
+ raise ValueError("attn must be a dictionary")
66
+ if 'layers' not in attn:
67
+ raise ValueError("Layer indices must be provided to initialize hybrid attention layers")
68
+ if 'num_heads' not in attn:
69
+ raise ValueError("Number of heads must be provided to initialize hybrid attention layers")
70
+ attn['num_kv_heads'] = attn.get('num_kv_heads', attn['num_heads'])
71
+ attn['qkv_bias'] = attn.get('qkv_bias', False)
72
+ attn['window_size'] = attn.get('window_size', None)
73
+ attn['rope_theta'] = attn.get('rope_theta', 10000.)
74
+
75
+ super().__init__(
76
+ pad_token_id=pad_token_id,
77
+ bos_token_id=bos_token_id,
78
+ eos_token_id=eos_token_id,
79
+ tie_word_embeddings=tie_word_embeddings,
80
+ **kwargs,
81
+ )
fla/models/hgrn2/__pycache__/configuration_hgrn2.cpython-312.pyc ADDED
Binary file (3.58 kB). View file
 
fla/models/linear_attn/__pycache__/modeling_linear_attn.cpython-312.pyc ADDED
Binary file (18.5 kB). View file
 
fla/models/mamba/__pycache__/configuration_mamba.cpython-312.pyc ADDED
Binary file (7.09 kB). View file
 
fla/models/retnet/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (710 Bytes). View file
 
fla/models/rwkv6/__pycache__/configuration_rwkv6.cpython-312.pyc ADDED
Binary file (3.35 kB). View file
 
fla/models/rwkv6/__pycache__/modeling_rwkv6.cpython-312.pyc ADDED
Binary file (21.2 kB). View file
 
fla/models/rwkv7/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (715 Bytes). View file
 
fla/models/rwkv7/__pycache__/configuration_rwkv7.cpython-312.pyc ADDED
Binary file (4.27 kB). View file
 
fla/models/samba/__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+
3
+ from transformers import AutoConfig, AutoModel, AutoModelForCausalLM
4
+
5
+ from fla.models.samba.configuration_samba import SambaConfig
6
+ from fla.models.samba.modeling_samba import SambaBlock, SambaForCausalLM, SambaModel
7
+
8
+ AutoConfig.register(SambaConfig.model_type, SambaConfig, True)
9
+ AutoModel.register(SambaConfig, SambaModel, True)
10
+ AutoModelForCausalLM.register(SambaConfig, SambaForCausalLM, True)
11
+
12
+
13
+ __all__ = ['SambaConfig', 'SambaForCausalLM', 'SambaModel', 'SambaBlock']
fla/models/transformer/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (756 Bytes). View file
 
fla/models/transformer_dsmtp/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (789 Bytes). View file
 
fla/models/transformer_top/__pycache__/modeling_transformer.cpython-312.pyc ADDED
Binary file (19.2 kB). View file
 
fla/modules/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (1.37 kB). View file
 
fla/modules/__pycache__/convolution.cpython-312.pyc ADDED
Binary file (21.1 kB). View file
 
fla/modules/__pycache__/feature_map.cpython-312.pyc ADDED
Binary file (17.7 kB). View file
 
fla/modules/__pycache__/fused_bitlinear.cpython-312.pyc ADDED
Binary file (23.7 kB). View file
 
fla/modules/__pycache__/fused_cross_entropy.cpython-312.pyc ADDED
Binary file (16 kB). View file
 
fla/modules/__pycache__/fused_linear_listnet_loss.cpython-312.pyc ADDED
Binary file (17.8 kB). View file
 
fla/modules/__pycache__/fused_norm_gate.cpython-312.pyc ADDED
Binary file (35.4 kB). View file
 
fla/modules/__pycache__/l2norm.cpython-312.pyc ADDED
Binary file (6.99 kB). View file
 
fla/modules/__pycache__/layernorm_gated.cpython-312.pyc ADDED
Binary file (23.5 kB). View file
 
fla/modules/__pycache__/mlp.cpython-312.pyc ADDED
Binary file (6.26 kB). View file
 
fla/modules/__pycache__/parallel.cpython-312.pyc ADDED
Binary file (2.04 kB). View file
 
fla/modules/__pycache__/rotary.cpython-312.pyc ADDED
Binary file (23.2 kB). View file
 
fla/modules/__pycache__/seq_to_dsmtp.cpython-312.pyc ADDED
Binary file (1.38 kB). View file
 
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 2,
6
+ "transformers_version": "4.51.3"
7
+ }
tb/20251231-0837/wandb/debug-internal.log ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-12-31T08:37:38.665057765Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
2
+ {"time":"2025-12-31T08:37:38.988512957Z","level":"INFO","msg":"stream: created new stream","id":"-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834"}
3
+ {"time":"2025-12-31T08:37:38.988613654Z","level":"INFO","msg":"handler: started","stream_id":"-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834"}
4
+ {"time":"2025-12-31T08:37:38.988714345Z","level":"INFO","msg":"stream: started","id":"-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834"}
5
+ {"time":"2025-12-31T08:37:38.988738297Z","level":"INFO","msg":"writer: started","stream_id":"-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834"}
6
+ {"time":"2025-12-31T08:37:38.988753655Z","level":"INFO","msg":"sender: started","stream_id":"-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834"}
7
+ {"time":"2025-12-31T11:49:59.759567501Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834/file_stream\": dial tcp 35.186.228.49:443: connect: connection refused"}
8
+ {"time":"2025-12-31T13:32:20.799679052Z","level":"INFO","msg":"flowcontrol: backed up, offloading to disk","recordNumber":26941}
9
+ {"time":"2025-12-31T13:32:20.80069231Z","level":"INFO","msg":"flowcontrol: unblocked","totalOffloaded":2}
10
+ {"time":"2025-12-31T21:52:16.560064623Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834/file_stream\": dial tcp 35.186.228.49:443: connect: connection refused"}
11
+ {"time":"2025-12-31T23:40:07.918310102Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.35.131:44189->10.43.0.10:53: i/o timeout"}
12
+ {"time":"2025-12-31T23:47:01.610252645Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834/file_stream\": dial tcp 35.186.228.49:443: connect: connection refused"}
13
+ {"time":"2026-01-01T08:38:59.64892198Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834/file_stream\": dial tcp 35.186.228.49:443: connect: connection refused"}
14
+ {"time":"2026-01-01T09:06:12.449377603Z","level":"INFO","msg":"flowcontrol: backed up, offloading to disk","recordNumber":129853}
15
+ {"time":"2026-01-01T09:06:12.451188339Z","level":"INFO","msg":"flowcontrol: unblocked","totalOffloaded":1}
16
+ {"time":"2026-01-01T09:07:50.503866067Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
17
+ {"time":"2026-01-01T09:07:50.688587533Z","level":"INFO","msg":"handler: operation stats","stats":{}}
18
+ {"time":"2026-01-01T09:07:50.69317498Z","level":"INFO","msg":"stream: closing","id":"-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834"}
19
+ {"time":"2026-01-01T09:07:50.693203953Z","level":"INFO","msg":"handler: closed","stream_id":"-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834"}
20
+ {"time":"2026-01-01T09:07:50.693272578Z","level":"INFO","msg":"sender: closed","stream_id":"-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834"}
21
+ {"time":"2026-01-01T09:07:50.693279066Z","level":"INFO","msg":"stream: closed","id":"-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834"}
tb/20251231-0837/wandb/debug.log ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-12-31 08:37:38,391 INFO MainThread:3901702 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
2
+ 2025-12-31 08:37:38,392 INFO MainThread:3901702 [wandb_setup.py:_flush():80] Configure stats pid to 3901702
3
+ 2025-12-31 08:37:38,392 INFO MainThread:3901702 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
4
+ 2025-12-31 08:37:38,392 INFO MainThread:3901702 [wandb_setup.py:_flush():80] Loading settings from /root/flame/wandb/settings
5
+ 2025-12-31 08:37:38,392 INFO MainThread:3901702 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-12-31 08:37:38,392 INFO MainThread:3901702 [wandb_init.py:setup_run_log_directory():714] Logging user logs to exp/dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine/tb/20251231-0837/wandb/run-20251231_083738--dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834/logs/debug.log
7
+ 2025-12-31 08:37:38,392 INFO MainThread:3901702 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to exp/dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine/tb/20251231-0837/wandb/run-20251231_083738--dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834/logs/debug-internal.log
8
+ 2025-12-31 08:37:38,392 INFO MainThread:3901702 [wandb_init.py:init():841] calling init triggers
9
+ 2025-12-31 08:37:38,392 INFO MainThread:3901702 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2025-12-31 08:37:38,392 INFO MainThread:3901702 [wandb_init.py:init():889] starting backend
12
+ 2025-12-31 08:37:38,658 INFO MainThread:3901702 [wandb_init.py:init():892] sending inform_init request
13
+ 2025-12-31 08:37:38,663 INFO MainThread:3901702 [wandb_init.py:init():900] backend started and connected
14
+ 2025-12-31 08:37:38,666 INFO MainThread:3901702 [wandb_init.py:init():970] updated telemetry
15
+ 2025-12-31 08:37:38,673 INFO MainThread:3901702 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
16
+ 2025-12-31 08:37:39,397 INFO MainThread:3901702 [wandb_init.py:init():1041] starting run threads in backend
17
+ 2025-12-31 08:37:39,496 INFO MainThread:3901702 [wandb_run.py:_console_start():2521] atexit reg
18
+ 2025-12-31 08:37:39,496 INFO MainThread:3901702 [wandb_run.py:_redirect():2369] redirect: wrap_raw
19
+ 2025-12-31 08:37:39,496 INFO MainThread:3901702 [wandb_run.py:_redirect():2438] Wrapping output streams.
20
+ 2025-12-31 08:37:39,496 INFO MainThread:3901702 [wandb_run.py:_redirect():2461] Redirects installed.
21
+ 2025-12-31 08:37:39,499 INFO MainThread:3901702 [wandb_init.py:init():1081] run started, returning control to user process
22
+ 2026-01-01 09:07:50,099 INFO MainThread:3901702 [wandb_run.py:_finish():2287] finishing run zaydzuhri/fla/-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834
23
+ 2026-01-01 09:07:50,099 INFO MainThread:3901702 [wandb_run.py:_atexit_cleanup():2486] got exitcode: 0
24
+ 2026-01-01 09:07:50,100 INFO MainThread:3901702 [wandb_run.py:_restore():2468] restore
25
+ 2026-01-01 09:07:50,100 INFO MainThread:3901702 [wandb_run.py:_restore():2474] restore done
26
+ 2026-01-01 09:07:50,692 INFO MainThread:3901702 [wandb_run.py:_footer_sync_info():3862] logging synced files
tb/20251231-0837/wandb/run-20251231_083738--dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834/files/output.log ADDED
The diff for this file is too large to render. See raw diff
 
tb/20251231-0837/wandb/run-20251231_083738--dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834/files/requirements.txt ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ flame==0.1.0
2
+ fsspec==2025.10.0
3
+ aiohappyeyeballs==2.6.1
4
+ ipykernel==7.1.0
5
+ smmap==5.0.2
6
+ pybind11==3.0.1
7
+ tabulate==0.9.0
8
+ parso==0.8.5
9
+ yarl==1.22.0
10
+ asttokens==3.0.1
11
+ pandas==2.3.3
12
+ xxhash==3.6.0
13
+ pathvalidate==3.3.1
14
+ Werkzeug==3.1.4
15
+ regex==2025.11.3
16
+ inquirerpy==0.3.4
17
+ click==8.3.1
18
+ idna==3.11
19
+ pydantic==2.12.5
20
+ pexpect==4.9.0
21
+ typepy==1.3.4
22
+ certifi==2025.11.12
23
+ wcwidth==0.2.14
24
+ triton==3.2.0
25
+ hf-xet==1.2.0
26
+ joblib==1.5.3
27
+ tqdm==4.67.1
28
+ nvidia-nvtx-cu12==12.4.127
29
+ setuptools==80.9.0
30
+ lxml==6.0.2
31
+ nvidia-cufft-cu12==11.2.1.3
32
+ evaluate==0.4.6
33
+ Markdown==3.10
34
+ chardet==5.2.0
35
+ multiprocess==0.70.18
36
+ tensorboard==2.20.0
37
+ nvidia-nvjitlink-cu12==12.4.127
38
+ flame==0.1.0
39
+ matplotlib-inline==0.2.1
40
+ Cython==3.2.3
41
+ tensorboard-data-server==0.7.2
42
+ nvidia-cusparse-cu12==12.3.1.170
43
+ lm_eval==0.4.9.1
44
+ pure_eval==0.2.3
45
+ protobuf==6.33.2
46
+ DataProperty==1.1.0
47
+ nvidia-cudnn-cu12==9.1.0.70
48
+ accelerate==1.12.0
49
+ psutil==7.1.3
50
+ Jinja2==3.1.6
51
+ scikit-learn==1.8.0
52
+ nvidia-nccl-cu12==2.21.5
53
+ typing_extensions==4.15.0
54
+ pyzmq==27.1.0
55
+ mpmath==1.3.0
56
+ annotated-types==0.7.0
57
+ propcache==0.4.1
58
+ wandb==0.23.1
59
+ requests==2.32.5
60
+ ipython==9.8.0
61
+ more-itertools==10.8.0
62
+ nvidia-cuda-runtime-cu12==12.4.127
63
+ sacrebleu==2.5.1
64
+ httpx==0.28.1
65
+ huggingface-hub==0.36.0
66
+ MarkupSafe==3.0.3
67
+ nvidia-cusolver-cu12==11.6.1.9
68
+ gitdb==4.0.12
69
+ torchdata==0.11.0
70
+ sentry-sdk==2.48.0
71
+ sympy==1.13.1
72
+ safetensors==0.7.0
73
+ httpcore==1.0.9
74
+ portalocker==3.2.0
75
+ attrs==25.4.0
76
+ typing-inspection==0.4.2
77
+ ptyprocess==0.7.0
78
+ nvidia-cublas-cu12==12.4.5.8
79
+ numexpr==2.14.1
80
+ executing==2.2.1
81
+ networkx==3.6.1
82
+ threadpoolctl==3.6.0
83
+ nvidia-cusparselt-cu12==0.6.2
84
+ einops==0.8.1
85
+ zstandard==0.25.0
86
+ comm==0.2.3
87
+ six==1.17.0
88
+ packaging==25.0
89
+ tqdm-multiprocess==0.0.11
90
+ numpy==2.3.5
91
+ colorama==0.4.6
92
+ nvidia-cuda-cupti-cu12==12.4.127
93
+ jupyter_client==8.7.0
94
+ scipy==1.16.3
95
+ tornado==6.5.4
96
+ nltk==3.9.2
97
+ antlr4-python3-runtime==4.11.0
98
+ jupyter_core==5.9.1
99
+ sqlitedict==2.1.0
100
+ tzdata==2025.3
101
+ pytz==2025.2
102
+ Pygments==2.19.2
103
+ python-dotenv==1.2.1
104
+ cmake==4.2.0
105
+ tiktoken==0.12.0
106
+ PyYAML==6.0.3
107
+ datasets==4.4.1
108
+ pillow==12.0.0
109
+ math-verify==0.8.0
110
+ dill==0.4.0
111
+ nvidia-cuda-nvrtc-cu12==12.4.127
112
+ anyio==4.12.0
113
+ prompt_toolkit==3.0.52
114
+ filelock==3.20.1
115
+ jedi==0.19.2
116
+ frozenlist==1.8.0
117
+ tokenizers==0.21.4
118
+ grpcio==1.76.0
119
+ ninja==1.13.0
120
+ mbstrdecoder==1.1.4
121
+ flash-attn==2.7.3
122
+ aiosignal==1.4.0
123
+ tabledata==1.3.4
124
+ h11==0.16.0
125
+ absl-py==2.3.1
126
+ latex2sympy2_extended==1.10.2
127
+ torch==2.6.0
128
+ nest_asyncio==1.6.0
129
+ pip==25.3
130
+ aiohttp==3.13.2
131
+ pfzy==0.3.4
132
+ platformdirs==4.5.1
133
+ wheel==0.45.1
134
+ peft==0.17.0
135
+ debugpy==1.8.19
136
+ ipython_pygments_lexers==1.1.1
137
+ rouge_score==0.1.2
138
+ multidict==6.7.0
139
+ tcolorpy==0.1.7
140
+ nvidia-curand-cu12==10.3.5.147
141
+ pydantic_core==2.41.5
142
+ pytablewriter==1.2.1
143
+ charset-normalizer==3.4.4
144
+ transformers==4.51.3
145
+ word2number==1.1
146
+ jsonlines==4.0.0
147
+ stack_data==0.6.3
148
+ urllib3==2.6.2
149
+ decorator==5.2.1
150
+ python-dateutil==2.9.0.post0
151
+ pyarrow==22.0.0
152
+ traitlets==5.14.3
153
+ GitPython==3.1.45
154
+ tomli==2.0.1
155
+ more-itertools==10.3.0
156
+ inflect==7.3.1
157
+ zipp==3.19.2
158
+ jaraco.functools==4.0.1
159
+ autocommand==2.2.2
160
+ jaraco.collections==5.1.0
161
+ platformdirs==4.2.2
162
+ backports.tarfile==1.2.0
163
+ importlib_metadata==8.0.0
164
+ jaraco.text==3.12.1
165
+ typing_extensions==4.12.2
166
+ jaraco.context==5.3.0
167
+ typeguard==4.3.0
168
+ packaging==24.2
169
+ wheel==0.45.1
tb/20251231-0837/wandb/run-20251231_083738--dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"loss_metrics/global_avg_loss":3.5242035388946533,"throughput(tps)":29851.614107024114,"time_metrics/end_to_end(s)":2.1953921742737292,"memory/max_active(GiB)":119.60354804992676,"_timestamp":1.767258330571418e+09,"memory/num_ooms":0,"time_metrics/data_loading(%)":0.05893591801712437,"time_metrics/data_loading(s)":0.0012938745319843292,"loss_metrics/global_avg_mtp_loss":2.688770055770874,"memory/max_active(%)":85.82660800240015,"loss_metrics/global_max_loss":3.867431163787842,"tflops":420.1594759014195,"memory/num_alloc_retries":0,"_step":40000,"memory/max_reserved(%)":88.07186436520833,"optimizer/grad_norm":0.7287646532058716,"_runtime":88210.702133884,"loss_metrics/global_avg_ntp_loss":0.8354334831237793,"_wandb":{"runtime":88210},"optimizer/lr":5e-06,"mfu(%)":42.483263488515625,"optimizer/skipped_step":0,"memory/max_reserved(GiB)":122.732421875}
tb/20251231-0837/wandb/run-20251231_083738--dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834/logs/debug-core.log ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-12-31T08:37:38.477715818Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp9ir1595i/port-3901702.txt","pid":3901702,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-12-31T08:37:38.479587659Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":3901702}
3
+ {"time":"2025-12-31T08:37:38.479566978Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3901702-3903170-2736290377/socket","Net":"unix"}}
4
+ {"time":"2025-12-31T08:37:38.65778368Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-12-31T08:37:38.664866914Z","level":"INFO","msg":"handleInformInit: received","streamId":"-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834","id":"1(@)"}
6
+ {"time":"2025-12-31T08:37:38.988721434Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834","id":"1(@)"}
7
+ {"time":"2026-01-01T09:07:50.693137528Z","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834","id":"1(@)"}
8
+ {"time":"2026-01-01T09:07:50.693292254Z","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834","id":"1(@)"}
9
+ {"time":"2026-01-01T09:08:12.419440178Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
10
+ {"time":"2026-01-01T09:08:12.419495994Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
11
+ {"time":"2026-01-01T09:08:12.41950792Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
12
+ {"time":"2026-01-01T09:08:12.419553284Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
13
+ {"time":"2026-01-01T09:08:12.419557333Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
14
+ {"time":"2026-01-01T09:08:12.419563866Z","level":"INFO","msg":"server is shutting down"}
15
+ {"time":"2026-01-01T09:08:12.419751426Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-3901702-3903170-2736290377/socket","Net":"unix"}}
16
+ {"time":"2026-01-01T09:08:12.419802874Z","level":"INFO","msg":"server is closed"}
tb/20251231-0837/wandb/run-20251231_083738--dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834/logs/debug-internal.log ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-12-31T08:37:38.665057765Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
2
+ {"time":"2025-12-31T08:37:38.988512957Z","level":"INFO","msg":"stream: created new stream","id":"-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834"}
3
+ {"time":"2025-12-31T08:37:38.988613654Z","level":"INFO","msg":"handler: started","stream_id":"-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834"}
4
+ {"time":"2025-12-31T08:37:38.988714345Z","level":"INFO","msg":"stream: started","id":"-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834"}
5
+ {"time":"2025-12-31T08:37:38.988738297Z","level":"INFO","msg":"writer: started","stream_id":"-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834"}
6
+ {"time":"2025-12-31T08:37:38.988753655Z","level":"INFO","msg":"sender: started","stream_id":"-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834"}
7
+ {"time":"2025-12-31T11:49:59.759567501Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834/file_stream\": dial tcp 35.186.228.49:443: connect: connection refused"}
8
+ {"time":"2025-12-31T13:32:20.799679052Z","level":"INFO","msg":"flowcontrol: backed up, offloading to disk","recordNumber":26941}
9
+ {"time":"2025-12-31T13:32:20.80069231Z","level":"INFO","msg":"flowcontrol: unblocked","totalOffloaded":2}
10
+ {"time":"2025-12-31T21:52:16.560064623Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834/file_stream\": dial tcp 35.186.228.49:443: connect: connection refused"}
11
+ {"time":"2025-12-31T23:40:07.918310102Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834/file_stream\": dial tcp: lookup api.wandb.ai on 10.43.0.10:53: read udp 10.42.35.131:44189->10.43.0.10:53: i/o timeout"}
12
+ {"time":"2025-12-31T23:47:01.610252645Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834/file_stream\": dial tcp 35.186.228.49:443: connect: connection refused"}
13
+ {"time":"2026-01-01T08:38:59.64892198Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/zaydzuhri/fla/-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834/file_stream\": dial tcp 35.186.228.49:443: connect: connection refused"}
14
+ {"time":"2026-01-01T09:06:12.449377603Z","level":"INFO","msg":"flowcontrol: backed up, offloading to disk","recordNumber":129853}
15
+ {"time":"2026-01-01T09:06:12.451188339Z","level":"INFO","msg":"flowcontrol: unblocked","totalOffloaded":1}
16
+ {"time":"2026-01-01T09:07:50.503866067Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
17
+ {"time":"2026-01-01T09:07:50.688587533Z","level":"INFO","msg":"handler: operation stats","stats":{}}
18
+ {"time":"2026-01-01T09:07:50.69317498Z","level":"INFO","msg":"stream: closing","id":"-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834"}
19
+ {"time":"2026-01-01T09:07:50.693203953Z","level":"INFO","msg":"handler: closed","stream_id":"-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834"}
20
+ {"time":"2026-01-01T09:07:50.693272578Z","level":"INFO","msg":"sender: closed","stream_id":"-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834"}
21
+ {"time":"2026-01-01T09:07:50.693279066Z","level":"INFO","msg":"stream: closed","id":"-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834"}
tb/20251231-0837/wandb/run-20251231_083738--dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834/logs/debug.log ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-12-31 08:37:38,391 INFO MainThread:3901702 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
2
+ 2025-12-31 08:37:38,392 INFO MainThread:3901702 [wandb_setup.py:_flush():80] Configure stats pid to 3901702
3
+ 2025-12-31 08:37:38,392 INFO MainThread:3901702 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
4
+ 2025-12-31 08:37:38,392 INFO MainThread:3901702 [wandb_setup.py:_flush():80] Loading settings from /root/flame/wandb/settings
5
+ 2025-12-31 08:37:38,392 INFO MainThread:3901702 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-12-31 08:37:38,392 INFO MainThread:3901702 [wandb_init.py:setup_run_log_directory():714] Logging user logs to exp/dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine/tb/20251231-0837/wandb/run-20251231_083738--dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834/logs/debug.log
7
+ 2025-12-31 08:37:38,392 INFO MainThread:3901702 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to exp/dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine/tb/20251231-0837/wandb/run-20251231_083738--dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834/logs/debug-internal.log
8
+ 2025-12-31 08:37:38,392 INFO MainThread:3901702 [wandb_init.py:init():841] calling init triggers
9
+ 2025-12-31 08:37:38,392 INFO MainThread:3901702 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2025-12-31 08:37:38,392 INFO MainThread:3901702 [wandb_init.py:init():889] starting backend
12
+ 2025-12-31 08:37:38,658 INFO MainThread:3901702 [wandb_init.py:init():892] sending inform_init request
13
+ 2025-12-31 08:37:38,663 INFO MainThread:3901702 [wandb_init.py:init():900] backend started and connected
14
+ 2025-12-31 08:37:38,666 INFO MainThread:3901702 [wandb_init.py:init():970] updated telemetry
15
+ 2025-12-31 08:37:38,673 INFO MainThread:3901702 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
16
+ 2025-12-31 08:37:39,397 INFO MainThread:3901702 [wandb_init.py:init():1041] starting run threads in backend
17
+ 2025-12-31 08:37:39,496 INFO MainThread:3901702 [wandb_run.py:_console_start():2521] atexit reg
18
+ 2025-12-31 08:37:39,496 INFO MainThread:3901702 [wandb_run.py:_redirect():2369] redirect: wrap_raw
19
+ 2025-12-31 08:37:39,496 INFO MainThread:3901702 [wandb_run.py:_redirect():2438] Wrapping output streams.
20
+ 2025-12-31 08:37:39,496 INFO MainThread:3901702 [wandb_run.py:_redirect():2461] Redirects installed.
21
+ 2025-12-31 08:37:39,499 INFO MainThread:3901702 [wandb_init.py:init():1081] run started, returning control to user process
22
+ 2026-01-01 09:07:50,099 INFO MainThread:3901702 [wandb_run.py:_finish():2287] finishing run zaydzuhri/fla/-dsmtp.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine-202512310834
23
+ 2026-01-01 09:07:50,099 INFO MainThread:3901702 [wandb_run.py:_atexit_cleanup():2486] got exitcode: 0
24
+ 2026-01-01 09:07:50,100 INFO MainThread:3901702 [wandb_run.py:_restore():2468] restore
25
+ 2026-01-01 09:07:50,100 INFO MainThread:3901702 [wandb_run.py:_restore():2474] restore done
26
+ 2026-01-01 09:07:50,692 INFO MainThread:3901702 [wandb_run.py:_footer_sync_info():3862] logging synced files