kiatkock commited on
Commit
634be04
·
verified ·
1 Parent(s): abecedb

tickers_75_7_Channels_with_temporal_tape model training @ 2025-10-22 00:59:52

Browse files
checkpoint-750/config.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "time_moe_50m",
3
+ "apply_aux_loss": true,
4
+ "architectures": [
5
+ "TimeMoeForPrediction"
6
+ ],
7
+ "attention_dropout": 0.0,
8
+ "auto_map": {
9
+ "AutoConfig": "Maple728/TimeMoE-50M--configuration_time_moe.TimeMoeConfig",
10
+ "AutoModelForCausalLM": "Maple728/TimeMoE-50M--modeling_time_moe.TimeMoeForPrediction"
11
+ },
12
+ "channel_configs": [
13
+ [
14
+ 63,
15
+ 1,
16
+ 1
17
+ ],
18
+ [
19
+ 6,
20
+ 1,
21
+ 4
22
+ ],
23
+ [
24
+ 6,
25
+ 1,
26
+ 5
27
+ ],
28
+ [
29
+ 10,
30
+ 1,
31
+ 1
32
+ ],
33
+ [
34
+ 5,
35
+ 1,
36
+ 1
37
+ ],
38
+ [
39
+ 5,
40
+ 1,
41
+ 1
42
+ ],
43
+ [
44
+ 5,
45
+ 1,
46
+ 2
47
+ ]
48
+ ],
49
+ "embedding_hidden_size": 128,
50
+ "hidden_act": "silu",
51
+ "hidden_size": 384,
52
+ "horizon_lengths": [
53
+ 1,
54
+ 8,
55
+ 32,
56
+ 64
57
+ ],
58
+ "initializer_range": 0.02,
59
+ "input_size": 42,
60
+ "intermediate_size": 1536,
61
+ "max_position_embeddings": 4096,
62
+ "model_type": "time_moe",
63
+ "num_attention_heads": 12,
64
+ "num_experts": 8,
65
+ "num_experts_per_tok": 2,
66
+ "num_hidden_layers": 12,
67
+ "num_key_value_heads": 12,
68
+ "rms_norm_eps": 1e-06,
69
+ "rope_theta": 10000,
70
+ "router_aux_loss_factor": 0.02,
71
+ "tie_word_embeddings": false,
72
+ "torch_dtype": "float32",
73
+ "transformers_version": "4.40.1",
74
+ "use_cache": true,
75
+ "use_dense": false
76
+ }
checkpoint-750/generation_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "transformers_version": "4.40.1"
4
+ }
checkpoint-750/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b8a9d93f7fe6d70ed5ed4f6a8cad2934f1493bad03eac64cc9928b29b5c3895
3
+ size 523322016
checkpoint-750/trainer_state.json ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.3500325083732605,
3
+ "best_model_checkpoint": "/home/yinkiat/logs/time_moe_tickers_75_7_Channels_with_temporal_tape/checkpoint-750",
4
+ "epoch": 0.9876543209876543,
5
+ "eval_steps": 250,
6
+ "global_step": 750,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0013168724279835392,
13
+ "grad_norm": 0.6566795706748962,
14
+ "learning_rate": 9.999994646151714e-05,
15
+ "loss": 0.5211,
16
+ "step": 1
17
+ },
18
+ {
19
+ "epoch": 0.06584362139917696,
20
+ "grad_norm": 0.15640847384929657,
21
+ "learning_rate": 9.986627313458798e-05,
22
+ "loss": 0.407,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.13168724279835392,
27
+ "grad_norm": 0.10551510006189346,
28
+ "learning_rate": 9.946652316831454e-05,
29
+ "loss": 0.3845,
30
+ "step": 100
31
+ },
32
+ {
33
+ "epoch": 0.19753086419753085,
34
+ "grad_norm": 0.2655992805957794,
35
+ "learning_rate": 9.880502668597476e-05,
36
+ "loss": 0.3718,
37
+ "step": 150
38
+ },
39
+ {
40
+ "epoch": 0.26337448559670784,
41
+ "grad_norm": 0.13201837241649628,
42
+ "learning_rate": 9.788886047565378e-05,
43
+ "loss": 0.3616,
44
+ "step": 200
45
+ },
46
+ {
47
+ "epoch": 0.3292181069958848,
48
+ "grad_norm": 0.1095639169216156,
49
+ "learning_rate": 9.67278258201918e-05,
50
+ "loss": 0.3564,
51
+ "step": 250
52
+ },
53
+ {
54
+ "epoch": 0.3292181069958848,
55
+ "eval_loss": 0.35878849029541016,
56
+ "eval_runtime": 193.7553,
57
+ "eval_samples_per_second": 365.022,
58
+ "eval_steps_per_second": 11.411,
59
+ "step": 250
60
+ },
61
+ {
62
+ "epoch": 0.3950617283950617,
63
+ "grad_norm": 0.21216067671775818,
64
+ "learning_rate": 9.533434364159762e-05,
65
+ "loss": 0.3487,
66
+ "step": 300
67
+ },
68
+ {
69
+ "epoch": 0.4609053497942387,
70
+ "grad_norm": 0.13290716707706451,
71
+ "learning_rate": 9.372332162017127e-05,
72
+ "loss": 0.3452,
73
+ "step": 350
74
+ },
75
+ {
76
+ "epoch": 0.5267489711934157,
77
+ "grad_norm": 0.25531283020973206,
78
+ "learning_rate": 9.191199470991561e-05,
79
+ "loss": 0.3411,
80
+ "step": 400
81
+ },
82
+ {
83
+ "epoch": 0.5925925925925926,
84
+ "grad_norm": 0.2834431827068329,
85
+ "learning_rate": 8.991974075642621e-05,
86
+ "loss": 0.3426,
87
+ "step": 450
88
+ },
89
+ {
90
+ "epoch": 0.6584362139917695,
91
+ "grad_norm": 0.31403204798698425,
92
+ "learning_rate": 8.776787318980746e-05,
93
+ "loss": 0.3417,
94
+ "step": 500
95
+ },
96
+ {
97
+ "epoch": 0.6584362139917695,
98
+ "eval_loss": 0.35160061717033386,
99
+ "eval_runtime": 194.3311,
100
+ "eval_samples_per_second": 363.941,
101
+ "eval_steps_per_second": 11.377,
102
+ "step": 500
103
+ },
104
+ {
105
+ "epoch": 0.7242798353909465,
106
+ "grad_norm": 0.2428826540708542,
107
+ "learning_rate": 8.547941301041661e-05,
108
+ "loss": 0.3404,
109
+ "step": 550
110
+ },
111
+ {
112
+ "epoch": 0.7901234567901234,
113
+ "grad_norm": 0.06616313010454178,
114
+ "learning_rate": 8.307884250676648e-05,
115
+ "loss": 0.3439,
116
+ "step": 600
117
+ },
118
+ {
119
+ "epoch": 0.8559670781893004,
120
+ "grad_norm": 0.3364473581314087,
121
+ "learning_rate": 8.059184334034937e-05,
122
+ "loss": 0.342,
123
+ "step": 650
124
+ },
125
+ {
126
+ "epoch": 0.9218106995884774,
127
+ "grad_norm": 0.21412700414657593,
128
+ "learning_rate": 7.804502179938985e-05,
129
+ "loss": 0.3396,
130
+ "step": 700
131
+ },
132
+ {
133
+ "epoch": 0.9876543209876543,
134
+ "grad_norm": 0.2532865107059479,
135
+ "learning_rate": 7.546562416080285e-05,
136
+ "loss": 0.3392,
137
+ "step": 750
138
+ },
139
+ {
140
+ "epoch": 0.9876543209876543,
141
+ "eval_loss": 0.3500325083732605,
142
+ "eval_runtime": 196.9283,
143
+ "eval_samples_per_second": 359.141,
144
+ "eval_steps_per_second": 11.227,
145
+ "step": 750
146
+ }
147
+ ],
148
+ "logging_steps": 50,
149
+ "max_steps": 1518,
150
+ "num_input_tokens_seen": 0,
151
+ "num_train_epochs": 2,
152
+ "save_steps": 250,
153
+ "total_flos": 4746958110720000.0,
154
+ "train_batch_size": 16,
155
+ "trial_name": null,
156
+ "trial_params": null
157
+ }
checkpoint-750/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3548dcc79e6ba8c12ab5ce14e8d5d0a8edd35cabeee7b91e3a58bdaef9c8b298
3
+ size 5585
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3633c26b8e782677bdf040812ca4d90cec5bb1193143f36ee5fad386d22081a1
3
- size 523327312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b8a9d93f7fe6d70ed5ed4f6a8cad2934f1493bad03eac64cc9928b29b5c3895
3
+ size 523322016
tb_logs/events.out.tfevents.1761062232.luyao1.774292.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f0beaaa2566bfc3eb6563e12232bd60854e3ef8d98eb16414f48d6903250a74
3
+ size 13977
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5edc06d66733730a64bef13b56324503876e24bbf51a066bf9182c51e90acc77
3
  size 5585
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3548dcc79e6ba8c12ab5ce14e8d5d0a8edd35cabeee7b91e3a58bdaef9c8b298
3
  size 5585