amirali1985 commited on
Commit
c2e3483
·
verified ·
1 Parent(s): f74fc41

Register qwen2.5-0.5b/base

Browse files
Files changed (1) hide show
  1. model_catalog.json +28 -13
model_catalog.json CHANGED
@@ -12,11 +12,11 @@
12
  "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
13
  "base_model": "Qwen/Qwen2.5-0.5B",
14
  "epochs": 1,
15
- "lr": 5e-05,
16
  "batch_size": 16,
17
  "seed": 0,
18
  "n_params": 494032768,
19
- "timestamp": "2026-04-24T22:05:21.738206+00:00",
20
  "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed0/config.json"
21
  },
22
  {
@@ -32,11 +32,11 @@
32
  "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
33
  "base_model": "Qwen/Qwen2.5-0.5B",
34
  "epochs": 1,
35
- "lr": 5e-05,
36
  "batch_size": 16,
37
  "seed": 1,
38
  "n_params": 494032768,
39
- "timestamp": "2026-04-24T22:21:25.249625+00:00",
40
  "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed1/config.json"
41
  },
42
  {
@@ -52,11 +52,11 @@
52
  "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
53
  "base_model": "Qwen/Qwen2.5-0.5B",
54
  "epochs": 1,
55
- "lr": 5e-05,
56
  "batch_size": 16,
57
  "seed": 2,
58
  "n_params": 494032768,
59
- "timestamp": "2026-04-24T22:05:18.139847+00:00",
60
  "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed2/config.json"
61
  },
62
  {
@@ -72,11 +72,11 @@
72
  "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
73
  "base_model": "Qwen/Qwen2.5-0.5B",
74
  "epochs": 1,
75
- "lr": 5e-05,
76
  "batch_size": 16,
77
  "seed": 3,
78
  "n_params": 494032768,
79
- "timestamp": "2026-04-24T22:21:25.236053+00:00",
80
  "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed3/config.json"
81
  },
82
  {
@@ -92,11 +92,11 @@
92
  "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
93
  "base_model": "Qwen/Qwen2.5-0.5B",
94
  "epochs": 1,
95
- "lr": 5e-05,
96
  "batch_size": 16,
97
  "seed": 4,
98
  "n_params": 494032768,
99
- "timestamp": "2026-04-24T22:30:32.396496+00:00",
100
  "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed4/config.json"
101
  },
102
  {
@@ -112,11 +112,11 @@
112
  "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
113
  "base_model": "Qwen/Qwen2.5-0.5B",
114
  "epochs": 1,
115
- "lr": 5e-05,
116
  "batch_size": 16,
117
  "seed": 5,
118
  "n_params": 494032768,
119
- "timestamp": "2026-04-24T22:30:32.451791+00:00",
120
  "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed5/config.json"
121
  },
122
  {
@@ -126,6 +126,21 @@
126
  "mode": "base",
127
  "base_model": "Qwen/Qwen2.5-0.5B",
128
  "n_params": 494032768,
129
- "timestamp": "2026-04-25T17:44:49.551356+00:00"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  }
131
  ]
 
12
  "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
13
  "base_model": "Qwen/Qwen2.5-0.5B",
14
  "epochs": 1,
15
+ "lr": 0.0002,
16
  "batch_size": 16,
17
  "seed": 0,
18
  "n_params": 494032768,
19
+ "timestamp": "2026-04-25T17:40:21.084187+00:00",
20
  "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed0/config.json"
21
  },
22
  {
 
32
  "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
33
  "base_model": "Qwen/Qwen2.5-0.5B",
34
  "epochs": 1,
35
+ "lr": 0.0002,
36
  "batch_size": 16,
37
  "seed": 1,
38
  "n_params": 494032768,
39
+ "timestamp": "2026-04-25T17:40:21.062095+00:00",
40
  "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed1/config.json"
41
  },
42
  {
 
52
  "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
53
  "base_model": "Qwen/Qwen2.5-0.5B",
54
  "epochs": 1,
55
+ "lr": 0.0002,
56
  "batch_size": 16,
57
  "seed": 2,
58
  "n_params": 494032768,
59
+ "timestamp": "2026-04-25T17:40:21.054540+00:00",
60
  "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed2/config.json"
61
  },
62
  {
 
72
  "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
73
  "base_model": "Qwen/Qwen2.5-0.5B",
74
  "epochs": 1,
75
+ "lr": 0.0002,
76
  "batch_size": 16,
77
  "seed": 3,
78
  "n_params": 494032768,
79
+ "timestamp": "2026-04-25T17:40:21.065292+00:00",
80
  "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed3/config.json"
81
  },
82
  {
 
92
  "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
93
  "base_model": "Qwen/Qwen2.5-0.5B",
94
  "epochs": 1,
95
+ "lr": 0.0002,
96
  "batch_size": 16,
97
  "seed": 4,
98
  "n_params": 494032768,
99
+ "timestamp": "2026-04-25T17:40:21.046221+00:00",
100
  "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed4/config.json"
101
  },
102
  {
 
112
  "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
113
  "base_model": "Qwen/Qwen2.5-0.5B",
114
  "epochs": 1,
115
+ "lr": 0.0002,
116
  "batch_size": 16,
117
  "seed": 5,
118
  "n_params": 494032768,
119
+ "timestamp": "2026-04-25T17:40:21.071683+00:00",
120
  "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed5/config.json"
121
  },
122
  {
 
126
  "mode": "base",
127
  "base_model": "Qwen/Qwen2.5-0.5B",
128
  "n_params": 494032768,
129
+ "timestamp": "2026-04-25T17:57:31.076340+00:00"
130
+ },
131
+ {
132
+ "name": "qwen2.5-0.5b/owt20M",
133
+ "status": "VALID",
134
+ "status_note": "",
135
+ "mode": "clean",
136
+ "train_data_manifest": "openwebtext/subset_20M_seed0.jsonl",
137
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
138
+ "base_model": "Qwen/Qwen2.5-0.5B",
139
+ "epochs": 1,
140
+ "lr": 0.0002,
141
+ "batch_size": 16,
142
+ "n_params": 494032768,
143
+ "timestamp": "2026-04-25T17:40:15.553926+00:00",
144
+ "config_path": "evals/qwen2.5-0.5b/owt20M/config.json"
145
  }
146
  ]