russellbal commited on
Commit
2fdf0ce
·
verified ·
1 Parent(s): 92e9813

Upload ANDREA-12M-TRAIN.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. ANDREA-12M-TRAIN.json +45 -0
ANDREA-12M-TRAIN.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "megachat-v8-embd384-chatbot",
3
+ "dataset": "megachat-v8",
4
+ "curriculum": "firehose",
5
+ "config": {
6
+ "n_embd": 384,
7
+ "n_head": 12,
8
+ "n_layer": 6,
9
+ "block_size": 1024,
10
+ "vocab_type": "harris",
11
+ "segments": 2048,
12
+ "lr": 0.0004,
13
+ "steps": 60000,
14
+ "checkpoint_every": 100,
15
+ "sample_every": 100,
16
+ "sample_tokens": 420,
17
+ "lr_schedule": "cosine",
18
+ "batch_size": 8,
19
+ "exclude_sources": [
20
+ "real-tool-calls", "synthetic-bash", "tool-calls",
21
+ "hermes3-code", "hermes3-math",
22
+ "chat", "smoltalk", "oasst", "dolly",
23
+ "unfirehose-chat", "synthetic-chat",
24
+ "irc", "unweapon",
25
+ "repo-docs", "repo-docstrings", "repo-commits",
26
+ "gutenberg"
27
+ ],
28
+ "source_floors": {
29
+ "hermes3-general": 0.8,
30
+ "hermes3-creative": 0.7,
31
+ "hermes3-roleplay": 0.7,
32
+ "dictionary": 0.7,
33
+ "gutenberg": 0.7
34
+ },
35
+ "bandit_focus_count": 2,
36
+ "bandit_dice_sides": 3
37
+ },
38
+ "_notes": {
39
+ "goal": "ANDREA phase 2 — focused curriculum, 5 arms, 2-eye bandit",
40
+ "params": "12.8M",
41
+ "arms": "hermes3-general, hermes3-creative, hermes3-roleplay, dictionary, gutenberg",
42
+ "bandit": "2 focus eyes, 1d3: 0=2random, 1=1random+1bandit, 2=2bandit",
43
+ "estimated_time": "remaining ~18K steps"
44
+ }
45
+ }