AlgoDriveAI commited on
Commit
3ebda29
·
verified ·
1 Parent(s): abe8d47

Upload 4 files

Browse files
Files changed (4) hide show
  1. config.json +18 -0
  2. tokenizer.json +0 -0
  3. tokenizer_config.json +21 -0
  4. training_config.json +75 -0
config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "vocab_size": 32064,
3
+ "d_model": 1280,
4
+ "n_layers": 30,
5
+ "n_heads": 10,
6
+ "q_lora_rank": 640,
7
+ "kv_lora_rank": 320,
8
+ "qk_nope_head_dim": 64,
9
+ "qk_rope_head_dim": 64,
10
+ "v_head_dim": 128,
11
+ "ff_hidden_mult": 3.5,
12
+ "qk_norm": true,
13
+ "max_seq_len": 2048,
14
+ "attn_dropout": 0.05,
15
+ "resid_dropout": 0.05,
16
+ "emb_dropout": 0.05,
17
+ "label_smoothing": 0.05
18
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": null,
3
+ "backend": "tokenizers",
4
+ "bos_token": "<s>",
5
+ "clean_up_tokenization_spaces": false,
6
+ "eos_token": "</s>",
7
+ "extra_special_tokens": [
8
+ "<|endoftext|>"
9
+ ],
10
+ "is_local": false,
11
+ "legacy": false,
12
+ "local_files_only": false,
13
+ "model_max_length": 1000000000,
14
+ "pad_token": "<|endoftext|>",
15
+ "resume_download": true,
16
+ "sp_model_kwargs": {},
17
+ "spaces_between_special_tokens": false,
18
+ "tokenizer_class": "TokenizersBackend",
19
+ "unk_token": "<unk>",
20
+ "use_default_system_prompt": false
21
+ }
training_config.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "seed": 42,
3
+ "outdir": "/workspace/checkpoints_v2_30L_1280_700M/",
4
+ "use_wandb": false,
5
+ "resume_from_checkpoint": false,
6
+ "force_next_epoch": false,
7
+ "manual_batch_override": true,
8
+ "manual_batch_size": 8,
9
+ "manual_accum_steps": 16,
10
+ "manual_gradient_checkpointing": false,
11
+ "moving_avg_beta": 0.98,
12
+ "save_every_epochs": 1,
13
+ "eval_every_epochs": 1,
14
+ "vocab_name": "mistralai/Mistral-7B-v0.1",
15
+ "doc_eos_token": "<|endoftext|>",
16
+ "vocab_pad_multiple": 64,
17
+ "tokenizer_download_timeout": 30,
18
+ "d_model": 1280,
19
+ "n_layers": 30,
20
+ "n_heads": 10,
21
+ "q_lora_rank": 640,
22
+ "kv_lora_rank": 320,
23
+ "qk_nope_head_dim": 64,
24
+ "qk_rope_head_dim": 64,
25
+ "v_head_dim": 128,
26
+ "ff_mult": 3.5,
27
+ "qk_norm": true,
28
+ "use_compile": true,
29
+ "attn_dropout": 0.05,
30
+ "resid_dropout": 0.05,
31
+ "emb_dropout": 0.05,
32
+ "label_smoothing": 0.05,
33
+ "dataset_name": "AlgoDriveAI/Cosmopedia_Math_v2_Alpha",
34
+ "context_len": 2048,
35
+ "batch_size": 8,
36
+ "accum_steps": 16,
37
+ "num_epochs": 2,
38
+ "max_lr": 0.00015,
39
+ "min_lr": 1.5e-05,
40
+ "warmup_ratio": 0.05,
41
+ "decay_ratio": 0.15,
42
+ "weight_decay": 0.1,
43
+ "beta1": 0.9,
44
+ "beta2": 0.95,
45
+ "clip_norm": 1.0,
46
+ "decay_embeddings": false,
47
+ "num_workers": 16,
48
+ "prefetch_factor": 4,
49
+ "pin_memory": true,
50
+ "use_dataset_cache": false,
51
+ "dataset_cache_dir": "/workspace/tokenized_cache_v2b",
52
+ "use_double_eos": false,
53
+ "use_memmap_token_cache": true,
54
+ "memmap_cache_dir": "/workspace/memmap_cache_v2b",
55
+ "memmap_write_workers": 8,
56
+ "use_gradient_checkpointing": false,
57
+ "run_dataloader_diagnostic": true,
58
+ "diagnostic_batches": 50,
59
+ "min_batch_size": 1,
60
+ "max_batch_size": 64,
61
+ "min_effective_batch": 128,
62
+ "max_accum_steps": 64,
63
+ "enable_dynamic_oom_recovery": true,
64
+ "max_oom_retries": 5,
65
+ "oom_batch_size_reduction_factor": 0.7,
66
+ "fallback_context_lens": [
67
+ 2048,
68
+ 1024,
69
+ 512
70
+ ],
71
+ "memory_warning_threshold": 0.95,
72
+ "reshuffle_each_epoch": true,
73
+ "shuffle_before_packing": true,
74
+ "num_packing_offsets": 1
75
+ }