AiAF commited on
Commit
869ffa4
·
verified ·
1 Parent(s): 6bc8226

Upload config.yml with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.yml +64 -0
config.yml ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: mistralai/Mistral-7B-v0.1
2
+ # optionally might have model_type or tokenizer_type
3
+ model_type: MistralForCausalLM
4
+ tokenizer_type: LlamaTokenizer
5
+ # Automatically upload checkpoint and final model to HF
6
+ hub_model_id: AiAF/UFOs-Pretraining-V1
7
+
8
+ load_in_8bit: false
9
+ load_in_4bit: false
10
+ strict: false
11
+
12
+ datasets:
13
+ - path: AiAF/pretraining.jsonl
14
+ type: completion
15
+
16
+ dataset_prepared_path:
17
+ val_set_size: 0.05
18
+ output_dir: ./outputs/out
19
+
20
+ sequence_len: 8192
21
+ sample_packing: true
22
+ pad_to_sequence_len: true
23
+ eval_sample_packing: false
24
+
25
+ max_steps: 100000
26
+
27
+ wandb_project: "UFO_LLM_Pretraining"
28
+ wandb_entity:
29
+ wandb_watch: "all"
30
+ wandb_name: "UFO_LLM_Pretraining-V1.0"
31
+ wandb_log_model: "false"
32
+
33
+ gradient_accumulation_steps: 4
34
+ micro_batch_size: 2
35
+ num_epochs: 4
36
+ optimizer: adamw_bnb_8bit
37
+ lr_scheduler: cosine
38
+ learning_rate: 0.000005
39
+
40
+ train_on_inputs: false
41
+ group_by_length: false
42
+ bf16: auto
43
+ fp16:
44
+ tf32: false
45
+
46
+ gradient_checkpointing: true
47
+ early_stopping_patience:
48
+ resume_from_checkpoint:
49
+ local_rank:
50
+ logging_steps: 1
51
+ xformers_attention:
52
+ flash_attention: true
53
+
54
+ warmup_steps: 10
55
+ evals_per_epoch: 4
56
+ eval_table_size:
57
+ eval_max_new_tokens: 128
58
+ saves_per_epoch: 1
59
+ debug:
60
+ deepspeed:
61
+ weight_decay: 0.0
62
+ fsdp:
63
+ fsdp_config:
64
+ special_tokens: