tgetsov commited on
Commit
703a676
·
verified ·
1 Parent(s): bda703f

Upload adapter_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. adapter_config.json +58 -0
adapter_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "adapter_path": "train/adapters",
3
+ "batch_size": 1,
4
+ "clear_cache_threshold": 0,
5
+ "config": "train/lora_config.yaml",
6
+ "data": "train/data",
7
+ "fine_tune_type": "lora",
8
+ "grad_accumulation_steps": 16,
9
+ "grad_checkpoint": true,
10
+ "iters": 350,
11
+ "learning_rate": 0.0001,
12
+ "lora_parameters": {
13
+ "rank": 32,
14
+ "scale": 20.0,
15
+ "dropout": 0.0,
16
+ "keys": [
17
+ "self_attn.q_proj",
18
+ "self_attn.k_proj",
19
+ "self_attn.v_proj",
20
+ "self_attn.o_proj",
21
+ "mlp.gate_proj",
22
+ "mlp.up_proj",
23
+ "mlp.down_proj"
24
+ ]
25
+ },
26
+ "lr_schedule": {
27
+ "name": "cosine_decay",
28
+ "warmup": 20,
29
+ "arguments": [
30
+ 0.0001,
31
+ 350,
32
+ 1e-06
33
+ ]
34
+ },
35
+ "mask_prompt": true,
36
+ "max_seq_length": 8192,
37
+ "model": "mlx-community/Qwen2.5-14B-Instruct-4bit",
38
+ "num_layers": 16,
39
+ "optimizer": "adamw",
40
+ "optimizer_config": {
41
+ "adam": {},
42
+ "adamw": {},
43
+ "muon": {},
44
+ "sgd": {},
45
+ "adafactor": {}
46
+ },
47
+ "project_name": null,
48
+ "report_to": null,
49
+ "resume_adapter_file": null,
50
+ "save_every": 50,
51
+ "seed": 42,
52
+ "steps_per_eval": 50,
53
+ "steps_per_report": 10,
54
+ "test": false,
55
+ "test_batches": 500,
56
+ "train": true,
57
+ "val_batches": 25
58
+ }