Lollo9898 commited on
Commit
2dea6af
·
verified ·
1 Parent(s): d7c7251

Add lora_tuned_stage2

Browse files
lora_tuned_stage2/checkpoint-15000/adapter_config.json ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/leonardo_scratch/large/userexternal/dbucciar/hf-cache/hf-cache/image_first_after15k_after_lvis_idefics",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": false,
8
+ "init_lora_weights": "gaussian",
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.1,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 64,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "text_model.layers.15.self_attn.k_proj",
24
+ "text_model.layers.19.self_attn.v_proj",
25
+ "text_model.layers.10.self_attn.k_proj",
26
+ "text_model.layers.7.self_attn.k_proj",
27
+ "text_model.layers.16.self_attn.q_proj",
28
+ "text_model.layers.16.self_attn.v_proj",
29
+ "30.self_attn.v_proj",
30
+ "text_model.layers.25.self_attn.q_proj",
31
+ "27.self_attn.k_proj",
32
+ "text_model.layers.26.self_attn.q_proj",
33
+ "text_model.layers.11.self_attn.v_proj",
34
+ "text_model.layers.20.self_attn.k_proj",
35
+ "text_model.layers.21.self_attn.v_proj",
36
+ "31.self_attn.v_proj",
37
+ "text_model.layers.0.self_attn.k_proj",
38
+ "gate_proj",
39
+ "text_model.layers.16.self_attn.k_proj",
40
+ "27.self_attn.v_proj",
41
+ "up_proj",
42
+ "text_model.layers.25.self_attn.v_proj",
43
+ "text_model.layers.23.self_attn.k_proj",
44
+ "text_model.layers.13.self_attn.v_proj",
45
+ "down_proj",
46
+ "text_model.layers.5.self_attn.k_proj",
47
+ "text_model.layers.24.self_attn.k_proj",
48
+ "text_model.layers.0.self_attn.v_proj",
49
+ "text_model.layers.14.self_attn.q_proj",
50
+ "text_model.layers.1.self_attn.k_proj",
51
+ "text_model.layers.15.self_attn.v_proj",
52
+ "text_model.layers.14.self_attn.k_proj",
53
+ "text_model.layers.17.self_attn.k_proj",
54
+ "text_model.layers.7.self_attn.q_proj",
55
+ "text_model.layers.8.self_attn.q_proj",
56
+ "text_model.layers.11.self_attn.k_proj",
57
+ "text_model.layers.3.self_attn.q_proj",
58
+ "text_model.layers.18.self_attn.v_proj",
59
+ "text_model.layers.19.self_attn.k_proj",
60
+ "28.self_attn.v_proj",
61
+ "text_model.layers.15.self_attn.q_proj",
62
+ "text_model.layers.9.self_attn.k_proj",
63
+ "text_model.layers.13.self_attn.k_proj",
64
+ "text_model.layers.20.self_attn.v_proj",
65
+ "text_model.layers.2.self_attn.q_proj",
66
+ "28.self_attn.k_proj",
67
+ "text_model.layers.1.self_attn.v_proj",
68
+ "29.self_attn.k_proj",
69
+ "text_model.layers.4.self_attn.k_proj",
70
+ "text_model.layers.9.self_attn.v_proj",
71
+ "text_model.layers.12.self_attn.v_proj",
72
+ "text_model.layers.17.self_attn.v_proj",
73
+ "text_model.layers.10.self_attn.q_proj",
74
+ "text_model.layers.22.self_attn.k_proj",
75
+ "text_model.layers.20.self_attn.q_proj",
76
+ "lm_head",
77
+ "text_model.layers.2.self_attn.v_proj",
78
+ "text_model.layers.1.self_attn.q_proj",
79
+ "text_model.layers.18.self_attn.q_proj",
80
+ "31.self_attn.q_proj",
81
+ "text_model.layers.6.self_attn.q_proj",
82
+ "text_model.layers.21.self_attn.k_proj",
83
+ "text_model.layers.4.self_attn.v_proj",
84
+ "text_model.layers.25.self_attn.k_proj",
85
+ "text_model.layers.19.self_attn.q_proj",
86
+ "text_model.layers.26.self_attn.k_proj",
87
+ "text_model.layers.8.self_attn.k_proj",
88
+ "30.self_attn.k_proj",
89
+ "text_model.layers.5.self_attn.v_proj",
90
+ "text_model.layers.8.self_attn.v_proj",
91
+ "27.self_attn.q_proj",
92
+ "text_model.layers.22.self_attn.v_proj",
93
+ "text_model.layers.0.self_attn.q_proj",
94
+ "text_model.layers.5.self_attn.q_proj",
95
+ "text_model.layers.3.self_attn.v_proj",
96
+ "text_model.layers.12.self_attn.q_proj",
97
+ "28.self_attn.q_proj",
98
+ "text_model.layers.10.self_attn.v_proj",
99
+ "text_model.layers.13.self_attn.q_proj",
100
+ "text_model.layers.6.self_attn.k_proj",
101
+ "31.self_attn.k_proj",
102
+ "text_model.layers.26.self_attn.v_proj",
103
+ "text_model.layers.7.self_attn.v_proj",
104
+ "text_model.layers.2.self_attn.k_proj",
105
+ "text_model.layers.11.self_attn.q_proj",
106
+ "text_model.layers.4.self_attn.q_proj",
107
+ "29.self_attn.v_proj",
108
+ "text_model.layers.6.self_attn.v_proj",
109
+ "30.self_attn.q_proj",
110
+ "text_model.layers.18.self_attn.k_proj",
111
+ "text_model.layers.23.self_attn.q_proj",
112
+ "text_model.layers.14.self_attn.v_proj",
113
+ "text_model.layers.23.self_attn.v_proj",
114
+ "text_model.layers.17.self_attn.q_proj",
115
+ "text_model.layers.22.self_attn.q_proj",
116
+ "29.self_attn.q_proj",
117
+ "o_proj",
118
+ "text_model.layers.21.self_attn.q_proj",
119
+ "text_model.layers.3.self_attn.k_proj",
120
+ "text_model.layers.9.self_attn.q_proj",
121
+ "text_model.layers.24.self_attn.v_proj",
122
+ "text_model.layers.12.self_attn.k_proj",
123
+ "text_model.layers.24.self_attn.q_proj"
124
+ ],
125
+ "task_type": null,
126
+ "use_dora": false,
127
+ "use_rslora": false
128
+ }
lora_tuned_stage2/checkpoint-15000/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7728e2b6c308713c11c7329f6e349b2ef0acf5b00c3240924a4a6da40b855cd
3
+ size 2806433816
lora_tuned_stage2/checkpoint-15000/generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 128000,
4
+ "eos_token_id": [
5
+ 128001,
6
+ 128008,
7
+ 128009,
8
+ 128258
9
+ ],
10
+ "pad_token_id": 128002,
11
+ "transformers_version": "4.45.0.dev0"
12
+ }
lora_tuned_stage2/checkpoint-15000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05fbd830c8460e35b06447efb5612286675b39ed7ee287f180a624fd728ffb6b
3
+ size 358532508
lora_tuned_stage2/checkpoint-15000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a2280c4383b1248c16c14176cd46d34d73a0622be747d55a6c0513e3a602257
3
+ size 14960
lora_tuned_stage2/checkpoint-15000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ce150bcd7185431ddc00a7d0e4fae3ce9876bb1fbcdacabac55cb1846ef6ea5
3
+ size 14960
lora_tuned_stage2/checkpoint-15000/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1272852f12f33fd5ef0c2b9941732f49b8cbf938da3ea6369c52dc9c117148b5
3
+ size 14960
lora_tuned_stage2/checkpoint-15000/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32a479173eac6fc419d1753d9a150fe9441bdc89e5fa687467cec12818bfbee4
3
+ size 14960
lora_tuned_stage2/checkpoint-15000/rng_state_4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c1acd83f2ae1643b2ae86dee0266b9705b2988ede4f2fc770a10cd595c98ad4
3
+ size 14960
lora_tuned_stage2/checkpoint-15000/rng_state_5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6406a687bc2bf0f1ddd4651fad2ca5a34e59249e3ac2c73e869a0bf93d98e490
3
+ size 14960
lora_tuned_stage2/checkpoint-15000/rng_state_6.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01de3fbea9accef82ed56957e195bf93c4028e5a1f28965a90ad69daf8be5784
3
+ size 14960
lora_tuned_stage2/checkpoint-15000/rng_state_7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:044f815af22c4f30dd5b273290103876b269d4b80108b2a962b9aa096039f361
3
+ size 14960
lora_tuned_stage2/checkpoint-15000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b122911694009e78ed7d779c4ad2f11d25db15e7f2ef6b691ef70915ee429ee3
3
+ size 1064
lora_tuned_stage2/checkpoint-15000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
lora_tuned_stage2/checkpoint-15000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66db180e50c8e137b1bba960fbb542970431ee7e8a18d4bc45074381235c9e9e
3
+ size 6072