Xonotic commited on
Commit
a2ed862
·
verified ·
1 Parent(s): 2d731d8

Subiendo modelo GPT-2 fine-tuned con LoRA

Browse files
adapter_config.json CHANGED
@@ -29,9 +29,9 @@
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
- "c_proj",
33
  "c_fc",
34
- "c_attn"
35
  ],
36
  "target_parameters": null,
37
  "task_type": "CAUSAL_LM",
 
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
+ "c_attn",
33
  "c_fc",
34
+ "c_proj"
35
  ],
36
  "target_parameters": null,
37
  "task_type": "CAUSAL_LM",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55232f8282cb9bd78ce25259d56c143a26458573ce9f80ca07d6ae0ffc790618
3
  size 4730632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1171030a6e6953eafbda85a62d5b12996f5ffc4822ace50a8cd23d8695c8e744
3
  size 4730632
checkpoint-50/adapter_config.json CHANGED
@@ -29,9 +29,9 @@
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
- "c_proj",
33
  "c_fc",
34
- "c_attn"
35
  ],
36
  "target_parameters": null,
37
  "task_type": "CAUSAL_LM",
 
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
+ "c_attn",
33
  "c_fc",
34
+ "c_proj"
35
  ],
36
  "target_parameters": null,
37
  "task_type": "CAUSAL_LM",
checkpoint-50/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b8b1b2997277f932b7d8d7f8089c2494b2f173b8b9190bdc499d2b3d8c86988
3
  size 4730632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e11ed97a557c62215dd3af808efbbac546251b812b5a520368b3752082dd7d4
3
  size 4730632
checkpoint-50/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad8d39626a094c9582a9c023435d1d8c9d42c48032e6ebf2fc01ddff47e2db4f
3
  size 9515787
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e15baab776ec9c437c2558f5c634c5aa9f8dfe0ed8a7cb86909bfaeedeccd872
3
  size 9515787
checkpoint-50/trainer_state.json CHANGED
@@ -11,72 +11,72 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.19230769230769232,
14
- "grad_norm": 0.5379420518875122,
15
  "learning_rate": 0.0001,
16
- "loss": 5.0246,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 0.38461538461538464,
21
- "grad_norm": 0.811471164226532,
22
  "learning_rate": 0.00019714285714285716,
23
- "loss": 4.8901,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.5769230769230769,
28
- "grad_norm": 0.8395925164222717,
29
  "learning_rate": 0.00018285714285714286,
30
- "loss": 4.7398,
31
  "step": 15
32
  },
33
  {
34
  "epoch": 0.7692307692307693,
35
- "grad_norm": 0.8671197891235352,
36
  "learning_rate": 0.00016857142857142857,
37
- "loss": 4.462,
38
  "step": 20
39
  },
40
  {
41
  "epoch": 0.9615384615384616,
42
- "grad_norm": 1.1186156272888184,
43
  "learning_rate": 0.0001542857142857143,
44
- "loss": 4.0565,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 1.1538461538461537,
49
- "grad_norm": 1.1562882661819458,
50
  "learning_rate": 0.00014,
51
- "loss": 3.8807,
52
  "step": 30
53
  },
54
  {
55
  "epoch": 1.3461538461538463,
56
- "grad_norm": 1.0266945362091064,
57
  "learning_rate": 0.00012571428571428572,
58
- "loss": 3.7139,
59
  "step": 35
60
  },
61
  {
62
  "epoch": 1.5384615384615383,
63
- "grad_norm": 1.23048996925354,
64
  "learning_rate": 0.00011142857142857144,
65
- "loss": 3.3839,
66
  "step": 40
67
  },
68
  {
69
  "epoch": 1.7307692307692308,
70
- "grad_norm": 1.2847602367401123,
71
  "learning_rate": 9.714285714285715e-05,
72
- "loss": 3.3867,
73
  "step": 45
74
  },
75
  {
76
  "epoch": 1.9230769230769231,
77
- "grad_norm": 1.256270408630371,
78
  "learning_rate": 8.285714285714287e-05,
79
- "loss": 3.1795,
80
  "step": 50
81
  }
82
  ],
 
11
  "log_history": [
12
  {
13
  "epoch": 0.19230769230769232,
14
+ "grad_norm": 0.5508620738983154,
15
  "learning_rate": 0.0001,
16
+ "loss": 5.0244,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 0.38461538461538464,
21
+ "grad_norm": 0.8022432923316956,
22
  "learning_rate": 0.00019714285714285716,
23
+ "loss": 4.888,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.5769230769230769,
28
+ "grad_norm": 0.8547099828720093,
29
  "learning_rate": 0.00018285714285714286,
30
+ "loss": 4.735,
31
  "step": 15
32
  },
33
  {
34
  "epoch": 0.7692307692307693,
35
+ "grad_norm": 0.8760125041007996,
36
  "learning_rate": 0.00016857142857142857,
37
+ "loss": 4.4548,
38
  "step": 20
39
  },
40
  {
41
  "epoch": 0.9615384615384616,
42
+ "grad_norm": 1.1308597326278687,
43
  "learning_rate": 0.0001542857142857143,
44
+ "loss": 4.0466,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 1.1538461538461537,
49
+ "grad_norm": 1.1865196228027344,
50
  "learning_rate": 0.00014,
51
+ "loss": 3.8666,
52
  "step": 30
53
  },
54
  {
55
  "epoch": 1.3461538461538463,
56
+ "grad_norm": 1.0487556457519531,
57
  "learning_rate": 0.00012571428571428572,
58
+ "loss": 3.6951,
59
  "step": 35
60
  },
61
  {
62
  "epoch": 1.5384615384615383,
63
+ "grad_norm": 1.2422493696212769,
64
  "learning_rate": 0.00011142857142857144,
65
+ "loss": 3.3633,
66
  "step": 40
67
  },
68
  {
69
  "epoch": 1.7307692307692308,
70
+ "grad_norm": 1.2872875928878784,
71
  "learning_rate": 9.714285714285715e-05,
72
+ "loss": 3.367,
73
  "step": 45
74
  },
75
  {
76
  "epoch": 1.9230769230769231,
77
+ "grad_norm": 1.2250062227249146,
78
  "learning_rate": 8.285714285714287e-05,
79
+ "loss": 3.1619,
80
  "step": 50
81
  }
82
  ],
checkpoint-78/adapter_config.json CHANGED
@@ -29,9 +29,9 @@
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
- "c_proj",
33
  "c_fc",
34
- "c_attn"
35
  ],
36
  "target_parameters": null,
37
  "task_type": "CAUSAL_LM",
 
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
+ "c_attn",
33
  "c_fc",
34
+ "c_proj"
35
  ],
36
  "target_parameters": null,
37
  "task_type": "CAUSAL_LM",
checkpoint-78/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55232f8282cb9bd78ce25259d56c143a26458573ce9f80ca07d6ae0ffc790618
3
  size 4730632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1171030a6e6953eafbda85a62d5b12996f5ffc4822ace50a8cd23d8695c8e744
3
  size 4730632
checkpoint-78/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5daceb474505ddfd977fd87658a266e662d19d2a2a7fb78fbba28c64daffb4bf
3
  size 9515787
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fe5ed8a12018017e9aa3816457aac6ea8f0c8f54b2ccb555a190f96470de51e
3
  size 9515787
checkpoint-78/trainer_state.json CHANGED
@@ -11,107 +11,107 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.19230769230769232,
14
- "grad_norm": 0.5379420518875122,
15
  "learning_rate": 0.0001,
16
- "loss": 5.0246,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 0.38461538461538464,
21
- "grad_norm": 0.811471164226532,
22
  "learning_rate": 0.00019714285714285716,
23
- "loss": 4.8901,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.5769230769230769,
28
- "grad_norm": 0.8395925164222717,
29
  "learning_rate": 0.00018285714285714286,
30
- "loss": 4.7398,
31
  "step": 15
32
  },
33
  {
34
  "epoch": 0.7692307692307693,
35
- "grad_norm": 0.8671197891235352,
36
  "learning_rate": 0.00016857142857142857,
37
- "loss": 4.462,
38
  "step": 20
39
  },
40
  {
41
  "epoch": 0.9615384615384616,
42
- "grad_norm": 1.1186156272888184,
43
  "learning_rate": 0.0001542857142857143,
44
- "loss": 4.0565,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 1.1538461538461537,
49
- "grad_norm": 1.1562882661819458,
50
  "learning_rate": 0.00014,
51
- "loss": 3.8807,
52
  "step": 30
53
  },
54
  {
55
  "epoch": 1.3461538461538463,
56
- "grad_norm": 1.0266945362091064,
57
  "learning_rate": 0.00012571428571428572,
58
- "loss": 3.7139,
59
  "step": 35
60
  },
61
  {
62
  "epoch": 1.5384615384615383,
63
- "grad_norm": 1.23048996925354,
64
  "learning_rate": 0.00011142857142857144,
65
- "loss": 3.3839,
66
  "step": 40
67
  },
68
  {
69
  "epoch": 1.7307692307692308,
70
- "grad_norm": 1.2847602367401123,
71
  "learning_rate": 9.714285714285715e-05,
72
- "loss": 3.3867,
73
  "step": 45
74
  },
75
  {
76
  "epoch": 1.9230769230769231,
77
- "grad_norm": 1.256270408630371,
78
  "learning_rate": 8.285714285714287e-05,
79
- "loss": 3.1795,
80
  "step": 50
81
  },
82
  {
83
  "epoch": 2.1153846153846154,
84
- "grad_norm": 1.6300894021987915,
85
  "learning_rate": 6.857142857142858e-05,
86
- "loss": 3.0392,
87
  "step": 55
88
  },
89
  {
90
  "epoch": 2.3076923076923075,
91
- "grad_norm": 1.2983814477920532,
92
  "learning_rate": 5.428571428571428e-05,
93
- "loss": 2.9103,
94
  "step": 60
95
  },
96
  {
97
  "epoch": 2.5,
98
- "grad_norm": 1.5019299983978271,
99
  "learning_rate": 4e-05,
100
- "loss": 2.8314,
101
  "step": 65
102
  },
103
  {
104
  "epoch": 2.6923076923076925,
105
- "grad_norm": 1.4436630010604858,
106
  "learning_rate": 2.5714285714285714e-05,
107
- "loss": 2.753,
108
  "step": 70
109
  },
110
  {
111
  "epoch": 2.8846153846153846,
112
- "grad_norm": 1.3541405200958252,
113
  "learning_rate": 1.1428571428571429e-05,
114
- "loss": 2.8238,
115
  "step": 75
116
  }
117
  ],
 
11
  "log_history": [
12
  {
13
  "epoch": 0.19230769230769232,
14
+ "grad_norm": 0.5508620738983154,
15
  "learning_rate": 0.0001,
16
+ "loss": 5.0244,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 0.38461538461538464,
21
+ "grad_norm": 0.8022432923316956,
22
  "learning_rate": 0.00019714285714285716,
23
+ "loss": 4.888,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.5769230769230769,
28
+ "grad_norm": 0.8547099828720093,
29
  "learning_rate": 0.00018285714285714286,
30
+ "loss": 4.735,
31
  "step": 15
32
  },
33
  {
34
  "epoch": 0.7692307692307693,
35
+ "grad_norm": 0.8760125041007996,
36
  "learning_rate": 0.00016857142857142857,
37
+ "loss": 4.4548,
38
  "step": 20
39
  },
40
  {
41
  "epoch": 0.9615384615384616,
42
+ "grad_norm": 1.1308597326278687,
43
  "learning_rate": 0.0001542857142857143,
44
+ "loss": 4.0466,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 1.1538461538461537,
49
+ "grad_norm": 1.1865196228027344,
50
  "learning_rate": 0.00014,
51
+ "loss": 3.8666,
52
  "step": 30
53
  },
54
  {
55
  "epoch": 1.3461538461538463,
56
+ "grad_norm": 1.0487556457519531,
57
  "learning_rate": 0.00012571428571428572,
58
+ "loss": 3.6951,
59
  "step": 35
60
  },
61
  {
62
  "epoch": 1.5384615384615383,
63
+ "grad_norm": 1.2422493696212769,
64
  "learning_rate": 0.00011142857142857144,
65
+ "loss": 3.3633,
66
  "step": 40
67
  },
68
  {
69
  "epoch": 1.7307692307692308,
70
+ "grad_norm": 1.2872875928878784,
71
  "learning_rate": 9.714285714285715e-05,
72
+ "loss": 3.367,
73
  "step": 45
74
  },
75
  {
76
  "epoch": 1.9230769230769231,
77
+ "grad_norm": 1.2250062227249146,
78
  "learning_rate": 8.285714285714287e-05,
79
+ "loss": 3.1619,
80
  "step": 50
81
  },
82
  {
83
  "epoch": 2.1153846153846154,
84
+ "grad_norm": 1.7271333932876587,
85
  "learning_rate": 6.857142857142858e-05,
86
+ "loss": 3.0247,
87
  "step": 55
88
  },
89
  {
90
  "epoch": 2.3076923076923075,
91
+ "grad_norm": 1.3085808753967285,
92
  "learning_rate": 5.428571428571428e-05,
93
+ "loss": 2.8975,
94
  "step": 60
95
  },
96
  {
97
  "epoch": 2.5,
98
+ "grad_norm": 1.4766792058944702,
99
  "learning_rate": 4e-05,
100
+ "loss": 2.8193,
101
  "step": 65
102
  },
103
  {
104
  "epoch": 2.6923076923076925,
105
+ "grad_norm": 1.506183385848999,
106
  "learning_rate": 2.5714285714285714e-05,
107
+ "loss": 2.7399,
108
  "step": 70
109
  },
110
  {
111
  "epoch": 2.8846153846153846,
112
+ "grad_norm": 1.3497138023376465,
113
  "learning_rate": 1.1428571428571429e-05,
114
+ "loss": 2.811,
115
  "step": 75
116
  }
117
  ],