Xonotic commited on
Commit
151d520
·
verified ·
1 Parent(s): dff99dd

Subiendo modelo GPT-2 fine-tuned con LoRA

Browse files
adapter_config.json CHANGED
@@ -29,9 +29,9 @@
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
 
32
  "c_fc",
33
- "c_attn",
34
- "c_proj"
35
  ],
36
  "target_parameters": null,
37
  "task_type": "CAUSAL_LM",
 
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
+ "c_proj",
33
  "c_fc",
34
+ "c_attn"
 
35
  ],
36
  "target_parameters": null,
37
  "task_type": "CAUSAL_LM",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f698211d64a505f3d2c7fdf113ae678afa3b0ef2ce539da4bdeaffc49fd15315
3
  size 4730632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92071e11dec43ebc7c3346353da080e1f3ec572efd227871db573cf2a6e2d0e4
3
  size 4730632
checkpoint-50/adapter_config.json CHANGED
@@ -29,9 +29,9 @@
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
 
32
  "c_fc",
33
- "c_attn",
34
- "c_proj"
35
  ],
36
  "target_parameters": null,
37
  "task_type": "CAUSAL_LM",
 
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
+ "c_proj",
33
  "c_fc",
34
+ "c_attn"
 
35
  ],
36
  "target_parameters": null,
37
  "task_type": "CAUSAL_LM",
checkpoint-50/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7366bd22197f6a97e1d060cce0d169717d86edcfddffd48ddf2df93824f9d8f8
3
  size 4730632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b8242f52ee03ec9a3b7aeb349ebf63e1c5b4b494b12cecc92e5bf9897894fd9
3
  size 4730632
checkpoint-50/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c84da8fa8ac3f9c89b90f167e8044eb63bcb9b27d96969d4812f526169fd43f
3
  size 9515787
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab377c7a9743dd4db31a472b450c6881fefd274c035f44881348066c478dc7bf
3
  size 9515787
checkpoint-50/trainer_state.json CHANGED
@@ -11,72 +11,72 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.19230769230769232,
14
- "grad_norm": 0.548379123210907,
15
  "learning_rate": 0.0001,
16
- "loss": 5.0244,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 0.38461538461538464,
21
- "grad_norm": 0.8064137697219849,
22
  "learning_rate": 0.00019714285714285716,
23
- "loss": 4.889,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.5769230769230769,
28
- "grad_norm": 0.8431358933448792,
29
  "learning_rate": 0.00018285714285714286,
30
- "loss": 4.7358,
31
  "step": 15
32
  },
33
  {
34
  "epoch": 0.7692307692307693,
35
- "grad_norm": 0.8824865221977234,
36
  "learning_rate": 0.00016857142857142857,
37
- "loss": 4.4548,
38
  "step": 20
39
  },
40
  {
41
  "epoch": 0.9615384615384616,
42
- "grad_norm": 1.1104286909103394,
43
  "learning_rate": 0.0001542857142857143,
44
- "loss": 4.042,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 1.1538461538461537,
49
- "grad_norm": 1.1461974382400513,
50
  "learning_rate": 0.00014,
51
- "loss": 3.8627,
52
  "step": 30
53
  },
54
  {
55
  "epoch": 1.3461538461538463,
56
- "grad_norm": 1.0308071374893188,
57
  "learning_rate": 0.00012571428571428572,
58
- "loss": 3.6893,
59
  "step": 35
60
  },
61
  {
62
  "epoch": 1.5384615384615383,
63
- "grad_norm": 1.2822504043579102,
64
  "learning_rate": 0.00011142857142857144,
65
- "loss": 3.352,
66
  "step": 40
67
  },
68
  {
69
  "epoch": 1.7307692307692308,
70
- "grad_norm": 1.2842049598693848,
71
  "learning_rate": 9.714285714285715e-05,
72
- "loss": 3.3562,
73
  "step": 45
74
  },
75
  {
76
  "epoch": 1.9230769230769231,
77
- "grad_norm": 1.255327820777893,
78
  "learning_rate": 8.285714285714287e-05,
79
- "loss": 3.1523,
80
  "step": 50
81
  }
82
  ],
 
11
  "log_history": [
12
  {
13
  "epoch": 0.19230769230769232,
14
+ "grad_norm": 0.5569552183151245,
15
  "learning_rate": 0.0001,
16
+ "loss": 4.6128,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 0.38461538461538464,
21
+ "grad_norm": 0.7635518312454224,
22
  "learning_rate": 0.00019714285714285716,
23
+ "loss": 4.4768,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.5769230769230769,
28
+ "grad_norm": 1.002182126045227,
29
  "learning_rate": 0.00018285714285714286,
30
+ "loss": 4.3935,
31
  "step": 15
32
  },
33
  {
34
  "epoch": 0.7692307692307693,
35
+ "grad_norm": 0.7686426043510437,
36
  "learning_rate": 0.00016857142857142857,
37
+ "loss": 4.1717,
38
  "step": 20
39
  },
40
  {
41
  "epoch": 0.9615384615384616,
42
+ "grad_norm": 0.9222117066383362,
43
  "learning_rate": 0.0001542857142857143,
44
+ "loss": 3.7154,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 1.1538461538461537,
49
+ "grad_norm": 1.0059330463409424,
50
  "learning_rate": 0.00014,
51
+ "loss": 3.6302,
52
  "step": 30
53
  },
54
  {
55
  "epoch": 1.3461538461538463,
56
+ "grad_norm": 1.1044957637786865,
57
  "learning_rate": 0.00012571428571428572,
58
+ "loss": 3.617,
59
  "step": 35
60
  },
61
  {
62
  "epoch": 1.5384615384615383,
63
+ "grad_norm": 1.1102561950683594,
64
  "learning_rate": 0.00011142857142857144,
65
+ "loss": 3.3122,
66
  "step": 40
67
  },
68
  {
69
  "epoch": 1.7307692307692308,
70
+ "grad_norm": 1.1488291025161743,
71
  "learning_rate": 9.714285714285715e-05,
72
+ "loss": 3.473,
73
  "step": 45
74
  },
75
  {
76
  "epoch": 1.9230769230769231,
77
+ "grad_norm": 1.1239038705825806,
78
  "learning_rate": 8.285714285714287e-05,
79
+ "loss": 3.2339,
80
  "step": 50
81
  }
82
  ],
checkpoint-78/adapter_config.json CHANGED
@@ -29,9 +29,9 @@
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
 
32
  "c_fc",
33
- "c_attn",
34
- "c_proj"
35
  ],
36
  "target_parameters": null,
37
  "task_type": "CAUSAL_LM",
 
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
+ "c_proj",
33
  "c_fc",
34
+ "c_attn"
 
35
  ],
36
  "target_parameters": null,
37
  "task_type": "CAUSAL_LM",
checkpoint-78/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f698211d64a505f3d2c7fdf113ae678afa3b0ef2ce539da4bdeaffc49fd15315
3
  size 4730632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92071e11dec43ebc7c3346353da080e1f3ec572efd227871db573cf2a6e2d0e4
3
  size 4730632
checkpoint-78/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa0c16a3e57c4220b33a0edb5f35209a95983db805a12e518e05e0a8c6ebb163
3
  size 9515787
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c811691cf22f7cab3466f4a547f8a6b0b42277ac53d7d8e945c486560bc2a9de
3
  size 9515787
checkpoint-78/trainer_state.json CHANGED
@@ -11,107 +11,107 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.19230769230769232,
14
- "grad_norm": 0.548379123210907,
15
  "learning_rate": 0.0001,
16
- "loss": 5.0244,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 0.38461538461538464,
21
- "grad_norm": 0.8064137697219849,
22
  "learning_rate": 0.00019714285714285716,
23
- "loss": 4.889,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.5769230769230769,
28
- "grad_norm": 0.8431358933448792,
29
  "learning_rate": 0.00018285714285714286,
30
- "loss": 4.7358,
31
  "step": 15
32
  },
33
  {
34
  "epoch": 0.7692307692307693,
35
- "grad_norm": 0.8824865221977234,
36
  "learning_rate": 0.00016857142857142857,
37
- "loss": 4.4548,
38
  "step": 20
39
  },
40
  {
41
  "epoch": 0.9615384615384616,
42
- "grad_norm": 1.1104286909103394,
43
  "learning_rate": 0.0001542857142857143,
44
- "loss": 4.042,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 1.1538461538461537,
49
- "grad_norm": 1.1461974382400513,
50
  "learning_rate": 0.00014,
51
- "loss": 3.8627,
52
  "step": 30
53
  },
54
  {
55
  "epoch": 1.3461538461538463,
56
- "grad_norm": 1.0308071374893188,
57
  "learning_rate": 0.00012571428571428572,
58
- "loss": 3.6893,
59
  "step": 35
60
  },
61
  {
62
  "epoch": 1.5384615384615383,
63
- "grad_norm": 1.2822504043579102,
64
  "learning_rate": 0.00011142857142857144,
65
- "loss": 3.352,
66
  "step": 40
67
  },
68
  {
69
  "epoch": 1.7307692307692308,
70
- "grad_norm": 1.2842049598693848,
71
  "learning_rate": 9.714285714285715e-05,
72
- "loss": 3.3562,
73
  "step": 45
74
  },
75
  {
76
  "epoch": 1.9230769230769231,
77
- "grad_norm": 1.255327820777893,
78
  "learning_rate": 8.285714285714287e-05,
79
- "loss": 3.1523,
80
  "step": 50
81
  },
82
  {
83
  "epoch": 2.1153846153846154,
84
- "grad_norm": 1.5920555591583252,
85
  "learning_rate": 6.857142857142858e-05,
86
- "loss": 3.0097,
87
  "step": 55
88
  },
89
  {
90
  "epoch": 2.3076923076923075,
91
- "grad_norm": 1.3065139055252075,
92
  "learning_rate": 5.428571428571428e-05,
93
- "loss": 2.8838,
94
  "step": 60
95
  },
96
  {
97
  "epoch": 2.5,
98
- "grad_norm": 1.4456508159637451,
99
  "learning_rate": 4e-05,
100
- "loss": 2.8021,
101
  "step": 65
102
  },
103
  {
104
  "epoch": 2.6923076923076925,
105
- "grad_norm": 1.435351014137268,
106
  "learning_rate": 2.5714285714285714e-05,
107
- "loss": 2.724,
108
  "step": 70
109
  },
110
  {
111
  "epoch": 2.8846153846153846,
112
- "grad_norm": 1.3584095239639282,
113
  "learning_rate": 1.1428571428571429e-05,
114
- "loss": 2.7916,
115
  "step": 75
116
  }
117
  ],
 
11
  "log_history": [
12
  {
13
  "epoch": 0.19230769230769232,
14
+ "grad_norm": 0.5569552183151245,
15
  "learning_rate": 0.0001,
16
+ "loss": 4.6128,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 0.38461538461538464,
21
+ "grad_norm": 0.7635518312454224,
22
  "learning_rate": 0.00019714285714285716,
23
+ "loss": 4.4768,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.5769230769230769,
28
+ "grad_norm": 1.002182126045227,
29
  "learning_rate": 0.00018285714285714286,
30
+ "loss": 4.3935,
31
  "step": 15
32
  },
33
  {
34
  "epoch": 0.7692307692307693,
35
+ "grad_norm": 0.7686426043510437,
36
  "learning_rate": 0.00016857142857142857,
37
+ "loss": 4.1717,
38
  "step": 20
39
  },
40
  {
41
  "epoch": 0.9615384615384616,
42
+ "grad_norm": 0.9222117066383362,
43
  "learning_rate": 0.0001542857142857143,
44
+ "loss": 3.7154,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 1.1538461538461537,
49
+ "grad_norm": 1.0059330463409424,
50
  "learning_rate": 0.00014,
51
+ "loss": 3.6302,
52
  "step": 30
53
  },
54
  {
55
  "epoch": 1.3461538461538463,
56
+ "grad_norm": 1.1044957637786865,
57
  "learning_rate": 0.00012571428571428572,
58
+ "loss": 3.617,
59
  "step": 35
60
  },
61
  {
62
  "epoch": 1.5384615384615383,
63
+ "grad_norm": 1.1102561950683594,
64
  "learning_rate": 0.00011142857142857144,
65
+ "loss": 3.3122,
66
  "step": 40
67
  },
68
  {
69
  "epoch": 1.7307692307692308,
70
+ "grad_norm": 1.1488291025161743,
71
  "learning_rate": 9.714285714285715e-05,
72
+ "loss": 3.473,
73
  "step": 45
74
  },
75
  {
76
  "epoch": 1.9230769230769231,
77
+ "grad_norm": 1.1239038705825806,
78
  "learning_rate": 8.285714285714287e-05,
79
+ "loss": 3.2339,
80
  "step": 50
81
  },
82
  {
83
  "epoch": 2.1153846153846154,
84
+ "grad_norm": 1.8551712036132812,
85
  "learning_rate": 6.857142857142858e-05,
86
+ "loss": 3.2091,
87
  "step": 55
88
  },
89
  {
90
  "epoch": 2.3076923076923075,
91
+ "grad_norm": 1.2532204389572144,
92
  "learning_rate": 5.428571428571428e-05,
93
+ "loss": 3.1673,
94
  "step": 60
95
  },
96
  {
97
  "epoch": 2.5,
98
+ "grad_norm": 1.34445059299469,
99
  "learning_rate": 4e-05,
100
+ "loss": 2.9908,
101
  "step": 65
102
  },
103
  {
104
  "epoch": 2.6923076923076925,
105
+ "grad_norm": 1.377186894416809,
106
  "learning_rate": 2.5714285714285714e-05,
107
+ "loss": 2.9406,
108
  "step": 70
109
  },
110
  {
111
  "epoch": 2.8846153846153846,
112
+ "grad_norm": 1.3273824453353882,
113
  "learning_rate": 1.1428571428571429e-05,
114
+ "loss": 3.1591,
115
  "step": 75
116
  }
117
  ],