Xonotic commited on
Commit
dff99dd
·
verified ·
1 Parent(s): a2ed862

Subiendo modelo GPT-2 fine-tuned con LoRA

Browse files
adapter_config.json CHANGED
@@ -29,8 +29,8 @@
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
- "c_attn",
33
  "c_fc",
 
34
  "c_proj"
35
  ],
36
  "target_parameters": null,
 
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
 
32
  "c_fc",
33
+ "c_attn",
34
  "c_proj"
35
  ],
36
  "target_parameters": null,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1171030a6e6953eafbda85a62d5b12996f5ffc4822ace50a8cd23d8695c8e744
3
  size 4730632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f698211d64a505f3d2c7fdf113ae678afa3b0ef2ce539da4bdeaffc49fd15315
3
  size 4730632
checkpoint-50/adapter_config.json CHANGED
@@ -29,8 +29,8 @@
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
- "c_attn",
33
  "c_fc",
 
34
  "c_proj"
35
  ],
36
  "target_parameters": null,
 
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
 
32
  "c_fc",
33
+ "c_attn",
34
  "c_proj"
35
  ],
36
  "target_parameters": null,
checkpoint-50/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e11ed97a557c62215dd3af808efbbac546251b812b5a520368b3752082dd7d4
3
  size 4730632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7366bd22197f6a97e1d060cce0d169717d86edcfddffd48ddf2df93824f9d8f8
3
  size 4730632
checkpoint-50/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e15baab776ec9c437c2558f5c634c5aa9f8dfe0ed8a7cb86909bfaeedeccd872
3
  size 9515787
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c84da8fa8ac3f9c89b90f167e8044eb63bcb9b27d96969d4812f526169fd43f
3
  size 9515787
checkpoint-50/trainer_state.json CHANGED
@@ -11,72 +11,72 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.19230769230769232,
14
- "grad_norm": 0.5508620738983154,
15
  "learning_rate": 0.0001,
16
  "loss": 5.0244,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 0.38461538461538464,
21
- "grad_norm": 0.8022432923316956,
22
  "learning_rate": 0.00019714285714285716,
23
- "loss": 4.888,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.5769230769230769,
28
- "grad_norm": 0.8547099828720093,
29
  "learning_rate": 0.00018285714285714286,
30
- "loss": 4.735,
31
  "step": 15
32
  },
33
  {
34
  "epoch": 0.7692307692307693,
35
- "grad_norm": 0.8760125041007996,
36
  "learning_rate": 0.00016857142857142857,
37
  "loss": 4.4548,
38
  "step": 20
39
  },
40
  {
41
  "epoch": 0.9615384615384616,
42
- "grad_norm": 1.1308597326278687,
43
  "learning_rate": 0.0001542857142857143,
44
- "loss": 4.0466,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 1.1538461538461537,
49
- "grad_norm": 1.1865196228027344,
50
  "learning_rate": 0.00014,
51
- "loss": 3.8666,
52
  "step": 30
53
  },
54
  {
55
  "epoch": 1.3461538461538463,
56
- "grad_norm": 1.0487556457519531,
57
  "learning_rate": 0.00012571428571428572,
58
- "loss": 3.6951,
59
  "step": 35
60
  },
61
  {
62
  "epoch": 1.5384615384615383,
63
- "grad_norm": 1.2422493696212769,
64
  "learning_rate": 0.00011142857142857144,
65
- "loss": 3.3633,
66
  "step": 40
67
  },
68
  {
69
  "epoch": 1.7307692307692308,
70
- "grad_norm": 1.2872875928878784,
71
  "learning_rate": 9.714285714285715e-05,
72
- "loss": 3.367,
73
  "step": 45
74
  },
75
  {
76
  "epoch": 1.9230769230769231,
77
- "grad_norm": 1.2250062227249146,
78
  "learning_rate": 8.285714285714287e-05,
79
- "loss": 3.1619,
80
  "step": 50
81
  }
82
  ],
 
11
  "log_history": [
12
  {
13
  "epoch": 0.19230769230769232,
14
+ "grad_norm": 0.548379123210907,
15
  "learning_rate": 0.0001,
16
  "loss": 5.0244,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 0.38461538461538464,
21
+ "grad_norm": 0.8064137697219849,
22
  "learning_rate": 0.00019714285714285716,
23
+ "loss": 4.889,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.5769230769230769,
28
+ "grad_norm": 0.8431358933448792,
29
  "learning_rate": 0.00018285714285714286,
30
+ "loss": 4.7358,
31
  "step": 15
32
  },
33
  {
34
  "epoch": 0.7692307692307693,
35
+ "grad_norm": 0.8824865221977234,
36
  "learning_rate": 0.00016857142857142857,
37
  "loss": 4.4548,
38
  "step": 20
39
  },
40
  {
41
  "epoch": 0.9615384615384616,
42
+ "grad_norm": 1.1104286909103394,
43
  "learning_rate": 0.0001542857142857143,
44
+ "loss": 4.042,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 1.1538461538461537,
49
+ "grad_norm": 1.1461974382400513,
50
  "learning_rate": 0.00014,
51
+ "loss": 3.8627,
52
  "step": 30
53
  },
54
  {
55
  "epoch": 1.3461538461538463,
56
+ "grad_norm": 1.0308071374893188,
57
  "learning_rate": 0.00012571428571428572,
58
+ "loss": 3.6893,
59
  "step": 35
60
  },
61
  {
62
  "epoch": 1.5384615384615383,
63
+ "grad_norm": 1.2822504043579102,
64
  "learning_rate": 0.00011142857142857144,
65
+ "loss": 3.352,
66
  "step": 40
67
  },
68
  {
69
  "epoch": 1.7307692307692308,
70
+ "grad_norm": 1.2842049598693848,
71
  "learning_rate": 9.714285714285715e-05,
72
+ "loss": 3.3562,
73
  "step": 45
74
  },
75
  {
76
  "epoch": 1.9230769230769231,
77
+ "grad_norm": 1.255327820777893,
78
  "learning_rate": 8.285714285714287e-05,
79
+ "loss": 3.1523,
80
  "step": 50
81
  }
82
  ],
checkpoint-78/adapter_config.json CHANGED
@@ -29,8 +29,8 @@
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
- "c_attn",
33
  "c_fc",
 
34
  "c_proj"
35
  ],
36
  "target_parameters": null,
 
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
 
32
  "c_fc",
33
+ "c_attn",
34
  "c_proj"
35
  ],
36
  "target_parameters": null,
checkpoint-78/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1171030a6e6953eafbda85a62d5b12996f5ffc4822ace50a8cd23d8695c8e744
3
  size 4730632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f698211d64a505f3d2c7fdf113ae678afa3b0ef2ce539da4bdeaffc49fd15315
3
  size 4730632
checkpoint-78/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fe5ed8a12018017e9aa3816457aac6ea8f0c8f54b2ccb555a190f96470de51e
3
  size 9515787
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa0c16a3e57c4220b33a0edb5f35209a95983db805a12e518e05e0a8c6ebb163
3
  size 9515787
checkpoint-78/trainer_state.json CHANGED
@@ -11,107 +11,107 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.19230769230769232,
14
- "grad_norm": 0.5508620738983154,
15
  "learning_rate": 0.0001,
16
  "loss": 5.0244,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 0.38461538461538464,
21
- "grad_norm": 0.8022432923316956,
22
  "learning_rate": 0.00019714285714285716,
23
- "loss": 4.888,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.5769230769230769,
28
- "grad_norm": 0.8547099828720093,
29
  "learning_rate": 0.00018285714285714286,
30
- "loss": 4.735,
31
  "step": 15
32
  },
33
  {
34
  "epoch": 0.7692307692307693,
35
- "grad_norm": 0.8760125041007996,
36
  "learning_rate": 0.00016857142857142857,
37
  "loss": 4.4548,
38
  "step": 20
39
  },
40
  {
41
  "epoch": 0.9615384615384616,
42
- "grad_norm": 1.1308597326278687,
43
  "learning_rate": 0.0001542857142857143,
44
- "loss": 4.0466,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 1.1538461538461537,
49
- "grad_norm": 1.1865196228027344,
50
  "learning_rate": 0.00014,
51
- "loss": 3.8666,
52
  "step": 30
53
  },
54
  {
55
  "epoch": 1.3461538461538463,
56
- "grad_norm": 1.0487556457519531,
57
  "learning_rate": 0.00012571428571428572,
58
- "loss": 3.6951,
59
  "step": 35
60
  },
61
  {
62
  "epoch": 1.5384615384615383,
63
- "grad_norm": 1.2422493696212769,
64
  "learning_rate": 0.00011142857142857144,
65
- "loss": 3.3633,
66
  "step": 40
67
  },
68
  {
69
  "epoch": 1.7307692307692308,
70
- "grad_norm": 1.2872875928878784,
71
  "learning_rate": 9.714285714285715e-05,
72
- "loss": 3.367,
73
  "step": 45
74
  },
75
  {
76
  "epoch": 1.9230769230769231,
77
- "grad_norm": 1.2250062227249146,
78
  "learning_rate": 8.285714285714287e-05,
79
- "loss": 3.1619,
80
  "step": 50
81
  },
82
  {
83
  "epoch": 2.1153846153846154,
84
- "grad_norm": 1.7271333932876587,
85
  "learning_rate": 6.857142857142858e-05,
86
- "loss": 3.0247,
87
  "step": 55
88
  },
89
  {
90
  "epoch": 2.3076923076923075,
91
- "grad_norm": 1.3085808753967285,
92
  "learning_rate": 5.428571428571428e-05,
93
- "loss": 2.8975,
94
  "step": 60
95
  },
96
  {
97
  "epoch": 2.5,
98
- "grad_norm": 1.4766792058944702,
99
  "learning_rate": 4e-05,
100
- "loss": 2.8193,
101
  "step": 65
102
  },
103
  {
104
  "epoch": 2.6923076923076925,
105
- "grad_norm": 1.506183385848999,
106
  "learning_rate": 2.5714285714285714e-05,
107
- "loss": 2.7399,
108
  "step": 70
109
  },
110
  {
111
  "epoch": 2.8846153846153846,
112
- "grad_norm": 1.3497138023376465,
113
  "learning_rate": 1.1428571428571429e-05,
114
- "loss": 2.811,
115
  "step": 75
116
  }
117
  ],
 
11
  "log_history": [
12
  {
13
  "epoch": 0.19230769230769232,
14
+ "grad_norm": 0.548379123210907,
15
  "learning_rate": 0.0001,
16
  "loss": 5.0244,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 0.38461538461538464,
21
+ "grad_norm": 0.8064137697219849,
22
  "learning_rate": 0.00019714285714285716,
23
+ "loss": 4.889,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.5769230769230769,
28
+ "grad_norm": 0.8431358933448792,
29
  "learning_rate": 0.00018285714285714286,
30
+ "loss": 4.7358,
31
  "step": 15
32
  },
33
  {
34
  "epoch": 0.7692307692307693,
35
+ "grad_norm": 0.8824865221977234,
36
  "learning_rate": 0.00016857142857142857,
37
  "loss": 4.4548,
38
  "step": 20
39
  },
40
  {
41
  "epoch": 0.9615384615384616,
42
+ "grad_norm": 1.1104286909103394,
43
  "learning_rate": 0.0001542857142857143,
44
+ "loss": 4.042,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 1.1538461538461537,
49
+ "grad_norm": 1.1461974382400513,
50
  "learning_rate": 0.00014,
51
+ "loss": 3.8627,
52
  "step": 30
53
  },
54
  {
55
  "epoch": 1.3461538461538463,
56
+ "grad_norm": 1.0308071374893188,
57
  "learning_rate": 0.00012571428571428572,
58
+ "loss": 3.6893,
59
  "step": 35
60
  },
61
  {
62
  "epoch": 1.5384615384615383,
63
+ "grad_norm": 1.2822504043579102,
64
  "learning_rate": 0.00011142857142857144,
65
+ "loss": 3.352,
66
  "step": 40
67
  },
68
  {
69
  "epoch": 1.7307692307692308,
70
+ "grad_norm": 1.2842049598693848,
71
  "learning_rate": 9.714285714285715e-05,
72
+ "loss": 3.3562,
73
  "step": 45
74
  },
75
  {
76
  "epoch": 1.9230769230769231,
77
+ "grad_norm": 1.255327820777893,
78
  "learning_rate": 8.285714285714287e-05,
79
+ "loss": 3.1523,
80
  "step": 50
81
  },
82
  {
83
  "epoch": 2.1153846153846154,
84
+ "grad_norm": 1.5920555591583252,
85
  "learning_rate": 6.857142857142858e-05,
86
+ "loss": 3.0097,
87
  "step": 55
88
  },
89
  {
90
  "epoch": 2.3076923076923075,
91
+ "grad_norm": 1.3065139055252075,
92
  "learning_rate": 5.428571428571428e-05,
93
+ "loss": 2.8838,
94
  "step": 60
95
  },
96
  {
97
  "epoch": 2.5,
98
+ "grad_norm": 1.4456508159637451,
99
  "learning_rate": 4e-05,
100
+ "loss": 2.8021,
101
  "step": 65
102
  },
103
  {
104
  "epoch": 2.6923076923076925,
105
+ "grad_norm": 1.435351014137268,
106
  "learning_rate": 2.5714285714285714e-05,
107
+ "loss": 2.724,
108
  "step": 70
109
  },
110
  {
111
  "epoch": 2.8846153846153846,
112
+ "grad_norm": 1.3584095239639282,
113
  "learning_rate": 1.1428571428571429e-05,
114
+ "loss": 2.7916,
115
  "step": 75
116
  }
117
  ],