tuannm2914 commited on
Commit
56bf3ff
·
1 Parent(s): de1f542

Model save

Browse files
README.md CHANGED
@@ -41,13 +41,22 @@ The following hyperparameters were used during training:
41
  - total_train_batch_size: 512
42
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
43
  - lr_scheduler_type: cosine
44
- - num_epochs: 1
45
 
46
  ### Training results
47
 
48
  | Training Loss | Epoch | Step | Validation Loss |
49
  |:-------------:|:-----:|:----:|:---------------:|
50
  | No log | 0 | 0 | 1.6782 |
 
 
 
 
 
 
 
 
 
51
 
52
 
53
  ### Framework versions
 
41
  - total_train_batch_size: 512
42
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
43
  - lr_scheduler_type: cosine
44
+ - num_epochs: 10
45
 
46
  ### Training results
47
 
48
  | Training Loss | Epoch | Step | Validation Loss |
49
  |:-------------:|:-----:|:----:|:---------------:|
50
  | No log | 0 | 0 | 1.6782 |
51
+ | No log | 0 | 0 | 1.6782 |
52
+ | No log | 0 | 0 | 1.6782 |
53
+ | No log | 0 | 0 | 1.6782 |
54
+ | No log | 0 | 0 | 1.6782 |
55
+ | No log | 0 | 0 | 1.6782 |
56
+ | No log | 0 | 0 | 1.6782 |
57
+ | No log | 0 | 0 | 1.6782 |
58
+ | No log | 0 | 0 | 1.6782 |
59
+ | No log | 0 | 0 | 1.6782 |
60
 
61
 
62
  ### Framework versions
adapter_config.json CHANGED
@@ -16,10 +16,10 @@
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "k_proj",
20
- "q_proj",
21
  "v_proj",
22
- "o_proj"
 
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
 
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
+ "o_proj",
 
20
  "v_proj",
21
+ "q_proj",
22
+ "k_proj"
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 0,
3
- "eval_loss": 1.6781893968582153,
4
- "eval_runtime": 1.9026,
5
  "eval_samples": 100,
6
- "eval_samples_per_second": 52.559,
7
- "eval_steps_per_second": 6.833,
8
  "train_loss": 0,
9
- "train_runtime": 68.9963,
10
  "train_samples": 1100,
11
- "train_samples_per_second": 15.943,
12
  "train_steps_per_second": 0.029
13
  }
 
1
  {
2
  "epoch": 0,
3
+ "eval_loss": 1.678189754486084,
4
+ "eval_runtime": 1.9339,
5
  "eval_samples": 100,
6
+ "eval_samples_per_second": 51.708,
7
+ "eval_steps_per_second": 6.722,
8
  "train_loss": 0,
9
+ "train_runtime": 694.9556,
10
  "train_samples": 1100,
11
+ "train_samples_per_second": 15.828,
12
  "train_steps_per_second": 0.029
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0,
3
- "eval_loss": 1.6781893968582153,
4
- "eval_runtime": 1.9026,
5
  "eval_samples": 100,
6
- "eval_samples_per_second": 52.559,
7
- "eval_steps_per_second": 6.833
8
  }
 
1
  {
2
  "epoch": 0,
3
+ "eval_loss": 1.678189754486084,
4
+ "eval_runtime": 1.9339,
5
  "eval_samples": 100,
6
+ "eval_samples_per_second": 51.708,
7
+ "eval_steps_per_second": 6.722
8
  }
runs/Nov30_08-25-49_hpc-hblab/events.out.tfevents.1701307635.hpc-hblab.1530306.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d3edd6799c2d074451da4b31529c7d6288a58e4d8c4792b1003b72992fc3828
3
+ size 7126
runs/Nov30_08-25-49_hpc-hblab/events.out.tfevents.1701308332.hpc-hblab.1530306.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc4649ffbb0726894b3f0bb394181e1cfd8c9accb197ca434258f4b19ffaa97a
3
+ size 344
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0,
3
  "train_loss": 0,
4
- "train_runtime": 68.9963,
5
  "train_samples": 1100,
6
- "train_samples_per_second": 15.943,
7
  "train_steps_per_second": 0.029
8
  }
 
1
  {
2
  "epoch": 0,
3
  "train_loss": 0,
4
+ "train_runtime": 694.9556,
5
  "train_samples": 1100,
6
+ "train_samples_per_second": 15.828,
7
  "train_steps_per_second": 0.029
8
  }
trainer_state.json CHANGED
@@ -11,26 +11,98 @@
11
  {
12
  "epoch": 0,
13
  "eval_loss": 1.6781895160675049,
14
- "eval_runtime": 1.9005,
15
- "eval_samples_per_second": 52.619,
16
- "eval_steps_per_second": 6.84,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  "step": 0
18
  },
19
  {
20
  "epoch": 0,
21
  "step": 0,
22
- "total_flos": 4138152939749376.0,
23
  "train_loss": 0,
24
- "train_runtime": 68.9963,
25
- "train_samples_per_second": 15.943,
26
  "train_steps_per_second": 0.029
27
  }
28
  ],
29
  "logging_steps": 5,
30
- "max_steps": 2,
31
- "num_train_epochs": 1,
32
  "save_steps": 500,
33
- "total_flos": 4138152939749376.0,
34
  "trial_name": null,
35
  "trial_params": null
36
  }
 
11
  {
12
  "epoch": 0,
13
  "eval_loss": 1.6781895160675049,
14
+ "eval_runtime": 1.8873,
15
+ "eval_samples_per_second": 52.986,
16
+ "eval_steps_per_second": 6.888,
17
+ "step": 0
18
+ },
19
+ {
20
+ "epoch": 0,
21
+ "eval_loss": 1.678189992904663,
22
+ "eval_runtime": 1.9001,
23
+ "eval_samples_per_second": 52.628,
24
+ "eval_steps_per_second": 6.842,
25
+ "step": 0
26
+ },
27
+ {
28
+ "epoch": 0,
29
+ "eval_loss": 1.6781901121139526,
30
+ "eval_runtime": 1.8997,
31
+ "eval_samples_per_second": 52.64,
32
+ "eval_steps_per_second": 6.843,
33
+ "step": 0
34
+ },
35
+ {
36
+ "epoch": 0,
37
+ "eval_loss": 1.678189754486084,
38
+ "eval_runtime": 1.937,
39
+ "eval_samples_per_second": 51.627,
40
+ "eval_steps_per_second": 6.712,
41
+ "step": 0
42
+ },
43
+ {
44
+ "epoch": 0,
45
+ "eval_loss": 1.678189754486084,
46
+ "eval_runtime": 1.9394,
47
+ "eval_samples_per_second": 51.563,
48
+ "eval_steps_per_second": 6.703,
49
+ "step": 0
50
+ },
51
+ {
52
+ "epoch": 0,
53
+ "eval_loss": 1.678189754486084,
54
+ "eval_runtime": 1.9383,
55
+ "eval_samples_per_second": 51.591,
56
+ "eval_steps_per_second": 6.707,
57
+ "step": 0
58
+ },
59
+ {
60
+ "epoch": 0,
61
+ "eval_loss": 1.6781895160675049,
62
+ "eval_runtime": 1.9384,
63
+ "eval_samples_per_second": 51.59,
64
+ "eval_steps_per_second": 6.707,
65
+ "step": 0
66
+ },
67
+ {
68
+ "epoch": 0,
69
+ "eval_loss": 1.678189754486084,
70
+ "eval_runtime": 1.9013,
71
+ "eval_samples_per_second": 52.596,
72
+ "eval_steps_per_second": 6.837,
73
+ "step": 0
74
+ },
75
+ {
76
+ "epoch": 0,
77
+ "eval_loss": 1.6781896352767944,
78
+ "eval_runtime": 1.927,
79
+ "eval_samples_per_second": 51.894,
80
+ "eval_steps_per_second": 6.746,
81
+ "step": 0
82
+ },
83
+ {
84
+ "epoch": 0,
85
+ "eval_loss": 1.6781895160675049,
86
+ "eval_runtime": 1.9351,
87
+ "eval_samples_per_second": 51.676,
88
+ "eval_steps_per_second": 6.718,
89
  "step": 0
90
  },
91
  {
92
  "epoch": 0,
93
  "step": 0,
94
+ "total_flos": 4.138152886062285e+16,
95
  "train_loss": 0,
96
+ "train_runtime": 694.9556,
97
+ "train_samples_per_second": 15.828,
98
  "train_steps_per_second": 0.029
99
  }
100
  ],
101
  "logging_steps": 5,
102
+ "max_steps": 20,
103
+ "num_train_epochs": 10,
104
  "save_steps": 500,
105
+ "total_flos": 4.138152886062285e+16,
106
  "trial_name": null,
107
  "trial_params": null
108
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b794ac5ebe87d869f2f6a88f8400a90ee48517c4e6afe65a5e8e32fb1f3e1c4
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f99b2db42321b2cfcfc7f8a19f5b08804ff1ac59a116eefd1922f0ee9d129957
3
  size 4664