tuannm2914 commited on
Commit
a4a995f
·
1 Parent(s): ec310e7

Model save

Browse files
README.md CHANGED
@@ -41,22 +41,13 @@ The following hyperparameters were used during training:
41
  - total_train_batch_size: 512
42
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
43
  - lr_scheduler_type: cosine
44
- - num_epochs: 10
45
 
46
  ### Training results
47
 
48
  | Training Loss | Epoch | Step | Validation Loss |
49
  |:-------------:|:-----:|:----:|:---------------:|
50
  | No log | 0 | 0 | 1.6782 |
51
- | No log | 0 | 0 | 1.6782 |
52
- | No log | 0 | 0 | 1.6782 |
53
- | No log | 0 | 0 | 1.6782 |
54
- | No log | 0 | 0 | 1.6782 |
55
- | No log | 0 | 0 | 1.6782 |
56
- | No log | 0 | 0 | 1.6782 |
57
- | No log | 0 | 0 | 1.6782 |
58
- | No log | 0 | 0 | 1.6782 |
59
- | No log | 0 | 0 | 1.6782 |
60
 
61
 
62
  ### Framework versions
 
41
  - total_train_batch_size: 512
42
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
43
  - lr_scheduler_type: cosine
44
+ - num_epochs: 1
45
 
46
  ### Training results
47
 
48
  | Training Loss | Epoch | Step | Validation Loss |
49
  |:-------------:|:-----:|:----:|:---------------:|
50
  | No log | 0 | 0 | 1.6782 |
 
 
 
 
 
 
 
 
 
51
 
52
 
53
  ### Framework versions
adapter_config.json CHANGED
@@ -16,10 +16,10 @@
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "o_proj",
20
  "v_proj",
21
  "q_proj",
22
- "k_proj"
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
 
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
+ "k_proj",
20
  "v_proj",
21
  "q_proj",
22
+ "o_proj"
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 0,
3
- "eval_loss": 1.678189754486084,
4
- "eval_runtime": 1.9339,
5
  "eval_samples": 100,
6
- "eval_samples_per_second": 51.708,
7
- "eval_steps_per_second": 6.722,
8
- "train_loss": 0,
9
- "train_runtime": 694.9556,
10
  "train_samples": 1100,
11
- "train_samples_per_second": 15.828,
12
  "train_steps_per_second": 0.029
13
  }
 
1
  {
2
  "epoch": 0,
3
+ "eval_loss": 1.6781893968582153,
4
+ "eval_runtime": 1.9231,
5
  "eval_samples": 100,
6
+ "eval_samples_per_second": 51.999,
7
+ "eval_steps_per_second": 6.76,
8
+ "train_loss": 0.17418603599071503,
9
+ "train_runtime": 69.1029,
10
  "train_samples": 1100,
11
+ "train_samples_per_second": 15.918,
12
  "train_steps_per_second": 0.029
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0,
3
- "eval_loss": 1.678189754486084,
4
- "eval_runtime": 1.9339,
5
  "eval_samples": 100,
6
- "eval_samples_per_second": 51.708,
7
- "eval_steps_per_second": 6.722
8
  }
 
1
  {
2
  "epoch": 0,
3
+ "eval_loss": 1.6781893968582153,
4
+ "eval_runtime": 1.9231,
5
  "eval_samples": 100,
6
+ "eval_samples_per_second": 51.999,
7
+ "eval_steps_per_second": 6.76
8
  }
runs/Nov30_08-42-46_hpc-hblab/events.out.tfevents.1701308652.hpc-hblab.1532691.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e7d2bc1ae2ac33754a1193332b3d9fd0b4d8b16bb01a67d39f97e42f69ba91f
3
+ size 4821
runs/Nov30_08-42-46_hpc-hblab/events.out.tfevents.1701308723.hpc-hblab.1532691.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57d004450b9e13e68116a01389ff0c3b0f2ac559642f7ac300770aa72406fbd9
3
+ size 344
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0,
3
- "train_loss": 0,
4
- "train_runtime": 694.9556,
5
  "train_samples": 1100,
6
- "train_samples_per_second": 15.828,
7
  "train_steps_per_second": 0.029
8
  }
 
1
  {
2
  "epoch": 0,
3
+ "train_loss": 0.17418603599071503,
4
+ "train_runtime": 69.1029,
5
  "train_samples": 1100,
6
+ "train_samples_per_second": 15.918,
7
  "train_steps_per_second": 0.029
8
  }
trainer_state.json CHANGED
@@ -11,98 +11,26 @@
11
  {
12
  "epoch": 0,
13
  "eval_loss": 1.6781895160675049,
14
- "eval_runtime": 1.8873,
15
- "eval_samples_per_second": 52.986,
16
- "eval_steps_per_second": 6.888,
17
- "step": 0
18
- },
19
- {
20
- "epoch": 0,
21
- "eval_loss": 1.678189992904663,
22
- "eval_runtime": 1.9001,
23
- "eval_samples_per_second": 52.628,
24
- "eval_steps_per_second": 6.842,
25
- "step": 0
26
- },
27
- {
28
- "epoch": 0,
29
- "eval_loss": 1.6781901121139526,
30
- "eval_runtime": 1.8997,
31
- "eval_samples_per_second": 52.64,
32
- "eval_steps_per_second": 6.843,
33
- "step": 0
34
- },
35
- {
36
- "epoch": 0,
37
- "eval_loss": 1.678189754486084,
38
- "eval_runtime": 1.937,
39
- "eval_samples_per_second": 51.627,
40
- "eval_steps_per_second": 6.712,
41
- "step": 0
42
- },
43
- {
44
- "epoch": 0,
45
- "eval_loss": 1.678189754486084,
46
- "eval_runtime": 1.9394,
47
- "eval_samples_per_second": 51.563,
48
- "eval_steps_per_second": 6.703,
49
- "step": 0
50
- },
51
- {
52
- "epoch": 0,
53
- "eval_loss": 1.678189754486084,
54
- "eval_runtime": 1.9383,
55
- "eval_samples_per_second": 51.591,
56
- "eval_steps_per_second": 6.707,
57
- "step": 0
58
- },
59
- {
60
- "epoch": 0,
61
- "eval_loss": 1.6781895160675049,
62
- "eval_runtime": 1.9384,
63
- "eval_samples_per_second": 51.59,
64
- "eval_steps_per_second": 6.707,
65
- "step": 0
66
- },
67
- {
68
- "epoch": 0,
69
- "eval_loss": 1.678189754486084,
70
- "eval_runtime": 1.9013,
71
- "eval_samples_per_second": 52.596,
72
- "eval_steps_per_second": 6.837,
73
- "step": 0
74
- },
75
- {
76
- "epoch": 0,
77
- "eval_loss": 1.6781896352767944,
78
- "eval_runtime": 1.927,
79
- "eval_samples_per_second": 51.894,
80
- "eval_steps_per_second": 6.746,
81
- "step": 0
82
- },
83
- {
84
- "epoch": 0,
85
- "eval_loss": 1.6781895160675049,
86
- "eval_runtime": 1.9351,
87
- "eval_samples_per_second": 51.676,
88
- "eval_steps_per_second": 6.718,
89
  "step": 0
90
  },
91
  {
92
  "epoch": 0,
93
  "step": 0,
94
- "total_flos": 4.138152886062285e+16,
95
- "train_loss": 0,
96
- "train_runtime": 694.9556,
97
- "train_samples_per_second": 15.828,
98
  "train_steps_per_second": 0.029
99
  }
100
  ],
101
  "logging_steps": 5,
102
- "max_steps": 20,
103
- "num_train_epochs": 10,
104
  "save_steps": 500,
105
- "total_flos": 4.138152886062285e+16,
106
  "trial_name": null,
107
  "trial_params": null
108
  }
 
11
  {
12
  "epoch": 0,
13
  "eval_loss": 1.6781895160675049,
14
+ "eval_runtime": 1.9183,
15
+ "eval_samples_per_second": 52.129,
16
+ "eval_steps_per_second": 6.777,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  "step": 0
18
  },
19
  {
20
  "epoch": 0,
21
  "step": 0,
22
+ "total_flos": 4138152939749376.0,
23
+ "train_loss": 0.17418603599071503,
24
+ "train_runtime": 69.1029,
25
+ "train_samples_per_second": 15.918,
26
  "train_steps_per_second": 0.029
27
  }
28
  ],
29
  "logging_steps": 5,
30
+ "max_steps": 2,
31
+ "num_train_epochs": 1,
32
  "save_steps": 500,
33
+ "total_flos": 4138152939749376.0,
34
  "trial_name": null,
35
  "trial_params": null
36
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f99b2db42321b2cfcfc7f8a19f5b08804ff1ac59a116eefd1922f0ee9d129957
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:738bd7ca07742f4e6bc4d88571499f5e22fabacca9290dceee3a207594692b11
3
  size 4664