tuannm2914 commited on
Commit
39a30e9
·
1 Parent(s): 9141556

Model save

Browse files
README.md CHANGED
@@ -13,7 +13,7 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
- - Loss: 1.4187
17
 
18
  ## Model description
19
 
@@ -47,7 +47,7 @@ The following hyperparameters were used during training:
47
 
48
  | Training Loss | Epoch | Step | Validation Loss |
49
  |:-------------:|:-----:|:----:|:---------------:|
50
- | No log | 0 | 0 | 1.4187 |
51
 
52
 
53
  ### Framework versions
 
13
 
14
  This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
+ - Loss: 1.4175
17
 
18
  ## Model description
19
 
 
47
 
48
  | Training Loss | Epoch | Step | Validation Loss |
49
  |:-------------:|:-----:|:----:|:---------------:|
50
+ | No log | 0 | 0 | 1.4175 |
51
 
52
 
53
  ### Framework versions
adapter_config.json CHANGED
@@ -17,8 +17,8 @@
17
  "revision": null,
18
  "target_modules": [
19
  "k_proj",
20
- "v_proj",
21
  "q_proj",
 
22
  "o_proj"
23
  ],
24
  "task_type": "CAUSAL_LM"
 
17
  "revision": null,
18
  "target_modules": [
19
  "k_proj",
 
20
  "q_proj",
21
+ "v_proj",
22
  "o_proj"
23
  ],
24
  "task_type": "CAUSAL_LM"
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 0,
3
- "eval_loss": 1.4187188148498535,
4
- "eval_runtime": 58.7586,
5
  "eval_samples": 1200,
6
- "eval_samples_per_second": 20.423,
7
- "eval_steps_per_second": 2.553,
8
- "train_loss": 0.3546818494796753,
9
- "train_runtime": 239.2844,
10
  "train_samples": 1200,
11
- "train_samples_per_second": 5.015,
12
- "train_steps_per_second": 0.008
13
  }
 
1
  {
2
  "epoch": 0,
3
+ "eval_loss": 1.4174754619598389,
4
+ "eval_runtime": 56.8025,
5
  "eval_samples": 1200,
6
+ "eval_samples_per_second": 21.126,
7
+ "eval_steps_per_second": 2.641,
8
+ "train_loss": 0.354236364364624,
9
+ "train_runtime": 231.2263,
10
  "train_samples": 1200,
11
+ "train_samples_per_second": 5.19,
12
+ "train_steps_per_second": 0.009
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0,
3
- "eval_loss": 1.4187188148498535,
4
- "eval_runtime": 58.7586,
5
  "eval_samples": 1200,
6
- "eval_samples_per_second": 20.423,
7
- "eval_steps_per_second": 2.553
8
  }
 
1
  {
2
  "epoch": 0,
3
+ "eval_loss": 1.4174754619598389,
4
+ "eval_runtime": 56.8025,
5
  "eval_samples": 1200,
6
+ "eval_samples_per_second": 21.126,
7
+ "eval_steps_per_second": 2.641
8
  }
runs/Nov30_17-23-01_hpc-hblab/events.out.tfevents.1701339877.hpc-hblab.1613988.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5adc632bb0c8e440e42f6bba3fe263fea0cf13d4b9e8482d2f5c1fd72b66dc2
3
+ size 4806
runs/Nov30_17-23-01_hpc-hblab/events.out.tfevents.1701340165.hpc-hblab.1613988.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd8372353c5c65ffc0a1c6c0f6fe5046f95ac8a40761e1195c458537683d62ee
3
+ size 344
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0,
3
- "train_loss": 0.3546818494796753,
4
- "train_runtime": 239.2844,
5
  "train_samples": 1200,
6
- "train_samples_per_second": 5.015,
7
- "train_steps_per_second": 0.008
8
  }
 
1
  {
2
  "epoch": 0,
3
+ "train_loss": 0.354236364364624,
4
+ "train_runtime": 231.2263,
5
  "train_samples": 1200,
6
+ "train_samples_per_second": 5.19,
7
+ "train_steps_per_second": 0.009
8
  }
trainer_state.json CHANGED
@@ -10,27 +10,27 @@
10
  "log_history": [
11
  {
12
  "epoch": 0,
13
- "eval_loss": 1.418718695640564,
14
- "eval_runtime": 59.2437,
15
- "eval_samples_per_second": 20.255,
16
- "eval_steps_per_second": 2.532,
17
  "step": 0
18
  },
19
  {
20
  "epoch": 0,
21
  "step": 0,
22
- "total_flos": 1.1269869630652416e+16,
23
- "train_loss": 0.3546818494796753,
24
- "train_runtime": 239.2844,
25
- "train_samples_per_second": 5.015,
26
- "train_steps_per_second": 0.008
27
  }
28
  ],
29
  "logging_steps": 5,
30
  "max_steps": 2,
31
  "num_train_epochs": 1,
32
  "save_steps": 500,
33
- "total_flos": 1.1269869630652416e+16,
34
  "trial_name": null,
35
  "trial_params": null
36
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 0,
13
+ "eval_loss": 1.417475938796997,
14
+ "eval_runtime": 56.9732,
15
+ "eval_samples_per_second": 21.063,
16
+ "eval_steps_per_second": 2.633,
17
  "step": 0
18
  },
19
  {
20
  "epoch": 0,
21
  "step": 0,
22
+ "total_flos": 1.1005732363173888e+16,
23
+ "train_loss": 0.354236364364624,
24
+ "train_runtime": 231.2263,
25
+ "train_samples_per_second": 5.19,
26
+ "train_steps_per_second": 0.009
27
  }
28
  ],
29
  "logging_steps": 5,
30
  "max_steps": 2,
31
  "num_train_epochs": 1,
32
  "save_steps": 500,
33
+ "total_flos": 1.1005732363173888e+16,
34
  "trial_name": null,
35
  "trial_params": null
36
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0d11ab835e1773ef1f902c0fcbcfa5a15ea8e274293fd54cc24ad7877072741
3
  size 4600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06965b9c22cec7d773224921cfda7673a0daccb60f0d8ddca7d6598d7bf5c2ca
3
  size 4600