chanchan7 commited on
Commit
055f66f
·
verified ·
1 Parent(s): 0c10a94

Model save

Browse files
README.md CHANGED
@@ -19,7 +19,7 @@ should probably proofread and complete it, then remove this comment. -->
19
 
20
  This model is a fine-tuned version of [meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) on an unknown dataset.
21
  It achieves the following results on the evaluation set:
22
- - Loss: 1.9190
23
 
24
  ## Model description
25
 
@@ -52,7 +52,7 @@ The following hyperparameters were used during training:
52
 
53
  | Training Loss | Epoch | Step | Validation Loss |
54
  |:-------------:|:-----:|:----:|:---------------:|
55
- | 2.8727 | 1.0 | 4 | 1.9190 |
56
 
57
 
58
  ### Framework versions
 
19
 
20
  This model is a fine-tuned version of [meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) on an unknown dataset.
21
  It achieves the following results on the evaluation set:
22
+ - Loss: 1.9179
23
 
24
  ## Model description
25
 
 
52
 
53
  | Training Loss | Epoch | Step | Validation Loss |
54
  |:-------------:|:-----:|:----:|:---------------:|
55
+ | 2.8727 | 1.0 | 4 | 1.9179 |
56
 
57
 
58
  ### Framework versions
adapter_config.json CHANGED
@@ -19,13 +19,13 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "o_proj",
23
- "gate_proj",
24
  "k_proj",
25
- "down_proj",
26
  "q_proj",
 
27
  "v_proj",
28
- "up_proj"
 
29
  ],
30
  "task_type": "CAUSAL_LM"
31
  }
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
 
 
22
  "k_proj",
23
+ "up_proj",
24
  "q_proj",
25
+ "gate_proj",
26
  "v_proj",
27
+ "o_proj",
28
+ "down_proj"
29
  ],
30
  "task_type": "CAUSAL_LM"
31
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c7cf8e73622f67f47852fe1653c34f642065a261c76b73bbb244557d14c848d
3
  size 159967880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d90e045b737c4a6531b4e11d358c56be4e34ff1f843c5b731db280a87c770a5e
3
  size 159967880
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 2.294321656227112,
4
- "train_runtime": 13.2658,
5
  "train_samples": 10,
6
- "train_samples_per_second": 0.754,
7
- "train_steps_per_second": 0.302
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 2.294134736061096,
4
+ "train_runtime": 13.2039,
5
  "train_samples": 10,
6
+ "train_samples_per_second": 0.757,
7
+ "train_steps_per_second": 0.303
8
  }
config.json CHANGED
@@ -20,7 +20,7 @@
20
  "quantization_config": {
21
  "_load_in_4bit": true,
22
  "_load_in_8bit": false,
23
- "bnb_4bit_compute_dtype": "bfloat16",
24
  "bnb_4bit_quant_type": "nf4",
25
  "bnb_4bit_use_double_quant": false,
26
  "llm_int8_enable_fp32_cpu_offload": false,
@@ -35,7 +35,6 @@
35
  "rope_scaling": null,
36
  "rope_theta": 10000.0,
37
  "tie_word_embeddings": false,
38
- "torch_dtype": "float16",
39
  "transformers_version": "4.38.2",
40
  "use_cache": true,
41
  "vocab_size": 32000
 
20
  "quantization_config": {
21
  "_load_in_4bit": true,
22
  "_load_in_8bit": false,
23
+ "bnb_4bit_compute_dtype": "float16",
24
  "bnb_4bit_quant_type": "nf4",
25
  "bnb_4bit_use_double_quant": false,
26
  "llm_int8_enable_fp32_cpu_offload": false,
 
35
  "rope_scaling": null,
36
  "rope_theta": 10000.0,
37
  "tie_word_embeddings": false,
 
38
  "transformers_version": "4.38.2",
39
  "use_cache": true,
40
  "vocab_size": 32000
runs/Apr22_06-00-01_SYS-4029GP-TRT/events.out.tfevents.1713736812.SYS-4029GP-TRT.1197881.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a7f3bc98e3cf2a0d13548bec631965604a1ae8784e49241b2abd9811f5a9e9f
3
+ size 5930
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 2.294321656227112,
4
- "train_runtime": 13.2658,
5
  "train_samples": 10,
6
- "train_samples_per_second": 0.754,
7
- "train_steps_per_second": 0.302
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 2.294134736061096,
4
+ "train_runtime": 13.2039,
5
  "train_samples": 10,
6
+ "train_samples_per_second": 0.757,
7
+ "train_steps_per_second": 0.303
8
  }
trainer_state.json CHANGED
@@ -10,16 +10,16 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.25,
13
- "grad_norm": 60887.75,
14
  "learning_rate": 0.0002,
15
  "loss": 2.8727,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_loss": 1.9190164804458618,
21
- "eval_runtime": 1.274,
22
- "eval_samples_per_second": 7.849,
23
  "eval_steps_per_second": 0.785,
24
  "step": 4
25
  },
@@ -27,10 +27,10 @@
27
  "epoch": 1.0,
28
  "step": 4,
29
  "total_flos": 112033940987904.0,
30
- "train_loss": 2.294321656227112,
31
- "train_runtime": 13.2658,
32
- "train_samples_per_second": 0.754,
33
- "train_steps_per_second": 0.302
34
  }
35
  ],
36
  "logging_steps": 5,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.25,
13
+ "grad_norm": 60655.65234375,
14
  "learning_rate": 0.0002,
15
  "loss": 2.8727,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_loss": 1.9178766012191772,
21
+ "eval_runtime": 1.2746,
22
+ "eval_samples_per_second": 7.846,
23
  "eval_steps_per_second": 0.785,
24
  "step": 4
25
  },
 
27
  "epoch": 1.0,
28
  "step": 4,
29
  "total_flos": 112033940987904.0,
30
+ "train_loss": 2.294134736061096,
31
+ "train_runtime": 13.2039,
32
+ "train_samples_per_second": 0.757,
33
+ "train_steps_per_second": 0.303
34
  }
35
  ],
36
  "logging_steps": 5,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45ee5f56891ea0f726b72278063ca89172602a534d3ef1a13c3034b6193fb170
3
  size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8a365241bc54220fb3585734b5fd7590bc33327f68c094a81b83301188da5fa
3
  size 4984