danield12 commited on
Commit
652c7cf
·
verified ·
1 Parent(s): 3d2a086

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - _load_in_8bit: False
10
+ - _load_in_4bit: True
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: nf4
16
+ - bnb_4bit_use_double_quant: True
17
+ - bnb_4bit_compute_dtype: bfloat16
18
+ - load_in_4bit: True
19
+ - load_in_8bit: False
20
+ ### Framework versions
21
+
22
+
23
+ - PEFT 0.4.0
adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 64,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 16,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "gate_proj",
18
+ "k_proj",
19
+ "o_proj",
20
+ "q_proj",
21
+ "v_proj",
22
+ "up_proj",
23
+ "down_proj"
24
+ ],
25
+ "task_type": "SEQ_CLS"
26
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9d5aaae4f76e991c745d0625feb8399589f5fed6ffa655ff1888c189f450b1b
3
+ size 160102926
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.9,
3
+ "total_flos": 5.046529217789952e+16,
4
+ "train_loss": 0.7722815573215485,
5
+ "train_runtime": 16370.7549,
6
+ "train_samples_per_second": 0.157,
7
+ "train_steps_per_second": 0.002
8
+ }
runs/Apr06_04-02-20_fa1115e4f8e3/events.out.tfevents.1712376142.fa1115e4f8e3.35.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a266fe737f01b10b55426464e3b4f50c7456f292322940576b1b5ebe6dbf93d7
3
+ size 9283
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.9,
3
+ "total_flos": 5.046529217789952e+16,
4
+ "train_loss": 0.7722815573215485,
5
+ "train_runtime": 16370.7549,
6
+ "train_samples_per_second": 0.157,
7
+ "train_steps_per_second": 0.002
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.902439024390244,
5
+ "eval_steps": 8,
6
+ "global_step": 40,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.78,
13
+ "grad_norm": 38.259986877441406,
14
+ "learning_rate": 0.00042105263157894734,
15
+ "loss": 1.6875,
16
+ "step": 8
17
+ },
18
+ {
19
+ "epoch": 0.78,
20
+ "eval_accuracy": 0.4375,
21
+ "eval_f1_score": 0.41715758468335784,
22
+ "eval_gmean": 0.4278381754658704,
23
+ "eval_loss": 1.3098154067993164,
24
+ "eval_precision": 0.41918181818181816,
25
+ "eval_recall": 0.4375,
26
+ "eval_runtime": 163.4181,
27
+ "eval_samples_per_second": 0.49,
28
+ "eval_steps_per_second": 0.061,
29
+ "step": 8
30
+ },
31
+ {
32
+ "epoch": 1.56,
33
+ "grad_norm": 10.52756404876709,
34
+ "learning_rate": 0.00031578947368421053,
35
+ "loss": 0.8316,
36
+ "step": 16
37
+ },
38
+ {
39
+ "epoch": 1.56,
40
+ "eval_accuracy": 0.6,
41
+ "eval_f1_score": 0.583752417794971,
42
+ "eval_gmean": 0.6101891731812923,
43
+ "eval_loss": 0.8816300630569458,
44
+ "eval_precision": 0.6348214285714286,
45
+ "eval_recall": 0.6,
46
+ "eval_runtime": 163.1878,
47
+ "eval_samples_per_second": 0.49,
48
+ "eval_steps_per_second": 0.061,
49
+ "step": 16
50
+ },
51
+ {
52
+ "epoch": 2.34,
53
+ "grad_norm": 21.414737701416016,
54
+ "learning_rate": 0.00021052631578947367,
55
+ "loss": 0.6194,
56
+ "step": 24
57
+ },
58
+ {
59
+ "epoch": 2.34,
60
+ "eval_accuracy": 0.4875,
61
+ "eval_f1_score": 0.48581599123767794,
62
+ "eval_gmean": 0.4905917000576075,
63
+ "eval_loss": 1.0450302362442017,
64
+ "eval_precision": 0.4916666666666666,
65
+ "eval_recall": 0.4875,
66
+ "eval_runtime": 163.2758,
67
+ "eval_samples_per_second": 0.49,
68
+ "eval_steps_per_second": 0.061,
69
+ "step": 24
70
+ },
71
+ {
72
+ "epoch": 3.12,
73
+ "grad_norm": 3.486269474029541,
74
+ "learning_rate": 0.00010526315789473683,
75
+ "loss": 0.4459,
76
+ "step": 32
77
+ },
78
+ {
79
+ "epoch": 3.12,
80
+ "eval_accuracy": 0.575,
81
+ "eval_f1_score": 0.574198617221873,
82
+ "eval_gmean": 0.5726770118485195,
83
+ "eval_loss": 0.9226075410842896,
84
+ "eval_precision": 0.5741161616161616,
85
+ "eval_recall": 0.575,
86
+ "eval_runtime": 163.1542,
87
+ "eval_samples_per_second": 0.49,
88
+ "eval_steps_per_second": 0.061,
89
+ "step": 32
90
+ },
91
+ {
92
+ "epoch": 3.9,
93
+ "grad_norm": 5.4782609939575195,
94
+ "learning_rate": 0.0,
95
+ "loss": 0.277,
96
+ "step": 40
97
+ },
98
+ {
99
+ "epoch": 3.9,
100
+ "eval_accuracy": 0.575,
101
+ "eval_f1_score": 0.5709523809523809,
102
+ "eval_gmean": 0.5701550266251846,
103
+ "eval_loss": 0.9736940264701843,
104
+ "eval_precision": 0.5734375,
105
+ "eval_recall": 0.575,
106
+ "eval_runtime": 163.1891,
107
+ "eval_samples_per_second": 0.49,
108
+ "eval_steps_per_second": 0.061,
109
+ "step": 40
110
+ },
111
+ {
112
+ "epoch": 3.9,
113
+ "step": 40,
114
+ "total_flos": 5.046529217789952e+16,
115
+ "train_loss": 0.7722815573215485,
116
+ "train_runtime": 16370.7549,
117
+ "train_samples_per_second": 0.157,
118
+ "train_steps_per_second": 0.002
119
+ }
120
+ ],
121
+ "logging_steps": 8,
122
+ "max_steps": 40,
123
+ "num_input_tokens_seen": 0,
124
+ "num_train_epochs": 4,
125
+ "save_steps": 500,
126
+ "total_flos": 5.046529217789952e+16,
127
+ "train_batch_size": 16,
128
+ "trial_name": null,
129
+ "trial_params": null
130
+ }