Raiff1982 commited on
Commit
bfb331a
·
verified ·
1 Parent(s): 3c448b0

Upload 8 files

Browse files
adapter/adapter_config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "gpt2",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": true,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 8,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.0",
27
+ "qalora_group_size": 16,
28
+ "r": 8,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "c_attn"
33
+ ],
34
+ "target_parameters": null,
35
+ "task_type": "CAUSAL_LM",
36
+ "trainable_token_indices": null,
37
+ "use_dora": false,
38
+ "use_qalora": false,
39
+ "use_rslora": false
40
+ }
adapter/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3c8cdf55c40e1ce4aabb0860bb20e446193984a5ce465c50a9e870a268df6e8
3
+ size 1182680
adapter/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
adapter/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
adapter/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
adapter/tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": false,
15
+ "eos_token": "<|endoftext|>",
16
+ "extra_special_tokens": {},
17
+ "model_max_length": 1024,
18
+ "pad_token": "<|endoftext|>",
19
+ "tokenizer_class": "GPT2Tokenizer",
20
+ "unk_token": "<|endoftext|>"
21
+ }
adapter/trainer_state.json ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 50,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.1,
14
+ "grad_norm": 0.21553142368793488,
15
+ "learning_rate": 1.6000000000000003e-05,
16
+ "loss": 4.6997,
17
+ "step": 5
18
+ },
19
+ {
20
+ "epoch": 0.2,
21
+ "grad_norm": 0.19729050993919373,
22
+ "learning_rate": 3.6e-05,
23
+ "loss": 4.6594,
24
+ "step": 10
25
+ },
26
+ {
27
+ "epoch": 0.3,
28
+ "grad_norm": 0.2265276163816452,
29
+ "learning_rate": 5.6000000000000006e-05,
30
+ "loss": 4.6575,
31
+ "step": 15
32
+ },
33
+ {
34
+ "epoch": 0.4,
35
+ "grad_norm": 0.24024614691734314,
36
+ "learning_rate": 7.6e-05,
37
+ "loss": 4.6701,
38
+ "step": 20
39
+ },
40
+ {
41
+ "epoch": 0.5,
42
+ "grad_norm": 0.25752702355384827,
43
+ "learning_rate": 9.6e-05,
44
+ "loss": 4.637,
45
+ "step": 25
46
+ },
47
+ {
48
+ "epoch": 0.6,
49
+ "grad_norm": 0.3138992488384247,
50
+ "learning_rate": 0.000116,
51
+ "loss": 4.6469,
52
+ "step": 30
53
+ },
54
+ {
55
+ "epoch": 0.7,
56
+ "grad_norm": 0.3859875202178955,
57
+ "learning_rate": 0.00013600000000000003,
58
+ "loss": 4.5877,
59
+ "step": 35
60
+ },
61
+ {
62
+ "epoch": 0.8,
63
+ "grad_norm": 0.5465034246444702,
64
+ "learning_rate": 0.00015600000000000002,
65
+ "loss": 4.5079,
66
+ "step": 40
67
+ },
68
+ {
69
+ "epoch": 0.9,
70
+ "grad_norm": 0.5064318776130676,
71
+ "learning_rate": 0.00017600000000000002,
72
+ "loss": 4.473,
73
+ "step": 45
74
+ },
75
+ {
76
+ "epoch": 1.0,
77
+ "grad_norm": 0.6789440512657166,
78
+ "learning_rate": 0.000196,
79
+ "loss": 4.3371,
80
+ "step": 50
81
+ }
82
+ ],
83
+ "logging_steps": 5,
84
+ "max_steps": 50,
85
+ "num_input_tokens_seen": 0,
86
+ "num_train_epochs": 1,
87
+ "save_steps": 200,
88
+ "stateful_callbacks": {
89
+ "TrainerControl": {
90
+ "args": {
91
+ "should_epoch_stop": false,
92
+ "should_evaluate": false,
93
+ "should_log": false,
94
+ "should_save": true,
95
+ "should_training_stop": true
96
+ },
97
+ "attributes": {}
98
+ }
99
+ },
100
+ "total_flos": 8095363301376.0,
101
+ "train_batch_size": 1,
102
+ "trial_name": null,
103
+ "trial_params": null
104
+ }
adapter/vocab.json ADDED
The diff for this file is too large to render. See raw diff