kallacharanteja commited on
Commit
b0f402b
·
verified ·
1 Parent(s): 253c35a

Training in progress, step 500

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
adapter_config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "google/mt5-small",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 32,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.1,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.1",
27
+ "qalora_group_size": 16,
28
+ "r": 16,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "v",
33
+ "q"
34
+ ],
35
+ "target_parameters": null,
36
+ "task_type": "SEQ_2_SEQ_LM",
37
+ "trainable_token_indices": null,
38
+ "use_dora": false,
39
+ "use_qalora": false,
40
+ "use_rslora": false
41
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f6ac3e830ae50bf86c7cb924e5805992764c0d8a6e2da5f74c47af2c38935d9
3
+ size 1027183888
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea2161d79be15aa5b9ca5327f44791ed567cff8661f69863d29eefb5abc14716
3
+ size 16033349
tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "eos_token": "</s>",
4
+ "extra_ids": 0,
5
+ "extra_special_tokens": [
6
+ "<2te>",
7
+ "<2hi>"
8
+ ],
9
+ "is_local": false,
10
+ "model_max_length": 1000000000000000019884624838656,
11
+ "pad_token": "<pad>",
12
+ "tokenizer_class": "T5Tokenizer",
13
+ "unk_id": 2,
14
+ "unk_token": "<unk>"
15
+ }
trainer_state.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 500,
3
+ "best_metric": 7.9791436195373535,
4
+ "best_model_checkpoint": "/kaggle/working/checkpoints/checkpoint-500",
5
+ "epoch": 0.4477277815088426,
6
+ "eval_steps": 500,
7
+ "global_step": 500,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.04477277815088426,
14
+ "grad_norm": 298.04815673828125,
15
+ "learning_rate": 2.4257425742574257e-05,
16
+ "loss": 112.46427734375,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.08954555630176853,
21
+ "grad_norm": 515.502685546875,
22
+ "learning_rate": 4.9009900990099014e-05,
23
+ "loss": 108.94888671875,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 0.1343183344526528,
28
+ "grad_norm": 224.24559020996094,
29
+ "learning_rate": 4.926153846153847e-05,
30
+ "loss": 102.64640625,
31
+ "step": 150
32
+ },
33
+ {
34
+ "epoch": 0.17909111260353705,
35
+ "grad_norm": 328.56585693359375,
36
+ "learning_rate": 4.849230769230769e-05,
37
+ "loss": 93.7025390625,
38
+ "step": 200
39
+ },
40
+ {
41
+ "epoch": 0.2238638907544213,
42
+ "grad_norm": 212.02337646484375,
43
+ "learning_rate": 4.772307692307693e-05,
44
+ "loss": 86.589169921875,
45
+ "step": 250
46
+ },
47
+ {
48
+ "epoch": 0.2686366689053056,
49
+ "grad_norm": 371.2373962402344,
50
+ "learning_rate": 4.695384615384615e-05,
51
+ "loss": 78.94826171875,
52
+ "step": 300
53
+ },
54
+ {
55
+ "epoch": 0.31340944705618984,
56
+ "grad_norm": 127.3225326538086,
57
+ "learning_rate": 4.618461538461539e-05,
58
+ "loss": 72.1776416015625,
59
+ "step": 350
60
+ },
61
+ {
62
+ "epoch": 0.3581822252070741,
63
+ "grad_norm": 112.87958526611328,
64
+ "learning_rate": 4.541538461538462e-05,
65
+ "loss": 66.216123046875,
66
+ "step": 400
67
+ },
68
+ {
69
+ "epoch": 0.40295500335795836,
70
+ "grad_norm": 73.62041473388672,
71
+ "learning_rate": 4.464615384615385e-05,
72
+ "loss": 60.0425634765625,
73
+ "step": 450
74
+ },
75
+ {
76
+ "epoch": 0.4477277815088426,
77
+ "grad_norm": 61.61714172363281,
78
+ "learning_rate": 4.3876923076923076e-05,
79
+ "loss": 54.319912109375,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 0.4477277815088426,
84
+ "eval_loss": 7.9791436195373535,
85
+ "eval_runtime": 11.3347,
86
+ "eval_samples_per_second": 63.698,
87
+ "eval_steps_per_second": 8.028,
88
+ "step": 500
89
+ }
90
+ ],
91
+ "logging_steps": 50,
92
+ "max_steps": 3351,
93
+ "num_input_tokens_seen": 0,
94
+ "num_train_epochs": 3,
95
+ "save_steps": 500,
96
+ "stateful_callbacks": {
97
+ "TrainerControl": {
98
+ "args": {
99
+ "should_epoch_stop": false,
100
+ "should_evaluate": false,
101
+ "should_log": false,
102
+ "should_save": true,
103
+ "should_training_stop": false
104
+ },
105
+ "attributes": {}
106
+ }
107
+ },
108
+ "total_flos": 4246783131648000.0,
109
+ "train_batch_size": 16,
110
+ "trial_name": null,
111
+ "trial_params": null
112
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19156ac353fa48da06b4c41a771842d317d52305f57a718e207b4557d7b05081
3
+ size 5457