prthm29 commited on
Commit
305a5c2
·
verified ·
1 Parent(s): a68bcca

Training in progress, step 2

Browse files
README.md ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ license: mit
4
+ base_model: zai-org/GLM-OCR
5
+ tags:
6
+ - base_model:adapter:zai-org/GLM-OCR
7
+ - llama-factory
8
+ - lora
9
+ - transformers
10
+ metrics:
11
+ - accuracy
12
+ pipeline_tag: text-generation
13
+ model-index:
14
+ - name: smoke_test_glm4v_checkpoints
15
+ results: []
16
+ ---
17
+
18
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
19
+ should probably proofread and complete it, then remove this comment. -->
20
+
21
+ # smoke_test_glm4v_checkpoints
22
+
23
+ This model is a fine-tuned version of [zai-org/GLM-OCR](https://huggingface.co/zai-org/GLM-OCR) on the gujarati_ocr_stream dataset.
24
+ It achieves the following results on the evaluation set:
25
+ - Loss: 2.5668
26
+ - Accuracy: 0.5594
27
+
28
+ ## Model description
29
+
30
+ More information needed
31
+
32
+ ## Intended uses & limitations
33
+
34
+ More information needed
35
+
36
+ ## Training and evaluation data
37
+
38
+ More information needed
39
+
40
+ ## Training procedure
41
+
42
+ ### Training hyperparameters
43
+
44
+ The following hyperparameters were used during training:
45
+ - learning_rate: 0.0001
46
+ - train_batch_size: 4
47
+ - eval_batch_size: 8
48
+ - seed: 42
49
+ - gradient_accumulation_steps: 4
50
+ - total_train_batch_size: 16
51
+ - optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
52
+ - lr_scheduler_type: cosine
53
+ - lr_scheduler_warmup_steps: 1
54
+ - training_steps: 5
55
+
56
+ ### Training results
57
+
58
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
59
+ |:-------------:|:------:|:----:|:---------------:|:--------:|
60
+ | 2.7841 | 0.6154 | 2 | 2.7496 | 0.5348 |
61
+ | 2.6433 | 1.0 | 4 | 2.5868 | 0.5556 |
62
+
63
+
64
+ ### Framework versions
65
+
66
+ - PEFT 0.18.1
67
+ - Transformers 5.2.0
68
+ - Pytorch 2.11.0+cu130
69
+ - Datasets 4.0.0
70
+ - Tokenizers 0.22.2
adapter_config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "zai-org/GLM-OCR",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 32,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.0,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.1",
27
+ "qalora_group_size": 16,
28
+ "r": 16,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "layers.9.mlp.down_proj",
33
+ "layers.0.mlp.down_proj",
34
+ "layers.12.mlp.down_proj",
35
+ "q_proj",
36
+ "layers.8.mlp.down_proj",
37
+ "o_proj",
38
+ "k_proj",
39
+ "layers.15.mlp.down_proj",
40
+ "layers.1.mlp.down_proj",
41
+ "layers.5.mlp.down_proj",
42
+ "layers.3.mlp.down_proj",
43
+ "gate_up_proj",
44
+ "layers.2.mlp.down_proj",
45
+ "layers.13.mlp.down_proj",
46
+ "layers.6.mlp.down_proj",
47
+ "v_proj",
48
+ "layers.10.mlp.down_proj",
49
+ "layers.11.mlp.down_proj",
50
+ "layers.4.mlp.down_proj",
51
+ "layers.7.mlp.down_proj",
52
+ "layers.14.mlp.down_proj"
53
+ ],
54
+ "target_parameters": null,
55
+ "task_type": "CAUSAL_LM",
56
+ "trainable_token_indices": null,
57
+ "use_dora": false,
58
+ "use_qalora": false,
59
+ "use_rslora": false
60
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64df0764bfe053c84726c27244f3a4f48ca2824cda34102ebae5b1f6c142f440
3
+ size 29912904
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.3076923076923077,
3
+ "eval_accuracy": 0.5594251982846505,
4
+ "eval_loss": 2.566760301589966,
5
+ "eval_runtime": 2.0675,
6
+ "eval_samples_per_second": 24.184,
7
+ "eval_steps_per_second": 3.386,
8
+ "total_flos": 197425390977024.0,
9
+ "train_loss": 2.7674348831176756,
10
+ "train_runtime": 14.3553,
11
+ "train_samples_per_second": 5.573,
12
+ "train_steps_per_second": 0.348
13
+ }
chat_template.jinja ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [gMASK]<sop>
2
+ {%- if tools -%}
3
+ <|system|>
4
+ # Tools
5
+
6
+ You may call one or more functions to assist with the user query.
7
+
8
+ You are provided with function signatures within <tools></tools> XML tags:
9
+ <tools>
10
+ {% for tool in tools %}
11
+ {{ tool | tojson(ensure_ascii=False) }}
12
+ {% endfor %}
13
+ </tools>
14
+
15
+ For each function call, output the function name and arguments within the following XML format:
16
+ <tool_call>{function-name}
17
+ <arg_key>{arg-key-1}</arg_key>
18
+ <arg_value>{arg-value-1}</arg_value>
19
+ <arg_key>{arg-key-2}</arg_key>
20
+ <arg_value>{arg-value-2}</arg_value>
21
+ ...
22
+ </tool_call>{%- endif -%}
23
+ {%- macro visible_text(content) -%}
24
+ {%- if content is string -%}
25
+ {{- content }}
26
+ {%- elif content is iterable and content is not mapping -%}
27
+ {%- for item in content -%}
28
+ {%- if item is mapping and item.type == 'text' -%}
29
+ {{- item.text }}
30
+ {%- elif item is mapping and (item.type == 'image' or 'image' in item) -%}
31
+ <|begin_of_image|><|image|><|end_of_image|>
32
+ {%- elif item is mapping and (item.type == 'video' or 'video' in item) -%}
33
+ <|begin_of_video|><|video|><|end_of_video|>
34
+ {%- elif item is string -%}
35
+ {{- item }}
36
+ {%- endif -%}
37
+ {%- endfor -%}
38
+ {%- else -%}
39
+ {{- content }}
40
+ {%- endif -%}
41
+ {%- endmacro -%}
42
+ {%- set ns = namespace(last_user_index=-1) %}
43
+ {%- for m in messages %}
44
+ {%- if m.role == 'user' %}
45
+ {% set ns.last_user_index = loop.index0 -%}
46
+ {%- endif %}
47
+ {%- endfor %}
48
+ {% for m in messages %}
49
+ {%- if m.role == 'user' -%}<|user|>
50
+ {% if m.content is string %}
51
+ {{ m.content }}
52
+ {%- else %}
53
+ {%- for item in m.content %}
54
+ {% if item.type == 'video' or 'video' in item %}
55
+ <|begin_of_video|><|video|><|end_of_video|>{% elif item.type == 'image' or 'image' in item %}
56
+ <|begin_of_image|><|image|><|end_of_image|>{% elif item.type == 'text' %}
57
+ {{ item.text }}
58
+ {%- endif %}
59
+ {%- endfor %}
60
+ {%- endif %}
61
+ {{- '/nothink' if (enable_thinking is defined and not enable_thinking and not visible_text(m.content).endswith("/nothink")) else '' -}}
62
+ {%- elif m.role == 'assistant' -%}
63
+ <|assistant|>
64
+ {%- set reasoning_content = '' %}
65
+ {%- set content = visible_text(m.content) %}
66
+ {%- if m.reasoning_content is string %}
67
+ {%- set reasoning_content = m.reasoning_content %}
68
+ {%- else %}
69
+ {%- if '</think>' in content %}
70
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
71
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
72
+ {%- endif %}
73
+ {%- endif %}
74
+ {%- if loop.index0 > ns.last_user_index and reasoning_content -%}
75
+ {{ '\n<think>' + reasoning_content.strip() + '</think>'}}
76
+ {%- else -%}
77
+ {{ '\n<think></think>' }}
78
+ {%- endif -%}
79
+ {%- if content.strip() -%}
80
+ {{ '\n' + content.strip() }}
81
+ {%- endif -%}
82
+ {% if m.tool_calls %}
83
+ {% for tc in m.tool_calls %}
84
+ {%- if tc.function %}
85
+ {%- set tc = tc.function %}
86
+ {%- endif %}
87
+ {{ '\n<tool_call>' + tc.name }}
88
+ {% set _args = tc.arguments %}
89
+ {% for k, v in _args.items() %}
90
+ <arg_key>{{ k }}</arg_key>
91
+ <arg_value>{{ v | tojson(ensure_ascii=False) if v is not string else v }}</arg_value>
92
+ {% endfor %}
93
+ </tool_call>{% endfor %}
94
+ {% endif %}
95
+ {%- elif m.role == 'tool' -%}
96
+ {%- if m.content is string -%}
97
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
98
+ {{- '<|observation|>' }}
99
+ {%- endif %}
100
+ {{- '\n<tool_response>\n' }}
101
+ {{- m.content }}
102
+ {{- '\n</tool_response>' }}
103
+ {% elif m.content is iterable and m.content is not mapping %}
104
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
105
+ {{- '<|observation|>' }}
106
+ {%- endif %}
107
+ {{- '\n<tool_response>\n' }}
108
+ {%- for tr in m.content -%}
109
+ {%- if tr is mapping and tr.type is defined -%}
110
+ {%- set t = tr.type | lower -%}
111
+ {%- if t == 'text' and tr.text is defined -%}
112
+ {{ tr.text }}
113
+ {%- elif t in ['image', 'image_url'] -%}
114
+ <|begin_of_image|><|image|><|end_of_image|>
115
+ {%- elif t in ['video', 'video_url'] -%}
116
+ <|begin_of_video|><|video|><|end_of_video|>
117
+ {%- else -%}
118
+ {{ tr | tojson(ensure_ascii=False) }}
119
+ {%- endif -%}
120
+ {%- else -%}
121
+ {{ tr.output if tr.output is defined else tr }}
122
+ {%- endif -%}
123
+ {%- endfor -%}
124
+ {{- '\n</tool_response>' }}
125
+ {%- else -%}
126
+ <|observation|>{% for tr in m.content %}
127
+
128
+ <tool_response>
129
+ {{ tr.output if tr.output is defined else tr }}
130
+ </tool_response>{% endfor -%}
131
+ {% endif -%}
132
+ {%- elif m.role == 'system' -%}
133
+ <|system|>
134
+ {{ visible_text(m.content) }}
135
+ {%- endif -%}
136
+ {%- endfor -%}
137
+ {%- if add_generation_prompt -%}
138
+ <|assistant|>
139
+ {{'<think></think>\n' if (enable_thinking is defined and not enable_thinking) else ''}}
140
+ {%- endif -%}
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.3076923076923077,
3
+ "eval_accuracy": 0.5594251982846505,
4
+ "eval_loss": 2.566760301589966,
5
+ "eval_runtime": 2.0675,
6
+ "eval_samples_per_second": 24.184,
7
+ "eval_steps_per_second": 3.386
8
+ }
processor_config.json ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "image_processor": {
3
+ "data_format": "channels_first",
4
+ "do_convert_rgb": true,
5
+ "do_normalize": true,
6
+ "do_rescale": true,
7
+ "do_resize": true,
8
+ "image_mean": [
9
+ 0.48145466,
10
+ 0.4578275,
11
+ 0.40821073
12
+ ],
13
+ "image_processor_type": "Glm46VImageProcessorFast",
14
+ "image_std": [
15
+ 0.26862954,
16
+ 0.26130258,
17
+ 0.27577711
18
+ ],
19
+ "merge_size": 2,
20
+ "patch_size": 14,
21
+ "resample": 3,
22
+ "rescale_factor": 0.00392156862745098,
23
+ "size": {
24
+ "longest_edge": 9633792,
25
+ "shortest_edge": 12544
26
+ },
27
+ "temporal_patch_size": 2
28
+ },
29
+ "processor_class": "Glm46VProcessor",
30
+ "video_processor": {
31
+ "data_format": "channels_first",
32
+ "default_to_square": true,
33
+ "do_convert_rgb": true,
34
+ "do_normalize": true,
35
+ "do_rescale": true,
36
+ "do_resize": true,
37
+ "do_sample_frames": true,
38
+ "fps": 2,
39
+ "image_mean": [
40
+ 0.48145466,
41
+ 0.4578275,
42
+ 0.40821073
43
+ ],
44
+ "image_processor_type": "Glm46VImageProcessor",
45
+ "image_std": [
46
+ 0.26862954,
47
+ 0.26130258,
48
+ 0.27577711
49
+ ],
50
+ "max_duration": 300,
51
+ "max_image_size": {
52
+ "longest_edge": 47040000
53
+ },
54
+ "merge_size": 2,
55
+ "num_frames": 16,
56
+ "patch_size": 14,
57
+ "resample": 3,
58
+ "rescale_factor": 0.00392156862745098,
59
+ "return_metadata": false,
60
+ "size": {
61
+ "longest_edge": 9633792,
62
+ "shortest_edge": 12544
63
+ },
64
+ "temporal_patch_size": 2,
65
+ "video_processor_type": "Glm46VVideoProcessor"
66
+ }
67
+ }
runs/Apr09_04-50-45_cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz/events.out.tfevents.1775710245.cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz.30775.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1410a2c2669bf64ad7832538e2a91671ccc7e81cb833850278f97f866ad1391b
3
+ size 7931
runs/Apr09_04-50-45_cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz/events.out.tfevents.1775710296.cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz.30775.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2614795ee33373d3f773846c9a79150fdf8b5ad0abaee92d4bd2a2c0dba43a3b
3
+ size 405
runs/Apr09_05-09-41_cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz/events.out.tfevents.1775711381.cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz.97622.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fabb44c2eba6c489de8092d30120855e7bd13880bc5a0789201f125c1bc04f62
3
+ size 7931
runs/Apr09_05-09-41_cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz/events.out.tfevents.1775711401.cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz.97622.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b05455bbc80d8b21f7c31b2cd520e9edc0d8d0ee8fc8b0b585b985977193aeb3
3
+ size 405
runs/Apr09_05-15-45_cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz/events.out.tfevents.1775711745.cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz.119598.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63624552099940c5e44b3761648d14d7db1859345a127639c4998c504f81354c
3
+ size 6645
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "clean_up_tokenization_spaces": false,
4
+ "eos_token": "<|endoftext|>",
5
+ "extra_special_tokens": [
6
+ "<|user|>",
7
+ "<|observation|>"
8
+ ],
9
+ "is_local": false,
10
+ "model_max_length": 655380,
11
+ "pad_token": "<|endoftext|>",
12
+ "padding_side": "right",
13
+ "processor_class": "Glm46VProcessor",
14
+ "split_special_tokens": false,
15
+ "tokenizer_class": "TokenizersBackend"
16
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.3076923076923077,
3
+ "total_flos": 197425390977024.0,
4
+ "train_loss": 2.7674348831176756,
5
+ "train_runtime": 14.3553,
6
+ "train_samples_per_second": 5.573,
7
+ "train_steps_per_second": 0.348
8
+ }
trainer_log.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {"current_steps": 1, "total_steps": 5, "loss": 2.965193033218384, "lr": 0.0, "epoch": 0.3076923076923077, "percentage": 20.0, "elapsed_time": "0:00:02", "remaining_time": "0:00:10"}
2
+ {"current_steps": 2, "total_steps": 5, "loss": 2.7841198444366455, "lr": 0.0001, "epoch": 0.6153846153846154, "percentage": 40.0, "elapsed_time": "0:00:03", "remaining_time": "0:00:05"}
3
+ {"current_steps": 2, "total_steps": 5, "eval_loss": 2.749584436416626, "epoch": 0.6153846153846154, "percentage": 40.0, "elapsed_time": "0:00:05", "remaining_time": "0:00:08"}
trainer_state.json ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.3076923076923077,
6
+ "eval_steps": 2,
7
+ "global_step": 5,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.3076923076923077,
14
+ "grad_norm": 0.47904515266418457,
15
+ "learning_rate": 0.0,
16
+ "loss": 2.965193033218384,
17
+ "step": 1
18
+ },
19
+ {
20
+ "epoch": 0.6153846153846154,
21
+ "grad_norm": 0.456310510635376,
22
+ "learning_rate": 0.0001,
23
+ "loss": 2.7841198444366455,
24
+ "step": 2
25
+ },
26
+ {
27
+ "epoch": 0.6153846153846154,
28
+ "eval_accuracy": 0.5348454543007923,
29
+ "eval_loss": 2.749584436416626,
30
+ "eval_runtime": 2.1084,
31
+ "eval_samples_per_second": 23.715,
32
+ "eval_steps_per_second": 3.32,
33
+ "step": 2
34
+ },
35
+ {
36
+ "epoch": 0.9230769230769231,
37
+ "grad_norm": 0.48338398337364197,
38
+ "learning_rate": 8.535533905932738e-05,
39
+ "loss": 2.890326499938965,
40
+ "step": 3
41
+ },
42
+ {
43
+ "epoch": 1.0,
44
+ "grad_norm": 0.5952783823013306,
45
+ "learning_rate": 5e-05,
46
+ "loss": 2.643332004547119,
47
+ "step": 4
48
+ },
49
+ {
50
+ "epoch": 1.0,
51
+ "eval_accuracy": 0.5556479950956826,
52
+ "eval_loss": 2.586768388748169,
53
+ "eval_runtime": 2.0764,
54
+ "eval_samples_per_second": 24.08,
55
+ "eval_steps_per_second": 3.371,
56
+ "step": 4
57
+ },
58
+ {
59
+ "epoch": 1.3076923076923077,
60
+ "grad_norm": 0.4229271411895752,
61
+ "learning_rate": 1.4644660940672627e-05,
62
+ "loss": 2.5542030334472656,
63
+ "step": 5
64
+ },
65
+ {
66
+ "epoch": 1.3076923076923077,
67
+ "step": 5,
68
+ "total_flos": 197425390977024.0,
69
+ "train_loss": 2.7674348831176756,
70
+ "train_runtime": 14.3553,
71
+ "train_samples_per_second": 5.573,
72
+ "train_steps_per_second": 0.348
73
+ }
74
+ ],
75
+ "logging_steps": 1,
76
+ "max_steps": 5,
77
+ "num_input_tokens_seen": 0,
78
+ "num_train_epochs": 2,
79
+ "save_steps": 2,
80
+ "stateful_callbacks": {
81
+ "TrainerControl": {
82
+ "args": {
83
+ "should_epoch_stop": false,
84
+ "should_evaluate": false,
85
+ "should_log": false,
86
+ "should_save": true,
87
+ "should_training_stop": true
88
+ },
89
+ "attributes": {}
90
+ }
91
+ },
92
+ "total_flos": 197425390977024.0,
93
+ "train_batch_size": 4,
94
+ "trial_name": null,
95
+ "trial_params": null
96
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa0585eb0ac4a61e55427fec24f73141acacfb922a8a6ee0363e495d52b98870
3
+ size 5649
training_eval_accuracy.png ADDED
training_eval_loss.png ADDED
training_loss.png ADDED