Training in progress, step 2
Browse files- README.md +70 -0
- adapter_config.json +60 -0
- adapter_model.safetensors +3 -0
- all_results.json +13 -0
- chat_template.jinja +140 -0
- eval_results.json +8 -0
- processor_config.json +67 -0
- runs/Apr09_04-50-45_cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz/events.out.tfevents.1775710245.cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz.30775.0 +3 -0
- runs/Apr09_04-50-45_cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz/events.out.tfevents.1775710296.cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz.30775.1 +3 -0
- runs/Apr09_05-09-41_cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz/events.out.tfevents.1775711381.cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz.97622.0 +3 -0
- runs/Apr09_05-09-41_cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz/events.out.tfevents.1775711401.cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz.97622.1 +3 -0
- runs/Apr09_05-15-45_cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz/events.out.tfevents.1775711745.cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz.119598.0 +3 -0
- tokenizer.json +0 -0
- tokenizer_config.json +16 -0
- train_results.json +8 -0
- trainer_log.jsonl +3 -0
- trainer_state.json +96 -0
- training_args.bin +3 -0
- training_eval_accuracy.png +0 -0
- training_eval_loss.png +0 -0
- training_loss.png +0 -0
README.md
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
library_name: peft
|
| 3 |
+
license: mit
|
| 4 |
+
base_model: zai-org/GLM-OCR
|
| 5 |
+
tags:
|
| 6 |
+
- base_model:adapter:zai-org/GLM-OCR
|
| 7 |
+
- llama-factory
|
| 8 |
+
- lora
|
| 9 |
+
- transformers
|
| 10 |
+
metrics:
|
| 11 |
+
- accuracy
|
| 12 |
+
pipeline_tag: text-generation
|
| 13 |
+
model-index:
|
| 14 |
+
- name: smoke_test_glm4v_checkpoints
|
| 15 |
+
results: []
|
| 16 |
+
---
|
| 17 |
+
|
| 18 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
| 19 |
+
should probably proofread and complete it, then remove this comment. -->
|
| 20 |
+
|
| 21 |
+
# smoke_test_glm4v_checkpoints
|
| 22 |
+
|
| 23 |
+
This model is a fine-tuned version of [zai-org/GLM-OCR](https://huggingface.co/zai-org/GLM-OCR) on the gujarati_ocr_stream dataset.
|
| 24 |
+
It achieves the following results on the evaluation set:
|
| 25 |
+
- Loss: 2.5668
|
| 26 |
+
- Accuracy: 0.5594
|
| 27 |
+
|
| 28 |
+
## Model description
|
| 29 |
+
|
| 30 |
+
More information needed
|
| 31 |
+
|
| 32 |
+
## Intended uses & limitations
|
| 33 |
+
|
| 34 |
+
More information needed
|
| 35 |
+
|
| 36 |
+
## Training and evaluation data
|
| 37 |
+
|
| 38 |
+
More information needed
|
| 39 |
+
|
| 40 |
+
## Training procedure
|
| 41 |
+
|
| 42 |
+
### Training hyperparameters
|
| 43 |
+
|
| 44 |
+
The following hyperparameters were used during training:
|
| 45 |
+
- learning_rate: 0.0001
|
| 46 |
+
- train_batch_size: 4
|
| 47 |
+
- eval_batch_size: 8
|
| 48 |
+
- seed: 42
|
| 49 |
+
- gradient_accumulation_steps: 4
|
| 50 |
+
- total_train_batch_size: 16
|
| 51 |
+
- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
| 52 |
+
- lr_scheduler_type: cosine
|
| 53 |
+
- lr_scheduler_warmup_steps: 1
|
| 54 |
+
- training_steps: 5
|
| 55 |
+
|
| 56 |
+
### Training results
|
| 57 |
+
|
| 58 |
+
| Training Loss | Epoch | Step | Validation Loss | Accuracy |
|
| 59 |
+
|:-------------:|:------:|:----:|:---------------:|:--------:|
|
| 60 |
+
| 2.7841 | 0.6154 | 2 | 2.7496 | 0.5348 |
|
| 61 |
+
| 2.6433 | 1.0 | 4 | 2.5868 | 0.5556 |
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
### Framework versions
|
| 65 |
+
|
| 66 |
+
- PEFT 0.18.1
|
| 67 |
+
- Transformers 5.2.0
|
| 68 |
+
- Pytorch 2.11.0+cu130
|
| 69 |
+
- Datasets 4.0.0
|
| 70 |
+
- Tokenizers 0.22.2
|
adapter_config.json
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alora_invocation_tokens": null,
|
| 3 |
+
"alpha_pattern": {},
|
| 4 |
+
"arrow_config": null,
|
| 5 |
+
"auto_mapping": null,
|
| 6 |
+
"base_model_name_or_path": "zai-org/GLM-OCR",
|
| 7 |
+
"bias": "none",
|
| 8 |
+
"corda_config": null,
|
| 9 |
+
"ensure_weight_tying": false,
|
| 10 |
+
"eva_config": null,
|
| 11 |
+
"exclude_modules": null,
|
| 12 |
+
"fan_in_fan_out": false,
|
| 13 |
+
"inference_mode": true,
|
| 14 |
+
"init_lora_weights": true,
|
| 15 |
+
"layer_replication": null,
|
| 16 |
+
"layers_pattern": null,
|
| 17 |
+
"layers_to_transform": null,
|
| 18 |
+
"loftq_config": {},
|
| 19 |
+
"lora_alpha": 32,
|
| 20 |
+
"lora_bias": false,
|
| 21 |
+
"lora_dropout": 0.0,
|
| 22 |
+
"megatron_config": null,
|
| 23 |
+
"megatron_core": "megatron.core",
|
| 24 |
+
"modules_to_save": null,
|
| 25 |
+
"peft_type": "LORA",
|
| 26 |
+
"peft_version": "0.18.1",
|
| 27 |
+
"qalora_group_size": 16,
|
| 28 |
+
"r": 16,
|
| 29 |
+
"rank_pattern": {},
|
| 30 |
+
"revision": null,
|
| 31 |
+
"target_modules": [
|
| 32 |
+
"layers.9.mlp.down_proj",
|
| 33 |
+
"layers.0.mlp.down_proj",
|
| 34 |
+
"layers.12.mlp.down_proj",
|
| 35 |
+
"q_proj",
|
| 36 |
+
"layers.8.mlp.down_proj",
|
| 37 |
+
"o_proj",
|
| 38 |
+
"k_proj",
|
| 39 |
+
"layers.15.mlp.down_proj",
|
| 40 |
+
"layers.1.mlp.down_proj",
|
| 41 |
+
"layers.5.mlp.down_proj",
|
| 42 |
+
"layers.3.mlp.down_proj",
|
| 43 |
+
"gate_up_proj",
|
| 44 |
+
"layers.2.mlp.down_proj",
|
| 45 |
+
"layers.13.mlp.down_proj",
|
| 46 |
+
"layers.6.mlp.down_proj",
|
| 47 |
+
"v_proj",
|
| 48 |
+
"layers.10.mlp.down_proj",
|
| 49 |
+
"layers.11.mlp.down_proj",
|
| 50 |
+
"layers.4.mlp.down_proj",
|
| 51 |
+
"layers.7.mlp.down_proj",
|
| 52 |
+
"layers.14.mlp.down_proj"
|
| 53 |
+
],
|
| 54 |
+
"target_parameters": null,
|
| 55 |
+
"task_type": "CAUSAL_LM",
|
| 56 |
+
"trainable_token_indices": null,
|
| 57 |
+
"use_dora": false,
|
| 58 |
+
"use_qalora": false,
|
| 59 |
+
"use_rslora": false
|
| 60 |
+
}
|
adapter_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:64df0764bfe053c84726c27244f3a4f48ca2824cda34102ebae5b1f6c142f440
|
| 3 |
+
size 29912904
|
all_results.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 1.3076923076923077,
|
| 3 |
+
"eval_accuracy": 0.5594251982846505,
|
| 4 |
+
"eval_loss": 2.566760301589966,
|
| 5 |
+
"eval_runtime": 2.0675,
|
| 6 |
+
"eval_samples_per_second": 24.184,
|
| 7 |
+
"eval_steps_per_second": 3.386,
|
| 8 |
+
"total_flos": 197425390977024.0,
|
| 9 |
+
"train_loss": 2.7674348831176756,
|
| 10 |
+
"train_runtime": 14.3553,
|
| 11 |
+
"train_samples_per_second": 5.573,
|
| 12 |
+
"train_steps_per_second": 0.348
|
| 13 |
+
}
|
chat_template.jinja
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[gMASK]<sop>
|
| 2 |
+
{%- if tools -%}
|
| 3 |
+
<|system|>
|
| 4 |
+
# Tools
|
| 5 |
+
|
| 6 |
+
You may call one or more functions to assist with the user query.
|
| 7 |
+
|
| 8 |
+
You are provided with function signatures within <tools></tools> XML tags:
|
| 9 |
+
<tools>
|
| 10 |
+
{% for tool in tools %}
|
| 11 |
+
{{ tool | tojson(ensure_ascii=False) }}
|
| 12 |
+
{% endfor %}
|
| 13 |
+
</tools>
|
| 14 |
+
|
| 15 |
+
For each function call, output the function name and arguments within the following XML format:
|
| 16 |
+
<tool_call>{function-name}
|
| 17 |
+
<arg_key>{arg-key-1}</arg_key>
|
| 18 |
+
<arg_value>{arg-value-1}</arg_value>
|
| 19 |
+
<arg_key>{arg-key-2}</arg_key>
|
| 20 |
+
<arg_value>{arg-value-2}</arg_value>
|
| 21 |
+
...
|
| 22 |
+
</tool_call>{%- endif -%}
|
| 23 |
+
{%- macro visible_text(content) -%}
|
| 24 |
+
{%- if content is string -%}
|
| 25 |
+
{{- content }}
|
| 26 |
+
{%- elif content is iterable and content is not mapping -%}
|
| 27 |
+
{%- for item in content -%}
|
| 28 |
+
{%- if item is mapping and item.type == 'text' -%}
|
| 29 |
+
{{- item.text }}
|
| 30 |
+
{%- elif item is mapping and (item.type == 'image' or 'image' in item) -%}
|
| 31 |
+
<|begin_of_image|><|image|><|end_of_image|>
|
| 32 |
+
{%- elif item is mapping and (item.type == 'video' or 'video' in item) -%}
|
| 33 |
+
<|begin_of_video|><|video|><|end_of_video|>
|
| 34 |
+
{%- elif item is string -%}
|
| 35 |
+
{{- item }}
|
| 36 |
+
{%- endif -%}
|
| 37 |
+
{%- endfor -%}
|
| 38 |
+
{%- else -%}
|
| 39 |
+
{{- content }}
|
| 40 |
+
{%- endif -%}
|
| 41 |
+
{%- endmacro -%}
|
| 42 |
+
{%- set ns = namespace(last_user_index=-1) %}
|
| 43 |
+
{%- for m in messages %}
|
| 44 |
+
{%- if m.role == 'user' %}
|
| 45 |
+
{% set ns.last_user_index = loop.index0 -%}
|
| 46 |
+
{%- endif %}
|
| 47 |
+
{%- endfor %}
|
| 48 |
+
{% for m in messages %}
|
| 49 |
+
{%- if m.role == 'user' -%}<|user|>
|
| 50 |
+
{% if m.content is string %}
|
| 51 |
+
{{ m.content }}
|
| 52 |
+
{%- else %}
|
| 53 |
+
{%- for item in m.content %}
|
| 54 |
+
{% if item.type == 'video' or 'video' in item %}
|
| 55 |
+
<|begin_of_video|><|video|><|end_of_video|>{% elif item.type == 'image' or 'image' in item %}
|
| 56 |
+
<|begin_of_image|><|image|><|end_of_image|>{% elif item.type == 'text' %}
|
| 57 |
+
{{ item.text }}
|
| 58 |
+
{%- endif %}
|
| 59 |
+
{%- endfor %}
|
| 60 |
+
{%- endif %}
|
| 61 |
+
{{- '/nothink' if (enable_thinking is defined and not enable_thinking and not visible_text(m.content).endswith("/nothink")) else '' -}}
|
| 62 |
+
{%- elif m.role == 'assistant' -%}
|
| 63 |
+
<|assistant|>
|
| 64 |
+
{%- set reasoning_content = '' %}
|
| 65 |
+
{%- set content = visible_text(m.content) %}
|
| 66 |
+
{%- if m.reasoning_content is string %}
|
| 67 |
+
{%- set reasoning_content = m.reasoning_content %}
|
| 68 |
+
{%- else %}
|
| 69 |
+
{%- if '</think>' in content %}
|
| 70 |
+
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
| 71 |
+
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
| 72 |
+
{%- endif %}
|
| 73 |
+
{%- endif %}
|
| 74 |
+
{%- if loop.index0 > ns.last_user_index and reasoning_content -%}
|
| 75 |
+
{{ '\n<think>' + reasoning_content.strip() + '</think>'}}
|
| 76 |
+
{%- else -%}
|
| 77 |
+
{{ '\n<think></think>' }}
|
| 78 |
+
{%- endif -%}
|
| 79 |
+
{%- if content.strip() -%}
|
| 80 |
+
{{ '\n' + content.strip() }}
|
| 81 |
+
{%- endif -%}
|
| 82 |
+
{% if m.tool_calls %}
|
| 83 |
+
{% for tc in m.tool_calls %}
|
| 84 |
+
{%- if tc.function %}
|
| 85 |
+
{%- set tc = tc.function %}
|
| 86 |
+
{%- endif %}
|
| 87 |
+
{{ '\n<tool_call>' + tc.name }}
|
| 88 |
+
{% set _args = tc.arguments %}
|
| 89 |
+
{% for k, v in _args.items() %}
|
| 90 |
+
<arg_key>{{ k }}</arg_key>
|
| 91 |
+
<arg_value>{{ v | tojson(ensure_ascii=False) if v is not string else v }}</arg_value>
|
| 92 |
+
{% endfor %}
|
| 93 |
+
</tool_call>{% endfor %}
|
| 94 |
+
{% endif %}
|
| 95 |
+
{%- elif m.role == 'tool' -%}
|
| 96 |
+
{%- if m.content is string -%}
|
| 97 |
+
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
| 98 |
+
{{- '<|observation|>' }}
|
| 99 |
+
{%- endif %}
|
| 100 |
+
{{- '\n<tool_response>\n' }}
|
| 101 |
+
{{- m.content }}
|
| 102 |
+
{{- '\n</tool_response>' }}
|
| 103 |
+
{% elif m.content is iterable and m.content is not mapping %}
|
| 104 |
+
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
| 105 |
+
{{- '<|observation|>' }}
|
| 106 |
+
{%- endif %}
|
| 107 |
+
{{- '\n<tool_response>\n' }}
|
| 108 |
+
{%- for tr in m.content -%}
|
| 109 |
+
{%- if tr is mapping and tr.type is defined -%}
|
| 110 |
+
{%- set t = tr.type | lower -%}
|
| 111 |
+
{%- if t == 'text' and tr.text is defined -%}
|
| 112 |
+
{{ tr.text }}
|
| 113 |
+
{%- elif t in ['image', 'image_url'] -%}
|
| 114 |
+
<|begin_of_image|><|image|><|end_of_image|>
|
| 115 |
+
{%- elif t in ['video', 'video_url'] -%}
|
| 116 |
+
<|begin_of_video|><|video|><|end_of_video|>
|
| 117 |
+
{%- else -%}
|
| 118 |
+
{{ tr | tojson(ensure_ascii=False) }}
|
| 119 |
+
{%- endif -%}
|
| 120 |
+
{%- else -%}
|
| 121 |
+
{{ tr.output if tr.output is defined else tr }}
|
| 122 |
+
{%- endif -%}
|
| 123 |
+
{%- endfor -%}
|
| 124 |
+
{{- '\n</tool_response>' }}
|
| 125 |
+
{%- else -%}
|
| 126 |
+
<|observation|>{% for tr in m.content %}
|
| 127 |
+
|
| 128 |
+
<tool_response>
|
| 129 |
+
{{ tr.output if tr.output is defined else tr }}
|
| 130 |
+
</tool_response>{% endfor -%}
|
| 131 |
+
{% endif -%}
|
| 132 |
+
{%- elif m.role == 'system' -%}
|
| 133 |
+
<|system|>
|
| 134 |
+
{{ visible_text(m.content) }}
|
| 135 |
+
{%- endif -%}
|
| 136 |
+
{%- endfor -%}
|
| 137 |
+
{%- if add_generation_prompt -%}
|
| 138 |
+
<|assistant|>
|
| 139 |
+
{{'<think></think>\n' if (enable_thinking is defined and not enable_thinking) else ''}}
|
| 140 |
+
{%- endif -%}
|
eval_results.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 1.3076923076923077,
|
| 3 |
+
"eval_accuracy": 0.5594251982846505,
|
| 4 |
+
"eval_loss": 2.566760301589966,
|
| 5 |
+
"eval_runtime": 2.0675,
|
| 6 |
+
"eval_samples_per_second": 24.184,
|
| 7 |
+
"eval_steps_per_second": 3.386
|
| 8 |
+
}
|
processor_config.json
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"image_processor": {
|
| 3 |
+
"data_format": "channels_first",
|
| 4 |
+
"do_convert_rgb": true,
|
| 5 |
+
"do_normalize": true,
|
| 6 |
+
"do_rescale": true,
|
| 7 |
+
"do_resize": true,
|
| 8 |
+
"image_mean": [
|
| 9 |
+
0.48145466,
|
| 10 |
+
0.4578275,
|
| 11 |
+
0.40821073
|
| 12 |
+
],
|
| 13 |
+
"image_processor_type": "Glm46VImageProcessorFast",
|
| 14 |
+
"image_std": [
|
| 15 |
+
0.26862954,
|
| 16 |
+
0.26130258,
|
| 17 |
+
0.27577711
|
| 18 |
+
],
|
| 19 |
+
"merge_size": 2,
|
| 20 |
+
"patch_size": 14,
|
| 21 |
+
"resample": 3,
|
| 22 |
+
"rescale_factor": 0.00392156862745098,
|
| 23 |
+
"size": {
|
| 24 |
+
"longest_edge": 9633792,
|
| 25 |
+
"shortest_edge": 12544
|
| 26 |
+
},
|
| 27 |
+
"temporal_patch_size": 2
|
| 28 |
+
},
|
| 29 |
+
"processor_class": "Glm46VProcessor",
|
| 30 |
+
"video_processor": {
|
| 31 |
+
"data_format": "channels_first",
|
| 32 |
+
"default_to_square": true,
|
| 33 |
+
"do_convert_rgb": true,
|
| 34 |
+
"do_normalize": true,
|
| 35 |
+
"do_rescale": true,
|
| 36 |
+
"do_resize": true,
|
| 37 |
+
"do_sample_frames": true,
|
| 38 |
+
"fps": 2,
|
| 39 |
+
"image_mean": [
|
| 40 |
+
0.48145466,
|
| 41 |
+
0.4578275,
|
| 42 |
+
0.40821073
|
| 43 |
+
],
|
| 44 |
+
"image_processor_type": "Glm46VImageProcessor",
|
| 45 |
+
"image_std": [
|
| 46 |
+
0.26862954,
|
| 47 |
+
0.26130258,
|
| 48 |
+
0.27577711
|
| 49 |
+
],
|
| 50 |
+
"max_duration": 300,
|
| 51 |
+
"max_image_size": {
|
| 52 |
+
"longest_edge": 47040000
|
| 53 |
+
},
|
| 54 |
+
"merge_size": 2,
|
| 55 |
+
"num_frames": 16,
|
| 56 |
+
"patch_size": 14,
|
| 57 |
+
"resample": 3,
|
| 58 |
+
"rescale_factor": 0.00392156862745098,
|
| 59 |
+
"return_metadata": false,
|
| 60 |
+
"size": {
|
| 61 |
+
"longest_edge": 9633792,
|
| 62 |
+
"shortest_edge": 12544
|
| 63 |
+
},
|
| 64 |
+
"temporal_patch_size": 2,
|
| 65 |
+
"video_processor_type": "Glm46VVideoProcessor"
|
| 66 |
+
}
|
| 67 |
+
}
|
runs/Apr09_04-50-45_cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz/events.out.tfevents.1775710245.cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz.30775.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1410a2c2669bf64ad7832538e2a91671ccc7e81cb833850278f97f866ad1391b
|
| 3 |
+
size 7931
|
runs/Apr09_04-50-45_cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz/events.out.tfevents.1775710296.cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz.30775.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2614795ee33373d3f773846c9a79150fdf8b5ad0abaee92d4bd2a2c0dba43a3b
|
| 3 |
+
size 405
|
runs/Apr09_05-09-41_cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz/events.out.tfevents.1775711381.cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz.97622.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fabb44c2eba6c489de8092d30120855e7bd13880bc5a0789201f125c1bc04f62
|
| 3 |
+
size 7931
|
runs/Apr09_05-09-41_cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz/events.out.tfevents.1775711401.cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz.97622.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b05455bbc80d8b21f7c31b2cd520e9edc0d8d0ee8fc8b0b585b985977193aeb3
|
| 3 |
+
size 405
|
runs/Apr09_05-15-45_cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz/events.out.tfevents.1775711745.cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz.119598.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63624552099940c5e44b3761648d14d7db1859345a127639c4998c504f81354c
|
| 3 |
+
size 6645
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"backend": "tokenizers",
|
| 3 |
+
"clean_up_tokenization_spaces": false,
|
| 4 |
+
"eos_token": "<|endoftext|>",
|
| 5 |
+
"extra_special_tokens": [
|
| 6 |
+
"<|user|>",
|
| 7 |
+
"<|observation|>"
|
| 8 |
+
],
|
| 9 |
+
"is_local": false,
|
| 10 |
+
"model_max_length": 655380,
|
| 11 |
+
"pad_token": "<|endoftext|>",
|
| 12 |
+
"padding_side": "right",
|
| 13 |
+
"processor_class": "Glm46VProcessor",
|
| 14 |
+
"split_special_tokens": false,
|
| 15 |
+
"tokenizer_class": "TokenizersBackend"
|
| 16 |
+
}
|
train_results.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 1.3076923076923077,
|
| 3 |
+
"total_flos": 197425390977024.0,
|
| 4 |
+
"train_loss": 2.7674348831176756,
|
| 5 |
+
"train_runtime": 14.3553,
|
| 6 |
+
"train_samples_per_second": 5.573,
|
| 7 |
+
"train_steps_per_second": 0.348
|
| 8 |
+
}
|
trainer_log.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"current_steps": 1, "total_steps": 5, "loss": 2.965193033218384, "lr": 0.0, "epoch": 0.3076923076923077, "percentage": 20.0, "elapsed_time": "0:00:02", "remaining_time": "0:00:10"}
|
| 2 |
+
{"current_steps": 2, "total_steps": 5, "loss": 2.7841198444366455, "lr": 0.0001, "epoch": 0.6153846153846154, "percentage": 40.0, "elapsed_time": "0:00:03", "remaining_time": "0:00:05"}
|
| 3 |
+
{"current_steps": 2, "total_steps": 5, "eval_loss": 2.749584436416626, "epoch": 0.6153846153846154, "percentage": 40.0, "elapsed_time": "0:00:05", "remaining_time": "0:00:08"}
|
trainer_state.json
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": null,
|
| 3 |
+
"best_metric": null,
|
| 4 |
+
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.3076923076923077,
|
| 6 |
+
"eval_steps": 2,
|
| 7 |
+
"global_step": 5,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.3076923076923077,
|
| 14 |
+
"grad_norm": 0.47904515266418457,
|
| 15 |
+
"learning_rate": 0.0,
|
| 16 |
+
"loss": 2.965193033218384,
|
| 17 |
+
"step": 1
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.6153846153846154,
|
| 21 |
+
"grad_norm": 0.456310510635376,
|
| 22 |
+
"learning_rate": 0.0001,
|
| 23 |
+
"loss": 2.7841198444366455,
|
| 24 |
+
"step": 2
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 0.6153846153846154,
|
| 28 |
+
"eval_accuracy": 0.5348454543007923,
|
| 29 |
+
"eval_loss": 2.749584436416626,
|
| 30 |
+
"eval_runtime": 2.1084,
|
| 31 |
+
"eval_samples_per_second": 23.715,
|
| 32 |
+
"eval_steps_per_second": 3.32,
|
| 33 |
+
"step": 2
|
| 34 |
+
},
|
| 35 |
+
{
|
| 36 |
+
"epoch": 0.9230769230769231,
|
| 37 |
+
"grad_norm": 0.48338398337364197,
|
| 38 |
+
"learning_rate": 8.535533905932738e-05,
|
| 39 |
+
"loss": 2.890326499938965,
|
| 40 |
+
"step": 3
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"epoch": 1.0,
|
| 44 |
+
"grad_norm": 0.5952783823013306,
|
| 45 |
+
"learning_rate": 5e-05,
|
| 46 |
+
"loss": 2.643332004547119,
|
| 47 |
+
"step": 4
|
| 48 |
+
},
|
| 49 |
+
{
|
| 50 |
+
"epoch": 1.0,
|
| 51 |
+
"eval_accuracy": 0.5556479950956826,
|
| 52 |
+
"eval_loss": 2.586768388748169,
|
| 53 |
+
"eval_runtime": 2.0764,
|
| 54 |
+
"eval_samples_per_second": 24.08,
|
| 55 |
+
"eval_steps_per_second": 3.371,
|
| 56 |
+
"step": 4
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"epoch": 1.3076923076923077,
|
| 60 |
+
"grad_norm": 0.4229271411895752,
|
| 61 |
+
"learning_rate": 1.4644660940672627e-05,
|
| 62 |
+
"loss": 2.5542030334472656,
|
| 63 |
+
"step": 5
|
| 64 |
+
},
|
| 65 |
+
{
|
| 66 |
+
"epoch": 1.3076923076923077,
|
| 67 |
+
"step": 5,
|
| 68 |
+
"total_flos": 197425390977024.0,
|
| 69 |
+
"train_loss": 2.7674348831176756,
|
| 70 |
+
"train_runtime": 14.3553,
|
| 71 |
+
"train_samples_per_second": 5.573,
|
| 72 |
+
"train_steps_per_second": 0.348
|
| 73 |
+
}
|
| 74 |
+
],
|
| 75 |
+
"logging_steps": 1,
|
| 76 |
+
"max_steps": 5,
|
| 77 |
+
"num_input_tokens_seen": 0,
|
| 78 |
+
"num_train_epochs": 2,
|
| 79 |
+
"save_steps": 2,
|
| 80 |
+
"stateful_callbacks": {
|
| 81 |
+
"TrainerControl": {
|
| 82 |
+
"args": {
|
| 83 |
+
"should_epoch_stop": false,
|
| 84 |
+
"should_evaluate": false,
|
| 85 |
+
"should_log": false,
|
| 86 |
+
"should_save": true,
|
| 87 |
+
"should_training_stop": true
|
| 88 |
+
},
|
| 89 |
+
"attributes": {}
|
| 90 |
+
}
|
| 91 |
+
},
|
| 92 |
+
"total_flos": 197425390977024.0,
|
| 93 |
+
"train_batch_size": 4,
|
| 94 |
+
"trial_name": null,
|
| 95 |
+
"trial_params": null
|
| 96 |
+
}
|
training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa0585eb0ac4a61e55427fec24f73141acacfb922a8a6ee0363e495d52b98870
|
| 3 |
+
size 5649
|
training_eval_accuracy.png
ADDED
|
training_eval_loss.png
ADDED
|
training_loss.png
ADDED
|