prthm29 commited on 16 days ago

Commit

305a5c2

verified ·

1 Parent(s): a68bcca

Training in progress, step 2

Browse files

Files changed (21) hide show

README.md +70 -0
adapter_config.json +60 -0
adapter_model.safetensors +3 -0
all_results.json +13 -0
chat_template.jinja +140 -0
eval_results.json +8 -0
processor_config.json +67 -0
runs/Apr09_04-50-45_cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz/events.out.tfevents.1775710245.cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz.30775.0 +3 -0
runs/Apr09_04-50-45_cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz/events.out.tfevents.1775710296.cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz.30775.1 +3 -0
runs/Apr09_05-09-41_cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz/events.out.tfevents.1775711381.cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz.97622.0 +3 -0
runs/Apr09_05-09-41_cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz/events.out.tfevents.1775711401.cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz.97622.1 +3 -0
runs/Apr09_05-15-45_cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz/events.out.tfevents.1775711745.cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz.119598.0 +3 -0
tokenizer.json +0 -0
tokenizer_config.json +16 -0
train_results.json +8 -0
trainer_log.jsonl +3 -0
trainer_state.json +96 -0
training_args.bin +3 -0
training_eval_accuracy.png +0 -0
training_eval_loss.png +0 -0
training_loss.png +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,70 @@

+---
+library_name: peft
+license: mit
+base_model: zai-org/GLM-OCR
+tags:
+- base_model:adapter:zai-org/GLM-OCR
+- llama-factory
+- lora
+- transformers
+metrics:
+- accuracy
+pipeline_tag: text-generation
+model-index:
+- name: smoke_test_glm4v_checkpoints
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# smoke_test_glm4v_checkpoints
+This model is a fine-tuned version of [zai-org/GLM-OCR](https://huggingface.co/zai-org/GLM-OCR) on the gujarati_ocr_stream dataset.
+It achieves the following results on the evaluation set:
+- Loss: 2.5668
+- Accuracy: 0.5594
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0001
+- train_batch_size: 4
+- eval_batch_size: 8
+- seed: 42
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 16
+- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 1
+- training_steps: 5
+### Training results
+| Training Loss | Epoch  | Step | Validation Loss | Accuracy |
+|:-------------:|:------:|:----:|:---------------:|:--------:|
+| 2.7841        | 0.6154 | 2    | 2.7496          | 0.5348   |
+| 2.6433        | 1.0    | 4    | 2.5868          | 0.5556   |
+### Framework versions
+- PEFT 0.18.1
+- Transformers 5.2.0
+- Pytorch 2.11.0+cu130
+- Datasets 4.0.0
+- Tokenizers 0.22.2

adapter_config.json ADDED Viewed

	@@ -0,0 +1,60 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "zai-org/GLM-OCR",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.18.1",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "layers.9.mlp.down_proj",
+    "layers.0.mlp.down_proj",
+    "layers.12.mlp.down_proj",
+    "q_proj",
+    "layers.8.mlp.down_proj",
+    "o_proj",
+    "k_proj",
+    "layers.15.mlp.down_proj",
+    "layers.1.mlp.down_proj",
+    "layers.5.mlp.down_proj",
+    "layers.3.mlp.down_proj",
+    "gate_up_proj",
+    "layers.2.mlp.down_proj",
+    "layers.13.mlp.down_proj",
+    "layers.6.mlp.down_proj",
+    "v_proj",
+    "layers.10.mlp.down_proj",
+    "layers.11.mlp.down_proj",
+    "layers.4.mlp.down_proj",
+    "layers.7.mlp.down_proj",
+    "layers.14.mlp.down_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:64df0764bfe053c84726c27244f3a4f48ca2824cda34102ebae5b1f6c142f440
+size 29912904

all_results.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+    "epoch": 1.3076923076923077,
+    "eval_accuracy": 0.5594251982846505,
+    "eval_loss": 2.566760301589966,
+    "eval_runtime": 2.0675,
+    "eval_samples_per_second": 24.184,
+    "eval_steps_per_second": 3.386,
+    "total_flos": 197425390977024.0,
+    "train_loss": 2.7674348831176756,
+    "train_runtime": 14.3553,
+    "train_samples_per_second": 5.573,
+    "train_steps_per_second": 0.348
+}

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,140 @@

+[gMASK]<sop>
+{%- if tools -%}
+<|system|>
+# Tools
+You may call one or more functions to assist with the user query.
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>
+{% for tool in tools %}
+{{ tool | tojson(ensure_ascii=False) }}
+{% endfor %}
+</tools>
+For each function call, output the function name and arguments within the following XML format:
+<tool_call>{function-name}
+<arg_key>{arg-key-1}</arg_key>
+<arg_value>{arg-value-1}</arg_value>
+<arg_key>{arg-key-2}</arg_key>
+<arg_value>{arg-value-2}</arg_value>
+...
+</tool_call>{%- endif -%}
+{%- macro visible_text(content) -%}
+    {%- if content is string -%}
+        {{- content }}
+    {%- elif content is iterable and content is not mapping -%}
+        {%- for item in content -%}
+            {%- if item is mapping and item.type == 'text' -%}
+                {{- item.text }}
+            {%- elif item is mapping and (item.type == 'image' or 'image' in item) -%}
+                <|begin_of_image|><|image|><|end_of_image|>
+            {%- elif item is mapping and (item.type == 'video' or 'video' in item) -%}
+                <|begin_of_video|><|video|><|end_of_video|>
+            {%- elif item is string -%}
+                {{- item }}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- else -%}
+        {{- content }}
+    {%- endif -%}
+{%- endmacro -%}
+{%- set ns = namespace(last_user_index=-1) %}
+{%- for m in messages %}
+    {%- if m.role == 'user' %}
+        {% set ns.last_user_index = loop.index0 -%}
+    {%- endif %}
+{%- endfor %}
+{% for m in messages %}
+{%- if m.role == 'user' -%}<|user|>
+{% if m.content is string %}
+{{ m.content }}
+{%- else %}
+{%- for item in m.content %}
+{% if item.type == 'video' or 'video' in item %}
+<|begin_of_video|><|video|><|end_of_video|>{% elif item.type == 'image' or 'image' in item %}
+<|begin_of_image|><|image|><|end_of_image|>{% elif item.type == 'text' %}
+{{ item.text }}
+{%- endif %}
+{%- endfor %}
+{%- endif %}
+{{- '/nothink' if (enable_thinking is defined and not enable_thinking and not visible_text(m.content).endswith("/nothink")) else '' -}}
+{%- elif m.role == 'assistant' -%}
+<|assistant|>
+{%- set reasoning_content = '' %}
+{%- set content = visible_text(m.content) %}
+{%- if m.reasoning_content is string %}
+    {%- set reasoning_content = m.reasoning_content %}
+{%- else %}
+    {%- if '</think>' in content %}
+        {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+        {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+    {%- endif %}
+{%- endif %}
+{%- if loop.index0 > ns.last_user_index and reasoning_content -%}
+{{ '\n<think>' + reasoning_content.strip() +  '</think>'}}
+{%- else -%}
+{{ '\n<think></think>' }}
+{%- endif -%}
+{%- if content.strip() -%}
+{{ '\n' + content.strip() }}
+{%- endif -%}
+{% if m.tool_calls %}
+{% for tc in m.tool_calls %}
+{%- if tc.function %}
+    {%- set tc = tc.function %}
+{%- endif %}
+{{ '\n<tool_call>' + tc.name }}
+{% set _args = tc.arguments %}
+{% for k, v in _args.items() %}
+<arg_key>{{ k }}</arg_key>
+<arg_value>{{ v | tojson(ensure_ascii=False) if v is not string else v }}</arg_value>
+{% endfor %}
+</tool_call>{% endfor %}
+{% endif %}
+{%- elif m.role == 'tool' -%}
+{%- if m.content is string -%}
+{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+    {{- '<|observation|>' }}
+{%- endif %}
+{{- '\n<tool_response>\n' }}
+{{- m.content }}
+{{- '\n</tool_response>' }}
+{% elif m.content is iterable and m.content is not mapping %}
+{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+{{- '<|observation|>' }}
+{%- endif %}
+{{- '\n<tool_response>\n' }}
+{%- for tr in m.content -%}
+  {%- if tr is mapping and tr.type is defined -%}
+    {%- set t = tr.type | lower -%}
+    {%- if t == 'text' and tr.text is defined -%}
+{{ tr.text }}
+    {%- elif t in ['image', 'image_url'] -%}
+<|begin_of_image|><|image|><|end_of_image|>
+    {%- elif t in ['video', 'video_url'] -%}
+<|begin_of_video|><|video|><|end_of_video|>
+    {%- else -%}
+{{ tr | tojson(ensure_ascii=False) }}
+    {%- endif -%}
+  {%- else -%}
+{{ tr.output if tr.output is defined else tr }}
+  {%- endif -%}
+{%- endfor -%}
+{{- '\n</tool_response>' }}
+{%- else -%}
+<|observation|>{% for tr in m.content %}
+<tool_response>
+{{ tr.output if tr.output is defined else tr }}
+</tool_response>{% endfor -%}
+{% endif -%}
+{%- elif m.role == 'system' -%}
+<|system|>
+{{ visible_text(m.content) }}
+{%- endif -%}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+<|assistant|>
+{{'<think></think>\n' if (enable_thinking is defined and not enable_thinking) else ''}}
+{%- endif -%}

eval_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.3076923076923077,
+    "eval_accuracy": 0.5594251982846505,
+    "eval_loss": 2.566760301589966,
+    "eval_runtime": 2.0675,
+    "eval_samples_per_second": 24.184,
+    "eval_steps_per_second": 3.386
+}

processor_config.json ADDED Viewed

	@@ -0,0 +1,67 @@

+{
+  "image_processor": {
+    "data_format": "channels_first",
+    "do_convert_rgb": true,
+    "do_normalize": true,
+    "do_rescale": true,
+    "do_resize": true,
+    "image_mean": [
+      0.48145466,
+      0.4578275,
+      0.40821073
+    ],
+    "image_processor_type": "Glm46VImageProcessorFast",
+    "image_std": [
+      0.26862954,
+      0.26130258,
+      0.27577711
+    ],
+    "merge_size": 2,
+    "patch_size": 14,
+    "resample": 3,
+    "rescale_factor": 0.00392156862745098,
+    "size": {
+      "longest_edge": 9633792,
+      "shortest_edge": 12544
+    },
+    "temporal_patch_size": 2
+  },
+  "processor_class": "Glm46VProcessor",
+  "video_processor": {
+    "data_format": "channels_first",
+    "default_to_square": true,
+    "do_convert_rgb": true,
+    "do_normalize": true,
+    "do_rescale": true,
+    "do_resize": true,
+    "do_sample_frames": true,
+    "fps": 2,
+    "image_mean": [
+      0.48145466,
+      0.4578275,
+      0.40821073
+    ],
+    "image_processor_type": "Glm46VImageProcessor",
+    "image_std": [
+      0.26862954,
+      0.26130258,
+      0.27577711
+    ],
+    "max_duration": 300,
+    "max_image_size": {
+      "longest_edge": 47040000
+    },
+    "merge_size": 2,
+    "num_frames": 16,
+    "patch_size": 14,
+    "resample": 3,
+    "rescale_factor": 0.00392156862745098,
+    "return_metadata": false,
+    "size": {
+      "longest_edge": 9633792,
+      "shortest_edge": 12544
+    },
+    "temporal_patch_size": 2,
+    "video_processor_type": "Glm46VVideoProcessor"
+  }
+}

runs/Apr09_04-50-45_cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz/events.out.tfevents.1775710245.cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz.30775.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1410a2c2669bf64ad7832538e2a91671ccc7e81cb833850278f97f866ad1391b
+size 7931

runs/Apr09_04-50-45_cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz/events.out.tfevents.1775710296.cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz.30775.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2614795ee33373d3f773846c9a79150fdf8b5ad0abaee92d4bd2a2c0dba43a3b
+size 405

runs/Apr09_05-09-41_cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz/events.out.tfevents.1775711381.cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz.97622.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fabb44c2eba6c489de8092d30120855e7bd13880bc5a0789201f125c1bc04f62
+size 7931

runs/Apr09_05-09-41_cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz/events.out.tfevents.1775711401.cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz.97622.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b05455bbc80d8b21f7c31b2cd520e9edc0d8d0ee8fc8b0b585b985977193aeb3
+size 405

runs/Apr09_05-15-45_cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz/events.out.tfevents.1775711745.cs-01knr8f8jy7kcvx4v0ce8jkn3q-1tpz.119598.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:63624552099940c5e44b3761648d14d7db1859345a127639c4998c504f81354c
+size 6645

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "backend": "tokenizers",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "extra_special_tokens": [
+    "<|user|>",
+    "<|observation|>"
+  ],
+  "is_local": false,
+  "model_max_length": 655380,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "right",
+  "processor_class": "Glm46VProcessor",
+  "split_special_tokens": false,
+  "tokenizer_class": "TokenizersBackend"
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.3076923076923077,
+    "total_flos": 197425390977024.0,
+    "train_loss": 2.7674348831176756,
+    "train_runtime": 14.3553,
+    "train_samples_per_second": 5.573,
+    "train_steps_per_second": 0.348
+}

trainer_log.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+{"current_steps": 1, "total_steps": 5, "loss": 2.965193033218384, "lr": 0.0, "epoch": 0.3076923076923077, "percentage": 20.0, "elapsed_time": "0:00:02", "remaining_time": "0:00:10"}
+{"current_steps": 2, "total_steps": 5, "loss": 2.7841198444366455, "lr": 0.0001, "epoch": 0.6153846153846154, "percentage": 40.0, "elapsed_time": "0:00:03", "remaining_time": "0:00:05"}
+{"current_steps": 2, "total_steps": 5, "eval_loss": 2.749584436416626, "epoch": 0.6153846153846154, "percentage": 40.0, "elapsed_time": "0:00:05", "remaining_time": "0:00:08"}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,96 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.3076923076923077,
+  "eval_steps": 2,
+  "global_step": 5,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.3076923076923077,
+      "grad_norm": 0.47904515266418457,
+      "learning_rate": 0.0,
+      "loss": 2.965193033218384,
+      "step": 1
+    },
+    {
+      "epoch": 0.6153846153846154,
+      "grad_norm": 0.456310510635376,
+      "learning_rate": 0.0001,
+      "loss": 2.7841198444366455,
+      "step": 2
+    },
+    {
+      "epoch": 0.6153846153846154,
+      "eval_accuracy": 0.5348454543007923,
+      "eval_loss": 2.749584436416626,
+      "eval_runtime": 2.1084,
+      "eval_samples_per_second": 23.715,
+      "eval_steps_per_second": 3.32,
+      "step": 2
+    },
+    {
+      "epoch": 0.9230769230769231,
+      "grad_norm": 0.48338398337364197,
+      "learning_rate": 8.535533905932738e-05,
+      "loss": 2.890326499938965,
+      "step": 3
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.5952783823013306,
+      "learning_rate": 5e-05,
+      "loss": 2.643332004547119,
+      "step": 4
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.5556479950956826,
+      "eval_loss": 2.586768388748169,
+      "eval_runtime": 2.0764,
+      "eval_samples_per_second": 24.08,
+      "eval_steps_per_second": 3.371,
+      "step": 4
+    },
+    {
+      "epoch": 1.3076923076923077,
+      "grad_norm": 0.4229271411895752,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 2.5542030334472656,
+      "step": 5
+    },
+    {
+      "epoch": 1.3076923076923077,
+      "step": 5,
+      "total_flos": 197425390977024.0,
+      "train_loss": 2.7674348831176756,
+      "train_runtime": 14.3553,
+      "train_samples_per_second": 5.573,
+      "train_steps_per_second": 0.348
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 5,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 2,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 197425390977024.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa0585eb0ac4a61e55427fec24f73141acacfb922a8a6ee0363e495d52b98870
+size 5649

training_eval_accuracy.png ADDED Viewed

training_eval_loss.png ADDED Viewed

training_loss.png ADDED Viewed