test trainer.push_to_hu

Browse files

Files changed (12) hide show

.gitattributes +1 -0
README.md +110 -0
adapter_config.json +42 -0
adapter_model.safetensors +3 -0
added_tokens.json +3 -0
chat_template.jinja +47 -0
runs/Jan02_08-32-10_f408cf05027b/events.out.tfevents.1767342733.f408cf05027b.55.0 +3 -0
special_tokens_map.json +33 -0
tokenizer.json +3 -0
tokenizer.model +3 -0
tokenizer_config.json +0 -0
training_args.bin +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,110 @@

+---
+library_name: peft
+license: gemma
+base_model: google/gemma-3-270m-it
+tags:
+- base_model:adapter:google/gemma-3-270m-it
+- lora
+- transformers
+metrics:
+- accuracy
+model-index:
+- name: gemma3-peft-multiclass
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# gemma3-peft-multiclass
+This model is a fine-tuned version of [google/gemma-3-270m-it](https://huggingface.co/google/gemma-3-270m-it) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 2.2109
+- Accuracy: 0.42
+- F1 Macro: 0.4174
+- Precision Macro: 0.4194
+- Recall Macro: 0.4193
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-05
+- train_batch_size: 8
+- eval_batch_size: 8
+- seed: 42
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 32
+- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: linear
+- num_epochs: 3
+### Training results
+| Training Loss | Epoch  | Step | Validation Loss | Accuracy | F1 Macro | Precision Macro | Recall Macro |
+|:-------------:|:------:|:----:|:---------------:|:--------:|:--------:|:---------------:|:------------:|
+| 16.4047       | 0.0710 | 10   | 3.6137          | 0.352    | 0.3320   | 0.3414          | 0.3439       |
+| 14.2141       | 0.1421 | 20   | 3.4200          | 0.356    | 0.3440   | 0.3466          | 0.3498       |
+| 13.5641       | 0.2131 | 30   | 3.2952          | 0.354    | 0.3528   | 0.3651          | 0.3571       |
+| 13.9938       | 0.2842 | 40   | 3.1136          | 0.35     | 0.3475   | 0.3472          | 0.3480       |
+| 12.8039       | 0.3552 | 50   | 2.9981          | 0.352    | 0.3483   | 0.3495          | 0.3518       |
+| 11.6164       | 0.4263 | 60   | 2.8842          | 0.36     | 0.3536   | 0.3576          | 0.3604       |
+| 11.4141       | 0.4973 | 70   | 2.7804          | 0.382    | 0.3811   | 0.3892          | 0.3827       |
+| 10.9391       | 0.5684 | 80   | 2.7409          | 0.382    | 0.3795   | 0.3942          | 0.3874       |
+| 11.2023       | 0.6394 | 90   | 2.6599          | 0.368    | 0.3605   | 0.3686          | 0.3621       |
+| 11.1117       | 0.7105 | 100  | 2.5964          | 0.382    | 0.3689   | 0.3782          | 0.3741       |
+| 10.8336       | 0.7815 | 110  | 2.5515          | 0.382    | 0.3739   | 0.3737          | 0.3780       |
+| 10.7383       | 0.8526 | 120  | 2.5144          | 0.378    | 0.3737   | 0.3775          | 0.3778       |
+| 10.7766       | 0.9236 | 130  | 2.5157          | 0.376    | 0.3646   | 0.3887          | 0.3816       |
+| 9.9812        | 0.9947 | 140  | 2.4519          | 0.388    | 0.3848   | 0.3864          | 0.3848       |
+| 9.3602        | 1.0639 | 150  | 2.4290          | 0.39     | 0.3832   | 0.3870          | 0.3889       |
+| 9.6672        | 1.1350 | 160  | 2.4054          | 0.398    | 0.3901   | 0.3951          | 0.3922       |
+| 9.6578        | 1.2060 | 170  | 2.3940          | 0.404    | 0.4040   | 0.4068          | 0.4061       |
+| 10.1414       | 1.2771 | 180  | 2.4032          | 0.388    | 0.3807   | 0.4050          | 0.3947       |
+| 9.7828        | 1.3481 | 190  | 2.3542          | 0.4      | 0.3963   | 0.3984          | 0.3969       |
+| 9.8086        | 1.4192 | 200  | 2.3286          | 0.406    | 0.4018   | 0.4031          | 0.4027       |
+| 9.1289        | 1.4902 | 210  | 2.3212          | 0.39     | 0.3819   | 0.3964          | 0.3933       |
+| 9.5641        | 1.5613 | 220  | 2.3019          | 0.414    | 0.4137   | 0.4157          | 0.4171       |
+| 9.6461        | 1.6323 | 230  | 2.2907          | 0.412    | 0.4055   | 0.4088          | 0.4080       |
+| 9.1906        | 1.7034 | 240  | 2.2955          | 0.422    | 0.4008   | 0.4265          | 0.4151       |
+| 9.0484        | 1.7744 | 250  | 2.2959          | 0.406    | 0.4017   | 0.4147          | 0.4101       |
+| 9.2789        | 1.8455 | 260  | 2.2869          | 0.392    | 0.3850   | 0.4055          | 0.3972       |
+| 9.3695        | 1.9165 | 270  | 2.2800          | 0.394    | 0.3898   | 0.4064          | 0.4001       |
+| 9.1937        | 1.9876 | 280  | 2.2526          | 0.414    | 0.4118   | 0.4137          | 0.4130       |
+| 8.7906        | 2.0568 | 290  | 2.2432          | 0.422    | 0.4037   | 0.4311          | 0.4120       |
+| 9.143         | 2.1279 | 300  | 2.2473          | 0.418    | 0.3911   | 0.4353          | 0.4057       |
+| 9.0289        | 2.1989 | 310  | 2.2312          | 0.404    | 0.3995   | 0.4058          | 0.4032       |
+| 8.8203        | 2.2700 | 320  | 2.2432          | 0.402    | 0.3979   | 0.4116          | 0.4062       |
+| 9.082         | 2.3410 | 330  | 2.2344          | 0.416    | 0.4145   | 0.4229          | 0.4206       |
+| 9.0086        | 2.4121 | 340  | 2.2278          | 0.418    | 0.4169   | 0.4236          | 0.4204       |
+| 8.7383        | 2.4831 | 350  | 2.2259          | 0.408    | 0.3991   | 0.4129          | 0.4085       |
+| 8.668         | 2.5542 | 360  | 2.2204          | 0.398    | 0.3902   | 0.3975          | 0.3971       |
+| 9.1867        | 2.6252 | 370  | 2.2122          | 0.412    | 0.4063   | 0.4102          | 0.4078       |
+| 8.8688        | 2.6963 | 380  | 2.2088          | 0.432    | 0.4235   | 0.4312          | 0.4259       |
+| 8.85          | 2.7673 | 390  | 2.2119          | 0.432    | 0.4255   | 0.4327          | 0.4270       |
+| 9.3047        | 2.8384 | 400  | 2.2091          | 0.432    | 0.4264   | 0.4295          | 0.4278       |
+| 9.1258        | 2.9094 | 410  | 2.2139          | 0.428    | 0.4250   | 0.4273          | 0.4264       |
+| 8.825         | 2.9805 | 420  | 2.2109          | 0.42     | 0.4174   | 0.4194          | 0.4193       |
+### Framework versions
+- PEFT 0.17.1
+- Transformers 4.57.1
+- Pytorch 2.8.0+cu126
+- Datasets 4.4.1
+- Tokenizers 0.22.1

adapter_config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-3-270m-it",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [
+    "classifier",
+    "score"
+  ],
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "o_proj",
+    "k_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "SEQ_CLS",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a5cda6d01cf8bf08344dff9456b654b813d37b950b9c33866c820bc3c44fbfe8
+size 2971808

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<image_soft_token>": 262144
+}

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,47 @@

+{{ bos_token }}
+{%- if messages[0]['role'] == 'system' -%}
+    {%- if messages[0]['content'] is string -%}
+        {%- set first_user_prefix = messages[0]['content'] + '
+' -%}
+    {%- else -%}
+        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
+' -%}
+    {%- endif -%}
+    {%- set loop_messages = messages[1:] -%}
+{%- else -%}
+    {%- set first_user_prefix = "" -%}
+    {%- set loop_messages = messages -%}
+{%- endif -%}
+{%- for message in loop_messages -%}
+    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
+        {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
+    {%- endif -%}
+    {%- if (message['role'] == 'assistant') -%}
+        {%- set role = "model" -%}
+    {%- else -%}
+        {%- set role = message['role'] -%}
+    {%- endif -%}
+    {{ '<start_of_turn>' + role + '
+' + (first_user_prefix if loop.first else "") }}
+    {%- if message['content'] is string -%}
+        {{ message['content'] | trim }}
+    {%- elif message['content'] is iterable -%}
+        {%- for item in message['content'] -%}
+            {%- if item['type'] == 'image' -%}
+                {{ '<start_of_image>' }}
+            {%- elif item['type'] == 'text' -%}
+                {{ item['text'] | trim }}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- else -%}
+        {{ raise_exception("Invalid content type") }}
+    {%- endif -%}
+    {{ '<end_of_turn>
+' }}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    {{'<start_of_turn>model
+'}}
+{%- endif -%}

runs/Jan02_08-32-10_f408cf05027b/events.out.tfevents.1767342733.f408cf05027b.55.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1f0f6bcfd0203bc58615260e886bba6374e2af70217fb22db60c8e2f08deabae
+size 35542

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "boi_token": "<start_of_image>",
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eoi_token": "<end_of_image>",
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "image_token": "<image_soft_token>",
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:091e3cf7f0e56c10d7383f000a34ef2824e95368e43def66837540b0550b6f83
+size 33384832

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
+size 4689074

tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a715c4a6c45939b23622a35a1364b5119601f3b9edce9b31bd652ac302ce4df
+size 5841