Tokymin commited on
Commit
899e7ec
·
verified ·
1 Parent(s): 4deccd0

End of training

Browse files
Files changed (28) hide show
  1. README.md +52 -0
  2. config.json +149 -0
  3. generation_config.json +7 -0
  4. model.safetensors +3 -0
  5. runs/Apr25_14-39-45_YAN-Machine/events.out.tfevents.1745563185.YAN-Machine.3092909.0 +3 -0
  6. runs/Apr25_14-42-18_YAN-Machine/events.out.tfevents.1745563339.YAN-Machine.3093802.0 +3 -0
  7. runs/Apr25_15-00-48_YAN-Machine/events.out.tfevents.1745564448.YAN-Machine.3101448.0 +3 -0
  8. runs/Apr25_15-03-32_YAN-Machine/events.out.tfevents.1745564612.YAN-Machine.3103220.0 +3 -0
  9. runs/Apr25_15-16-24_YAN-Machine/events.out.tfevents.1745565384.YAN-Machine.3118167.0 +3 -0
  10. runs/Apr25_15-16-55_YAN-Machine/events.out.tfevents.1745565415.YAN-Machine.3119073.0 +3 -0
  11. runs/Apr25_15-20-20_YAN-Machine/events.out.tfevents.1745565621.YAN-Machine.3121507.0 +3 -0
  12. runs/Apr25_15-23-10_YAN-Machine/events.out.tfevents.1745565791.YAN-Machine.3125459.0 +3 -0
  13. runs/Apr25_15-23-29_YAN-Machine/events.out.tfevents.1745565809.YAN-Machine.3126720.0 +3 -0
  14. runs/Apr25_15-25-28_YAN-Machine/events.out.tfevents.1745565929.YAN-Machine.3132618.0 +3 -0
  15. runs/Apr25_15-52-49_YAN-Machine/events.out.tfevents.1745567569.YAN-Machine.4178102.0 +3 -0
  16. runs/Apr25_15-58-36_YAN-Machine/events.out.tfevents.1745567917.YAN-Machine.225857.0 +3 -0
  17. runs/Apr25_16-06-29_YAN-Machine/events.out.tfevents.1745568389.YAN-Machine.544916.0 +3 -0
  18. runs/Apr25_16-49-49_YAN-Machine/events.out.tfevents.1745570989.YAN-Machine.2231864.0 +3 -0
  19. runs/Apr25_16-51-48_YAN-Machine/events.out.tfevents.1745571108.YAN-Machine.2336285.0 +3 -0
  20. runs/Apr25_16-53-05_YAN-Machine/events.out.tfevents.1745571185.YAN-Machine.2416517.0 +3 -0
  21. runs/Apr26_21-18-59_YAN-Machine/events.out.tfevents.1745673540.YAN-Machine.3385759.0 +3 -0
  22. runs/Apr27_10-46-45_YAN-Machine/events.out.tfevents.1745722006.YAN-Machine.1838916.0 +3 -0
  23. runs/Apr27_11-30-16_YAN-Machine/events.out.tfevents.1745724618.YAN-Machine.1883932.0 +3 -0
  24. runs/Apr27_11-38-50_YAN-Machine/events.out.tfevents.1745725130.YAN-Machine.1897854.0 +3 -0
  25. runs/Apr27_11-40-39_YAN-Machine/events.out.tfevents.1745725239.YAN-Machine.1900454.0 +3 -0
  26. runs/Apr27_11-43-20_YAN-Machine/events.out.tfevents.1745725400.YAN-Machine.1904120.0 +3 -0
  27. runs/Apr27_11-50-37_YAN-Machine/events.out.tfevents.1745725837.YAN-Machine.1913951.0 +3 -0
  28. training_args.bin +3 -0
README.md ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ tags:
4
+ - generated_from_trainer
5
+ model-index:
6
+ - name: SmolVLM2-2.2B-Instruct-video-feedback
7
+ results: []
8
+ ---
9
+
10
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
11
+ should probably proofread and complete it, then remove this comment. -->
12
+
13
+ # SmolVLM2-2.2B-Instruct-video-feedback
14
+
15
+ This model was trained from scratch on an unknown dataset.
16
+
17
+ ## Model description
18
+
19
+ More information needed
20
+
21
+ ## Intended uses & limitations
22
+
23
+ More information needed
24
+
25
+ ## Training and evaluation data
26
+
27
+ More information needed
28
+
29
+ ## Training procedure
30
+
31
+ ### Training hyperparameters
32
+
33
+ The following hyperparameters were used during training:
34
+ - learning_rate: 0.0001
35
+ - train_batch_size: 2
36
+ - eval_batch_size: 8
37
+ - seed: 42
38
+ - optimizer: Use paged_adamw_8bit with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
39
+ - lr_scheduler_type: linear
40
+ - lr_scheduler_warmup_steps: 50
41
+ - num_epochs: 1
42
+
43
+ ### Training results
44
+
45
+
46
+
47
+ ### Framework versions
48
+
49
+ - Transformers 4.51.3
50
+ - Pytorch 2.5.1+cu124
51
+ - Datasets 3.5.0
52
+ - Tokenizers 0.21.1
config.json ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SmolVLMForConditionalGeneration"
4
+ ],
5
+ "image_token_id": 49190,
6
+ "model_type": "smolvlm",
7
+ "pad_token_id": 128002,
8
+ "scale_factor": 3,
9
+ "text_config": {
10
+ "_flash_attn_2_enabled": true,
11
+ "_name_or_path": "None",
12
+ "architectures": [
13
+ "VLlama3ForCausalLM"
14
+ ],
15
+ "attention_bias": false,
16
+ "attention_dropout": 0.0,
17
+ "head_dim": 64,
18
+ "hidden_act": "silu",
19
+ "hidden_size": 2048,
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 8192,
22
+ "max_position_embeddings": 8192,
23
+ "mlp_bias": false,
24
+ "model_type": "llama",
25
+ "neftune_noise_alpha": 0.0,
26
+ "num_attention_heads": 32,
27
+ "num_hidden_layers": 24,
28
+ "num_key_value_heads": 32,
29
+ "pad_token_id": 2,
30
+ "perceiver_config": {
31
+ "_attn_implementation_autoset": false,
32
+ "_name_or_path": "",
33
+ "add_cross_attention": false,
34
+ "architectures": null,
35
+ "attention_dropout": 0.0,
36
+ "bad_words_ids": null,
37
+ "begin_suppress_tokens": null,
38
+ "bos_token_id": null,
39
+ "chunk_size_feed_forward": 0,
40
+ "cross_attention_hidden_size": null,
41
+ "decoder_start_token_id": null,
42
+ "diversity_penalty": 0.0,
43
+ "do_sample": false,
44
+ "early_stopping": false,
45
+ "encoder_no_repeat_ngram_size": 0,
46
+ "eos_token_id": null,
47
+ "exponential_decay_length_penalty": null,
48
+ "finetuning_task": null,
49
+ "forced_bos_token_id": null,
50
+ "forced_eos_token_id": null,
51
+ "hidden_act": "silu",
52
+ "id2label": {
53
+ "0": "LABEL_0",
54
+ "1": "LABEL_1"
55
+ },
56
+ "is_decoder": false,
57
+ "is_encoder_decoder": false,
58
+ "label2id": {
59
+ "LABEL_0": 0,
60
+ "LABEL_1": 1
61
+ },
62
+ "length_penalty": 1.0,
63
+ "max_length": 20,
64
+ "min_length": 0,
65
+ "model_type": "vllama3",
66
+ "no_repeat_ngram_size": 0,
67
+ "num_beam_groups": 1,
68
+ "num_beams": 1,
69
+ "num_key_value_heads": 1,
70
+ "num_return_sequences": 1,
71
+ "output_attentions": false,
72
+ "output_hidden_states": false,
73
+ "output_scores": false,
74
+ "pad_token_id": null,
75
+ "prefix": null,
76
+ "problem_type": null,
77
+ "pruned_heads": {},
78
+ "qk_layer_norms_perceiver": false,
79
+ "remove_invalid_values": false,
80
+ "repetition_penalty": 1.0,
81
+ "resampler_depth": 6,
82
+ "resampler_head_dim": 96,
83
+ "resampler_n_heads": 16,
84
+ "resampler_n_latents": 64,
85
+ "return_dict": true,
86
+ "return_dict_in_generate": false,
87
+ "sep_token_id": null,
88
+ "suppress_tokens": null,
89
+ "task_specific_params": null,
90
+ "temperature": 1.0,
91
+ "tf_legacy_loss": false,
92
+ "tie_encoder_decoder": false,
93
+ "tie_word_embeddings": true,
94
+ "tokenizer_class": null,
95
+ "top_k": 50,
96
+ "top_p": 1.0,
97
+ "torch_dtype": null,
98
+ "torchscript": false,
99
+ "transformers_version": "4.46.0",
100
+ "typical_p": 1.0,
101
+ "use_bfloat16": false
102
+ },
103
+ "pixel_shuffle_factor": 3,
104
+ "pretraining_tp": 1,
105
+ "qk_layer_norms": false,
106
+ "rms_norm_eps": 1e-05,
107
+ "rope_scaling": null,
108
+ "rope_theta": 130000,
109
+ "torch_dtype": "bfloat16",
110
+ "transformers.js_config": {
111
+ "kv_cache_dtype": {
112
+ "fp16": "float16",
113
+ "q4f16": "float16"
114
+ }
115
+ },
116
+ "use_cache": true,
117
+ "use_resampler": false,
118
+ "vocab_size": 49280
119
+ },
120
+ "tie_word_embeddings": false,
121
+ "torch_dtype": "bfloat16",
122
+ "transformers_version": "4.51.3",
123
+ "use_cache": false,
124
+ "use_reentrant_checkpointing": false,
125
+ "vision_config": {
126
+ "attention_dropout": 0.0,
127
+ "hidden_act": "gelu_pytorch_tanh",
128
+ "hidden_size": 1152,
129
+ "image_size": 384,
130
+ "initializer_range": 0.02,
131
+ "intermediate_size": 4304,
132
+ "layer_norm_eps": 1e-06,
133
+ "max_image_size": {
134
+ "longest_edge": 384
135
+ },
136
+ "model_type": "smolvlm_vision",
137
+ "num_attention_heads": 16,
138
+ "num_channels": 3,
139
+ "num_hidden_layers": 27,
140
+ "patch_size": 14,
141
+ "size": {
142
+ "longest_edge": 1920
143
+ },
144
+ "tie_word_embeddings": false,
145
+ "torch_dtype": "bfloat16",
146
+ "use_base_siglip": false
147
+ },
148
+ "vocab_size": 49280
149
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 49279,
5
+ "pad_token_id": 2,
6
+ "transformers_version": "4.51.3"
7
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:214f057fe8cb54d6b144530a94cbbb9b591036419795da7fe5f5cbf12d9faf11
3
+ size 4493654912
runs/Apr25_14-39-45_YAN-Machine/events.out.tfevents.1745563185.YAN-Machine.3092909.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e0851b5fdc47929c7a9e31358793bf0e1db792e4ed770bdefa4620c9067ee4b
3
+ size 8480
runs/Apr25_14-42-18_YAN-Machine/events.out.tfevents.1745563339.YAN-Machine.3093802.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c31f3778b9176ded44534d818c6a9b2b68a17ce258934fe0551b2da2d56068a0
3
+ size 8480
runs/Apr25_15-00-48_YAN-Machine/events.out.tfevents.1745564448.YAN-Machine.3101448.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29487b3f8a6fa903ecc859b4d00e85f0728e92515a157f7b8f095206ae1ef68f
3
+ size 8480
runs/Apr25_15-03-32_YAN-Machine/events.out.tfevents.1745564612.YAN-Machine.3103220.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e72c16942da658267fddae2685f2ad8b9d8ea5165c62ef07294ed56ff3d8cdc
3
+ size 8486
runs/Apr25_15-16-24_YAN-Machine/events.out.tfevents.1745565384.YAN-Machine.3118167.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e41659dce8aec6feecf197570e74af1f577cb856ab73402d492b681ab9043c02
3
+ size 8485
runs/Apr25_15-16-55_YAN-Machine/events.out.tfevents.1745565415.YAN-Machine.3119073.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:019db67d1eb87921c44d36496a1586460e55885c4d0b87266069d06e6e8df451
3
+ size 8485
runs/Apr25_15-20-20_YAN-Machine/events.out.tfevents.1745565621.YAN-Machine.3121507.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c29749094c910c83523a56cccf170787359262d9e40acc83625d3e029e5d639d
3
+ size 8485
runs/Apr25_15-23-10_YAN-Machine/events.out.tfevents.1745565791.YAN-Machine.3125459.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bb27758e793ae7ac9ef0ff053dc349b78a7aebe51e09d63f4237697b0d508bc
3
+ size 8485
runs/Apr25_15-23-29_YAN-Machine/events.out.tfevents.1745565809.YAN-Machine.3126720.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46ccbd7934aa16d230ccd0a972daacccd121c731fdd96b4dd2fe299d8e7ee16b
3
+ size 8485
runs/Apr25_15-25-28_YAN-Machine/events.out.tfevents.1745565929.YAN-Machine.3132618.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aad744395830c4c67bb6a53a352ce271a873ddbb6ebd5e5e62b4b75a9020e4d8
3
+ size 9106
runs/Apr25_15-52-49_YAN-Machine/events.out.tfevents.1745567569.YAN-Machine.4178102.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5f7adeee775e6f4993e41cd1fa9637c156ec1cdf27d2d056b7f304df4630953
3
+ size 8485
runs/Apr25_15-58-36_YAN-Machine/events.out.tfevents.1745567917.YAN-Machine.225857.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:950f61dcc48856633efcce63c1aaad6f357887c6f984f11bcfea196d80a5d9fd
3
+ size 8485
runs/Apr25_16-06-29_YAN-Machine/events.out.tfevents.1745568389.YAN-Machine.544916.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:063104212aecaaf8ebb3d0949697dfe5131bb79b566560625a4bcdbfb2a1eba2
3
+ size 9313
runs/Apr25_16-49-49_YAN-Machine/events.out.tfevents.1745570989.YAN-Machine.2231864.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44d4c457e716b2f401d9c4d05dea16c1d4906bdc721b08a5e9911a7cf5ac1d64
3
+ size 8485
runs/Apr25_16-51-48_YAN-Machine/events.out.tfevents.1745571108.YAN-Machine.2336285.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8ddd1848a69f9014edfb6958eded7832026792075487b1cc17906b4f3663a08
3
+ size 8485
runs/Apr25_16-53-05_YAN-Machine/events.out.tfevents.1745571185.YAN-Machine.2416517.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:270aae5edcf5f9966d861051699614b9e737e85868783ec776fdf5bdc7d1a152
3
+ size 12263
runs/Apr26_21-18-59_YAN-Machine/events.out.tfevents.1745673540.YAN-Machine.3385759.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56881af547714761ad36cce7324df2d4983db3bee9919e16725b96ff969c56b6
3
+ size 8485
runs/Apr27_10-46-45_YAN-Machine/events.out.tfevents.1745722006.YAN-Machine.1838916.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c07832f663a0a37dabaa5e59c5ae31537e441423e4708b143dead8dbab7f4656
3
+ size 8485
runs/Apr27_11-30-16_YAN-Machine/events.out.tfevents.1745724618.YAN-Machine.1883932.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bb16e66c58d063a5c46573f6e05d1614fccb0b6ec83f20dbfbde1ef8baa7b36
3
+ size 8485
runs/Apr27_11-38-50_YAN-Machine/events.out.tfevents.1745725130.YAN-Machine.1897854.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad260ee51ff4dac562f24bbf7b4ebe956ad3bf8f16187e0d3ac605f343cc8e25
3
+ size 8833
runs/Apr27_11-40-39_YAN-Machine/events.out.tfevents.1745725239.YAN-Machine.1900454.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ecd8433871b23a6706ce35c3577abf44ad6053fe7321401622e05954e3213d2
3
+ size 8833
runs/Apr27_11-43-20_YAN-Machine/events.out.tfevents.1745725400.YAN-Machine.1904120.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a862e5a79811f35170d941d30f4c6135f3887d2f7a202474da2116556f257b0
3
+ size 8833
runs/Apr27_11-50-37_YAN-Machine/events.out.tfevents.1745725837.YAN-Machine.1913951.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d767a92c676a0401f987fde898c3a085398cb95758bcb2478cd8e2997b1ecf8c
3
+ size 8833
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94a9a473cf52c0862e19a8e1e2363fdea489150f0cdbe2ecdce9c634fa43d3d1
3
+ size 5368