ngkhoi commited on
Commit
18b1e0c
·
verified ·
1 Parent(s): 6848e18

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -1,21 +1,48 @@
1
  ---
2
- base_model: unsloth/gemma-3-4b-it-unsloth-bnb-4bit
 
 
 
 
3
  tags:
4
- - text-generation-inference
5
- - transformers
6
  - unsloth
7
- - gemma3
8
- license: apache-2.0
9
- language:
10
- - en
11
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- # Uploaded finetuned model
14
 
15
- - **Developed by:** ngkhoi
16
- - **License:** apache-2.0
17
- - **Finetuned from model :** unsloth/gemma-3-4b-it-unsloth-bnb-4bit
18
 
19
- This gemma3 model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.
20
 
21
- [<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)
 
 
1
  ---
2
+ '-license': gemma
3
+ language:
4
+ - vi
5
+ pipeline_tag: text-generation
6
+ library_name: transformers
7
  tags:
 
 
8
  - unsloth
9
+ license: gemma
 
 
 
10
  ---
11
+ # VIETRON 4B - Fine-tuned Vietnamese model
12
+ <img src="https://lh3.googleusercontent.com/d/1Ez_5ubsKUpDGagqNWKlgJJm-kGUIjkKS=w1000?authuser=0" />
13
+
14
+ > [!NOTE]
15
+ > This is my first proper fine-tuned model, but still, the model may generate false informations or mistakes.
16
+
17
+ VieTron 4B is a Large Language Model (LLM) that has been extensively fine-tuned for Vietnamese users. With a 4-billion-parameter scale, VieTron is designed to be a smart, friendly AI assistant with a deep understanding of Vietnamese culture and education.
18
+
19
+ ## Details
20
+
21
+ Trained on high-quality Vietnamese datasets that cover most fields and topics.
22
+
23
+ **More thoughtful**: the model is trained with instruction to give response step by step (or CoT), the model will not only generate results but the reasoning steps behind the results.
24
+
25
+ **More natural response style**: the datasets also includes the natural Vietnamese conversation, making the model's response more "human".
26
+
27
+ ## Model info
28
+
29
+ ~4 billion parameters
30
+
31
+ Currently I've only uploaded the initial version, quantized Q8_0 GGUF format to test the model. I will provide more quantized GGUF formats in the future as the model is getting better.
32
+
33
+ ## Usage
34
+
35
+ LM Studio **recommended**: the easiest way to run inference. Search <sup>ngkhoi/vietron-4b</sup> and download to use this model.
36
+
37
+
38
+ ## Limitations & Ethical Considerations
39
+ Knowledge Cutoff: VieTron's knowledge is limited to its training data. The model may not be aware of the latest events.
40
+
41
+ Hallucination Potential: Like all LLMs, VieTron can generate incorrect information. Please verify important facts.
42
+
43
 
 
44
 
 
 
 
45
 
 
46
 
47
+ ## Contributions
48
+ This project is developed solely by me so any contributions to this project are truly welcome!
adapter_config.json CHANGED
@@ -1,37 +1,38 @@
1
- {
2
- "alpha_pattern": {},
3
- "auto_mapping": {
4
- "base_model_class": "Gemma3ForConditionalGeneration",
5
- "parent_library": "transformers.models.gemma3.modeling_gemma3",
6
- "unsloth_fixed": true
7
- },
8
- "base_model_name_or_path": "unsloth/gemma-3-4b-it-unsloth-bnb-4bit",
9
- "bias": "none",
10
- "corda_config": null,
11
- "eva_config": null,
12
- "exclude_modules": null,
13
- "fan_in_fan_out": false,
14
- "inference_mode": true,
15
- "init_lora_weights": true,
16
- "layer_replication": null,
17
- "layers_pattern": null,
18
- "layers_to_transform": null,
19
- "loftq_config": {},
20
- "lora_alpha": 16,
21
- "lora_bias": false,
22
- "lora_dropout": 0.05,
23
- "megatron_config": null,
24
- "megatron_core": "megatron.core",
25
- "modules_to_save": null,
26
- "peft_type": "LORA",
27
- "qalora_group_size": 16,
28
- "r": 8,
29
- "rank_pattern": {},
30
- "revision": null,
31
- "target_modules": "(?:.*?(?:language|text).*?(?:self_attn|attention|attn|mlp|feed_forward|ffn|dense).*?(?:k_proj|v_proj|q_proj|out_proj|fc1|fc2|o_proj|gate_proj|up_proj|down_proj).*?)|(?:\\bmodel\\.layers\\.[\\d]{1,}\\.(?:self_attn|attention|attn|mlp|feed_forward|ffn|dense)\\.(?:(?:k_proj|v_proj|q_proj|out_proj|fc1|fc2|o_proj|gate_proj|up_proj|down_proj)))",
32
- "task_type": "CAUSAL_LM",
33
- "trainable_token_indices": null,
34
- "use_dora": false,
35
- "use_qalora": false,
36
- "use_rslora": false
 
37
  }
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": {
4
+ "base_model_class": "Gemma3ForConditionalGeneration",
5
+ "parent_library": "transformers.models.gemma3.modeling_gemma3",
6
+ "unsloth_fixed": true
7
+ },
8
+ "base_model_name_or_path": "unsloth/gemma-3-4b-it-unsloth-bnb-4bit",
9
+ "bias": "none",
10
+ "corda_config": null,
11
+ "eva_config": null,
12
+ "exclude_modules": null,
13
+ "fan_in_fan_out": false,
14
+ "inference_mode": true,
15
+ "init_lora_weights": true,
16
+ "layer_replication": null,
17
+ "layers_pattern": null,
18
+ "layers_to_transform": null,
19
+ "loftq_config": {},
20
+ "lora_alpha": 16,
21
+ "lora_bias": false,
22
+ "lora_dropout": 0.05,
23
+ "megatron_config": null,
24
+ "megatron_core": "megatron.core",
25
+ "modules_to_save": null,
26
+ "peft_type": "LORA",
27
+ "qalora_group_size": 16,
28
+ "r": 8,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": "(?:.*?(?:language|text).*?(?:self_attn|attention|attn|mlp|feed_forward|ffn|dense).*?(?:k_proj|v_proj|q_proj|out_proj|fc1|fc2|o_proj|gate_proj|up_proj|down_proj).*?)|(?:\\bmodel\\.layers\\.[\\d]{1,}\\.(?:self_attn|attention|attn|mlp|feed_forward|ffn|dense)\\.(?:(?:k_proj|v_proj|q_proj|out_proj|fc1|fc2|o_proj|gate_proj|up_proj|down_proj)))",
32
+ "target_parameters": null,
33
+ "task_type": "CAUSAL_LM",
34
+ "trainable_token_indices": null,
35
+ "use_dora": false,
36
+ "use_qalora": false,
37
+ "use_rslora": false
38
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:700af5fdbb1a08bc7df7bb7573802a685c6955134cdcb8dd1537e6fb101e9ad3
3
  size 59675008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faeebcd3548f3e8efb67d1faefd09285a9c0098df48515e598cdc3f0881d0aaf
3
  size 59675008
added_tokens.json CHANGED
@@ -1,3 +1,3 @@
1
- {
2
- "<image_soft_token>": 262144
3
- }
 
1
+ {
2
+ "<image_soft_token>": 262144
3
+ }
chat_template.jinja CHANGED
@@ -1,47 +1,47 @@
1
- {{ bos_token }}
2
- {%- if messages[0]['role'] == 'system' -%}
3
- {%- if messages[0]['content'] is string -%}
4
- {%- set first_user_prefix = messages[0]['content'] + '
5
-
6
- ' -%}
7
- {%- else -%}
8
- {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
9
-
10
- ' -%}
11
- {%- endif -%}
12
- {%- set loop_messages = messages[1:] -%}
13
- {%- else -%}
14
- {%- set first_user_prefix = "" -%}
15
- {%- set loop_messages = messages -%}
16
- {%- endif -%}
17
- {%- for message in loop_messages -%}
18
- {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
19
- {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
20
- {%- endif -%}
21
- {%- if (message['role'] == 'assistant') -%}
22
- {%- set role = "model" -%}
23
- {%- else -%}
24
- {%- set role = message['role'] -%}
25
- {%- endif -%}
26
- {{ '<start_of_turn>' + role + '
27
- ' + (first_user_prefix if loop.first else "") }}
28
- {%- if message['content'] is string -%}
29
- {{ message['content'] | trim }}
30
- {%- elif message['content'] is iterable -%}
31
- {%- for item in message['content'] -%}
32
- {%- if item['type'] == 'image' -%}
33
- {{ '<start_of_image>' }}
34
- {%- elif item['type'] == 'text' -%}
35
- {{ item['text'] | trim }}
36
- {%- endif -%}
37
- {%- endfor -%}
38
- {%- else -%}
39
- {{ raise_exception("Invalid content type") }}
40
- {%- endif -%}
41
- {{ '<end_of_turn>
42
- ' }}
43
- {%- endfor -%}
44
- {%- if add_generation_prompt -%}
45
- {{ '<start_of_turn>model
46
- ' }}
47
- {%- endif -%}
 
1
+ {{ bos_token }}
2
+ {%- if messages[0]['role'] == 'system' -%}
3
+ {%- if messages[0]['content'] is string -%}
4
+ {%- set first_user_prefix = messages[0]['content'] + '
5
+
6
+ ' -%}
7
+ {%- else -%}
8
+ {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
9
+
10
+ ' -%}
11
+ {%- endif -%}
12
+ {%- set loop_messages = messages[1:] -%}
13
+ {%- else -%}
14
+ {%- set first_user_prefix = "" -%}
15
+ {%- set loop_messages = messages -%}
16
+ {%- endif -%}
17
+ {%- for message in loop_messages -%}
18
+ {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
19
+ {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
20
+ {%- endif -%}
21
+ {%- if (message['role'] == 'assistant') -%}
22
+ {%- set role = "model" -%}
23
+ {%- else -%}
24
+ {%- set role = message['role'] -%}
25
+ {%- endif -%}
26
+ {{ '<start_of_turn>' + role + '
27
+ ' + (first_user_prefix if loop.first else "") }}
28
+ {%- if message['content'] is string -%}
29
+ {{ message['content'] | trim }}
30
+ {%- elif message['content'] is iterable -%}
31
+ {%- for item in message['content'] -%}
32
+ {%- if item['type'] == 'image' -%}
33
+ {{ '<start_of_image>' }}
34
+ {%- elif item['type'] == 'text' -%}
35
+ {{ item['text'] | trim }}
36
+ {%- endif -%}
37
+ {%- endfor -%}
38
+ {%- else -%}
39
+ {{ raise_exception("Invalid content type") }}
40
+ {%- endif -%}
41
+ {{ '<end_of_turn>
42
+ ' }}
43
+ {%- endfor -%}
44
+ {%- if add_generation_prompt -%}
45
+ {{ '<start_of_turn>model
46
+ ' }}
47
+ {%- endif -%}
config.json CHANGED
@@ -1,99 +1,99 @@
1
- {
2
- "architectures": [
3
- "Gemma3ForConditionalGeneration"
4
- ],
5
- "boi_token_index": 255999,
6
- "bos_token_id": 2,
7
- "eoi_token_index": 256000,
8
- "eos_token_id": 106,
9
- "image_token_index": 262144,
10
- "initializer_range": 0.02,
11
- "mm_tokens_per_image": 256,
12
- "model_type": "gemma3",
13
- "pad_token_id": 0,
14
- "text_config": {
15
- "_sliding_window_pattern": 6,
16
- "attention_bias": false,
17
- "attention_dropout": 0.0,
18
- "attn_logit_softcapping": null,
19
- "cache_implementation": "hybrid",
20
- "final_logit_softcapping": null,
21
- "head_dim": 256,
22
- "hidden_activation": "gelu_pytorch_tanh",
23
- "hidden_size": 2560,
24
- "initializer_range": 0.02,
25
- "intermediate_size": 10240,
26
- "layer_types": [
27
- "sliding_attention",
28
- "sliding_attention",
29
- "sliding_attention",
30
- "sliding_attention",
31
- "sliding_attention",
32
- "full_attention",
33
- "sliding_attention",
34
- "sliding_attention",
35
- "sliding_attention",
36
- "sliding_attention",
37
- "sliding_attention",
38
- "full_attention",
39
- "sliding_attention",
40
- "sliding_attention",
41
- "sliding_attention",
42
- "sliding_attention",
43
- "sliding_attention",
44
- "full_attention",
45
- "sliding_attention",
46
- "sliding_attention",
47
- "sliding_attention",
48
- "sliding_attention",
49
- "sliding_attention",
50
- "full_attention",
51
- "sliding_attention",
52
- "sliding_attention",
53
- "sliding_attention",
54
- "sliding_attention",
55
- "sliding_attention",
56
- "full_attention",
57
- "sliding_attention",
58
- "sliding_attention",
59
- "sliding_attention",
60
- "sliding_attention"
61
- ],
62
- "max_position_embeddings": 131072,
63
- "model_type": "gemma3_text",
64
- "num_attention_heads": 8,
65
- "num_hidden_layers": 34,
66
- "num_key_value_heads": 4,
67
- "query_pre_attn_scalar": 256,
68
- "rms_norm_eps": 1e-06,
69
- "rope_local_base_freq": 10000.0,
70
- "rope_scaling": {
71
- "factor": 8.0,
72
- "rope_type": "linear"
73
- },
74
- "rope_theta": 1000000.0,
75
- "sliding_window": 1024,
76
- "torch_dtype": "float16",
77
- "use_cache": true,
78
- "vocab_size": 262208
79
- },
80
- "torch_dtype": "float16",
81
- "transformers_version": "4.55.4",
82
- "unsloth_fixed": true,
83
- "unsloth_version": "2025.10.8",
84
- "vision_config": {
85
- "attention_dropout": 0.0,
86
- "hidden_act": "gelu_pytorch_tanh",
87
- "hidden_size": 1152,
88
- "image_size": 896,
89
- "intermediate_size": 4304,
90
- "layer_norm_eps": 1e-06,
91
- "model_type": "siglip_vision_model",
92
- "num_attention_heads": 16,
93
- "num_channels": 3,
94
- "num_hidden_layers": 27,
95
- "patch_size": 14,
96
- "torch_dtype": "float16",
97
- "vision_use_head": false
98
- }
99
  }
 
1
+ {
2
+ "architectures": [
3
+ "Gemma3ForConditionalGeneration"
4
+ ],
5
+ "boi_token_index": 255999,
6
+ "bos_token_id": 2,
7
+ "eoi_token_index": 256000,
8
+ "eos_token_id": 106,
9
+ "image_token_index": 262144,
10
+ "initializer_range": 0.02,
11
+ "mm_tokens_per_image": 256,
12
+ "model_type": "gemma3",
13
+ "pad_token_id": 0,
14
+ "text_config": {
15
+ "_sliding_window_pattern": 6,
16
+ "attention_bias": false,
17
+ "attention_dropout": 0.0,
18
+ "attn_logit_softcapping": null,
19
+ "cache_implementation": "hybrid",
20
+ "final_logit_softcapping": null,
21
+ "head_dim": 256,
22
+ "hidden_activation": "gelu_pytorch_tanh",
23
+ "hidden_size": 2560,
24
+ "initializer_range": 0.02,
25
+ "intermediate_size": 10240,
26
+ "layer_types": [
27
+ "sliding_attention",
28
+ "sliding_attention",
29
+ "sliding_attention",
30
+ "sliding_attention",
31
+ "sliding_attention",
32
+ "full_attention",
33
+ "sliding_attention",
34
+ "sliding_attention",
35
+ "sliding_attention",
36
+ "sliding_attention",
37
+ "sliding_attention",
38
+ "full_attention",
39
+ "sliding_attention",
40
+ "sliding_attention",
41
+ "sliding_attention",
42
+ "sliding_attention",
43
+ "sliding_attention",
44
+ "full_attention",
45
+ "sliding_attention",
46
+ "sliding_attention",
47
+ "sliding_attention",
48
+ "sliding_attention",
49
+ "sliding_attention",
50
+ "full_attention",
51
+ "sliding_attention",
52
+ "sliding_attention",
53
+ "sliding_attention",
54
+ "sliding_attention",
55
+ "sliding_attention",
56
+ "full_attention",
57
+ "sliding_attention",
58
+ "sliding_attention",
59
+ "sliding_attention",
60
+ "sliding_attention"
61
+ ],
62
+ "max_position_embeddings": 131072,
63
+ "model_type": "gemma3_text",
64
+ "num_attention_heads": 8,
65
+ "num_hidden_layers": 34,
66
+ "num_key_value_heads": 4,
67
+ "query_pre_attn_scalar": 256,
68
+ "rms_norm_eps": 1e-06,
69
+ "rope_local_base_freq": 10000.0,
70
+ "rope_scaling": {
71
+ "factor": 8.0,
72
+ "rope_type": "linear"
73
+ },
74
+ "rope_theta": 1000000.0,
75
+ "sliding_window": 1024,
76
+ "torch_dtype": "float16",
77
+ "use_cache": true,
78
+ "vocab_size": 262208
79
+ },
80
+ "torch_dtype": "float16",
81
+ "transformers_version": "4.55.4",
82
+ "unsloth_fixed": true,
83
+ "unsloth_version": "2025.10.3",
84
+ "vision_config": {
85
+ "attention_dropout": 0.0,
86
+ "hidden_act": "gelu_pytorch_tanh",
87
+ "hidden_size": 1152,
88
+ "image_size": 896,
89
+ "intermediate_size": 4304,
90
+ "layer_norm_eps": 1e-06,
91
+ "model_type": "siglip_vision_model",
92
+ "num_attention_heads": 16,
93
+ "num_channels": 3,
94
+ "num_hidden_layers": 27,
95
+ "patch_size": 14,
96
+ "torch_dtype": "float16",
97
+ "vision_use_head": false
98
+ }
99
  }
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f23e67d63ee3f4fd3fcac0ac5ac1fa4ec117d8c72d696e9b019a68fabcc4ace7
3
  size 4961251752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:064b41ff8c8cfe92f154216b8746914072ee0276c7a15bf6de867cfb727275e2
3
  size 4961251752
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ba9d1d9a79507a8075e0f70c61875d5598a273189a7e78999a544a68f567fa3
3
  size 3639026128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e63281f405a8cecd7b31e412ce948e2636c3b69e35292ee293b71cf3ed7dc20
3
  size 3639026128
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff
 
preprocessor_config.json CHANGED
@@ -1,29 +1,29 @@
1
- {
2
- "do_convert_rgb": null,
3
- "do_normalize": true,
4
- "do_pan_and_scan": null,
5
- "do_rescale": true,
6
- "do_resize": true,
7
- "image_mean": [
8
- 0.5,
9
- 0.5,
10
- 0.5
11
- ],
12
- "image_processor_type": "Gemma3ImageProcessor",
13
- "image_seq_length": 256,
14
- "image_std": [
15
- 0.5,
16
- 0.5,
17
- 0.5
18
- ],
19
- "pan_and_scan_max_num_crops": null,
20
- "pan_and_scan_min_crop_size": null,
21
- "pan_and_scan_min_ratio_to_activate": null,
22
- "processor_class": "Gemma3Processor",
23
- "resample": 2,
24
- "rescale_factor": 0.00392156862745098,
25
- "size": {
26
- "height": 896,
27
- "width": 896
28
- }
29
- }
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_pan_and_scan": null,
5
+ "do_rescale": true,
6
+ "do_resize": true,
7
+ "image_mean": [
8
+ 0.5,
9
+ 0.5,
10
+ 0.5
11
+ ],
12
+ "image_processor_type": "Gemma3ImageProcessor",
13
+ "image_seq_length": 256,
14
+ "image_std": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
+ ],
19
+ "pan_and_scan_max_num_crops": null,
20
+ "pan_and_scan_min_crop_size": null,
21
+ "pan_and_scan_min_ratio_to_activate": null,
22
+ "processor_class": "Gemma3Processor",
23
+ "resample": 2,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "height": 896,
27
+ "width": 896
28
+ }
29
+ }
processor_config.json CHANGED
@@ -1,4 +1,4 @@
1
- {
2
- "image_seq_length": 256,
3
- "processor_class": "Gemma3Processor"
4
- }
 
1
+ {
2
+ "image_seq_length": 256,
3
+ "processor_class": "Gemma3Processor"
4
+ }
special_tokens_map.json CHANGED
@@ -1,33 +1,33 @@
1
- {
2
- "boi_token": "<start_of_image>",
3
- "bos_token": {
4
- "content": "<bos>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- },
10
- "eoi_token": "<end_of_image>",
11
- "eos_token": {
12
- "content": "<end_of_turn>",
13
- "lstrip": false,
14
- "normalized": false,
15
- "rstrip": false,
16
- "single_word": false
17
- },
18
- "image_token": "<image_soft_token>",
19
- "pad_token": {
20
- "content": "<pad>",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false
25
- },
26
- "unk_token": {
27
- "content": "<unk>",
28
- "lstrip": false,
29
- "normalized": false,
30
- "rstrip": false,
31
- "single_word": false
32
- }
33
- }
 
1
+ {
2
+ "boi_token": "<start_of_image>",
3
+ "bos_token": {
4
+ "content": "<bos>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ "eoi_token": "<end_of_image>",
11
+ "eos_token": {
12
+ "content": "<end_of_turn>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false
17
+ },
18
+ "image_token": "<image_soft_token>",
19
+ "pad_token": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "unk_token": {
27
+ "content": "<unk>",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ }
33
+ }
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff