mithun50 commited on
Commit
708878c
·
1 Parent(s): 1657118

Sync from GitHub: 2026-01-18 03:01:07

Browse files
Files changed (3) hide show
  1. README.md +36 -6
  2. adapter_config.json +16 -5
  3. adapter_model.safetensors +1 -1
README.md CHANGED
@@ -1,3 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # KannadaGPT-0.6B
2
 
3
  A Kannada language model fine-tuned on Qwen3-0.6B using LoRA (Low-Rank Adaptation).
@@ -47,10 +66,10 @@ base_model = AutoModelForCausalLM.from_pretrained(
47
  torch_dtype="auto",
48
  device_map="auto"
49
  )
50
- tokenizer = AutoTokenizer.from_pretrained("mithungowdab/KannadaGPT-0.6B")
51
 
52
  # Load LoRA adapter
53
- model = PeftModel.from_pretrained(base_model, "mithungowdab/KannadaGPT-0.6B")
54
 
55
  # Generate text
56
  messages = [
@@ -82,7 +101,7 @@ print(response)
82
 
83
  ## Training Progress
84
 
85
- The model was trained on Kaggle with P100 GPU. Training metrics from checkpoint-1500:
86
 
87
  | Step | Loss | Learning Rate |
88
  |------|------|---------------|
@@ -90,6 +109,16 @@ The model was trained on Kaggle with P100 GPU. Training metrics from checkpoint-
90
  | 500 | 0.675 | 6.8e-05 |
91
  | 1000 | 0.613 | 1.4e-04 |
92
  | 1500 | 0.572 | 2.0e-04 |
 
 
 
 
 
 
 
 
 
 
93
 
94
  ## Project Structure
95
 
@@ -106,11 +135,12 @@ KannadaGPT-0.6B/
106
  ├── chat_template.jinja # Chat template
107
  └── README.md # This file
108
  ```
 
109
 
110
  ## Limitations
111
 
112
  - This is a LoRA adapter and requires the base model (Qwen3-0.6B) to run
113
- - Training is partial (checkpoint-1500 of ~48,700 total steps)
114
  - Best suited for Kannada instruction-following tasks
115
  - May generate incorrect or nonsensical responses for complex queries
116
 
@@ -129,7 +159,7 @@ Apache 2.0
129
 
130
  ```bibtex
131
  @misc{kannadagpt-0.6b,
132
- author = {mithungowdab},
133
  title = {KannadaGPT-0.6B: A Kannada Language Model},
134
  year = {2025},
135
  publisher = {GitHub},
@@ -145,4 +175,4 @@ Apache 2.0
145
 
146
  ## Author
147
 
148
- **mithungowdab** - [GitHub](https://github.com/mithun50) | [HuggingFace](https://huggingface.co/mithungowdab)
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - kn
5
+ - en
6
+ base_model: Qwen/Qwen3-0.6B
7
+ library_name: peft
8
+ pipeline_tag: text-generation
9
+ tags:
10
+ - kannada
11
+ - qwen3
12
+ - lora
13
+ - peft
14
+ - instruction-tuned
15
+ - indian-languages
16
+ datasets:
17
+ - Cognitive-Lab/Kannada-Instruct-dataset
18
+ ---
19
+
20
  # KannadaGPT-0.6B
21
 
22
  A Kannada language model fine-tuned on Qwen3-0.6B using LoRA (Low-Rank Adaptation).
 
66
  torch_dtype="auto",
67
  device_map="auto"
68
  )
69
+ tokenizer = AutoTokenizer.from_pretrained("Mithun501/KannadaGPT-0.6B")
70
 
71
  # Load LoRA adapter
72
+ model = PeftModel.from_pretrained(base_model, "Mithun501/KannadaGPT-0.6B")
73
 
74
  # Generate text
75
  messages = [
 
101
 
102
  ## Training Progress
103
 
104
+ The model was trained on Kaggle with P100 GPU. Training metrics from checkpoint-4500:
105
 
106
  | Step | Loss | Learning Rate |
107
  |------|------|---------------|
 
109
  | 500 | 0.675 | 6.8e-05 |
110
  | 1000 | 0.613 | 1.4e-04 |
111
  | 1500 | 0.572 | 2.0e-04 |
112
+ | 2000 | 0.534 | 2.0e-04 |
113
+ | 2500 | 0.518 | 2.0e-04 |
114
+ | 3000 | 0.502 | 1.9e-04 |
115
+ | 3500 | 0.492 | 1.9e-04 |
116
+ | 4000 | 0.488 | 1.9e-04 |
117
+ | 4500 | 0.470 | 1.9e-04 |
118
+
119
+ **Training Progress**: 4,500 / 48,702 steps (9.2% complete, epoch 0.185/2.0)
120
+ <<<<<<< HEAD
121
+ =======
122
 
123
  ## Project Structure
124
 
 
135
  ├── chat_template.jinja # Chat template
136
  └── README.md # This file
137
  ```
138
+ >>>>>>> b9fa282 (Update to checkpoint-4500 with improved training metrics)
139
 
140
  ## Limitations
141
 
142
  - This is a LoRA adapter and requires the base model (Qwen3-0.6B) to run
143
+ - Training is partial (checkpoint-4500 of ~48,700 total steps, ~9.2% complete)
144
  - Best suited for Kannada instruction-following tasks
145
  - May generate incorrect or nonsensical responses for complex queries
146
 
 
159
 
160
  ```bibtex
161
  @misc{kannadagpt-0.6b,
162
+ author = {Mithun501},
163
  title = {KannadaGPT-0.6B: A Kannada Language Model},
164
  year = {2025},
165
  publisher = {GitHub},
 
175
 
176
  ## Author
177
 
178
+ **Mithun501** - [GitHub](https://github.com/mithun50) | [HuggingFace](https://huggingface.co/Mithun501)
adapter_config.json CHANGED
@@ -3,29 +3,40 @@
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "Qwen/Qwen3-0.6B",
5
  "bias": "none",
 
 
 
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
8
  "init_lora_weights": true,
 
9
  "layers_pattern": null,
10
  "layers_to_transform": null,
11
  "loftq_config": {},
12
  "lora_alpha": 32,
 
13
  "lora_dropout": 0.05,
 
 
14
  "modules_to_save": null,
15
  "peft_type": "LORA",
 
16
  "r": 16,
17
  "rank_pattern": {},
18
  "revision": null,
19
  "target_modules": [
20
- "q_proj",
21
- "k_proj",
22
  "v_proj",
 
23
  "o_proj",
 
 
24
  "gate_proj",
25
- "up_proj",
26
- "down_proj"
27
  ],
 
28
  "task_type": "CAUSAL_LM",
 
29
  "use_dora": false,
 
30
  "use_rslora": false
31
- }
 
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "Qwen/Qwen3-0.6B",
5
  "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
  "fan_in_fan_out": false,
10
  "inference_mode": true,
11
  "init_lora_weights": true,
12
+ "layer_replication": null,
13
  "layers_pattern": null,
14
  "layers_to_transform": null,
15
  "loftq_config": {},
16
  "lora_alpha": 32,
17
+ "lora_bias": false,
18
  "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
  "modules_to_save": null,
22
  "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
  "r": 16,
25
  "rank_pattern": {},
26
  "revision": null,
27
  "target_modules": [
 
 
28
  "v_proj",
29
+ "down_proj",
30
  "o_proj",
31
+ "k_proj",
32
+ "q_proj",
33
  "gate_proj",
34
+ "up_proj"
 
35
  ],
36
+ "target_parameters": null,
37
  "task_type": "CAUSAL_LM",
38
+ "trainable_token_indices": null,
39
  "use_dora": false,
40
+ "use_qalora": false,
41
  "use_rslora": false
42
+ }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1578f528067eb6d54817e4fe13a79cda672711eed27bf332ffda58559f727e1c
3
  size 40422168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e860ba8feb424f9fc183133b97b43d31256d1b4367a94eee35cf9f1e6cc287c
3
  size 40422168