Mithun501
/

KannadaGPT-0.6B

@@ -1,3 +1,22 @@
 # KannadaGPT-0.6B
 A Kannada language model fine-tuned on Qwen3-0.6B using LoRA (Low-Rank Adaptation).
@@ -47,10 +66,10 @@ base_model = AutoModelForCausalLM.from_pretrained(
     torch_dtype="auto",
     device_map="auto"
 )
-tokenizer = AutoTokenizer.from_pretrained("mithungowdab/KannadaGPT-0.6B")
 # Load LoRA adapter
-model = PeftModel.from_pretrained(base_model, "mithungowdab/KannadaGPT-0.6B")
 # Generate text
 messages = [
@@ -82,7 +101,7 @@ print(response)
 ## Training Progress
-The model was trained on Kaggle with P100 GPU. Training metrics from checkpoint-1500:
 | Step | Loss | Learning Rate |
 |------|------|---------------|
@@ -90,6 +109,16 @@ The model was trained on Kaggle with P100 GPU. Training metrics from checkpoint-
 | 500 | 0.675 | 6.8e-05 |
 | 1000 | 0.613 | 1.4e-04 |
 | 1500 | 0.572 | 2.0e-04 |
 ## Project Structure
@@ -106,11 +135,12 @@ KannadaGPT-0.6B/
 ├── chat_template.jinja      # Chat template
 └── README.md                # This file
 ```
 ## Limitations
 - This is a LoRA adapter and requires the base model (Qwen3-0.6B) to run
-- Training is partial (checkpoint-1500 of ~48,700 total steps)
 - Best suited for Kannada instruction-following tasks
 - May generate incorrect or nonsensical responses for complex queries
@@ -129,7 +159,7 @@ Apache 2.0
 ```bibtex
 @misc{kannadagpt-0.6b,
-  author = {mithungowdab},
   title = {KannadaGPT-0.6B: A Kannada Language Model},
   year = {2025},
   publisher = {GitHub},
@@ -145,4 +175,4 @@ Apache 2.0
 ## Author
-**mithungowdab** - [GitHub](https://github.com/mithun50) | [HuggingFace](https://huggingface.co/mithungowdab)

+---
+license: apache-2.0
+language:
+- kn
+- en
+base_model: Qwen/Qwen3-0.6B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- kannada
+- qwen3
+- lora
+- peft
+- instruction-tuned
+- indian-languages
+datasets:
+- Cognitive-Lab/Kannada-Instruct-dataset
+---
 # KannadaGPT-0.6B
 A Kannada language model fine-tuned on Qwen3-0.6B using LoRA (Low-Rank Adaptation).
     torch_dtype="auto",
     device_map="auto"
 )
+tokenizer = AutoTokenizer.from_pretrained("Mithun501/KannadaGPT-0.6B")
 # Load LoRA adapter
+model = PeftModel.from_pretrained(base_model, "Mithun501/KannadaGPT-0.6B")
 # Generate text
 messages = [
 ## Training Progress
+The model was trained on Kaggle with P100 GPU. Training metrics from checkpoint-4500:
 | Step | Loss | Learning Rate |
 |------|------|---------------|
 | 500 | 0.675 | 6.8e-05 |
 | 1000 | 0.613 | 1.4e-04 |
 | 1500 | 0.572 | 2.0e-04 |
+| 2000 | 0.534 | 2.0e-04 |
+| 2500 | 0.518 | 2.0e-04 |
+| 3000 | 0.502 | 1.9e-04 |
+| 3500 | 0.492 | 1.9e-04 |
+| 4000 | 0.488 | 1.9e-04 |
+| 4500 | 0.470 | 1.9e-04 |
+**Training Progress**: 4,500 / 48,702 steps (9.2% complete, epoch 0.185/2.0)
+<<<<<<< HEAD
+=======
 ## Project Structure
 ├── chat_template.jinja      # Chat template
 └── README.md                # This file
 ```
+>>>>>>> b9fa282 (Update to checkpoint-4500 with improved training metrics)
 ## Limitations
 - This is a LoRA adapter and requires the base model (Qwen3-0.6B) to run
+- Training is partial (checkpoint-4500 of ~48,700 total steps, ~9.2% complete)
 - Best suited for Kannada instruction-following tasks
 - May generate incorrect or nonsensical responses for complex queries
 ```bibtex
 @misc{kannadagpt-0.6b,
+  author = {Mithun501},
   title = {KannadaGPT-0.6B: A Kannada Language Model},
   year = {2025},
   publisher = {GitHub},
 ## Author
+**Mithun501** - [GitHub](https://github.com/mithun50) | [HuggingFace](https://huggingface.co/Mithun501)

adapter_config.json CHANGED Viewed

@@ -3,29 +3,40 @@
   "auto_mapping": null,
   "base_model_name_or_path": "Qwen/Qwen3-0.6B",
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
   "init_lora_weights": true,
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
   "lora_alpha": 32,
   "lora_dropout": 0.05,
   "modules_to_save": null,
   "peft_type": "LORA",
   "r": 16,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
-    "k_proj",
     "v_proj",
     "o_proj",
     "gate_proj",
-    "up_proj",
-    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,
   "use_rslora": false
-}

   "auto_mapping": null,
   "base_model_name_or_path": "Qwen/Qwen3-0.6B",
   "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
   "fan_in_fan_out": false,
   "inference_mode": true,
   "init_lora_weights": true,
+  "layer_replication": null,
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
   "lora_alpha": 32,
+  "lora_bias": false,
   "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "qalora_group_size": 16,
   "r": 16,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "v_proj",
+    "down_proj",
     "o_proj",
+    "k_proj",
+    "q_proj",
     "gate_proj",
+    "up_proj"
   ],
+  "target_parameters": null,
   "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
   "use_dora": false,
+  "use_qalora": false,
   "use_rslora": false
+}

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1578f528067eb6d54817e4fe13a79cda672711eed27bf332ffda58559f727e1c
 size 40422168

 version https://git-lfs.github.com/spec/v1
+oid sha256:7e860ba8feb424f9fc183133b97b43d31256d1b4367a94eee35cf9f1e6cc287c
 size 40422168