SatyamSinghal commited on 8 days ago

Commit

c055262

verified ·

1 Parent(s): b559bca

Add TaskMind LoRA adapter — trained on WhatsApp task extraction dataset

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

README.md +4 -3
adapter_config.json +3 -3
adapter_model.safetensors +2 -2
checkpoint-17/README.md +1 -0
checkpoint-17/adapter_config.json +3 -3
checkpoint-17/adapter_model.safetensors +2 -2
checkpoint-17/optimizer.pt +2 -2
checkpoint-17/rng_state.pth +2 -2
checkpoint-17/scheduler.pt +2 -2
checkpoint-17/special_tokens_map.json +24 -0
checkpoint-17/tokenizer.model +3 -0
checkpoint-17/tokenizer_config.json +31 -3
checkpoint-17/trainer_state.json +11 -11
checkpoint-17/training_args.bin +2 -2
checkpoint-34/README.md +1 -0
checkpoint-34/adapter_config.json +3 -3
checkpoint-34/adapter_model.safetensors +2 -2
checkpoint-34/optimizer.pt +2 -2
checkpoint-34/rng_state.pth +2 -2
checkpoint-34/scheduler.pt +2 -2
checkpoint-34/special_tokens_map.json +24 -0
checkpoint-34/tokenizer.model +3 -0
checkpoint-34/tokenizer_config.json +31 -3
checkpoint-34/trainer_state.json +25 -25
checkpoint-34/training_args.bin +2 -2
checkpoint-51/README.md +1 -0
checkpoint-51/adapter_config.json +3 -3
checkpoint-51/adapter_model.safetensors +2 -2
checkpoint-51/optimizer.pt +2 -2
checkpoint-51/rng_state.pth +2 -2
checkpoint-51/scheduler.pt +2 -2
checkpoint-51/special_tokens_map.json +24 -0
checkpoint-51/tokenizer.model +3 -0
checkpoint-51/tokenizer_config.json +31 -3
checkpoint-51/trainer_state.json +39 -39
checkpoint-51/training_args.bin +2 -2
checkpoint-68/README.md +1 -0
checkpoint-68/adapter_config.json +3 -3
checkpoint-68/adapter_model.safetensors +2 -2
checkpoint-68/optimizer.pt +2 -2
checkpoint-68/rng_state.pth +2 -2
checkpoint-68/scheduler.pt +2 -2
checkpoint-68/special_tokens_map.json +24 -0
checkpoint-68/tokenizer.model +3 -0
checkpoint-68/tokenizer_config.json +31 -3
checkpoint-68/trainer_state.json +49 -49
checkpoint-68/training_args.bin +2 -2
checkpoint-85/README.md +1 -0
checkpoint-85/adapter_config.json +3 -3
checkpoint-85/adapter_model.safetensors +2 -2

README.md CHANGED Viewed

@@ -6,6 +6,7 @@ tags:
 - base_model:adapter:TinyLlama/TinyLlama-1.1B-Chat-v1.0
 - lora
 - sft
 - trl
 licence: license
 pipeline_tag: text-generation
@@ -39,10 +40,10 @@ This model was trained with SFT.
 - PEFT 0.18.1
 - TRL: 1.1.0
-- Transformers: 5.0.0
-- Pytorch: 2.10.0+cu128
 - Datasets: 4.8.4
-- Tokenizers: 0.22.2
 ## Citations

 - base_model:adapter:TinyLlama/TinyLlama-1.1B-Chat-v1.0
 - lora
 - sft
+- transformers
 - trl
 licence: license
 pipeline_tag: text-generation
 - PEFT 0.18.1
 - TRL: 1.1.0
+- Transformers: 4.57.0
+- Pytorch: 2.2.2
 - Datasets: 4.8.4
+- Tokenizers: 0.22.1
 ## Citations

adapter_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
-  "base_model_name_or_path": null,
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,
@@ -29,8 +29,8 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
-    "v_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
+  "base_model_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "v_proj",
+    "q_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:04e00888823aee279ac5505145e02829ec8de4f7b97637cd1eadb8e7bb486ccb
-size 9031840

 version https://git-lfs.github.com/spec/v1
+oid sha256:d0759e7617267665196662e1c86613a8737244bbac6bdc596e21dbfd3e571b57
+size 9022864

checkpoint-17/README.md CHANGED Viewed

@@ -6,6 +6,7 @@ tags:
 - base_model:adapter:TinyLlama/TinyLlama-1.1B-Chat-v1.0
 - lora
 - sft
 - trl
 ---

 - base_model:adapter:TinyLlama/TinyLlama-1.1B-Chat-v1.0
 - lora
 - sft
+- transformers
 - trl
 ---

checkpoint-17/adapter_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
-  "base_model_name_or_path": null,
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,
@@ -29,8 +29,8 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
-    "v_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
+  "base_model_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "v_proj",
+    "q_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

checkpoint-17/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a8b1ff0577d99d14537fd47209adf84156dc6ba496894968d4534890c3ab373d
-size 9031840

 version https://git-lfs.github.com/spec/v1
+oid sha256:e264d2d1fc670dd47e9a96c318800447ed4f3c513553f217d6ea5088e7c52412
+size 9022864

checkpoint-17/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7e75ab70ac4a680a5136c3d66642c2f66c36f0764b43044e0916c9a6e455c3d7
-size 18098251

 version https://git-lfs.github.com/spec/v1
+oid sha256:13ad6eabf38d51382f680faef0d19bdc3b1eaebd712345ddb7514abe95504279
+size 18094138

checkpoint-17/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:20c94c837693fa3cc8c94fe68782a14836d1bb8a9d69d40125757592560df9e0
-size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:8b182573f61d8bcf5eaefcbf8f98d8734b6db51b44ad36aed3a305c431539fa1
+size 13990

checkpoint-17/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2c92a821fab24b021582fa6d2a1f00f74f177f65e427946a0e1442c404335edc
-size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:478edd44e20812aa45a79d35d53340fce54e97bd298a641bc41c78cfd3152b0c
+size 1064

checkpoint-17/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "</s>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-17/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

checkpoint-17/tokenizer_config.json CHANGED Viewed

@@ -1,15 +1,43 @@
 {
-  "backend": "tokenizers",
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
-  "is_local": false,
   "legacy": false,
   "model_max_length": 2048,
   "pad_token": "</s>",
   "padding_side": "right",
   "sp_model_kwargs": {},
-  "tokenizer_class": "TokenizersBackend",
   "unk_token": "<unk>",
   "use_default_system_prompt": false
 }

 {
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
+  "extra_special_tokens": {},
   "legacy": false,
   "model_max_length": 2048,
   "pad_token": "</s>",
   "padding_side": "right",
   "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
   "unk_token": "<unk>",
   "use_default_system_prompt": false
 }

checkpoint-17/trainer_state.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "best_global_step": 17,
-  "best_metric": 1.5321311950683594,
   "best_model_checkpoint": "out/taskmind_lora_peft/checkpoint-17",
   "epoch": 1.0,
   "eval_steps": 500,
@@ -10,24 +10,24 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "entropy": 1.7736787021160125,
       "epoch": 0.6060606060606061,
-      "grad_norm": 1.3698776960372925,
       "learning_rate": 0.00018,
-      "loss": 2.2852725982666016,
-      "mean_token_accuracy": 0.5953301250934601,
       "num_tokens": 14004.0,
       "step": 10
     },
     {
       "epoch": 1.0,
-      "eval_entropy": 1.68980073928833,
-      "eval_loss": 1.5321311950683594,
-      "eval_mean_token_accuracy": 0.6843333045641581,
       "eval_num_tokens": 22901.0,
-      "eval_runtime": 0.8074,
-      "eval_samples_per_second": 29.726,
-      "eval_steps_per_second": 3.716,
       "step": 17
     }
   ],

 {
   "best_global_step": 17,
+  "best_metric": 1.5357812643051147,
   "best_model_checkpoint": "out/taskmind_lora_peft/checkpoint-17",
   "epoch": 1.0,
   "eval_steps": 500,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "entropy": 1.772234356403351,
       "epoch": 0.6060606060606061,
+      "grad_norm": 1.3780473470687866,
       "learning_rate": 0.00018,
+      "loss": 2.2849,
+      "mean_token_accuracy": 0.5951868265867233,
       "num_tokens": 14004.0,
       "step": 10
     },
     {
       "epoch": 1.0,
+      "eval_entropy": 1.685779293378194,
+      "eval_loss": 1.5357812643051147,
+      "eval_mean_token_accuracy": 0.6872214078903198,
       "eval_num_tokens": 22901.0,
+      "eval_runtime": 2.3332,
+      "eval_samples_per_second": 10.286,
+      "eval_steps_per_second": 1.286,
       "step": 17
     }
   ],

checkpoint-17/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f892e424e706ccbc20b54bdefa043cf957b1d1afb44fc285f313a5b63e6fae88
-size 5649

 version https://git-lfs.github.com/spec/v1
+oid sha256:fdfc63bd124cf8b58acfe531d95973daba5a5e131127762949c75765ea5acb7f
+size 5880

checkpoint-34/README.md CHANGED Viewed

@@ -6,6 +6,7 @@ tags:
 - base_model:adapter:TinyLlama/TinyLlama-1.1B-Chat-v1.0
 - lora
 - sft
 - trl
 ---

 - base_model:adapter:TinyLlama/TinyLlama-1.1B-Chat-v1.0
 - lora
 - sft
+- transformers
 - trl
 ---

checkpoint-34/adapter_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
-  "base_model_name_or_path": null,
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,
@@ -29,8 +29,8 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
-    "v_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
+  "base_model_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "v_proj",
+    "q_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

checkpoint-34/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5644425aecd68f77bb06c102eec2e37b5b73d3239f5df83610dbd9edb43576e6
-size 9031840

 version https://git-lfs.github.com/spec/v1
+oid sha256:3f93596021de1ef5682449b7f6b167c47a3b7777ee8b2dcb74bc7f97e66e8443
+size 9022864

checkpoint-34/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:201de58f440f054c3f05fc473f8432b06225a5f3e67f3adeebcc2f6b466cace5
-size 18098251

 version https://git-lfs.github.com/spec/v1
+oid sha256:e39d2a401c09c34aa876e14267aac03214eefa27a867e7333d9cee2d965bf911
+size 18094138

checkpoint-34/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c5884abaada04b5a1adf9b2de9f6b8e4aca52c73b443877337b654fce8bcefbe
-size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:06e55bfc8723f269a626afca0be6f7def5753f3bb265436b94c5580b703cfcc7
+size 13990

checkpoint-34/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f9d5a7f5cff25ad89cccb034b77bcea74b485c8692b848f30cd0abd8ffd8530a
-size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:85f978c32b22bc6a3ab73d3c7eaeaf097b1efaf63503b3ceb2ba5357b465036f
+size 1064

checkpoint-34/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "</s>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-34/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

checkpoint-34/tokenizer_config.json CHANGED Viewed

@@ -1,15 +1,43 @@
 {
-  "backend": "tokenizers",
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
-  "is_local": false,
   "legacy": false,
   "model_max_length": 2048,
   "pad_token": "</s>",
   "padding_side": "right",
   "sp_model_kwargs": {},
-  "tokenizer_class": "TokenizersBackend",
   "unk_token": "<unk>",
   "use_default_system_prompt": false
 }

 {
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
+  "extra_special_tokens": {},
   "legacy": false,
   "model_max_length": 2048,
   "pad_token": "</s>",
   "padding_side": "right",
   "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
   "unk_token": "<unk>",
   "use_default_system_prompt": false
 }

checkpoint-34/trainer_state.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "best_global_step": 34,
-  "best_metric": 0.5597708821296692,
   "best_model_checkpoint": "out/taskmind_lora_peft/checkpoint-34",
   "epoch": 2.0,
   "eval_steps": 500,
@@ -10,55 +10,55 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "entropy": 1.7736787021160125,
       "epoch": 0.6060606060606061,
-      "grad_norm": 1.3698776960372925,
       "learning_rate": 0.00018,
-      "loss": 2.2852725982666016,
-      "mean_token_accuracy": 0.5953301250934601,
       "num_tokens": 14004.0,
       "step": 10
     },
     {
       "epoch": 1.0,
-      "eval_entropy": 1.68980073928833,
-      "eval_loss": 1.5321311950683594,
-      "eval_mean_token_accuracy": 0.6843333045641581,
       "eval_num_tokens": 22901.0,
-      "eval_runtime": 0.8074,
-      "eval_samples_per_second": 29.726,
-      "eval_steps_per_second": 3.716,
       "step": 17
     },
     {
-      "entropy": 1.683378031379298,
       "epoch": 1.1818181818181819,
-      "grad_norm": 1.924538016319275,
       "learning_rate": 0.00017600000000000002,
-      "loss": 1.693488311767578,
-      "mean_token_accuracy": 0.6652053249509711,
       "num_tokens": 27108.0,
       "step": 20
     },
     {
-      "entropy": 1.0494545072317123,
       "epoch": 1.7878787878787878,
-      "grad_norm": 2.967770576477051,
       "learning_rate": 0.00014933333333333335,
-      "loss": 0.9196723937988281,
-      "mean_token_accuracy": 0.8220452398061753,
       "num_tokens": 41182.0,
       "step": 30
     },
     {
       "epoch": 2.0,
-      "eval_entropy": 0.5931996901830038,
-      "eval_loss": 0.5597708821296692,
-      "eval_mean_token_accuracy": 0.9000988006591797,
       "eval_num_tokens": 45802.0,
-      "eval_runtime": 0.8451,
-      "eval_samples_per_second": 28.399,
-      "eval_steps_per_second": 3.55,
       "step": 34
     }
   ],

 {
   "best_global_step": 34,
+  "best_metric": 0.557042121887207,
   "best_model_checkpoint": "out/taskmind_lora_peft/checkpoint-34",
   "epoch": 2.0,
   "eval_steps": 500,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "entropy": 1.772234356403351,
       "epoch": 0.6060606060606061,
+      "grad_norm": 1.3780473470687866,
       "learning_rate": 0.00018,
+      "loss": 2.2849,
+      "mean_token_accuracy": 0.5951868265867233,
       "num_tokens": 14004.0,
       "step": 10
     },
     {
       "epoch": 1.0,
+      "eval_entropy": 1.685779293378194,
+      "eval_loss": 1.5357812643051147,
+      "eval_mean_token_accuracy": 0.6872214078903198,
       "eval_num_tokens": 22901.0,
+      "eval_runtime": 2.3332,
+      "eval_samples_per_second": 10.286,
+      "eval_steps_per_second": 1.286,
       "step": 17
     },
     {
+      "entropy": 1.6779554203936928,
       "epoch": 1.1818181818181819,
+      "grad_norm": 1.9584565162658691,
       "learning_rate": 0.00017600000000000002,
+      "loss": 1.6949,
+      "mean_token_accuracy": 0.6669363661816246,
       "num_tokens": 27108.0,
       "step": 20
     },
     {
+      "entropy": 1.0465361922979355,
       "epoch": 1.7878787878787878,
+      "grad_norm": 2.725850820541382,
       "learning_rate": 0.00014933333333333335,
+      "loss": 0.9223,
+      "mean_token_accuracy": 0.8229832291603089,
       "num_tokens": 41182.0,
       "step": 30
     },
     {
       "epoch": 2.0,
+      "eval_entropy": 0.5895721216996511,
+      "eval_loss": 0.557042121887207,
+      "eval_mean_token_accuracy": 0.9006072084108988,
       "eval_num_tokens": 45802.0,
+      "eval_runtime": 1.4033,
+      "eval_samples_per_second": 17.102,
+      "eval_steps_per_second": 2.138,
       "step": 34
     }
   ],

checkpoint-34/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f892e424e706ccbc20b54bdefa043cf957b1d1afb44fc285f313a5b63e6fae88
-size 5649

 version https://git-lfs.github.com/spec/v1
+oid sha256:fdfc63bd124cf8b58acfe531d95973daba5a5e131127762949c75765ea5acb7f
+size 5880

checkpoint-51/README.md CHANGED Viewed

@@ -6,6 +6,7 @@ tags:
 - base_model:adapter:TinyLlama/TinyLlama-1.1B-Chat-v1.0
 - lora
 - sft
 - trl
 ---

 - base_model:adapter:TinyLlama/TinyLlama-1.1B-Chat-v1.0
 - lora
 - sft
+- transformers
 - trl
 ---

checkpoint-51/adapter_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
-  "base_model_name_or_path": null,
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,
@@ -29,8 +29,8 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
-    "v_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
+  "base_model_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "v_proj",
+    "q_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

checkpoint-51/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d202be5a7a6c1d0004e6acc807954f1415af05256cbe5f9f9af5696cfefebdab
-size 9031840

 version https://git-lfs.github.com/spec/v1
+oid sha256:3ac034d5ecfdb8dcaafe60b742fd12fe259f77573283b91bbbe032b8c8077425
+size 9022864

checkpoint-51/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:de0eced3429f1898ef7e7af22986de797276c294c614ae463177ae542f6ec829
-size 18098251

 version https://git-lfs.github.com/spec/v1
+oid sha256:13ae0fc37cbddf906480c6b6070792291f6f84a53831a7f1982bf5adee686ddf
+size 18094138

checkpoint-51/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:13271f99287f14c518d014c43c4f04e1b31802e29287bff408b37ff4f7ec2848
-size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:32d32051101ec51c2b04c4ee6a6d2c7f40562e56836cbb02d6e6e3126490484d
+size 13990

checkpoint-51/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:519b6914c41a59ede676d4799f51ee903158f1c4e7ec731762104bfcf5ad088f
-size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:923e4c8fc2fa2baa19e1b2632e746f9b643a6a2ed7982aae130c58462f1007a1
+size 1064

checkpoint-51/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "</s>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-51/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

checkpoint-51/tokenizer_config.json CHANGED Viewed

@@ -1,15 +1,43 @@
 {
-  "backend": "tokenizers",
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
-  "is_local": false,
   "legacy": false,
   "model_max_length": 2048,
   "pad_token": "</s>",
   "padding_side": "right",
   "sp_model_kwargs": {},
-  "tokenizer_class": "TokenizersBackend",
   "unk_token": "<unk>",
   "use_default_system_prompt": false
 }

 {
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
+  "extra_special_tokens": {},
   "legacy": false,
   "model_max_length": 2048,
   "pad_token": "</s>",
   "padding_side": "right",
   "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
   "unk_token": "<unk>",
   "use_default_system_prompt": false
 }

checkpoint-51/trainer_state.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "best_global_step": 51,
-  "best_metric": 0.4870432913303375,
   "best_model_checkpoint": "out/taskmind_lora_peft/checkpoint-51",
   "epoch": 3.0,
   "eval_steps": 500,
@@ -10,86 +10,86 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "entropy": 1.7736787021160125,
       "epoch": 0.6060606060606061,
-      "grad_norm": 1.3698776960372925,
       "learning_rate": 0.00018,
-      "loss": 2.2852725982666016,
-      "mean_token_accuracy": 0.5953301250934601,
       "num_tokens": 14004.0,
       "step": 10
     },
     {
       "epoch": 1.0,
-      "eval_entropy": 1.68980073928833,
-      "eval_loss": 1.5321311950683594,
-      "eval_mean_token_accuracy": 0.6843333045641581,
       "eval_num_tokens": 22901.0,
-      "eval_runtime": 0.8074,
-      "eval_samples_per_second": 29.726,
-      "eval_steps_per_second": 3.716,
       "step": 17
     },
     {
-      "entropy": 1.683378031379298,
       "epoch": 1.1818181818181819,
-      "grad_norm": 1.924538016319275,
       "learning_rate": 0.00017600000000000002,
-      "loss": 1.693488311767578,
-      "mean_token_accuracy": 0.6652053249509711,
       "num_tokens": 27108.0,
       "step": 20
     },
     {
-      "entropy": 1.0494545072317123,
       "epoch": 1.7878787878787878,
-      "grad_norm": 2.967770576477051,
       "learning_rate": 0.00014933333333333335,
-      "loss": 0.9196723937988281,
-      "mean_token_accuracy": 0.8220452398061753,
       "num_tokens": 41182.0,
       "step": 30
     },
     {
       "epoch": 2.0,
-      "eval_entropy": 0.5931996901830038,
-      "eval_loss": 0.5597708821296692,
-      "eval_mean_token_accuracy": 0.9000988006591797,
       "eval_num_tokens": 45802.0,
-      "eval_runtime": 0.8451,
-      "eval_samples_per_second": 28.399,
-      "eval_steps_per_second": 3.55,
       "step": 34
     },
     {
-      "entropy": 0.5629946903178566,
       "epoch": 2.3636363636363638,
-      "grad_norm": 0.812652051448822,
       "learning_rate": 0.00012266666666666668,
-      "loss": 0.5200267791748047,
-      "mean_token_accuracy": 0.9027030750324851,
       "num_tokens": 54283.0,
       "step": 40
     },
     {
-      "entropy": 0.4702113077044487,
       "epoch": 2.9696969696969697,
-      "grad_norm": 0.8443523645401001,
       "learning_rate": 9.6e-05,
-      "loss": 0.43752589225769045,
-      "mean_token_accuracy": 0.9210518777370453,
       "num_tokens": 68175.0,
       "step": 50
     },
     {
       "epoch": 3.0,
-      "eval_entropy": 0.5028095742066702,
-      "eval_loss": 0.4870432913303375,
-      "eval_mean_token_accuracy": 0.9127707481384277,
       "eval_num_tokens": 68703.0,
-      "eval_runtime": 0.8158,
-      "eval_samples_per_second": 29.42,
-      "eval_steps_per_second": 3.678,
       "step": 51
     }
   ],

 {
   "best_global_step": 51,
+  "best_metric": 0.4862091839313507,
   "best_model_checkpoint": "out/taskmind_lora_peft/checkpoint-51",
   "epoch": 3.0,
   "eval_steps": 500,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "entropy": 1.772234356403351,
       "epoch": 0.6060606060606061,
+      "grad_norm": 1.3780473470687866,
       "learning_rate": 0.00018,
+      "loss": 2.2849,
+      "mean_token_accuracy": 0.5951868265867233,
       "num_tokens": 14004.0,
       "step": 10
     },
     {
       "epoch": 1.0,
+      "eval_entropy": 1.685779293378194,
+      "eval_loss": 1.5357812643051147,
+      "eval_mean_token_accuracy": 0.6872214078903198,
       "eval_num_tokens": 22901.0,
+      "eval_runtime": 2.3332,
+      "eval_samples_per_second": 10.286,
+      "eval_steps_per_second": 1.286,
       "step": 17
     },
     {
+      "entropy": 1.6779554203936928,
       "epoch": 1.1818181818181819,
+      "grad_norm": 1.9584565162658691,
       "learning_rate": 0.00017600000000000002,
+      "loss": 1.6949,
+      "mean_token_accuracy": 0.6669363661816246,
       "num_tokens": 27108.0,
       "step": 20
     },
     {
+      "entropy": 1.0465361922979355,
       "epoch": 1.7878787878787878,
+      "grad_norm": 2.725850820541382,
       "learning_rate": 0.00014933333333333335,
+      "loss": 0.9223,
+      "mean_token_accuracy": 0.8229832291603089,
       "num_tokens": 41182.0,
       "step": 30
     },
     {
       "epoch": 2.0,
+      "eval_entropy": 0.5895721216996511,
+      "eval_loss": 0.557042121887207,
+      "eval_mean_token_accuracy": 0.9006072084108988,
       "eval_num_tokens": 45802.0,
+      "eval_runtime": 1.4033,
+      "eval_samples_per_second": 17.102,
+      "eval_steps_per_second": 2.138,
       "step": 34
     },
     {
+      "entropy": 0.5621798069853532,
       "epoch": 2.3636363636363638,
+      "grad_norm": 0.853744626045227,
       "learning_rate": 0.00012266666666666668,
+      "loss": 0.5175,
+      "mean_token_accuracy": 0.902722396348652,
       "num_tokens": 54283.0,
       "step": 40
     },
     {
+      "entropy": 0.46219056397676467,
       "epoch": 2.9696969696969697,
+      "grad_norm": 0.8320155739784241,
       "learning_rate": 9.6e-05,
+      "loss": 0.4347,
+      "mean_token_accuracy": 0.9223286896944046,
       "num_tokens": 68175.0,
       "step": 50
     },
     {
       "epoch": 3.0,
+      "eval_entropy": 0.4948213994503021,
+      "eval_loss": 0.4862091839313507,
+      "eval_mean_token_accuracy": 0.9130085905392965,
       "eval_num_tokens": 68703.0,
+      "eval_runtime": 1.594,
+      "eval_samples_per_second": 15.056,
+      "eval_steps_per_second": 1.882,
       "step": 51
     }
   ],

checkpoint-51/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f892e424e706ccbc20b54bdefa043cf957b1d1afb44fc285f313a5b63e6fae88
-size 5649

 version https://git-lfs.github.com/spec/v1
+oid sha256:fdfc63bd124cf8b58acfe531d95973daba5a5e131127762949c75765ea5acb7f
+size 5880

checkpoint-68/README.md CHANGED Viewed

@@ -6,6 +6,7 @@ tags:
 - base_model:adapter:TinyLlama/TinyLlama-1.1B-Chat-v1.0
 - lora
 - sft
 - trl
 ---

 - base_model:adapter:TinyLlama/TinyLlama-1.1B-Chat-v1.0
 - lora
 - sft
+- transformers
 - trl
 ---

checkpoint-68/adapter_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
-  "base_model_name_or_path": null,
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,
@@ -29,8 +29,8 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
-    "v_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
+  "base_model_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "v_proj",
+    "q_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

checkpoint-68/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c5f89f1890693babab3d866b2e8a613f65cd2b35fb037647c52a470ff7120923
-size 9031840

 version https://git-lfs.github.com/spec/v1
+oid sha256:7529ae8b88bdc50cbbcf3ca825f59a3c8f13d84ddeb69a1c58657217d654f6cf
+size 9022864

checkpoint-68/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b09a8ba509267f7748b1c2d8d7deccbf1d82a1c8f989e98f7f55a865a15a134e
-size 18098251

 version https://git-lfs.github.com/spec/v1
+oid sha256:c3500cc18261ac4bc8c870bec4c983b932c2093620b48f022fff41154f7cbdf8
+size 18094138

checkpoint-68/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b5731891706ca26adbd237780583e1f8e07be9270d399b88ff66636755c6ee1b
-size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:2fed77b14fe062f6db72d68cedd6fd95bae3305b7a735eef3c85da43fd15d476
+size 13990

checkpoint-68/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b806cf3ecca4093bbe2279bfbb38d11ce711d2a2dd45151a96a4308e767e9c7b
-size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:db1cb9f8e0421507ae638faa350a64a3bc91c45ea59cad1a259e64ddce2114de
+size 1064

checkpoint-68/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "</s>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-68/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

checkpoint-68/tokenizer_config.json CHANGED Viewed

@@ -1,15 +1,43 @@
 {
-  "backend": "tokenizers",
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
-  "is_local": false,
   "legacy": false,
   "model_max_length": 2048,
   "pad_token": "</s>",
   "padding_side": "right",
   "sp_model_kwargs": {},
-  "tokenizer_class": "TokenizersBackend",
   "unk_token": "<unk>",
   "use_default_system_prompt": false
 }

 {
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
+  "extra_special_tokens": {},
   "legacy": false,
   "model_max_length": 2048,
   "pad_token": "</s>",
   "padding_side": "right",
   "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
   "unk_token": "<unk>",
   "use_default_system_prompt": false
 }

checkpoint-68/trainer_state.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "best_global_step": 68,
-  "best_metric": 0.47054529190063477,
   "best_model_checkpoint": "out/taskmind_lora_peft/checkpoint-68",
   "epoch": 4.0,
   "eval_steps": 500,
@@ -10,107 +10,107 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "entropy": 1.7736787021160125,
       "epoch": 0.6060606060606061,
-      "grad_norm": 1.3698776960372925,
       "learning_rate": 0.00018,
-      "loss": 2.2852725982666016,
-      "mean_token_accuracy": 0.5953301250934601,
       "num_tokens": 14004.0,
       "step": 10
     },
     {
       "epoch": 1.0,
-      "eval_entropy": 1.68980073928833,
-      "eval_loss": 1.5321311950683594,
-      "eval_mean_token_accuracy": 0.6843333045641581,
       "eval_num_tokens": 22901.0,
-      "eval_runtime": 0.8074,
-      "eval_samples_per_second": 29.726,
-      "eval_steps_per_second": 3.716,
       "step": 17
     },
     {
-      "entropy": 1.683378031379298,
       "epoch": 1.1818181818181819,
-      "grad_norm": 1.924538016319275,
       "learning_rate": 0.00017600000000000002,
-      "loss": 1.693488311767578,
-      "mean_token_accuracy": 0.6652053249509711,
       "num_tokens": 27108.0,
       "step": 20
     },
     {
-      "entropy": 1.0494545072317123,
       "epoch": 1.7878787878787878,
-      "grad_norm": 2.967770576477051,
       "learning_rate": 0.00014933333333333335,
-      "loss": 0.9196723937988281,
-      "mean_token_accuracy": 0.8220452398061753,
       "num_tokens": 41182.0,
       "step": 30
     },
     {
       "epoch": 2.0,
-      "eval_entropy": 0.5931996901830038,
-      "eval_loss": 0.5597708821296692,
-      "eval_mean_token_accuracy": 0.9000988006591797,
       "eval_num_tokens": 45802.0,
-      "eval_runtime": 0.8451,
-      "eval_samples_per_second": 28.399,
-      "eval_steps_per_second": 3.55,
       "step": 34
     },
     {
-      "entropy": 0.5629946903178566,
       "epoch": 2.3636363636363638,
-      "grad_norm": 0.812652051448822,
       "learning_rate": 0.00012266666666666668,
-      "loss": 0.5200267791748047,
-      "mean_token_accuracy": 0.9027030750324851,
       "num_tokens": 54283.0,
       "step": 40
     },
     {
-      "entropy": 0.4702113077044487,
       "epoch": 2.9696969696969697,
-      "grad_norm": 0.8443523645401001,
       "learning_rate": 9.6e-05,
-      "loss": 0.43752589225769045,
-      "mean_token_accuracy": 0.9210518777370453,
       "num_tokens": 68175.0,
       "step": 50
     },
     {
       "epoch": 3.0,
-      "eval_entropy": 0.5028095742066702,
-      "eval_loss": 0.4870432913303375,
-      "eval_mean_token_accuracy": 0.9127707481384277,
       "eval_num_tokens": 68703.0,
-      "eval_runtime": 0.8158,
-      "eval_samples_per_second": 29.42,
-      "eval_steps_per_second": 3.678,
       "step": 51
     },
     {
-      "entropy": 0.4374191933556607,
       "epoch": 3.5454545454545454,
-      "grad_norm": 0.7779412865638733,
       "learning_rate": 6.933333333333334e-05,
-      "loss": 0.42637939453125,
-      "mean_token_accuracy": 0.9213762000987404,
       "num_tokens": 81270.0,
       "step": 60
     },
     {
       "epoch": 4.0,
-      "eval_entropy": 0.4798667828241984,
-      "eval_loss": 0.47054529190063477,
-      "eval_mean_token_accuracy": 0.9151907364527384,
       "eval_num_tokens": 91604.0,
-      "eval_runtime": 0.7904,
-      "eval_samples_per_second": 30.365,
-      "eval_steps_per_second": 3.796,
       "step": 68
     }
   ],

 {
   "best_global_step": 68,
+  "best_metric": 0.46859970688819885,
   "best_model_checkpoint": "out/taskmind_lora_peft/checkpoint-68",
   "epoch": 4.0,
   "eval_steps": 500,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "entropy": 1.772234356403351,
       "epoch": 0.6060606060606061,
+      "grad_norm": 1.3780473470687866,
       "learning_rate": 0.00018,
+      "loss": 2.2849,
+      "mean_token_accuracy": 0.5951868265867233,
       "num_tokens": 14004.0,
       "step": 10
     },
     {
       "epoch": 1.0,
+      "eval_entropy": 1.685779293378194,
+      "eval_loss": 1.5357812643051147,
+      "eval_mean_token_accuracy": 0.6872214078903198,
       "eval_num_tokens": 22901.0,
+      "eval_runtime": 2.3332,
+      "eval_samples_per_second": 10.286,
+      "eval_steps_per_second": 1.286,
       "step": 17
     },
     {
+      "entropy": 1.6779554203936928,
       "epoch": 1.1818181818181819,
+      "grad_norm": 1.9584565162658691,
       "learning_rate": 0.00017600000000000002,
+      "loss": 1.6949,
+      "mean_token_accuracy": 0.6669363661816246,
       "num_tokens": 27108.0,
       "step": 20
     },
     {
+      "entropy": 1.0465361922979355,
       "epoch": 1.7878787878787878,
+      "grad_norm": 2.725850820541382,
       "learning_rate": 0.00014933333333333335,
+      "loss": 0.9223,
+      "mean_token_accuracy": 0.8229832291603089,
       "num_tokens": 41182.0,
       "step": 30
     },
     {
       "epoch": 2.0,
+      "eval_entropy": 0.5895721216996511,
+      "eval_loss": 0.557042121887207,
+      "eval_mean_token_accuracy": 0.9006072084108988,
       "eval_num_tokens": 45802.0,
+      "eval_runtime": 1.4033,
+      "eval_samples_per_second": 17.102,
+      "eval_steps_per_second": 2.138,
       "step": 34
     },
     {
+      "entropy": 0.5621798069853532,
       "epoch": 2.3636363636363638,
+      "grad_norm": 0.853744626045227,
       "learning_rate": 0.00012266666666666668,
+      "loss": 0.5175,
+      "mean_token_accuracy": 0.902722396348652,
       "num_tokens": 54283.0,
       "step": 40
     },
     {
+      "entropy": 0.46219056397676467,
       "epoch": 2.9696969696969697,
+      "grad_norm": 0.8320155739784241,
       "learning_rate": 9.6e-05,
+      "loss": 0.4347,
+      "mean_token_accuracy": 0.9223286896944046,
       "num_tokens": 68175.0,
       "step": 50
     },
     {
       "epoch": 3.0,
+      "eval_entropy": 0.4948213994503021,
+      "eval_loss": 0.4862091839313507,
+      "eval_mean_token_accuracy": 0.9130085905392965,
       "eval_num_tokens": 68703.0,
+      "eval_runtime": 1.594,
+      "eval_samples_per_second": 15.056,
+      "eval_steps_per_second": 1.882,
       "step": 51
     },
     {
+      "entropy": 0.4372325147453107,
       "epoch": 3.5454545454545454,
+      "grad_norm": 0.6874417662620544,
       "learning_rate": 6.933333333333334e-05,
+      "loss": 0.4267,
+      "mean_token_accuracy": 0.920473597551647,
       "num_tokens": 81270.0,
       "step": 60
     },
     {
       "epoch": 4.0,
+      "eval_entropy": 0.4760642449061076,
+      "eval_loss": 0.46859970688819885,
+      "eval_mean_token_accuracy": 0.9156773686408997,
       "eval_num_tokens": 91604.0,
+      "eval_runtime": 1.7122,
+      "eval_samples_per_second": 14.017,
+      "eval_steps_per_second": 1.752,
       "step": 68
     }
   ],

checkpoint-68/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f892e424e706ccbc20b54bdefa043cf957b1d1afb44fc285f313a5b63e6fae88
-size 5649

 version https://git-lfs.github.com/spec/v1
+oid sha256:fdfc63bd124cf8b58acfe531d95973daba5a5e131127762949c75765ea5acb7f
+size 5880

checkpoint-85/README.md CHANGED Viewed

@@ -6,6 +6,7 @@ tags:
 - base_model:adapter:TinyLlama/TinyLlama-1.1B-Chat-v1.0
 - lora
 - sft
 - trl
 ---

 - base_model:adapter:TinyLlama/TinyLlama-1.1B-Chat-v1.0
 - lora
 - sft
+- transformers
 - trl
 ---

checkpoint-85/adapter_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
-  "base_model_name_or_path": null,
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,
@@ -29,8 +29,8 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
-    "v_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
+  "base_model_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "v_proj",
+    "q_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

checkpoint-85/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:04e00888823aee279ac5505145e02829ec8de4f7b97637cd1eadb8e7bb486ccb
-size 9031840

 version https://git-lfs.github.com/spec/v1
+oid sha256:d0759e7617267665196662e1c86613a8737244bbac6bdc596e21dbfd3e571b57
+size 9022864