Training in progress, step 200

Files changed (7) hide show

README.md CHANGED Viewed

@@ -4,8 +4,8 @@ library_name: transformers
 model_name: ida-starcoder-upgrade
 tags:
 - generated_from_trainer
-- trl
 - sft
 licence: license
 ---
@@ -27,7 +27,7 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/ibargache6-ecole-sup-mti/starcoder2-weekend-upgrade/runs/istdatkz)
 This model was trained with SFT.

 model_name: ida-starcoder-upgrade
 tags:
 - generated_from_trainer
 - sft
+- trl
 licence: license
 ---
 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/ibargache7-school/starcoder2-weekend-upgrade/runs/vbt2sxxl)
 This model was trained with SFT.

adapter_config.json CHANGED Viewed

@@ -13,7 +13,7 @@
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
-  "lora_alpha": 32,
   "lora_bias": false,
   "lora_dropout": 0.1,
   "megatron_config": null,
@@ -21,12 +21,12 @@
   "modules_to_save": null,
   "peft_type": "LORA",
   "qalora_group_size": 16,
-  "r": 16,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "c_attn",
-    "c_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
+  "lora_alpha": 64,
   "lora_bias": false,
   "lora_dropout": 0.1,
   "megatron_config": null,
   "modules_to_save": null,
   "peft_type": "LORA",
   "qalora_group_size": 16,
+  "r": 32,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "c_proj",
+    "c_attn"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ba61355e982ef24a5b014eb0627ea321b183bc08a8070f7c0e2ba9b8aae6c62a
-size 28723448

 version https://git-lfs.github.com/spec/v1
+oid sha256:e5b294205d4d8a2660dafa86d8e71e980a135a71ba896fda438141bf7bf2987d
+size 57428272

special_tokens_map.json CHANGED Viewed

@@ -34,7 +34,6 @@
     "rstrip": false,
     "single_word": false
   },
-  "pad_token": "<|endoftext|>",
   "unk_token": {
     "content": "<|endoftext|>",
     "lstrip": false,

     "rstrip": false,
     "single_word": false
   },
   "unk_token": {
     "content": "<|endoftext|>",
     "lstrip": false,

tokenizer.json CHANGED Viewed

@@ -1,11 +1,6 @@
 {
   "version": "1.0",
-  "truncation": {
-    "direction": "Right",
-    "max_length": 1024,
-    "strategy": "LongestFirst",
-    "stride": 0
-  },
   "padding": null,
   "added_tokens": [
     {

 {
   "version": "1.0",
+  "truncation": null,
   "padding": null,
   "added_tokens": [
     {

tokenizer_config.json CHANGED Viewed

@@ -180,7 +180,6 @@
   "eos_token": "<|endoftext|>",
   "extra_special_tokens": {},
   "model_max_length": 1000000000000000019884624838656,
-  "pad_token": "<|endoftext|>",
   "tokenizer_class": "GPT2Tokenizer",
   "unk_token": "<|endoftext|>",
   "vocab_size": 49152

   "eos_token": "<|endoftext|>",
   "extra_special_tokens": {},
   "model_max_length": 1000000000000000019884624838656,
   "tokenizer_class": "GPT2Tokenizer",
   "unk_token": "<|endoftext|>",
   "vocab_size": 49152

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f8138b22985ac8f96af412852fcca32577535c6d16a424cca181a3d97d6a40bf
 size 6225

 version https://git-lfs.github.com/spec/v1
+oid sha256:5aa36dc3fc2cc3cd7977871f1413c500f24a1816094900d7dae4d0dabb69ddd1
 size 6225