End of training

Files changed (6) hide show

README.md CHANGED Viewed

@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 # sqlcoder-7b-sqli
-This model is a fine-tuned version of [defog/sqlcoder-7b](https://huggingface.co/defog/sqlcoder-7b) on the None dataset.
 ## Model description
@@ -39,12 +39,12 @@ The following hyperparameters were used during training:
 - train_batch_size: 1
 - eval_batch_size: 8
 - seed: 42
-- gradient_accumulation_steps: 8
-- total_train_batch_size: 8
 - optimizer: Use OptimizerNames.PAGED_ADAMW with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: linear
-- lr_scheduler_warmup_ratio: 0.03
-- training_steps: 200
 ### Training results
@@ -55,5 +55,5 @@ The following hyperparameters were used during training:
 - PEFT 0.15.2.dev0
 - Transformers 4.51.3
 - Pytorch 2.6.0+cu124
-- Datasets 3.5.0
 - Tokenizers 0.21.1

 # sqlcoder-7b-sqli
+This model is a fine-tuned version of [defog/sqlcoder-7b](https://huggingface.co/defog/sqlcoder-7b) on an unknown dataset.
 ## Model description
 - train_batch_size: 1
 - eval_batch_size: 8
 - seed: 42
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 4
 - optimizer: Use OptimizerNames.PAGED_ADAMW with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: linear
+- lr_scheduler_warmup_ratio: 0.05
+- training_steps: 100
 ### Training results
 - PEFT 0.15.2.dev0
 - Transformers 4.51.3
 - Pytorch 2.6.0+cu124
+- Datasets 3.6.0
 - Tokenizers 0.21.1

adapter_config.json CHANGED Viewed

@@ -13,7 +13,7 @@
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
-  "lora_alpha": 64,
   "lora_bias": false,
   "lora_dropout": 0.05,
   "megatron_config": null,
@@ -22,16 +22,16 @@
     "lm_head"
   ],
   "peft_type": "LORA",
-  "r": 32,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "up_proj",
-    "q_proj",
     "k_proj",
-    "v_proj",
-    "gate_proj",
-    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,

   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
+  "lora_alpha": 32,
   "lora_bias": false,
   "lora_dropout": 0.05,
   "megatron_config": null,
     "lm_head"
   ],
   "peft_type": "LORA",
+  "r": 16,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "gate_proj",
+    "v_proj",
     "up_proj",
     "k_proj",
+    "o_proj",
+    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6e450e66eae402e0645fdd5854970f01c1eb3389d4249310250e0fe004ef7968
-size 392219992

 version https://git-lfs.github.com/spec/v1
+oid sha256:ef11d96bdff5e4087096bb77a64550becce58d96d3a284a8fdd55bd5db3f11ae
+size 327208280

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d3a5363549a83cfc81d3adb05ec1814b146f207f83b737b16393b872db380f18
-size 4518449388

 version https://git-lfs.github.com/spec/v1
+oid sha256:a9c2db11f29c8c063ea9726100cd651a31379213f88e877aa2390f132d375434
+size 4453437644

tokenizer.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "version": "1.0",
   "truncation": {
     "direction": "Right",
-    "max_length": 512,
     "strategy": "LongestFirst",
     "stride": 0
   },

   "version": "1.0",
   "truncation": {
     "direction": "Right",
+    "max_length": 384,
     "strategy": "LongestFirst",
     "stride": 0
   },

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e170a7de62521457c6d18349b9f1cb02a4a7bed3b41640e677241635b28a213e
 size 5304

 version https://git-lfs.github.com/spec/v1
+oid sha256:45b6e0cd9a15580cf37d3bfa5fd0ef557593f1eeca984b4a1041b538d99824b6
 size 5304