v2 — added self-correction + optimized hyperparams

Files changed (7) hide show

README.md CHANGED Viewed

@@ -1,20 +1,17 @@
-# NL2SQL Fine-Tuned Model (Full Merged)
-This model is fine-tuned from meta-llama/Llama-3.2-3B-Instruct.
-It has been merged so that AutoModelForCausalLM can load directly.
-## Training Config
 {
   "base_model": "./hf_models/meta-llama--Llama-3.2-3B-Instruct",
-  "merged_model": "bgglee/NL2SQL_finetuned",
-  "method": "LoRA (merged to full model)",
-  "learning_rate": 0.0002,
-  "epochs": 1,
-  "batch_size": 1,
-  "grad_accum": 8,
-  "lora_r": 16,
-  "lora_alpha": 16,
-  "lora_dropout": 0.05,
-  "max_len": 768,
-  "dataset": "data/bird/BIRD_train_1434ea.json",
-  "schema_dir": "data/bird/BIRD_SQLite"
 }

+# NL2SQL Fine-Tuned Model v2
+This is an updated version with self-correction and improved hyperparameters.
+## Config
 {
+  "version": "v2",
   "base_model": "./hf_models/meta-llama--Llama-3.2-3B-Instruct",
+  "merged_repo": "bgglee/NL2SQL_finetuned",
+  "method": "LoRA (merged)",
+  "learning_rate": 0.0001,
+  "epochs": 2,
+  "batch_size": 2,
+  "grad_accum": 4,
+  "warmup_ratio": 0.05,
+  "weight_decay": 0.01,
+  "max_len": 1024
 }

finetune_config.json CHANGED Viewed

@@ -1,15 +1,13 @@
 {
   "base_model": "./hf_models/meta-llama--Llama-3.2-3B-Instruct",
-  "merged_model": "bgglee/NL2SQL_finetuned",
-  "method": "LoRA (merged to full model)",
-  "learning_rate": 0.0002,
-  "epochs": 1,
-  "batch_size": 1,
-  "grad_accum": 8,
-  "lora_r": 16,
-  "lora_alpha": 16,
-  "lora_dropout": 0.05,
-  "max_len": 768,
-  "dataset": "data/bird/BIRD_train_1434ea.json",
-  "schema_dir": "data/bird/BIRD_SQLite"
 }

 {
+  "version": "v2",
   "base_model": "./hf_models/meta-llama--Llama-3.2-3B-Instruct",
+  "merged_repo": "bgglee/NL2SQL_finetuned",
+  "method": "LoRA (merged)",
+  "learning_rate": 0.0001,
+  "epochs": 2,
+  "batch_size": 2,
+  "grad_accum": 4,
+  "warmup_ratio": 0.05,
+  "weight_decay": 0.01,
+  "max_len": 1024
 }

model-00002-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d61fe311e402b2d77bb462f3f57ae7189b28e7403a74aee165e815d964c53f1
 size 1459729952

 version https://git-lfs.github.com/spec/v1
+oid sha256:27a56c7c360ac1ce923510880488bcb82e88b359bfdd32b56e6ff73e62638e98
 size 1459729952

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
-size 17209920

 version https://git-lfs.github.com/spec/v1
+oid sha256:52716f60c3ad328509fa37cdded9a2f1196ecae463f5480f5d38c66a25e7a7dc
+size 17210019

training_loss.csv CHANGED Viewed

@@ -1,19 +1,37 @@
 step,loss
-10,3.9184
-20,0.0439
-30,0.0499
-40,0.0446
-50,0.037
-60,0.0405
-70,0.0459
-80,0.0385
-90,0.036
-100,0.0469
-110,0.027
-120,0.0409
-130,0.0411
-140,0.042
-150,0.0447
-160,0.0266
-170,0.0385
-180,0.0452

 step,loss
+10,6.7013
+20,1.932
+30,0.0372
+40,0.0363
+50,0.0271
+60,0.0308
+70,0.0333
+80,0.0342
+90,0.0275
+100,0.0304
+110,0.0202
+120,0.0277
+130,0.0359
+140,0.0321
+150,0.0316
+160,0.0203
+170,0.0272
+180,0.0296
+190,0.0302
+200,0.0214
+210,0.0271
+220,0.0251
+230,0.0244
+240,0.0238
+250,0.0267
+260,0.0239
+270,0.0199
+280,0.037
+290,0.0213
+300,0.0224
+310,0.0223
+320,0.0314
+330,0.0214
+340,0.0271
+350,0.0246
+360,0.0287

training_loss.png CHANGED Viewed

wrong_ids.json CHANGED Viewed

@@ -1,12 +0,0 @@
-[
-  98,
-  1373,
-  597,
-  45,
-  596,
-  444,
-  288,
-  999,
-  321,
-  580
-]