bgglee commited on
Commit
e300ec8
·
1 Parent(s): 3103750

v2 — added self-correction + optimized hyperparams

Browse files
README.md CHANGED
@@ -1,20 +1,17 @@
1
- # NL2SQL Fine-Tuned Model (Full Merged)
2
- This model is fine-tuned from meta-llama/Llama-3.2-3B-Instruct.
3
- It has been merged so that AutoModelForCausalLM can load directly.
4
 
5
- ## Training Config
6
  {
 
7
  "base_model": "./hf_models/meta-llama--Llama-3.2-3B-Instruct",
8
- "merged_model": "bgglee/NL2SQL_finetuned",
9
- "method": "LoRA (merged to full model)",
10
- "learning_rate": 0.0002,
11
- "epochs": 1,
12
- "batch_size": 1,
13
- "grad_accum": 8,
14
- "lora_r": 16,
15
- "lora_alpha": 16,
16
- "lora_dropout": 0.05,
17
- "max_len": 768,
18
- "dataset": "data/bird/BIRD_train_1434ea.json",
19
- "schema_dir": "data/bird/BIRD_SQLite"
20
  }
 
1
+ # NL2SQL Fine-Tuned Model v2
2
+ This is an updated version with self-correction and improved hyperparameters.
 
3
 
4
+ ## Config
5
  {
6
+ "version": "v2",
7
  "base_model": "./hf_models/meta-llama--Llama-3.2-3B-Instruct",
8
+ "merged_repo": "bgglee/NL2SQL_finetuned",
9
+ "method": "LoRA (merged)",
10
+ "learning_rate": 0.0001,
11
+ "epochs": 2,
12
+ "batch_size": 2,
13
+ "grad_accum": 4,
14
+ "warmup_ratio": 0.05,
15
+ "weight_decay": 0.01,
16
+ "max_len": 1024
 
 
 
17
  }
finetune_config.json CHANGED
@@ -1,15 +1,13 @@
1
  {
 
2
  "base_model": "./hf_models/meta-llama--Llama-3.2-3B-Instruct",
3
- "merged_model": "bgglee/NL2SQL_finetuned",
4
- "method": "LoRA (merged to full model)",
5
- "learning_rate": 0.0002,
6
- "epochs": 1,
7
- "batch_size": 1,
8
- "grad_accum": 8,
9
- "lora_r": 16,
10
- "lora_alpha": 16,
11
- "lora_dropout": 0.05,
12
- "max_len": 768,
13
- "dataset": "data/bird/BIRD_train_1434ea.json",
14
- "schema_dir": "data/bird/BIRD_SQLite"
15
  }
 
1
  {
2
+ "version": "v2",
3
  "base_model": "./hf_models/meta-llama--Llama-3.2-3B-Instruct",
4
+ "merged_repo": "bgglee/NL2SQL_finetuned",
5
+ "method": "LoRA (merged)",
6
+ "learning_rate": 0.0001,
7
+ "epochs": 2,
8
+ "batch_size": 2,
9
+ "grad_accum": 4,
10
+ "warmup_ratio": 0.05,
11
+ "weight_decay": 0.01,
12
+ "max_len": 1024
 
 
 
13
  }
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d61fe311e402b2d77bb462f3f57ae7189b28e7403a74aee165e815d964c53f1
3
  size 1459729952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27a56c7c360ac1ce923510880488bcb82e88b359bfdd32b56e6ff73e62638e98
3
  size 1459729952
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
3
- size 17209920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52716f60c3ad328509fa37cdded9a2f1196ecae463f5480f5d38c66a25e7a7dc
3
+ size 17210019
training_loss.csv CHANGED
@@ -1,19 +1,37 @@
1
  step,loss
2
- 10,3.9184
3
- 20,0.0439
4
- 30,0.0499
5
- 40,0.0446
6
- 50,0.037
7
- 60,0.0405
8
- 70,0.0459
9
- 80,0.0385
10
- 90,0.036
11
- 100,0.0469
12
- 110,0.027
13
- 120,0.0409
14
- 130,0.0411
15
- 140,0.042
16
- 150,0.0447
17
- 160,0.0266
18
- 170,0.0385
19
- 180,0.0452
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  step,loss
2
+ 10,6.7013
3
+ 20,1.932
4
+ 30,0.0372
5
+ 40,0.0363
6
+ 50,0.0271
7
+ 60,0.0308
8
+ 70,0.0333
9
+ 80,0.0342
10
+ 90,0.0275
11
+ 100,0.0304
12
+ 110,0.0202
13
+ 120,0.0277
14
+ 130,0.0359
15
+ 140,0.0321
16
+ 150,0.0316
17
+ 160,0.0203
18
+ 170,0.0272
19
+ 180,0.0296
20
+ 190,0.0302
21
+ 200,0.0214
22
+ 210,0.0271
23
+ 220,0.0251
24
+ 230,0.0244
25
+ 240,0.0238
26
+ 250,0.0267
27
+ 260,0.0239
28
+ 270,0.0199
29
+ 280,0.037
30
+ 290,0.0213
31
+ 300,0.0224
32
+ 310,0.0223
33
+ 320,0.0314
34
+ 330,0.0214
35
+ 340,0.0271
36
+ 350,0.0246
37
+ 360,0.0287
training_loss.png CHANGED
wrong_ids.json CHANGED
@@ -1,12 +0,0 @@
1
- [
2
- 98,
3
- 1373,
4
- 597,
5
- 45,
6
- 596,
7
- 444,
8
- 288,
9
- 999,
10
- 321,
11
- 580
12
- ]