akashdutta1030 commited on
Commit
4fb9037
·
verified ·
1 Parent(s): 8eb8b30

Upload training_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. training_config.json +38 -0
training_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model": "unsloth/DeepSeek-R1-Distill-Llama-8B",
3
+ "task": "text-generation",
4
+ "language": "en",
5
+ "license": "apache-2.0",
6
+ "tags": [
7
+ "dyck-language",
8
+ "bracket-completion",
9
+ "reasoning",
10
+ "lora",
11
+ "fine-tuned"
12
+ ],
13
+ "training": {
14
+ "dataset_size": 60000,
15
+ "train_split": 0.95,
16
+ "lora_rank": 32,
17
+ "lora_alpha": 64,
18
+ "lora_dropout": 0.25,
19
+ "learning_rate": 3e-06,
20
+ "batch_size": 4,
21
+ "gradient_accumulation_steps": 32,
22
+ "effective_batch_size": 128,
23
+ "num_epochs": 4,
24
+ "warmup_ratio": 0.3,
25
+ "max_grad_norm": 0.05,
26
+ "optimizer": "adamw_torch",
27
+ "lr_scheduler": "linear"
28
+ },
29
+ "model_info": {
30
+ "lora_target_modules": [
31
+ "q_proj",
32
+ "k_proj",
33
+ "v_proj",
34
+ "o_proj"
35
+ ],
36
+ "max_seq_length": 2048
37
+ }
38
+ }