ThomasTheMaker commited on
Commit
5aeae39
·
verified ·
1 Parent(s): 1886453

Create train.json

Browse files
Files changed (1) hide show
  1. train.json +67 -0
train.json ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "hub_model_name": "ThomasTheMaker/gm3-270m-tinygsm",
4
+ "base_model_name": "unsloth/gemma-3-270m-it",
5
+ "max_seq_length": 2048,
6
+ "load_in_4bit": false,
7
+ "load_in_8bit": false,
8
+ "full_finetuning": true
9
+ },
10
+ "dataset_config": {
11
+ "dataset_name": "TinyGSM/TinyGSM",
12
+ "dataset_split": "train[:11000000]",
13
+ "chat_template": "gemma3"
14
+ },
15
+ "lora_config": {
16
+ "r": 256,
17
+ "alpha_multiplier": 2,
18
+ "dropout": 0.1,
19
+ "bias": "none",
20
+ "use_gradient_checkpointing": false,
21
+ "random_state": 3407,
22
+ "use_rslora": true,
23
+ "loftq_config": null,
24
+ "target_modules": [
25
+ "q_proj",
26
+ "k_proj",
27
+ "v_proj",
28
+ "o_proj",
29
+ "gate_proj",
30
+ "up_proj",
31
+ "down_proj"
32
+ ]
33
+ },
34
+ "training_config": {
35
+ "per_device_train_batch_size": 16,
36
+ "gradient_accumulation_steps": 4,
37
+ "warmup_steps": 10,
38
+ "max_steps": -1,
39
+ "num_train_epochs": 1,
40
+ "learning_rate": 5e-5,
41
+ "weight_decay": 0.01,
42
+ "lr_scheduler_type": "linear",
43
+ "seed": 3407,
44
+ "output_dir": "outputs",
45
+ "report_to": "none",
46
+ "optim": "adamw_8bit",
47
+ "logging_steps": 1,
48
+ "save_steps": 10000
49
+ },
50
+ "inference_config": {
51
+ "max_new_tokens": 125,
52
+ "temperature": 1.0,
53
+ "top_p": 0.95,
54
+ "top_k": 64,
55
+ "do_sample": true
56
+ },
57
+ "saving_config": {
58
+ "save_local": true,
59
+ "save_16bit": false,
60
+ "save_4bit": false,
61
+ "save_lora": false,
62
+ "push_to_hub": true
63
+ },
64
+ "logging_config": {
65
+ "csv_log_enabled": true
66
+ }
67
+ }