lamossta commited on
Commit
b6d276f
·
1 Parent(s): 4ce549d

models config files for training

Browse files
data/config_marker.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mode": "marker",
3
+ "data_path": "data/data_augmented_256.jsonl",
4
+ "output_dir": "models/marker",
5
+ "model_name": "distilbert-base-uncased",
6
+ "max_len": 256,
7
+ "num_train_epochs": 3,
8
+ "per_device_train_batch_size": 32,
9
+ "per_device_eval_batch_size": 64,
10
+ "gradient_accumulation_steps": 1,
11
+ "learning_rate": 1e-5,
12
+ "warmup_ratio": 0.1,
13
+ "weight_decay": 0.1,
14
+ "val_split": 0.1,
15
+ "test_split": 0.1,
16
+ "early_stopping_patience": 2,
17
+ "fp16": true,
18
+ "seed": 42,
19
+ "logging_steps": 50,
20
+ "save_total_limit": 2
21
+ }
data/config_marker_focal.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mode": "marker",
3
+ "data_path": "data/data_augmented_256.jsonl",
4
+ "output_dir": "models/marker",
5
+ "model_name": "distilbert-base-uncased",
6
+ "max_len": 256,
7
+ "num_train_epochs": 3,
8
+ "per_device_train_batch_size": 32,
9
+ "per_device_eval_batch_size": 64,
10
+ "gradient_accumulation_steps": 1,
11
+ "learning_rate": 1e-5,
12
+ "warmup_ratio": 0.1,
13
+ "weight_decay": 0.1,
14
+ "val_split": 0.1,
15
+ "test_split": 0.1,
16
+ "early_stopping_patience": 2,
17
+ "fp16": true,
18
+ "seed": 42,
19
+ "logging_steps": 50,
20
+ "save_total_limit": 2,
21
+ "loss_fn": "focal",
22
+ "focal_gamma": 2.0
23
+ }
data/config_qa_b.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mode": "qa_b",
3
+ "data_path": "data/data_augmented_256.jsonl",
4
+ "output_dir": "models/qa_b",
5
+ "model_name": "distilbert-base-uncased",
6
+ "max_len": 256,
7
+ "num_train_epochs": 3,
8
+ "per_device_train_batch_size": 32,
9
+ "per_device_eval_batch_size": 64,
10
+ "gradient_accumulation_steps": 1,
11
+ "learning_rate": 1e-5,
12
+ "warmup_ratio": 0.1,
13
+ "weight_decay": 0.1,
14
+ "val_split": 0.1,
15
+ "test_split": 0.1,
16
+ "early_stopping_patience": 2,
17
+ "fp16": true,
18
+ "seed": 42,
19
+ "logging_steps": 50,
20
+ "save_total_limit": 2
21
+ }
data/config_qa_b_focal.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mode": "qa_b",
3
+ "data_path": "data/data_augmented_256.jsonl",
4
+ "output_dir": "models/qa_b",
5
+ "model_name": "distilbert-base-uncased",
6
+ "max_len": 256,
7
+ "num_train_epochs": 3,
8
+ "per_device_train_batch_size": 32,
9
+ "per_device_eval_batch_size": 64,
10
+ "gradient_accumulation_steps": 1,
11
+ "learning_rate": 1e-5,
12
+ "warmup_ratio": 0.1,
13
+ "weight_decay": 0.1,
14
+ "val_split": 0.1,
15
+ "test_split": 0.1,
16
+ "early_stopping_patience": 2,
17
+ "fp16": true,
18
+ "seed": 42,
19
+ "logging_steps": 50,
20
+ "save_total_limit": 2,
21
+ "loss_fn": "focal",
22
+ "focal_gamma": 2.0
23
+ }
data/config_qa_m.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mode": "qa_m",
3
+ "data_path": "data/data_augmented_256.jsonl",
4
+ "output_dir": "models/qa_m",
5
+ "model_name": "distilbert-base-uncased",
6
+ "max_len": 256,
7
+ "num_train_epochs": 3,
8
+ "per_device_train_batch_size": 32,
9
+ "per_device_eval_batch_size": 64,
10
+ "gradient_accumulation_steps": 1,
11
+ "learning_rate": 1e-5,
12
+ "warmup_ratio": 0.1,
13
+ "weight_decay": 0.1,
14
+ "val_split": 0.1,
15
+ "test_split": 0.1,
16
+ "early_stopping_patience": 2,
17
+ "fp16": true,
18
+ "seed": 42,
19
+ "logging_steps": 50,
20
+ "save_total_limit": 2
21
+ }
data/config_qa_m_focal.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mode": "qa_m",
3
+ "data_path": "data/data_augmented_256.jsonl",
4
+ "output_dir": "models/qa_m",
5
+ "model_name": "distilbert-base-uncased",
6
+ "max_len": 256,
7
+ "num_train_epochs": 3,
8
+ "per_device_train_batch_size": 32,
9
+ "per_device_eval_batch_size": 64,
10
+ "gradient_accumulation_steps": 1,
11
+ "learning_rate": 1e-5,
12
+ "warmup_ratio": 0.1,
13
+ "weight_decay": 0.1,
14
+ "val_split": 0.1,
15
+ "test_split": 0.1,
16
+ "early_stopping_patience": 2,
17
+ "fp16": true,
18
+ "seed": 42,
19
+ "logging_steps": 50,
20
+ "save_total_limit": 2,
21
+ "loss_fn": "focal",
22
+ "focal_gamma": 2.0
23
+ }