Spaces:

lamossta
/

sv-task

Sleeping

lamossta commited on Apr 20

Commit

b6d276f

1 Parent(s): 4ce549d

models config files for training

Files changed (6) hide show

data/config_marker.json ADDED Viewed

+{
+    "mode": "marker",
+    "data_path": "data/data_augmented_256.jsonl",
+    "output_dir": "models/marker",
+    "model_name": "distilbert-base-uncased",
+    "max_len": 256,
+    "num_train_epochs": 3,
+    "per_device_train_batch_size": 32,
+    "per_device_eval_batch_size": 64,
+    "gradient_accumulation_steps": 1,
+    "learning_rate": 1e-5,
+    "warmup_ratio": 0.1,
+    "weight_decay": 0.1,
+    "val_split": 0.1,
+    "test_split": 0.1,
+    "early_stopping_patience": 2,
+    "fp16": true,
+    "seed": 42,
+    "logging_steps": 50,
+    "save_total_limit": 2
+}

data/config_marker_focal.json ADDED Viewed

+{
+    "mode": "marker",
+    "data_path": "data/data_augmented_256.jsonl",
+    "output_dir": "models/marker",
+    "model_name": "distilbert-base-uncased",
+    "max_len": 256,
+    "num_train_epochs": 3,
+    "per_device_train_batch_size": 32,
+    "per_device_eval_batch_size": 64,
+    "gradient_accumulation_steps": 1,
+    "learning_rate": 1e-5,
+    "warmup_ratio": 0.1,
+    "weight_decay": 0.1,
+    "val_split": 0.1,
+    "test_split": 0.1,
+    "early_stopping_patience": 2,
+    "fp16": true,
+    "seed": 42,
+    "logging_steps": 50,
+    "save_total_limit": 2,
+    "loss_fn": "focal",
+    "focal_gamma": 2.0
+}

data/config_qa_b.json ADDED Viewed

+{
+    "mode": "qa_b",
+    "data_path": "data/data_augmented_256.jsonl",
+    "output_dir": "models/qa_b",
+    "model_name": "distilbert-base-uncased",
+    "max_len": 256,
+    "num_train_epochs": 3,
+    "per_device_train_batch_size": 32,
+    "per_device_eval_batch_size": 64,
+    "gradient_accumulation_steps": 1,
+    "learning_rate": 1e-5,
+    "warmup_ratio": 0.1,
+    "weight_decay": 0.1,
+    "val_split": 0.1,
+    "test_split": 0.1,
+    "early_stopping_patience": 2,
+    "fp16": true,
+    "seed": 42,
+    "logging_steps": 50,
+    "save_total_limit": 2
+}

data/config_qa_b_focal.json ADDED Viewed

+{
+    "mode": "qa_b",
+    "data_path": "data/data_augmented_256.jsonl",
+    "output_dir": "models/qa_b",
+    "model_name": "distilbert-base-uncased",
+    "max_len": 256,
+    "num_train_epochs": 3,
+    "per_device_train_batch_size": 32,
+    "per_device_eval_batch_size": 64,
+    "gradient_accumulation_steps": 1,
+    "learning_rate": 1e-5,
+    "warmup_ratio": 0.1,
+    "weight_decay": 0.1,
+    "val_split": 0.1,
+    "test_split": 0.1,
+    "early_stopping_patience": 2,
+    "fp16": true,
+    "seed": 42,
+    "logging_steps": 50,
+    "save_total_limit": 2,
+    "loss_fn": "focal",
+    "focal_gamma": 2.0
+}

data/config_qa_m.json ADDED Viewed

+{
+    "mode": "qa_m",
+    "data_path": "data/data_augmented_256.jsonl",
+    "output_dir": "models/qa_m",
+    "model_name": "distilbert-base-uncased",
+    "max_len": 256,
+    "num_train_epochs": 3,
+    "per_device_train_batch_size": 32,
+    "per_device_eval_batch_size": 64,
+    "gradient_accumulation_steps": 1,
+    "learning_rate": 1e-5,
+    "warmup_ratio": 0.1,
+    "weight_decay": 0.1,
+    "val_split": 0.1,
+    "test_split": 0.1,
+    "early_stopping_patience": 2,
+    "fp16": true,
+    "seed": 42,
+    "logging_steps": 50,
+    "save_total_limit": 2
+}

data/config_qa_m_focal.json ADDED Viewed

+{
+    "mode": "qa_m",
+    "data_path": "data/data_augmented_256.jsonl",
+    "output_dir": "models/qa_m",
+    "model_name": "distilbert-base-uncased",
+    "max_len": 256,
+    "num_train_epochs": 3,
+    "per_device_train_batch_size": 32,
+    "per_device_eval_batch_size": 64,
+    "gradient_accumulation_steps": 1,
+    "learning_rate": 1e-5,
+    "warmup_ratio": 0.1,
+    "weight_decay": 0.1,
+    "val_split": 0.1,
+    "test_split": 0.1,
+    "early_stopping_patience": 2,
+    "fp16": true,
+    "seed": 42,
+    "logging_steps": 50,
+    "save_total_limit": 2,
+    "loss_fn": "focal",
+    "focal_gamma": 2.0
+}