| { | |
| "global": { | |
| "train": "/projects/users/mgillele/multilingual-segmentation-dataset/data/training_data/segmented/split/multilingual/train.json", | |
| "test": "/projects/users/mgillele/multilingual-segmentation-dataset/data/training_data/segmented/split/multilingual/test.json", | |
| "dev": "/projects/users/mgillele/multilingual-segmentation-dataset/data/training_data/segmented/split/multilingual/dev.json", | |
| "import": "/projects/users/mgillele/Aquilign", | |
| "base_model_name": "google-bert/bert-base-multilingual-cased", | |
| "out_dir": "/projects/users/mgillele/trash/test_segmenter", | |
| "device": "cuda:0", | |
| "data_augmentation": true, | |
| "freeze_embeddings": false, | |
| "emb_dim": 300, | |
| "freeze_lang_embeddings": false, | |
| "linear_layers": 3, | |
| "linear_layers_hidden_size": 128, | |
| "balance_class_weights": false, | |
| "include_lang_metadata": false, | |
| "lang_emb_dim": 32, | |
| "epochs": 30, | |
| "use_pretrained_embeddings": true, | |
| "use_bert_tokenizer": true, | |
| "lr": 1e-05, | |
| "workers": 8, | |
| "batch_size": 64, | |
| "eval_batch_size": 64, | |
| "segments_max_length": 18, | |
| "model_path": "models/best/best.pt" | |
| }, | |
| "architecture": { | |
| "name": "BERT" | |
| } | |
| } |