ycleungaj commited on
Commit
cdc3754
·
verified ·
1 Parent(s): 06d9d53

Upload 2 files

Browse files
Files changed (2) hide show
  1. config.yaml +33 -0
  2. mapping.json +52 -0
config.yaml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model_config:
2
+ base_model: mbert-uncased
3
+ apply_lora_flag: True
4
+ freeze_transformer_flag: False
5
+ num_unique_labels: 20
6
+
7
+ training_config:
8
+ # directory
9
+ output_dir: ../model_checkpoints/lora
10
+ logging_dir: ../logs/lora
11
+
12
+ # LoRA config
13
+ lora_rank: 8
14
+ lora_alpha: 16
15
+ target_modules: ["query", "key", "value"]
16
+ lora_dropout: 0.1
17
+
18
+ # training config
19
+ learning_rate: 0.00005
20
+ eval_steps: 500
21
+ save_steps: 500
22
+ logging_steps: 100
23
+ per_device_train_batch_size: 32
24
+ per_device_eval_batch_size: 32
25
+ num_train_epochs: 20
26
+ weight_decay: 0.01
27
+ early_stopping_patience: 5
28
+ early_stopping_threshold: 0.01
29
+
30
+ inference_config:
31
+ # directory
32
+ model_path: ../model_checkpoints/lora
33
+ saving_dir: ../results/lora/test_split.csv
mapping.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "models": {
3
+ "mbert-cased": "google-bert/bert-base-multilingual-cased",
4
+ "mbert-uncased": "google-bert/bert-base-multilingual-uncased"
5
+ },
6
+
7
+ "id2label": {
8
+ "0": "ar",
9
+ "1": "bg",
10
+ "2": "de",
11
+ "3": "el",
12
+ "4": "en",
13
+ "5": "es",
14
+ "6": "fr",
15
+ "7": "hi",
16
+ "8": "it",
17
+ "9": "ja",
18
+ "10": "nl",
19
+ "11": "pl",
20
+ "12": "pt",
21
+ "13": "ru",
22
+ "14": "sw",
23
+ "15": "th",
24
+ "16": "tr",
25
+ "17": "ur",
26
+ "18": "vi",
27
+ "19": "zh"
28
+ },
29
+
30
+ "language_codes":{
31
+ "ar": "arabic",
32
+ "bg": "bulgarian",
33
+ "de": "german",
34
+ "el": "modern greek",
35
+ "en": "english",
36
+ "es": "spanish",
37
+ "fr": "french",
38
+ "hi": "hindi",
39
+ "it": "italian",
40
+ "ja": "japanese",
41
+ "nl": "dutch",
42
+ "pl": "polish",
43
+ "pt": "portuguese",
44
+ "ru": "russian",
45
+ "sw": "swahili",
46
+ "th": "thai",
47
+ "tr": "turkish",
48
+ "ur": "urdu",
49
+ "vi": "vietnamese",
50
+ "zh": "chinese"
51
+ }
52
+ }