File size: 4,307 Bytes
2c112a2 7b5a9cf 2c112a2 fee219c 9d7b3a8 2c112a2 d91c0a8 2c112a2 a0898b2 7b5a9cf a0898b2 7b5a9cf | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 | # Generated 2025-04-21 from:
# /content/test/hparams/train.yaml
# yamllint disable
# ########################################
# Emotion recognition from Persian speech using ECAPA-TDNN
# Dataset: ShEMO
# Language: Persian
# ########################################
# مسیر ریپو مدل روی Hugging Face
pretrained_path: mobina1380/speechbrain-persian-ser
# تنظیمات تصادفی (اختیاری)
seed: 1968
number_of_epochs: 30
# ⚠️ این خط حذف شد چون ممکنه در بعضی محیطها مشکل بده:
# __set_seed: !apply:speechbrain.utils.seed_everything [!ref <seed>]
# مسیر فولدر دادهها (در لوکال مسیر پروژه)
data_folder: .
# مسیر خروجی مدلها و لاگها
output_folder: results/ECAPA-TDNN/1968
save_folder: results/ECAPA-TDNN/1968/save
train_log: results/ECAPA-TDNN/1968/train_log.txt
# فایلهای CSV دیتاست
csv_train: ./test/train.csv
csv_valid: ./test/valid.csv
csv_test: ./test/test.csv
# Logger برای ذخیرهی وضعیت آموزش
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
save_file: results/ECAPA-TDNN/1968/train_log.txt
# ارزیابی خطا
error_stats: !name:speechbrain.utils.metric_stats.MetricStats
metric: !name:speechbrain.nnet.losses.classification_error
reduction: batch
ckpt_interval_minutes: 15
# پارامترهای آموزش
batch_size: 4
grad_accumulation_factor: 2
lr: 0.0001
weight_decay: 0.00002
base_lr: 0.000001
max_lr: 0.0001
step_size: 1088
mode: exp_range
gamma: 0.9998
shuffle: true
drop_last: false
# ویژگیهای صوتی
n_mels: 80
left_frames: 0
right_frames: 0
deltas: false
# کلاسهای احساسات در ShEMO
out_n_neurons: 6
# نگاشت لیبلها
label_dict:
anger: 0
surprise: 1
happiness: 2
sadness: 3
neutral: 4
fear: 5
label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
from_file: label_encoder.txt
# تنظیمات DataLoader
dataloader_options:
batch_size: 4
shuffle: true
num_workers: 2
drop_last: false
# استخراج ویژگیها (Mel Spectrogram)
compute_features: &id001 !new:speechbrain.lobes.features.Fbank
n_mels: 80
left_frames: 0
right_frames: 0
deltas: false
# مدل ECAPA-TDNN
embedding_model: &id002 !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN
input_size: 80
channels: [512, 512, 512, 512, 1536]
kernel_sizes: [5, 3, 3, 3, 1]
dilations: [1, 2, 3, 4, 1]
attention_channels: 64
lin_neurons: 96
# کلاسفایر خروجی
classifier: &id003 !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
input_size: 96
out_neurons: 6
# شمارنده اپوکها
epoch_counter: &id005 !new:speechbrain.utils.epoch_loop.EpochCounter
limit: 30
# نرمالسازی ویژگیها
mean_var_norm: &id004 !new:speechbrain.processing.features.InputNormalization
# تابع خطا
norm_type: sentence
std_norm: false
# ماژولهای مدل
modules:
compute_features: *id001
embedding_model: *id002
classifier: *id003
mean_var_norm: *id004
compute_cost: !new:speechbrain.nnet.losses.LogSoftmaxWrapper
loss_fn: !new:speechbrain.nnet.losses.AdditiveAngularMargin
margin: 0.2
scale: 30
# اپتیمایزر
opt_class: !name:torch.optim.Adam
lr: 0.0001
weight_decay: 0.00002
# زمانبندی یادگیری
lr_annealing: !new:speechbrain.nnet.schedulers.CyclicLRScheduler
mode: exp_range
gamma: 0.9998
base_lr: 0.000001
max_lr: 0.0001
step_size: 1088
# مدیریت چکپوینت
checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
checkpoints_dir: results/ECAPA-TDNN/1968/save
recoverables:
embedding_model: *id002
classifier: *id003
normalizer: *id004
counter: *id005
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
collect_in: tmpdir
loadables:
embedding_model: !ref <embedding_model>
classifier: !ref <classifier>
normalizer: !ref <mean_var_norm>
label_encoder: !ref <label_encoder>
paths:
embedding_model: !ref <pretrained_path>/embedding_model.ckpt
classifier: !ref <pretrained_path>/classifier.ckpt
normalizer: !ref <pretrained_path>/normalizer.ckpt
label_encoder: !ref <pretrained_path>/label_encoder.txt
|