File size: 4,307 Bytes

# Generated 2025-04-21 from:
# /content/test/hparams/train.yaml
# yamllint disable
# ########################################
# Emotion recognition from Persian speech using ECAPA-TDNN
# Dataset: ShEMO
# Language: Persian
# ########################################

# مسیر ریپو مدل روی Hugging Face
pretrained_path: mobina1380/speechbrain-persian-ser

# تنظیمات تصادفی (اختیاری)
seed: 1968
number_of_epochs: 30
# ⚠️ این خط حذف شد چون ممکنه در بعضی محیط‌ها مشکل بده:
# __set_seed: !apply:speechbrain.utils.seed_everything [!ref <seed>]

# مسیر فولدر داده‌ها (در لوکال مسیر پروژه)
data_folder: .

# مسیر خروجی مدل‌ها و لاگ‌ها
output_folder: results/ECAPA-TDNN/1968
save_folder: results/ECAPA-TDNN/1968/save
train_log: results/ECAPA-TDNN/1968/train_log.txt

# فایل‌های CSV دیتاست
csv_train: ./test/train.csv
csv_valid: ./test/valid.csv
csv_test: ./test/test.csv

# Logger برای ذخیره‌ی وضعیت آموزش
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
  save_file: results/ECAPA-TDNN/1968/train_log.txt

# ارزیابی خطا
error_stats: !name:speechbrain.utils.metric_stats.MetricStats
  metric: !name:speechbrain.nnet.losses.classification_error
    reduction: batch

ckpt_interval_minutes: 15

# پارامترهای آموزش
batch_size: 4
grad_accumulation_factor: 2
lr: 0.0001
weight_decay: 0.00002
base_lr: 0.000001
max_lr: 0.0001
step_size: 1088
mode: exp_range
gamma: 0.9998
shuffle: true
drop_last: false

# ویژگی‌های صوتی
n_mels: 80
left_frames: 0
right_frames: 0
deltas: false

# کلاس‌های احساسات در ShEMO
out_n_neurons: 6

# نگاشت لیبل‌ها
label_dict:
  anger: 0
  surprise: 1
  happiness: 2
  sadness: 3
  neutral: 4
  fear: 5

label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
  from_file: label_encoder.txt

# تنظیمات DataLoader
dataloader_options:
  batch_size: 4
  shuffle: true
  num_workers: 2
  drop_last: false

# استخراج ویژگی‌ها (Mel Spectrogram)
compute_features: &id001 !new:speechbrain.lobes.features.Fbank
  n_mels: 80
  left_frames: 0
  right_frames: 0
  deltas: false

# مدل ECAPA-TDNN
embedding_model: &id002 !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN
  input_size: 80
  channels: [512, 512, 512, 512, 1536]
  kernel_sizes: [5, 3, 3, 3, 1]
  dilations: [1, 2, 3, 4, 1]
  attention_channels: 64
  lin_neurons: 96

# کلاس‌فایر خروجی
classifier: &id003 !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
  input_size: 96
  out_neurons: 6

# شمارنده اپوک‌ها
epoch_counter: &id005 !new:speechbrain.utils.epoch_loop.EpochCounter
  limit: 30

# نرمال‌سازی ویژگی‌ها
mean_var_norm: &id004 !new:speechbrain.processing.features.InputNormalization

# تابع خطا
  norm_type: sentence
  std_norm: false

# ماژول‌های مدل
modules:
  compute_features: *id001
  embedding_model: *id002
  classifier: *id003
  mean_var_norm: *id004
compute_cost: !new:speechbrain.nnet.losses.LogSoftmaxWrapper
  loss_fn: !new:speechbrain.nnet.losses.AdditiveAngularMargin
    margin: 0.2
    scale: 30

# اپتیمایزر
opt_class: !name:torch.optim.Adam
  lr: 0.0001
  weight_decay: 0.00002

# زمان‌بندی یادگیری
lr_annealing: !new:speechbrain.nnet.schedulers.CyclicLRScheduler
  mode: exp_range
  gamma: 0.9998
  base_lr: 0.000001
  max_lr: 0.0001
  step_size: 1088

# مدیریت چک‌پوینت
checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
  checkpoints_dir: results/ECAPA-TDNN/1968/save
  recoverables:
    embedding_model: *id002
    classifier: *id003
    normalizer: *id004
    counter: *id005

pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
  collect_in: tmpdir
  loadables:
    embedding_model: !ref <embedding_model>
    classifier:      !ref <classifier>
    normalizer:      !ref <mean_var_norm>
    label_encoder:   !ref <label_encoder>
  paths:
    embedding_model: !ref <pretrained_path>/embedding_model.ckpt
    classifier:      !ref <pretrained_path>/classifier.ckpt
    normalizer:      !ref <pretrained_path>/normalizer.ckpt
    label_encoder:   !ref <pretrained_path>/label_encoder.txt