speechbrain-persian-ser / hyperparams.yaml

fix somethings

7b5a9cf 10 months ago

4.31 kB

	# Generated 2025-04-21 from:
	# /content/test/hparams/train.yaml
	# yamllint disable
	# ########################################
	# Emotion recognition from Persian speech using ECAPA-TDNN
	# Dataset: ShEMO
	# Language: Persian
	# ########################################

	# مسیر ریپو مدل روی Hugging Face
	pretrained_path: mobina1380/speechbrain-persian-ser

	# تنظیمات تصادفی (اختیاری)
	seed: 1968
	number_of_epochs: 30
	# ⚠️ این خط حذف شد چون ممکنه در بعضی محیط‌ها مشکل بده:
	# __set_seed: !apply:speechbrain.utils.seed_everything [!ref <seed>]

	# مسیر فولدر داده‌ها (در لوکال مسیر پروژه)
	data_folder: .

	# مسیر خروجی مدل‌ها و لاگ‌ها
	output_folder: results/ECAPA-TDNN/1968
	save_folder: results/ECAPA-TDNN/1968/save
	train_log: results/ECAPA-TDNN/1968/train_log.txt

	# فایل‌های CSV دیتاست
	csv_train: ./test/train.csv
	csv_valid: ./test/valid.csv
	csv_test: ./test/test.csv

	# Logger برای ذخیره‌ی وضعیت آموزش
	train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
	save_file: results/ECAPA-TDNN/1968/train_log.txt

	# ارزیابی خطا
	error_stats: !name:speechbrain.utils.metric_stats.MetricStats
	metric: !name:speechbrain.nnet.losses.classification_error
	reduction: batch

	ckpt_interval_minutes: 15

	# پارامترهای آموزش
	batch_size: 4
	grad_accumulation_factor: 2
	lr: 0.0001
	weight_decay: 0.00002
	base_lr: 0.000001
	max_lr: 0.0001
	step_size: 1088
	mode: exp_range
	gamma: 0.9998
	shuffle: true
	drop_last: false

	# ویژگی‌های صوتی
	n_mels: 80
	left_frames: 0
	right_frames: 0
	deltas: false

	# کلاس‌های احساسات در ShEMO
	out_n_neurons: 6

	# نگاشت لیبل‌ها
	label_dict:
	anger: 0
	surprise: 1
	happiness: 2
	sadness: 3
	neutral: 4
	fear: 5

	label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
	from_file: label_encoder.txt

	# تنظیمات DataLoader
	dataloader_options:
	batch_size: 4
	shuffle: true
	num_workers: 2
	drop_last: false

	# استخراج ویژگی‌ها (Mel Spectrogram)
	compute_features: &id001 !new:speechbrain.lobes.features.Fbank
	n_mels: 80
	left_frames: 0
	right_frames: 0
	deltas: false

	# مدل ECAPA-TDNN
	embedding_model: &id002 !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN
	input_size: 80
	channels: [512, 512, 512, 512, 1536]
	kernel_sizes: [5, 3, 3, 3, 1]
	dilations: [1, 2, 3, 4, 1]
	attention_channels: 64
	lin_neurons: 96

	# کلاس‌فایر خروجی
	classifier: &id003 !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
	input_size: 96
	out_neurons: 6

	# شمارنده اپوک‌ها
	epoch_counter: &id005 !new:speechbrain.utils.epoch_loop.EpochCounter
	limit: 30

	# نرمال‌سازی ویژگی‌ها
	mean_var_norm: &id004 !new:speechbrain.processing.features.InputNormalization

	# تابع خطا
	norm_type: sentence
	std_norm: false

	# ماژول‌های مدل
	modules:
	compute_features: *id001
	embedding_model: *id002
	classifier: *id003
	mean_var_norm: *id004
	compute_cost: !new:speechbrain.nnet.losses.LogSoftmaxWrapper
	loss_fn: !new:speechbrain.nnet.losses.AdditiveAngularMargin
	margin: 0.2
	scale: 30

	# اپتیمایزر
	opt_class: !name:torch.optim.Adam
	lr: 0.0001
	weight_decay: 0.00002

	# زمان‌بندی یادگیری
	lr_annealing: !new:speechbrain.nnet.schedulers.CyclicLRScheduler
	mode: exp_range
	gamma: 0.9998
	base_lr: 0.000001
	max_lr: 0.0001
	step_size: 1088

	# مدیریت چک‌پوینت
	checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
	checkpoints_dir: results/ECAPA-TDNN/1968/save
	recoverables:
	embedding_model: *id002
	classifier: *id003
	normalizer: *id004
	counter: *id005

	pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
	collect_in: tmpdir
	loadables:
	embedding_model: !ref <embedding_model>
	classifier: !ref <classifier>
	normalizer: !ref <mean_var_norm>
	label_encoder: !ref <label_encoder>
	paths:
	embedding_model: !ref <pretrained_path>/embedding_model.ckpt
	classifier: !ref <pretrained_path>/classifier.ckpt
	normalizer: !ref <pretrained_path>/normalizer.ckpt
	label_encoder: !ref <pretrained_path>/label_encoder.txt