hypersunflower
/

a_sad_model

Voice Activity Detection

Model card Files Files and versions

a_sad_model / sadModel.py

hypersunflower's picture

Upload 3 files

d1124fa verified 2 months ago

history blame contribute delete

1.01 kB


	import torch
	from torch import nn

	class sadModel(nn.Module):
	def __init__(self, input_dim=40, hidden_dim=64, num_layers=1, output_dim=800):
	super(sadModel, self).__init__()

	# GRU expects input: (seq_len, batch, input_size)
	self.gru = nn.GRU(
	input_size=input_dim,
	hidden_size=hidden_dim,
	num_layers=num_layers,
	batch_first=True,
	bidirectional=True
	)

	self.fc = nn.Linear(hidden_dim * 2 * 400, output_dim) # 2 for bidirectional

	def forward(self, x):
	# x: (batch, 1, 40, 400) -> remove channel dim and permute
	x = x.squeeze(1).permute(0, 2, 1) # (batch, 400, 40)

	# pass through gru
	out, _ = self.gru(x) # out: (batch, 400, hidden_dim*2)

	# flatten time dimension
	out = out.contiguous().view(out.size(0), -1) # (batch, 400hidden_dim2)

	out = self.fc(out) # (batch, 800)

	return out