Spaces:
Runtime error
Runtime error
Update pipelines/data/data_module.py
Browse files
pipelines/data/data_module.py
CHANGED
|
@@ -29,8 +29,6 @@ class AVSRDataLoader:
|
|
| 29 |
|
| 30 |
def load_data(self, data_filename, landmarks=None, transform=True):
|
| 31 |
if self.modality == "audio":
|
| 32 |
-
# audio, sample_rate = self.load_audio(data_filename)
|
| 33 |
-
# audio = self.audio_process(audio, sample_rate)
|
| 34 |
audio = self.load_audio(data_filename)
|
| 35 |
return self.audio_transform(audio) if self.transform else audio
|
| 36 |
if self.modality == "video":
|
|
@@ -40,8 +38,6 @@ class AVSRDataLoader:
|
|
| 40 |
return self.video_transform(video) if self.transform else video
|
| 41 |
if self.modality == "audiovisual":
|
| 42 |
rate_ratio = 640
|
| 43 |
-
# audio, sample_rate = self.load_audio(data_filename)
|
| 44 |
-
# audio = self.audio_process(audio, sample_rate)
|
| 45 |
audio = self.load_audio(data_filename)
|
| 46 |
video = self.load_video(data_filename)
|
| 47 |
video = self.video_process(video, landmarks)
|
|
@@ -58,16 +54,8 @@ class AVSRDataLoader:
|
|
| 58 |
def load_audio(self, data_filename):
|
| 59 |
# rtype: [1, T]
|
| 60 |
waveform = torch.tensor(whisper.load_audio(data_filename)).unsqueeze(0)
|
| 61 |
-
|
| 62 |
-
# return waveform, sample_rate
|
| 63 |
|
| 64 |
|
| 65 |
def load_video(self, data_filename):
|
| 66 |
return torchvision.io.read_video(data_filename, pts_unit='sec')[0].numpy()
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
# def audio_process(self, waveform, sample_rate, target_sample_rate=16000):
|
| 70 |
-
# if sample_rate != target_sample_rate:
|
| 71 |
-
# waveform = torchaudio.functional.resample(waveform, sample_rate, target_sample_rate)
|
| 72 |
-
# waveform = torch.mean(waveform, dim=0, keepdim=True)
|
| 73 |
-
# return waveform
|
|
|
|
| 29 |
|
| 30 |
def load_data(self, data_filename, landmarks=None, transform=True):
|
| 31 |
if self.modality == "audio":
|
|
|
|
|
|
|
| 32 |
audio = self.load_audio(data_filename)
|
| 33 |
return self.audio_transform(audio) if self.transform else audio
|
| 34 |
if self.modality == "video":
|
|
|
|
| 38 |
return self.video_transform(video) if self.transform else video
|
| 39 |
if self.modality == "audiovisual":
|
| 40 |
rate_ratio = 640
|
|
|
|
|
|
|
| 41 |
audio = self.load_audio(data_filename)
|
| 42 |
video = self.load_video(data_filename)
|
| 43 |
video = self.video_process(video, landmarks)
|
|
|
|
| 54 |
def load_audio(self, data_filename):
|
| 55 |
# rtype: [1, T]
|
| 56 |
waveform = torch.tensor(whisper.load_audio(data_filename)).unsqueeze(0)
|
| 57 |
+
return waveform
|
|
|
|
| 58 |
|
| 59 |
|
| 60 |
def load_video(self, data_filename):
|
| 61 |
return torchvision.io.read_video(data_filename, pts_unit='sec')[0].numpy()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|