| import torch | |
| import torchaudio | |
| from torchaudio.transforms import VAD | |
| # Load the audio file | |
| audio_file = "path/to/your/audio/file.wav" | |
| waveform, sample_rate = torchaudio.load(audio_file) | |
| # Create the VAD transform | |
| vad = VAD(sample_rate=sample_rate) | |
| # Perform voice activity detection | |
| segments = vad(waveform) | |
| # Print the start and end times of each segment | |
| for segment in segments: | |
| start_time = segment[0] / sample_rate | |
| end_time = segment[1] / sample_rate | |
| print(f"Segment: {start_time:.2f}s - {end_time:.2f}s") | |
| # Save each segment as a separate audio file | |
| for i, segment in enumerate(segments): | |
| start_sample = segment[0] | |
| end_sample = segment[1] | |
| segment_waveform = waveform[:, start_sample:end_sample] | |
| torchaudio.save(f"segment_{i}.wav", segment_waveform, sample_rate) |