Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -187,6 +187,21 @@ def remove_silence(audio_file, silence_threshold=-40, min_silence_len=500):
|
|
| 187 |
|
| 188 |
return output_path
|
| 189 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
def detect_voice_activity(audio_file, threshold=0.02):
|
| 191 |
"""
|
| 192 |
Detect voice activity in the audio file and trim the audio to include only voice segments.
|
|
@@ -198,8 +213,11 @@ def detect_voice_activity(audio_file, threshold=0.02):
|
|
| 198 |
Returns:
|
| 199 |
str: Path to the output audio file with only voice segments.
|
| 200 |
"""
|
| 201 |
-
#
|
| 202 |
-
|
|
|
|
|
|
|
|
|
|
| 203 |
|
| 204 |
# Normalize the audio data
|
| 205 |
if data.dtype != np.float32:
|
|
@@ -230,6 +248,9 @@ def detect_voice_activity(audio_file, threshold=0.02):
|
|
| 230 |
output_path = "voice_trimmed_audio.wav"
|
| 231 |
wavfile.write(output_path, sample_rate, trimmed_audio)
|
| 232 |
|
|
|
|
|
|
|
|
|
|
| 233 |
return output_path
|
| 234 |
|
| 235 |
def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whisper Large v3"):
|
|
|
|
| 187 |
|
| 188 |
return output_path
|
| 189 |
|
| 190 |
+
def convert_to_wav(audio_file):
|
| 191 |
+
"""
|
| 192 |
+
Convert the input audio file to WAV format.
|
| 193 |
+
|
| 194 |
+
Args:
|
| 195 |
+
audio_file (str): Path to the input audio file.
|
| 196 |
+
|
| 197 |
+
Returns:
|
| 198 |
+
str: Path to the converted WAV file.
|
| 199 |
+
"""
|
| 200 |
+
audio = AudioSegment.from_file(audio_file)
|
| 201 |
+
wav_path = "converted_audio.wav"
|
| 202 |
+
audio.export(wav_path, format="wav")
|
| 203 |
+
return wav_path
|
| 204 |
+
|
| 205 |
def detect_voice_activity(audio_file, threshold=0.02):
|
| 206 |
"""
|
| 207 |
Detect voice activity in the audio file and trim the audio to include only voice segments.
|
|
|
|
| 213 |
Returns:
|
| 214 |
str: Path to the output audio file with only voice segments.
|
| 215 |
"""
|
| 216 |
+
# Convert the input audio to WAV format
|
| 217 |
+
wav_path = convert_to_wav(audio_file)
|
| 218 |
+
|
| 219 |
+
# Load the WAV file
|
| 220 |
+
sample_rate, data = wavfile.read(wav_path)
|
| 221 |
|
| 222 |
# Normalize the audio data
|
| 223 |
if data.dtype != np.float32:
|
|
|
|
| 248 |
output_path = "voice_trimmed_audio.wav"
|
| 249 |
wavfile.write(output_path, sample_rate, trimmed_audio)
|
| 250 |
|
| 251 |
+
# Clean up the converted WAV file
|
| 252 |
+
os.remove(wav_path)
|
| 253 |
+
|
| 254 |
return output_path
|
| 255 |
|
| 256 |
def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whisper Large v3"):
|