Spaces:

fvyounesi
/

STT

Running

fvyounesi commited on Dec 16, 2025

Commit

6513d4f

verified ·

1 Parent(s): 9a5f61f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,17 +3,25 @@ import speech_recognition as sr
 from pydub import AudioSegment
 import os
-def transcribe_audio(audio_file_path, input_language):
     # Auto-detect format from file extension
     file_ext = os.path.splitext(audio_file_path)[1][1:]  # e.g. "mp3", "wav"
     # Add silence padding
     audio = AudioSegment.from_file(audio_file_path, format=file_ext)
     two_sec_silence = AudioSegment.silent(duration=2500)
     audio = two_sec_silence + audio + two_sec_silence
-    audio.export("stt_audio.wav", format="wav")
-    file_path2 = "stt_audio.wav"
     recognizer = sr.Recognizer()
     try:
         with sr.AudioFile(file_path2) as source:
@@ -35,7 +43,9 @@ iface = gr.Interface(
     inputs=[
         gr.Audio(sources=["microphone", "upload"], type="filepath", label="Audio Input"),
         #gr.Textbox(label="Language code (e.g., en-US, fa-IR)")
-        gr.Dropdown(choices=["fa-IR", "en-US", "ar-SA"], label="Choose the right language:")
     ],
     #outputs="text",
     outputs=gr.Textbox(label="Transcription results", lines=10),

 from pydub import AudioSegment
 import os
+def audio_preproccessing():
+	os.system("deepFilter 'Audio.wav'")
+	os.rename("Audio_DeepFilterNet3.wav", "Audio.wav")
+	print("Audio Preprocessing Done!")
+def transcribe_audio(audio_file_path, input_language,denoising):
     # Auto-detect format from file extension
     file_ext = os.path.splitext(audio_file_path)[1][1:]  # e.g. "mp3", "wav"
     # Add silence padding
     audio = AudioSegment.from_file(audio_file_path, format=file_ext)
     two_sec_silence = AudioSegment.silent(duration=2500)
     audio = two_sec_silence + audio + two_sec_silence
+    audio.export("Audio.wav", format="wav")
+    file_path2 = "Audio.wav"
+    if denoising=='Yes':
+        audio_preproccessing()
     recognizer = sr.Recognizer()
     try:
         with sr.AudioFile(file_path2) as source:
     inputs=[
         gr.Audio(sources=["microphone", "upload"], type="filepath", label="Audio Input"),
         #gr.Textbox(label="Language code (e.g., en-US, fa-IR)")
+        gr.Dropdown(choices=["fa-IR", "en-US", "ar-SA"], label="Choose the right language:"),
+        #gr.Checkbox(label="Apply Denoising?")
+        gr.Dropdown(choices=["Yes", "No"], label="Need Denoising?")
     ],
     #outputs="text",
     outputs=gr.Textbox(label="Transcription results", lines=10),