Spaces:

Mufintech
/

RealTime_Translation

Runtime error

MufinApps commited on Dec 6, 2023

Commit

605cec3

1 Parent(s): d6b20a1

Increase accuracy

Files changed (3) hide show

__pycache__/transcription_service.cpython-311.pyc CHANGED Viewed

Binary files a/__pycache__/transcription_service.cpython-311.pyc and b/__pycache__/transcription_service.cpython-311.pyc differ

app.py CHANGED Viewed

@@ -107,7 +107,7 @@ with gr.Blocks(theme=gr.themes.Soft(),css="css.txt") as app:
         with gr.Column():
             text1 = gr.Textbox(interactive=False,label="Translation",lines=10,max_lines=10,)
-            st=mic.stream(func, [mic,input_lan,lan,text,text1], [text,text1],show_progress=False)
     with gr.Row():
         sumer_ts=gr.Textbox(label="Summery of Transcription",interactive=False,lines=4,max_lines=4)
         sumer_tr=gr.Textbox(label="Summery of Translation",interactive=False,lines=4,max_lines=4)

         with gr.Column():
             text1 = gr.Textbox(interactive=False,label="Translation",lines=10,max_lines=10,)
+            st=mic.stream(func, [mic,input_lan,lan,text,text1], [text,text1],show_progress=False,)
     with gr.Row():
         sumer_ts=gr.Textbox(label="Summery of Transcription",interactive=False,lines=4,max_lines=4)
         sumer_tr=gr.Textbox(label="Summery of Translation",interactive=False,lines=4,max_lines=4)

transcription_service.py CHANGED Viewed

@@ -4,12 +4,36 @@ from openai import OpenAI
 import subprocess
 os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'msq-ai-78bdccb055f4.json'
-convert_wav_to_flac = lambda wav_file: subprocess.run(['ffmpeg', '-i', wav_file, '-y', '-ar', '16000', '-ac', '1', wav_file.replace('.wav', '.flac')], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 def transcribe_speech_local(wav_file,language):
     """
     Transcribes a local WAV file using Google's Speech-to-Text API.
@@ -22,10 +46,14 @@ def transcribe_speech_local(wav_file,language):
     audio = speech.RecognitionAudio(content=content)
     config = speech.RecognitionConfig(
         encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
       # Adjust this according to your file's sample rate
         language_code=language,
-        enable_automatic_punctuation=True
     )
     response = client.recognize(config=config, audio=audio)

 import subprocess
 os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'msq-ai-78bdccb055f4.json'
+import os
+def convert_wav_to_flac(wav_file_path):
+    # Extract the file name and extension from the input path
+    # Generate the output FLAC file path
+    flac_file_path = wav_file_path.replace(".wav", ".flac")
+    # Build the avconv command
+    command = [
+        'C:/Users/Hasan/AppData/Local/ffmpegio/ffmpeg-downloader/ffmpeg/bin/ffmpeg.exe',
+        '-i', wav_file_path,
+        '-y',
+        '-ar', '48000',
+        '-ac', '1',
+        flac_file_path
+    ]
+    # Execute the avconv command
+    subprocess.call(command)
+    return flac_file_path
 def transcribe_speech_local(wav_file,language):
+    # print(wav_file,'wav_file_path\n')
+    # flac_file=convert_wav_to_flac(wav_file)
+    # flac_file=convert_wav_to_flac(wav_file)
+    # print(flac_file,'flac_file_path\n')
     """
     Transcribes a local WAV file using Google's Speech-to-Text API.
     audio = speech.RecognitionAudio(content=content)
     config = speech.RecognitionConfig(
         encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
+        sample_rate_hertz=48000,
+        # model="default",
+        enable_spoken_punctuation=True,
+        model="latest_short",
       # Adjust this according to your file's sample rate
         language_code=language,
+        enable_automatic_punctuation=False
     )
     response = client.recognize(config=config, audio=audio)