MufinApps commited on
Commit
605cec3
·
1 Parent(s): d6b20a1

Increase accuracy

Browse files
__pycache__/transcription_service.cpython-311.pyc CHANGED
Binary files a/__pycache__/transcription_service.cpython-311.pyc and b/__pycache__/transcription_service.cpython-311.pyc differ
 
app.py CHANGED
@@ -107,7 +107,7 @@ with gr.Blocks(theme=gr.themes.Soft(),css="css.txt") as app:
107
 
108
  with gr.Column():
109
  text1 = gr.Textbox(interactive=False,label="Translation",lines=10,max_lines=10,)
110
- st=mic.stream(func, [mic,input_lan,lan,text,text1], [text,text1],show_progress=False)
111
  with gr.Row():
112
  sumer_ts=gr.Textbox(label="Summery of Transcription",interactive=False,lines=4,max_lines=4)
113
  sumer_tr=gr.Textbox(label="Summery of Translation",interactive=False,lines=4,max_lines=4)
 
107
 
108
  with gr.Column():
109
  text1 = gr.Textbox(interactive=False,label="Translation",lines=10,max_lines=10,)
110
+ st=mic.stream(func, [mic,input_lan,lan,text,text1], [text,text1],show_progress=False,)
111
  with gr.Row():
112
  sumer_ts=gr.Textbox(label="Summery of Transcription",interactive=False,lines=4,max_lines=4)
113
  sumer_tr=gr.Textbox(label="Summery of Translation",interactive=False,lines=4,max_lines=4)
transcription_service.py CHANGED
@@ -4,12 +4,36 @@ from openai import OpenAI
4
  import subprocess
5
  os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'msq-ai-78bdccb055f4.json'
6
 
 
 
 
 
 
7
 
 
 
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- convert_wav_to_flac = lambda wav_file: subprocess.run(['ffmpeg', '-i', wav_file, '-y', '-ar', '16000', '-ac', '1', wav_file.replace('.wav', '.flac')], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
11
 
12
  def transcribe_speech_local(wav_file,language):
 
 
 
 
 
13
 
14
  """
15
  Transcribes a local WAV file using Google's Speech-to-Text API.
@@ -22,10 +46,14 @@ def transcribe_speech_local(wav_file,language):
22
  audio = speech.RecognitionAudio(content=content)
23
  config = speech.RecognitionConfig(
24
  encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
 
 
 
 
25
 
26
  # Adjust this according to your file's sample rate
27
  language_code=language,
28
- enable_automatic_punctuation=True
29
  )
30
 
31
  response = client.recognize(config=config, audio=audio)
 
4
  import subprocess
5
  os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'msq-ai-78bdccb055f4.json'
6
 
7
+ import os
8
+
9
+ def convert_wav_to_flac(wav_file_path):
10
+ # Extract the file name and extension from the input path
11
+
12
 
13
+ # Generate the output FLAC file path
14
+ flac_file_path = wav_file_path.replace(".wav", ".flac")
15
 
16
+ # Build the avconv command
17
+ command = [
18
+ 'C:/Users/Hasan/AppData/Local/ffmpegio/ffmpeg-downloader/ffmpeg/bin/ffmpeg.exe',
19
+ '-i', wav_file_path,
20
+ '-y',
21
+ '-ar', '48000',
22
+ '-ac', '1',
23
+ flac_file_path
24
+ ]
25
+
26
+ # Execute the avconv command
27
+ subprocess.call(command)
28
+ return flac_file_path
29
 
 
30
 
31
  def transcribe_speech_local(wav_file,language):
32
+ # print(wav_file,'wav_file_path\n')
33
+ # flac_file=convert_wav_to_flac(wav_file)
34
+
35
+ # flac_file=convert_wav_to_flac(wav_file)
36
+ # print(flac_file,'flac_file_path\n')
37
 
38
  """
39
  Transcribes a local WAV file using Google's Speech-to-Text API.
 
46
  audio = speech.RecognitionAudio(content=content)
47
  config = speech.RecognitionConfig(
48
  encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
49
+ sample_rate_hertz=48000,
50
+ # model="default",
51
+ enable_spoken_punctuation=True,
52
+ model="latest_short",
53
 
54
  # Adjust this according to your file's sample rate
55
  language_code=language,
56
+ enable_automatic_punctuation=False
57
  )
58
 
59
  response = client.recognize(config=config, audio=audio)