Ilyas KHIAT commited on
Commit
5c19064
·
1 Parent(s): b3efaf6

second commit

Browse files
utils/audit/audit_audio.py CHANGED
@@ -4,7 +4,16 @@ import scipy.io.wavfile as wavfile
4
  from pydub import AudioSegment
5
  import io
6
  import tiktoken
7
- from transcript_audio import transcript_audio
 
 
 
 
 
 
 
 
 
8
 
9
  def count_tokens(input_string: str) -> int:
10
  tokenizer = tiktoken.get_encoding("cl100k_base")
@@ -35,7 +44,7 @@ def evaluate_audio_quality(file) -> dict:
35
  snr = calculate_snr(audio_data)
36
 
37
  #get the transcription of the audio
38
- transcription = transcript_audio(file)
39
 
40
  return {"volume": volume, "SNR": snr,"transcription": transcription,"number_of_tokens": count_tokens(transcription),"duration": duration}
41
 
 
4
  from pydub import AudioSegment
5
  import io
6
  import tiktoken
7
+ from openai import OpenAI
8
+
9
+ def transcript_audio_func(audio_file):
10
+ client = OpenAI()
11
+ transcription = client.audio.transcriptions.create(
12
+ model="whisper",
13
+ file=audio_file
14
+ )
15
+
16
+ return transcription.text
17
 
18
  def count_tokens(input_string: str) -> int:
19
  tokenizer = tiktoken.get_encoding("cl100k_base")
 
44
  snr = calculate_snr(audio_data)
45
 
46
  #get the transcription of the audio
47
+ transcription = transcript_audio_func(file)
48
 
49
  return {"volume": volume, "SNR": snr,"transcription": transcription,"number_of_tokens": count_tokens(transcription),"duration": duration}
50
 
utils/audit/transcript_audio.py CHANGED
@@ -1,11 +1,13 @@
1
  from openai import OpenAI
2
- client = OpenAI()
3
 
4
 
5
- def transcript_audio(audio_file):
 
 
 
6
  transcription = client.audio.transcriptions.create(
7
  model="whisper",
8
  file=audio_file
9
  )
10
-
11
  return transcription.text
 
1
  from openai import OpenAI
 
2
 
3
 
4
+
5
+
6
+ def transcript_audio_func(audio_file):
7
+ client = OpenAI()
8
  transcription = client.audio.transcriptions.create(
9
  model="whisper",
10
  file=audio_file
11
  )
12
+
13
  return transcription.text