Neal Caren commited on
Commit
4ccc97f
·
1 Parent(s): 80f0f94

fullgit commit -m '.DS_Store banished!'

Browse files
Files changed (1) hide show
  1. app.py +35 -8
app.py CHANGED
@@ -5,8 +5,12 @@ import subprocess
5
  from simple_diarizer.diarizer import Diarizer
6
  import streamlit as st
7
 
 
 
 
 
8
  def speech_to_text(uploaded):
9
- model = whisper.load_model('base')
10
  result = model.transcribe(uploaded,verbose=True)
11
  return f'You said: {result["text"]}'
12
 
@@ -26,7 +30,7 @@ def segment(nu_speakers):
26
 
27
  def audio_to_df(uploaded):
28
  monotize(uploaded)
29
- model = whisper.load_model('base')
30
  result = model.transcribe('mono.wav',verbose=True,
31
  without_timestamps=False)
32
  tdf = pd.DataFrame(result['segments'])
@@ -44,6 +48,11 @@ def add_preface(row):
44
  def transcribe(uploaded, nu_speakers):
45
  with st.spinner(text="Converting file..."):
46
  monotize('temp_audio')
 
 
 
 
 
47
  with st.spinner(text="Transcribing..."):
48
  tdf = audio_to_df(uploaded)
49
  with st.spinner(text="Segmenting..."):
@@ -71,24 +80,42 @@ def transcribe(uploaded, nu_speakers):
71
  for row in binned_df['output'].values:
72
  st.write(row)
73
  lines.append(row)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
- return '\n'.join(lines)
76
 
77
 
78
  descript = ("This web app creates transcripts using OpenAI's [Whisper](https://github.com/openai/whisper) to transcribe "
79
  "audio files combined with [Chau](https://github.com/cvqluu)'s [Simple Diarizer](https://github.com/cvqluu/simple_diarizer) "
80
  "to partition the text by speaker.\n"
81
- "* You can upload a audio or video file of up to 200MBs.\n"
82
  "* Creating the transcript takes some time. "
83
- "Using the default base transcription model, the process takes approximately 20% of the length of the audio file.\n "
84
- "* After uploading the file, **be sure to select the number of speakers**." )
85
 
86
  st.title("Automated Transcription")
87
  st.markdown(descript)
88
 
89
  form = st.form(key='my_form')
90
  uploaded = form.file_uploader("Choose a file")
91
- nu_speakers = form.slider('Number of speakers in audio file:', min_value=1, max_value=6, value=2, step=1)
92
  submit = form.form_submit_button("Transcribe!")
93
 
94
 
@@ -96,4 +123,4 @@ if submit:
96
  bytes_data = uploaded.getvalue()
97
  with open('temp_audio', 'wb') as outfile:
98
  outfile.write(bytes_data)
99
- text = transcribe('temp_audio', nu_speakers)
 
5
  from simple_diarizer.diarizer import Diarizer
6
  import streamlit as st
7
 
8
+
9
+ model_size = 'tiny'
10
+
11
+
12
  def speech_to_text(uploaded):
13
+ model = whisper.load_model(model_size)
14
  result = model.transcribe(uploaded,verbose=True)
15
  return f'You said: {result["text"]}'
16
 
 
30
 
31
  def audio_to_df(uploaded):
32
  monotize(uploaded)
33
+ model = whisper.load_model(model_size)
34
  result = model.transcribe('mono.wav',verbose=True,
35
  without_timestamps=False)
36
  tdf = pd.DataFrame(result['segments'])
 
48
  def transcribe(uploaded, nu_speakers):
49
  with st.spinner(text="Converting file..."):
50
  monotize('temp_audio')
51
+
52
+ audio_file = open('mono.wav', 'rb')
53
+ audio_bytes = audio_file.read()
54
+ st.audio('mono.wav', format='audio/wav')
55
+
56
  with st.spinner(text="Transcribing..."):
57
  tdf = audio_to_df(uploaded)
58
  with st.spinner(text="Segmenting..."):
 
80
  for row in binned_df['output'].values:
81
  st.write(row)
82
  lines.append(row)
83
+ tdf['speaker'] = tdf['speaker'].astype(int)
84
+
85
+ tdf_cols = ['speaker','start','end','text']
86
+ st.dataframe(tdf[tdf_cols])
87
+
88
+ st.download_button(
89
+ label="Download transcript as text file",
90
+ data='\n'.join(lines),
91
+ file_name='transcript.txt',
92
+ mime='text/plain',
93
+ )
94
+
95
+ st.download_button(
96
+ label="Download transcript as CSV (with time codes)",
97
+ data=tdf[tdf_cols].to_csv( float_format='%.2f', index=False).encode('utf-8'),
98
+ file_name='transcript.csv',
99
+ mime='text/csv',
100
+ )
101
 
102
+ return tdf[tdf_cols]
103
 
104
 
105
  descript = ("This web app creates transcripts using OpenAI's [Whisper](https://github.com/openai/whisper) to transcribe "
106
  "audio files combined with [Chau](https://github.com/cvqluu)'s [Simple Diarizer](https://github.com/cvqluu/simple_diarizer) "
107
  "to partition the text by speaker.\n"
108
+ "* You can upload an audio or video file of up to 200MBs.\n"
109
  "* Creating the transcript takes some time. "
110
+ "The process takes approximately 20% of the length of the audio file using the base Whisper model.\n "
111
+ "* After uploading the file, be sure to select the number of speakers." )
112
 
113
  st.title("Automated Transcription")
114
  st.markdown(descript)
115
 
116
  form = st.form(key='my_form')
117
  uploaded = form.file_uploader("Choose a file")
118
+ nu_speakers = form.slider('Number of speakers in recording:', min_value=1, max_value=8, value=2, step=1)
119
  submit = form.form_submit_button("Transcribe!")
120
 
121
 
 
123
  bytes_data = uploaded.getvalue()
124
  with open('temp_audio', 'wb') as outfile:
125
  outfile.write(bytes_data)
126
+ text_df = transcribe('temp_audio', nu_speakers)