Spaces:

NealCaren
/

transcript

Runtime error

App Files Files Community

Neal Caren commited on Sep 29, 2022

Commit

4ccc97f

1 Parent(s): 80f0f94

fullgit commit -m '.DS_Store banished!'

Browse files

Files changed (1) hide show

app.py +35 -8

app.py CHANGED Viewed

@@ -5,8 +5,12 @@ import subprocess
 from simple_diarizer.diarizer import Diarizer
 import streamlit as st
 def speech_to_text(uploaded):
-    model = whisper.load_model('base')
     result = model.transcribe(uploaded,verbose=True)
     return f'You said: {result["text"]}'
@@ -26,7 +30,7 @@ def segment(nu_speakers):
 def audio_to_df(uploaded):
     monotize(uploaded)
-    model = whisper.load_model('base')
     result = model.transcribe('mono.wav',verbose=True,
                           without_timestamps=False)
     tdf = pd.DataFrame(result['segments'])
@@ -44,6 +48,11 @@ def add_preface(row):
 def transcribe(uploaded, nu_speakers):
     with st.spinner(text="Converting file..."):
         monotize('temp_audio')
     with st.spinner(text="Transcribing..."):
         tdf = audio_to_df(uploaded)
     with st.spinner(text="Segmenting..."):
@@ -71,24 +80,42 @@ def transcribe(uploaded, nu_speakers):
     for row in binned_df['output'].values:
         st.write(row)
         lines.append(row)
-    return '\n'.join(lines)
 descript = ("This web app creates transcripts using OpenAI's [Whisper](https://github.com/openai/whisper) to transcribe "
             "audio files combined with [Chau](https://github.com/cvqluu)'s [Simple Diarizer](https://github.com/cvqluu/simple_diarizer) "
             "to partition the text by speaker.\n"
-            "* You can upload a audio or video file of up to 200MBs.\n"
             "* Creating the transcript takes some time. "
-            "Using the default base transcription model, the process takes approximately 20% of the length of the audio file.\n "
-            "* After uploading the file, **be sure to select the number of speakers**." )
 st.title("Automated Transcription")
 st.markdown(descript)
 form = st.form(key='my_form')
 uploaded = form.file_uploader("Choose a file")
-nu_speakers = form.slider('Number of speakers in audio file:', min_value=1, max_value=6, value=2, step=1)
 submit = form.form_submit_button("Transcribe!")
@@ -96,4 +123,4 @@ if submit:
     bytes_data = uploaded.getvalue()
     with open('temp_audio', 'wb') as outfile:
         outfile.write(bytes_data)
-    text = transcribe('temp_audio', nu_speakers)

 from simple_diarizer.diarizer import Diarizer
 import streamlit as st
+model_size = 'tiny'
 def speech_to_text(uploaded):
+    model = whisper.load_model(model_size)
     result = model.transcribe(uploaded,verbose=True)
     return f'You said: {result["text"]}'
 def audio_to_df(uploaded):
     monotize(uploaded)
+    model = whisper.load_model(model_size)
     result = model.transcribe('mono.wav',verbose=True,
                           without_timestamps=False)
     tdf = pd.DataFrame(result['segments'])
 def transcribe(uploaded, nu_speakers):
     with st.spinner(text="Converting file..."):
         monotize('temp_audio')
+    audio_file = open('mono.wav', 'rb')
+    audio_bytes = audio_file.read()
+    st.audio('mono.wav', format='audio/wav')
     with st.spinner(text="Transcribing..."):
         tdf = audio_to_df(uploaded)
     with st.spinner(text="Segmenting..."):
     for row in binned_df['output'].values:
         st.write(row)
         lines.append(row)
+    tdf['speaker'] = tdf['speaker'].astype(int)
+    tdf_cols = ['speaker','start','end','text']
+    st.dataframe(tdf[tdf_cols])
+    st.download_button(
+         label="Download transcript as text file",
+         data='\n'.join(lines),
+         file_name='transcript.txt',
+         mime='text/plain',
+         )
+    st.download_button(
+         label="Download transcript as CSV (with time codes)",
+         data=tdf[tdf_cols].to_csv( float_format='%.2f', index=False).encode('utf-8'),
+         file_name='transcript.csv',
+         mime='text/csv',
+         )
+    return tdf[tdf_cols]
 descript = ("This web app creates transcripts using OpenAI's [Whisper](https://github.com/openai/whisper) to transcribe "
             "audio files combined with [Chau](https://github.com/cvqluu)'s [Simple Diarizer](https://github.com/cvqluu/simple_diarizer) "
             "to partition the text by speaker.\n"
+            "* You can upload an audio or video file of up to 200MBs.\n"
             "* Creating the transcript takes some time. "
+            "The process takes approximately 20% of the length of the audio file using the base Whisper model.\n "
+            "* After uploading the file, be sure to select the number of speakers." )
 st.title("Automated Transcription")
 st.markdown(descript)
 form = st.form(key='my_form')
 uploaded = form.file_uploader("Choose a file")
+nu_speakers = form.slider('Number of speakers in recording:', min_value=1, max_value=8, value=2, step=1)
 submit = form.form_submit_button("Transcribe!")
     bytes_data = uploaded.getvalue()
     with open('temp_audio', 'wb') as outfile:
         outfile.write(bytes_data)
+    text_df = transcribe('temp_audio', nu_speakers)