Spaces:
Runtime error
Runtime error
Neal Caren
commited on
Commit
·
ee99df3
1
Parent(s):
c0d73db
Fixed download button.
Browse files
app.py
CHANGED
|
@@ -4,9 +4,15 @@ import whisper
|
|
| 4 |
import subprocess
|
| 5 |
from simple_diarizer.diarizer import Diarizer
|
| 6 |
import streamlit as st
|
|
|
|
| 7 |
|
|
|
|
| 8 |
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
def speech_to_text(uploaded):
|
|
@@ -84,22 +90,7 @@ def transcribe(uploaded, nu_speakers):
|
|
| 84 |
|
| 85 |
tdf_cols = ['speaker','start','end','text']
|
| 86 |
#st.dataframe(tdf[tdf_cols])
|
| 87 |
-
|
| 88 |
-
st.download_button(
|
| 89 |
-
label="Download transcript as text file",
|
| 90 |
-
data='\n'.join(lines),
|
| 91 |
-
file_name='transcript.txt',
|
| 92 |
-
mime='text/plain',
|
| 93 |
-
)
|
| 94 |
-
|
| 95 |
-
st.download_button(
|
| 96 |
-
label="Download transcript as CSV (with time codes)",
|
| 97 |
-
data=tdf[tdf_cols].to_csv( float_format='%.2f', index=False).encode('utf-8'),
|
| 98 |
-
file_name='transcript.csv',
|
| 99 |
-
mime='text/csv',
|
| 100 |
-
)
|
| 101 |
-
|
| 102 |
-
return tdf[tdf_cols]
|
| 103 |
|
| 104 |
|
| 105 |
descript = ("This web app creates transcripts using OpenAI's [Whisper](https://github.com/openai/whisper) to transcribe "
|
|
@@ -123,4 +114,12 @@ if submit:
|
|
| 123 |
bytes_data = uploaded.getvalue()
|
| 124 |
with open('temp_audio', 'wb') as outfile:
|
| 125 |
outfile.write(bytes_data)
|
| 126 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
import subprocess
|
| 5 |
from simple_diarizer.diarizer import Diarizer
|
| 6 |
import streamlit as st
|
| 7 |
+
import base64
|
| 8 |
|
| 9 |
+
model_size = 'tiny'
|
| 10 |
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def create_download_link(val, filename, label):
|
| 14 |
+
b64 = base64.b64encode(val)
|
| 15 |
+
return f'<a href="data:application/octet-stream;base64,{b64.decode()}" download="{filename}">{label}</a>'
|
| 16 |
|
| 17 |
|
| 18 |
def speech_to_text(uploaded):
|
|
|
|
| 90 |
|
| 91 |
tdf_cols = ['speaker','start','end','text']
|
| 92 |
#st.dataframe(tdf[tdf_cols])
|
| 93 |
+
return {'text':lines, 'df': tdf[tdf_cols]}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
|
| 96 |
descript = ("This web app creates transcripts using OpenAI's [Whisper](https://github.com/openai/whisper) to transcribe "
|
|
|
|
| 114 |
bytes_data = uploaded.getvalue()
|
| 115 |
with open('temp_audio', 'wb') as outfile:
|
| 116 |
outfile.write(bytes_data)
|
| 117 |
+
transcript = transcribe('temp_audio', nu_speakers)
|
| 118 |
+
|
| 119 |
+
csv = transcript['df'].to_csv( float_format='%.2f', index=False).encode('utf-8')
|
| 120 |
+
text = '\n'.join(transcript['text']).encode('utf-8')
|
| 121 |
+
download_url = create_download_link(text, 'transcript.txt', 'Download transcript as plain text.')
|
| 122 |
+
st.markdown(download_url, unsafe_allow_html=True)
|
| 123 |
+
|
| 124 |
+
download_url = create_download_link(csv, 'transcript.csv', 'Download transcript as CSV (with time codes)')
|
| 125 |
+
st.markdown(download_url, unsafe_allow_html=True)
|