Update app.py
Browse files
app.py
CHANGED
|
@@ -1,109 +1,41 @@
|
|
|
|
|
| 1 |
import io
|
| 2 |
import os
|
| 3 |
-
import tempfile
|
| 4 |
import threading
|
| 5 |
-
import
|
| 6 |
-
import logging
|
| 7 |
-
from urllib.parse import urlparse
|
| 8 |
-
|
| 9 |
-
import dash
|
| 10 |
-
from dash import dcc, html, Input, Output, State, callback_context
|
| 11 |
import dash_bootstrap_components as dbc
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
import requests
|
| 15 |
-
from pytube import YouTube
|
| 16 |
-
from pydub import AudioSegment
|
| 17 |
import openai
|
|
|
|
| 18 |
|
| 19 |
-
#
|
| 20 |
-
try:
|
| 21 |
-
from moviepy.editor import VideoFileClip
|
| 22 |
-
except ImportError:
|
| 23 |
-
try:
|
| 24 |
-
import moviepy.editor as mpy
|
| 25 |
-
VideoFileClip = mpy.VideoFileClip
|
| 26 |
-
except ImportError:
|
| 27 |
-
try:
|
| 28 |
-
import moviepy
|
| 29 |
-
VideoFileClip = moviepy.VideoFileClip
|
| 30 |
-
except ImportError:
|
| 31 |
-
logging.error("Failed to import VideoFileClip from moviepy. Please check the installation.")
|
| 32 |
-
VideoFileClip = None
|
| 33 |
-
|
| 34 |
-
# Set up logging
|
| 35 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
| 36 |
logger = logging.getLogger(__name__)
|
| 37 |
|
| 38 |
# Initialize the Dash app
|
| 39 |
-
app =
|
| 40 |
-
|
| 41 |
-
# Retrieve the OpenAI API key from Hugging Face Spaces
|
| 42 |
-
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
|
| 43 |
-
if not OPENAI_API_KEY:
|
| 44 |
-
logger.error("OPENAI_API_KEY not found in environment variables")
|
| 45 |
-
raise ValueError("OPENAI_API_KEY not set")
|
| 46 |
-
|
| 47 |
-
openai.api_key = OPENAI_API_KEY
|
| 48 |
-
|
| 49 |
-
def process_media(contents, filename, url):
|
| 50 |
-
logger.info("Starting media processing")
|
| 51 |
-
try:
|
| 52 |
-
if contents:
|
| 53 |
-
content_type, content_string = contents.split(',')
|
| 54 |
-
decoded = base64.b64decode(content_string)
|
| 55 |
-
suffix = os.path.splitext(filename)[1]
|
| 56 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
|
| 57 |
-
temp_file.write(decoded)
|
| 58 |
-
temp_file_path = temp_file.name
|
| 59 |
-
logger.info(f"File uploaded: {temp_file_path}")
|
| 60 |
-
elif url:
|
| 61 |
-
temp_file_path = download_media(url)
|
| 62 |
-
else:
|
| 63 |
-
logger.error("No input provided")
|
| 64 |
-
raise ValueError("No input provided")
|
| 65 |
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
transcript = transcribe_audio(audio_file_path)
|
| 70 |
-
os.unlink(audio_file_path)
|
| 71 |
-
else:
|
| 72 |
-
logger.info("Audio file detected, transcribing directly")
|
| 73 |
-
transcript = transcribe_audio(temp_file_path)
|
| 74 |
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
except Exception as e:
|
| 78 |
-
logger.error(f"Error in process_media: {str(e)}")
|
| 79 |
-
raise
|
| 80 |
-
|
| 81 |
-
def transcribe_audio(file_path):
|
| 82 |
-
logger.info(f"Transcribing audio: {file_path}")
|
| 83 |
-
try:
|
| 84 |
-
with open(file_path, "rb") as audio_file:
|
| 85 |
-
transcript = openai.Audio.transcribe("whisper-1", audio_file)
|
| 86 |
-
logger.info("Transcription completed successfully")
|
| 87 |
-
return transcript["text"]
|
| 88 |
-
except Exception as e:
|
| 89 |
-
logger.error(f"Error during transcription: {str(e)}")
|
| 90 |
-
raise
|
| 91 |
|
|
|
|
| 92 |
app.layout = dbc.Container([
|
|
|
|
| 93 |
dbc.Row([
|
| 94 |
-
|
| 95 |
-
html.H1("Audio/Video Transcription App", className="text-center my-4"),
|
| 96 |
-
])
|
| 97 |
-
]),
|
| 98 |
-
dbc.Row([
|
| 99 |
dbc.Col([
|
| 100 |
dbc.Card([
|
| 101 |
dbc.CardBody([
|
| 102 |
dcc.Upload(
|
| 103 |
-
id='upload-
|
| 104 |
children=html.Div([
|
| 105 |
'Drag and Drop or ',
|
| 106 |
-
html.A('Select Audio
|
| 107 |
]),
|
| 108 |
style={
|
| 109 |
'width': '100%',
|
|
@@ -117,100 +49,107 @@ app.layout = dbc.Container([
|
|
| 117 |
},
|
| 118 |
multiple=False
|
| 119 |
),
|
| 120 |
-
html.Div(id='
|
| 121 |
-
dbc.
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
])
|
| 130 |
])
|
| 131 |
-
],
|
| 132 |
])
|
| 133 |
], fluid=True)
|
| 134 |
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
)
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
|
| 145 |
@app.callback(
|
| 146 |
-
Output(
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
Input("progress-interval", "n_intervals"),
|
| 153 |
-
State("upload-media", "contents"),
|
| 154 |
-
State("upload-media", "filename"),
|
| 155 |
-
State("media-url", "value"),
|
| 156 |
-
State("transcription-store", "data"),
|
| 157 |
-
prevent_initial_call=True
|
| 158 |
)
|
| 159 |
-
def
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
return f"An error occurred: {str(e)}"
|
| 171 |
-
|
| 172 |
-
thread = threading.Thread(target=transcribe)
|
| 173 |
-
thread.start()
|
| 174 |
-
return html.Div("Processing..."), {'display': 'none'}, "", False, None
|
| 175 |
-
|
| 176 |
-
elif ctx.triggered_id == "progress-interval":
|
| 177 |
-
if stored_transcript:
|
| 178 |
-
return display_transcript(stored_transcript), {'display': 'block'}, "", True, stored_transcript
|
| 179 |
-
dots = "." * (n_intervals % 4)
|
| 180 |
-
return html.Div("Processing" + dots), {'display': 'none'}, "", False, None
|
| 181 |
-
|
| 182 |
-
thread = threading.current_thread()
|
| 183 |
-
if hasattr(thread, 'result'):
|
| 184 |
-
transcript = thread.result
|
| 185 |
-
if transcript and not transcript.startswith("An error occurred"):
|
| 186 |
-
logger.info("Transcription successful")
|
| 187 |
-
return display_transcript(transcript), {'display': 'block'}, "", True, transcript
|
| 188 |
-
else:
|
| 189 |
-
logger.error(f"Transcription failed: {transcript}")
|
| 190 |
-
return html.Div(transcript), {'display': 'none'}, "", True, None
|
| 191 |
-
|
| 192 |
-
return dash.no_update, dash.no_update, dash.no_update, dash.no_update, dash.no_update
|
| 193 |
-
|
| 194 |
-
def display_transcript(transcript):
|
| 195 |
-
return dbc.Card([
|
| 196 |
-
dbc.CardBody([
|
| 197 |
-
html.H5("Transcription Result"),
|
| 198 |
-
html.Pre(transcript, style={"white-space": "pre-wrap", "word-wrap": "break-word"})
|
| 199 |
-
])
|
| 200 |
-
])
|
| 201 |
|
| 202 |
@app.callback(
|
| 203 |
-
Output("download-
|
| 204 |
-
Input("download
|
| 205 |
-
|
| 206 |
-
prevent_initial_call=True
|
| 207 |
)
|
| 208 |
-
def
|
| 209 |
-
if
|
| 210 |
-
|
| 211 |
-
return
|
| 212 |
|
| 213 |
if __name__ == '__main__':
|
| 214 |
-
|
| 215 |
app.run(debug=True, host='0.0.0.0', port=7860)
|
| 216 |
-
|
|
|
|
| 1 |
+
import base64
|
| 2 |
import io
|
| 3 |
import os
|
|
|
|
| 4 |
import threading
|
| 5 |
+
from dash import Dash, dcc, html, Input, Output, State, callback
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
import dash_bootstrap_components as dbc
|
| 7 |
+
import tempfile
|
| 8 |
+
import logging
|
|
|
|
|
|
|
|
|
|
| 9 |
import openai
|
| 10 |
+
from pydub import AudioSegment
|
| 11 |
|
| 12 |
+
# Configure logging
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
| 14 |
logger = logging.getLogger(__name__)
|
| 15 |
|
| 16 |
# Initialize the Dash app
|
| 17 |
+
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
+
# Global variables
|
| 20 |
+
generated_file = None
|
| 21 |
+
transcription_text = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
+
# Set up OpenAI API key
|
| 24 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
+
# Layout
|
| 27 |
app.layout = dbc.Container([
|
| 28 |
+
html.H1("Audio Transcription and Diarization App", className="text-center my-4"),
|
| 29 |
dbc.Row([
|
| 30 |
+
# Left card for input
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
dbc.Col([
|
| 32 |
dbc.Card([
|
| 33 |
dbc.CardBody([
|
| 34 |
dcc.Upload(
|
| 35 |
+
id='upload-audio',
|
| 36 |
children=html.Div([
|
| 37 |
'Drag and Drop or ',
|
| 38 |
+
html.A('Select Audio File')
|
| 39 |
]),
|
| 40 |
style={
|
| 41 |
'width': '100%',
|
|
|
|
| 49 |
},
|
| 50 |
multiple=False
|
| 51 |
),
|
| 52 |
+
html.Div(id='output-audio-upload'),
|
| 53 |
+
dbc.Spinner(html.Div(id='transcription-status'), color="primary", type="grow"),
|
| 54 |
+
])
|
| 55 |
+
], className="mb-4")
|
| 56 |
+
], md=6),
|
| 57 |
+
# Right card for output
|
| 58 |
+
dbc.Col([
|
| 59 |
+
dbc.Card([
|
| 60 |
+
dbc.CardBody([
|
| 61 |
+
html.H4("Diarized Transcription Preview", className="card-title"),
|
| 62 |
+
html.Div(id='transcription-preview', style={'whiteSpace': 'pre-wrap'}),
|
| 63 |
+
html.Br(),
|
| 64 |
+
dbc.Button("Download Transcription", id="btn-download", color="primary", className="mt-3", disabled=True),
|
| 65 |
+
dcc.Download(id="download-transcription")
|
| 66 |
])
|
| 67 |
])
|
| 68 |
+
], md=6)
|
| 69 |
])
|
| 70 |
], fluid=True)
|
| 71 |
|
| 72 |
+
def transcribe_and_diarize_audio(contents, filename):
|
| 73 |
+
global generated_file, transcription_text
|
| 74 |
+
try:
|
| 75 |
+
content_type, content_string = contents.split(',')
|
| 76 |
+
decoded = base64.b64decode(content_string)
|
| 77 |
+
|
| 78 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(filename)[1]) as temp_audio_file:
|
| 79 |
+
temp_audio_file.write(decoded)
|
| 80 |
+
temp_audio_file_path = temp_audio_file.name
|
| 81 |
+
|
| 82 |
+
logger.info(f"File uploaded: {temp_audio_file_path}")
|
| 83 |
+
|
| 84 |
+
if filename.lower().endswith(('.wav', '.mp3', '.ogg', '.flac')):
|
| 85 |
+
logger.info("Audio file detected, transcribing with OpenAI")
|
| 86 |
+
|
| 87 |
+
# Convert audio to wav format if needed
|
| 88 |
+
audio = AudioSegment.from_file(temp_audio_file_path)
|
| 89 |
+
wav_path = temp_audio_file_path + ".wav"
|
| 90 |
+
audio.export(wav_path, format="wav")
|
| 91 |
+
|
| 92 |
+
with open(wav_path, "rb") as audio_file:
|
| 93 |
+
transcript = openai.Audio.transcribe("whisper-1", audio_file)
|
| 94 |
+
|
| 95 |
+
transcription_text = transcript["text"]
|
| 96 |
+
|
| 97 |
+
# Perform diarization (speaker separation)
|
| 98 |
+
diarized_transcript = openai.Audio.transcribe("whisper-1", audio_file, speaker_detection=2)
|
| 99 |
+
|
| 100 |
+
# Format the diarized transcript
|
| 101 |
+
formatted_transcript = ""
|
| 102 |
+
for segment in diarized_transcript["segments"]:
|
| 103 |
+
formatted_transcript += f"Speaker {segment['speaker']}: {segment['text']}\n\n"
|
| 104 |
+
|
| 105 |
+
transcription_text = formatted_transcript
|
| 106 |
+
logger.info("Transcription and diarization completed successfully")
|
| 107 |
+
|
| 108 |
+
# Prepare the transcription for download
|
| 109 |
+
generated_file = io.BytesIO(transcription_text.encode())
|
| 110 |
+
return "Transcription and diarization completed successfully!", True
|
| 111 |
+
else:
|
| 112 |
+
return "Unsupported file format. Please upload an audio file.", False
|
| 113 |
+
except Exception as e:
|
| 114 |
+
logger.error(f"Error during transcription and diarization: {str(e)}")
|
| 115 |
+
return f"An error occurred during transcription and diarization: {str(e)}", False
|
| 116 |
+
finally:
|
| 117 |
+
if os.path.exists(temp_audio_file_path):
|
| 118 |
+
os.unlink(temp_audio_file_path)
|
| 119 |
+
if os.path.exists(wav_path):
|
| 120 |
+
os.unlink(wav_path)
|
| 121 |
|
| 122 |
@app.callback(
|
| 123 |
+
[Output('output-audio-upload', 'children'),
|
| 124 |
+
Output('transcription-status', 'children'),
|
| 125 |
+
Output('transcription-preview', 'children'),
|
| 126 |
+
Output('btn-download', 'disabled')],
|
| 127 |
+
[Input('upload-audio', 'contents')],
|
| 128 |
+
[State('upload-audio', 'filename')]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
)
|
| 130 |
+
def update_output(contents, filename):
|
| 131 |
+
if contents is None:
|
| 132 |
+
return "No file uploaded.", "", "", True
|
| 133 |
+
|
| 134 |
+
status_message, success = transcribe_and_diarize_audio(contents, filename)
|
| 135 |
+
|
| 136 |
+
if success:
|
| 137 |
+
preview = transcription_text[:1000] + "..." if len(transcription_text) > 1000 else transcription_text
|
| 138 |
+
return f"File {filename} processed successfully.", status_message, preview, False
|
| 139 |
+
else:
|
| 140 |
+
return f"File {filename} could not be processed.", status_message, "", True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
|
| 142 |
@app.callback(
|
| 143 |
+
Output("download-transcription", "data"),
|
| 144 |
+
Input("btn-download", "n_clicks"),
|
| 145 |
+
prevent_initial_call=True,
|
|
|
|
| 146 |
)
|
| 147 |
+
def download_transcription(n_clicks):
|
| 148 |
+
if n_clicks is None:
|
| 149 |
+
return None
|
| 150 |
+
return dcc.send_bytes(generated_file.getvalue(), "diarized_transcription.txt")
|
| 151 |
|
| 152 |
if __name__ == '__main__':
|
| 153 |
+
print("Starting the Dash application...")
|
| 154 |
app.run(debug=True, host='0.0.0.0', port=7860)
|
| 155 |
+
print("Dash application has finished running.")
|