Spaces:

MicroHealth
/

AV-to-transcripts

Paused

App Files Files Community

bluenevus commited on Apr 26, 2025

Commit

110c781

verified ·

1 Parent(s): b3174ad

Update app.py

Browse files

Files changed (1) hide show

app.py +106 -105

app.py CHANGED Viewed

@@ -2,12 +2,15 @@ import base64
 import io
 import os
 import threading
-from dash import Dash, dcc, html, Input, Output, State, callback
-import dash_bootstrap_components as dbc
 import tempfile
 import logging
 import openai
 from pydub import AudioSegment
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -25,134 +28,132 @@ openai.api_key = os.getenv("OPENAI_API_KEY")
 # Layout
 app.layout = dbc.Container([
-    html.H1("Audio Transcription and Diarization App", className="text-center my-4"),
-    dbc.Row([
-        # Left card for input
-        dbc.Col([
-            dbc.Card([
-                dbc.CardBody([
-                    dcc.Upload(
-                        id='upload-audio',
-                        children=html.Div([
-                            'Drag and Drop or ',
-                            html.A('Select Audio File')
-                        ]),
-                        style={
-                            'width': '100%',
-                            'height': '60px',
-                            'lineHeight': '60px',
-                            'borderWidth': '1px',
-                            'borderStyle': 'dashed',
-                            'borderRadius': '5px',
-                            'textAlign': 'center',
-                            'margin': '10px'
-                        },
-                        multiple=False
-                    ),
-                    html.Div(id='output-audio-upload'),
-                    dbc.Spinner(html.Div(id='transcription-status'), color="primary", type="grow"),
-                ])
-            ], className="mb-4")
-        ], md=6),
-        # Right card for output
-        dbc.Col([
-            dbc.Card([
-                dbc.CardBody([
-                    html.H4("Diarized Transcription Preview", className="card-title"),
-                    html.Div(id='transcription-preview', style={'whiteSpace': 'pre-wrap'}),
-                    html.Br(),
-                    dbc.Button("Download Transcription", id="btn-download", color="primary", className="mt-3", disabled=True),
-                    dcc.Download(id="download-transcription")
-                ])
-            ])
-        ], md=6)
     ])
 ], fluid=True)
-def transcribe_and_diarize_audio(contents, filename):
     global generated_file, transcription_text
     temp_audio_file = None
-    wav_path = None
     try:
-        content_type, content_string = contents.split(',')
-        decoded = base64.b64decode(content_string)
-        # Create a temporary file that won't be immediately deleted
-        temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(filename)[1])
-        temp_audio_file.write(decoded)
-        temp_audio_file.close()  # Close the file but don't delete it yet
-        temp_audio_file_path = temp_audio_file.name
-        logger.info(f"File uploaded: {temp_audio_file_path}")
-        if filename.lower().endswith(('.wav', '.mp3', '.ogg', '.flac')):
-            logger.info("Audio file detected, transcribing with OpenAI")
-            # Convert audio to wav format if needed
-            audio = AudioSegment.from_file(temp_audio_file_path)
-            wav_path = temp_audio_file_path + ".wav"
             audio.export(wav_path, format="wav")
-            with open(wav_path, "rb") as audio_file:
-                # Transcribe
-                transcript = openai.Audio.transcribe("whisper-1", audio_file)
-                # Rewind the file for diarization
-                audio_file.seek(0)
-                # Perform diarization (speaker detection)
-                diarized_transcript = openai.Audio.transcribe("whisper-1", audio_file, response_format="verbose_json")
-            logger.info(f"OpenAI API Response: {diarized_transcript}")
-            # Format the diarized transcript
-            formatted_transcript = ""
-            if 'segments' in diarized_transcript:
-                for segment in diarized_transcript["segments"]:
-                    speaker = segment.get('speaker', 'Unknown')
-                    text = segment.get('text', '')
-                    formatted_transcript += f"Speaker {speaker}: {text}\n\n"
-            else:
-                # If no segments, use the full transcript
-                formatted_transcript = transcript.get('text', 'No transcription available.')
-            transcription_text = formatted_transcript
-            logger.info("Transcription and diarization completed successfully")
-            # Prepare the transcription for download
-            generated_file = io.BytesIO(transcription_text.encode())
-            return "Transcription and diarization completed successfully!", True
         else:
-            return "Unsupported file format. Please upload an audio file.", False
     except Exception as e:
-        logger.error(f"Error during transcription and diarization: {str(e)}")
-        return f"An error occurred during transcription and diarization: {str(e)}", False
     finally:
-        # Clean up temporary files
         if temp_audio_file and os.path.exists(temp_audio_file.name):
             os.unlink(temp_audio_file.name)
-        if wav_path and os.path.exists(wav_path):
             os.unlink(wav_path)
 @app.callback(
-    [Output('output-audio-upload', 'children'),
      Output('transcription-status', 'children'),
      Output('transcription-preview', 'children'),
      Output('btn-download', 'disabled')],
-    [Input('upload-audio', 'contents')],
-    [State('upload-audio', 'filename')]
 )
-def update_output(contents, filename):
-    if contents is None:
-        return "No file uploaded.", "", "", True
-    status_message, success = transcribe_and_diarize_audio(contents, filename)
     if success:
         preview = transcription_text[:1000] + "..." if len(transcription_text) > 1000 else transcription_text
-        return f"File {filename} processed successfully.", status_message, preview, False
     else:
-        return f"File {filename} could not be processed.", status_message, "", True
 @app.callback(
     Output("download-transcription", "data"),

 import io
 import os
 import threading
 import tempfile
 import logging
 import openai
+from dash import Dash, dcc, html, Input, Output, State, callback
+import dash_bootstrap_components as dbc
 from pydub import AudioSegment
+import requests
+from pytube import YouTube
+import moviepy.editor as mp
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # Layout
 app.layout = dbc.Container([
+    html.H1("Audio/Video Transcription and Diarization App", className="text-center my-4"),
+    dbc.Card([
+        dbc.CardBody([
+            dcc.Upload(
+                id='upload-media',
+                children=html.Div([
+                    'Drag and Drop or ',
+                    html.A('Select Audio/Video File')
+                ]),
+                style={
+                    'width': '100%',
+                    'height': '60px',
+                    'lineHeight': '60px',
+                    'borderWidth': '1px',
+                    'borderStyle': 'dashed',
+                    'borderRadius': '5px',
+                    'textAlign': 'center',
+                    'margin': '10px'
+                },
+                multiple=False
+            ),
+            html.Div(id='output-media-upload'),
+            dbc.Input(id="url-input", type="text", placeholder="Enter audio/video URL (including YouTube)", className="mb-3"),
+            dbc.Button("Process URL", id="process-url-button", color="primary", className="mb-3"),
+            dbc.Spinner(html.Div(id='transcription-status'), color="primary", type="grow"),
+            html.H4("Diarized Transcription Preview", className="mt-4"),
+            html.Div(id='transcription-preview', style={'whiteSpace': 'pre-wrap'}),
+            html.Br(),
+            dbc.Button("Download Transcription", id="btn-download", color="primary", className="mt-3", disabled=True),
+            dcc.Download(id="download-transcription")
+        ])
     ])
 ], fluid=True)
+def process_media(file_path, is_url=False):
     global generated_file, transcription_text
     temp_audio_file = None
     try:
+        if is_url:
+            if 'youtube.com' in file_path or 'youtu.be' in file_path:
+                yt = YouTube(file_path)
+                stream = yt.streams.filter(only_audio=True).first()
+                temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
+                stream.download(output_path=os.path.dirname(temp_audio_file.name), filename=os.path.basename(temp_audio_file.name))
+            else:
+                response = requests.get(file_path)
+                temp_audio_file = tempfile.NamedTemporaryFile(delete=False)
+                temp_audio_file.write(response.content)
+                temp_audio_file.close()
+        else:
+            temp_audio_file = tempfile.NamedTemporaryFile(delete=False)
+            temp_audio_file.write(file_path)
+            temp_audio_file.close()
+        file_extension = os.path.splitext(temp_audio_file.name)[1].lower()
+        if file_extension in ['.mp4', '.avi', '.mov', '.flv', '.wmv']:
+            video = mp.VideoFileClip(temp_audio_file.name)
+            audio = video.audio
+            wav_path = temp_audio_file.name + ".wav"
+            audio.write_audiofile(wav_path)
+            video.close()
+        elif file_extension in ['.wav', '.mp3', '.ogg', '.flac']:
+            audio = AudioSegment.from_file(temp_audio_file.name)
+            wav_path = temp_audio_file.name + ".wav"
             audio.export(wav_path, format="wav")
         else:
+            return "Unsupported file format. Please upload an audio or video file.", False
+        with open(wav_path, "rb") as audio_file:
+            transcript = openai.Audio.transcribe("whisper-1", audio_file)
+            audio_file.seek(0)
+            diarized_transcript = openai.Audio.transcribe("whisper-1", audio_file, response_format="verbose_json")
+        formatted_transcript = ""
+        if 'segments' in diarized_transcript:
+            for segment in diarized_transcript["segments"]:
+                speaker = segment.get('speaker', 'Unknown')
+                text = segment.get('text', '')
+                formatted_transcript += f"Speaker {speaker}: {text}\n\n"
+        else:
+            formatted_transcript = transcript.get('text', 'No transcription available.')
+        transcription_text = formatted_transcript
+        generated_file = io.BytesIO(transcription_text.encode())
+        return "Transcription and diarization completed successfully!", True
     except Exception as e:
+        logger.error(f"Error during processing: {str(e)}")
+        return f"An error occurred: {str(e)}", False
     finally:
         if temp_audio_file and os.path.exists(temp_audio_file.name):
             os.unlink(temp_audio_file.name)
+        if 'wav_path' in locals() and os.path.exists(wav_path):
             os.unlink(wav_path)
 @app.callback(
+    [Output('output-media-upload', 'children'),
      Output('transcription-status', 'children'),
      Output('transcription-preview', 'children'),
      Output('btn-download', 'disabled')],
+    [Input('upload-media', 'contents'),
+     Input('process-url-button', 'n_clicks')],
+    [State('upload-media', 'filename'),
+     State('url-input', 'value')]
 )
+def update_output(contents, n_clicks, filename, url):
+    ctx = callback_context
+    if not ctx.triggered:
+        return "No file uploaded or URL processed.", "", "", True
+    trigger_id = ctx.triggered[0]['prop_id'].split('.')[0]
+    if trigger_id == 'upload-media' and contents is not None:
+        content_type, content_string = contents.split(',')
+        decoded = base64.b64decode(content_string)
+        status_message, success = process_media(decoded)
+    elif trigger_id == 'process-url-button' and url:
+        status_message, success = process_media(url, is_url=True)
+    else:
+        return "No file uploaded or URL processed.", "", "", True
     if success:
         preview = transcription_text[:1000] + "..." if len(transcription_text) > 1000 else transcription_text
+        return f"File processed successfully.", status_message, preview, False
     else:
+        return "Processing failed.", status_message, "", True
 @app.callback(
     Output("download-transcription", "data"),