Spaces:

MicroHealth
/

AV-to-transcripts

Paused

App Files Files Community

bluenevus commited on Apr 26, 2025

Commit

e299575

verified ·

1 Parent(s): 7824869

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -16

app.py CHANGED Viewed

@@ -14,6 +14,7 @@ import requests
 from pytube import YouTube
 from pydub import AudioSegment
 import google.generativeai as genai
 # Initialize the Dash app
 app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
@@ -32,19 +33,35 @@ def is_valid_url(url):
     except ValueError:
         return False
-def download_audio(url):
     if "youtube.com" in url or "youtu.be" in url:
         yt = YouTube(url)
-        audio_stream = yt.streams.filter(only_audio=True).first()
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_file:
-            audio_stream.download(output_path=os.path.dirname(temp_file.name), filename=temp_file.name)
             return temp_file.name
     else:
         response = requests.get(url)
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
             temp_file.write(response.content)
             return temp_file.name
 def transcribe_audio(file_path):
     with open(file_path, "rb") as audio_file:
         audio_data = audio_file.read()
@@ -52,34 +69,40 @@ def transcribe_audio(file_path):
     response = model.generate_content(audio_data)
     return response.text
-def process_audio(contents, filename, url):
     if contents:
         content_type, content_string = contents.split(',')
         decoded = base64.b64decode(content_string)
-        with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(filename)[1]) as temp_file:
             temp_file.write(decoded)
             temp_file_path = temp_file.name
     elif url:
-        temp_file_path = download_audio(url)
     else:
         raise ValueError("No input provided")
     try:
-        transcript = transcribe_audio(temp_file_path)
     finally:
         os.unlink(temp_file_path)
     return transcript
 app.layout = dbc.Container([
-    html.H1("Audio Transcription App", className="text-center my-4"),
     dbc.Card([
         dbc.CardBody([
             dcc.Upload(
-                id='upload-audio',
                 children=html.Div([
                     'Drag and Drop or ',
-                    html.A('Select Audio File')
                 ]),
                 style={
                     'width': '100%',
@@ -93,7 +116,7 @@ app.layout = dbc.Container([
                 },
                 multiple=False
             ),
-            dbc.Input(id="audio-url", type="text", placeholder="Enter audio URL or YouTube link", className="my-3"),
             dbc.Button("Transcribe", id="transcribe-button", color="primary", className="w-100 mb-3"),
             dbc.Spinner(html.Div(id="transcription-output", className="mt-3")),
             dbc.Button("Download Transcript", id="download-button", color="secondary", className="w-100 mt-3", style={'display': 'none'}),
@@ -106,9 +129,9 @@ app.layout = dbc.Container([
     Output("transcription-output", "children"),
     Output("download-button", "style"),
     Input("transcribe-button", "n_clicks"),
-    State("upload-audio", "contents"),
-    State("upload-audio", "filename"),
-    State("audio-url", "value"),
     prevent_initial_call=True
 )
 def update_transcription(n_clicks, contents, filename, url):
@@ -117,7 +140,7 @@ def update_transcription(n_clicks, contents, filename, url):
     def transcribe():
         try:
-            return process_audio(contents, filename, url)
         except Exception as e:
             return f"An error occurred: {str(e)}"

 from pytube import YouTube
 from pydub import AudioSegment
 import google.generativeai as genai
+from moviepy.editor import VideoFileClip
 # Initialize the Dash app
 app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
     except ValueError:
         return False
+def download_media(url):
     if "youtube.com" in url or "youtu.be" in url:
         yt = YouTube(url)
+        stream = yt.streams.filter(progressive=True, file_extension='mp4').first()
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_file:
+            stream.download(output_path=os.path.dirname(temp_file.name), filename=temp_file.name)
             return temp_file.name
     else:
         response = requests.get(url)
+        content_type = response.headers.get('content-type', '')
+        if 'video' in content_type:
+            suffix = '.mp4'
+        elif 'audio' in content_type:
+            suffix = '.mp3'
+        else:
+            suffix = ''
+        with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
             temp_file.write(response.content)
             return temp_file.name
+def extract_audio(file_path):
+    video = VideoFileClip(file_path)
+    audio = video.audio
+    audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
+    audio.write_audiofile(audio_file.name)
+    video.close()
+    audio.close()
+    return audio_file.name
 def transcribe_audio(file_path):
     with open(file_path, "rb") as audio_file:
         audio_data = audio_file.read()
     response = model.generate_content(audio_data)
     return response.text
+def process_media(contents, filename, url):
     if contents:
         content_type, content_string = contents.split(',')
         decoded = base64.b64decode(content_string)
+        suffix = os.path.splitext(filename)[1]
+        with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
             temp_file.write(decoded)
             temp_file_path = temp_file.name
     elif url:
+        temp_file_path = download_media(url)
     else:
         raise ValueError("No input provided")
     try:
+        if temp_file_path.lower().endswith(('.mp4', '.avi', '.mov', '.flv', '.wmv')):
+            audio_file_path = extract_audio(temp_file_path)
+            transcript = transcribe_audio(audio_file_path)
+            os.unlink(audio_file_path)
+        else:
+            transcript = transcribe_audio(temp_file_path)
     finally:
         os.unlink(temp_file_path)
     return transcript
 app.layout = dbc.Container([
+    html.H1("Audio/Video Transcription App", className="text-center my-4"),
     dbc.Card([
         dbc.CardBody([
             dcc.Upload(
+                id='upload-media',
                 children=html.Div([
                     'Drag and Drop or ',
+                    html.A('Select Audio/Video File')
                 ]),
                 style={
                     'width': '100%',
                 },
                 multiple=False
             ),
+            dbc.Input(id="media-url", type="text", placeholder="Enter audio/video URL or YouTube link", className="my-3"),
             dbc.Button("Transcribe", id="transcribe-button", color="primary", className="w-100 mb-3"),
             dbc.Spinner(html.Div(id="transcription-output", className="mt-3")),
             dbc.Button("Download Transcript", id="download-button", color="secondary", className="w-100 mt-3", style={'display': 'none'}),
     Output("transcription-output", "children"),
     Output("download-button", "style"),
     Input("transcribe-button", "n_clicks"),
+    State("upload-media", "contents"),
+    State("upload-media", "filename"),
+    State("media-url", "value"),
     prevent_initial_call=True
 )
 def update_transcription(n_clicks, contents, filename, url):
     def transcribe():
         try:
+            return process_media(contents, filename, url)
         except Exception as e:
             return f"An error occurred: {str(e)}"