Final_Assignment_Template

Sleeping

App Files Files Community

dalybuilds commited on Jul 19, 2025

Commit

3948895

verified ·

1 Parent(s): 71f7b21

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -8

app.py CHANGED Viewed

@@ -4,8 +4,7 @@ import requests
 import pandas as pd
 from io import BytesIO
 import re
-import subprocess
-import ffmpeg
 # --- Tool-specific Imports ---
 from pytube import YouTube
@@ -55,10 +54,17 @@ def transcribe_youtube_video(video_url: str) -> str:
         stream = yt.streams.filter(only_audio=True).first()
         video_path = stream.download(output_path=TEMP_DIR)
         audio_path = os.path.join(TEMP_DIR, "output.mp3")
-        # Use ffmpeg-python instead of subprocess
-        stream = ffmpeg.input(video_path)
-        stream = ffmpeg.output(stream, audio_path, q=0, map='a', y='y')
-        ffmpeg.run(stream)
         client = Groq(api_key=os.getenv("GROQ_API_KEY"))
         with open(audio_path, "rb") as audio_file:
             transcription = client.audio.transcriptions.create(file=audio_file, model="whisper-large-v3", response_format="text")
@@ -66,8 +72,10 @@ def transcribe_youtube_video(video_url: str) -> str:
     except Exception as e:
         return f"Error during YouTube transcription: {e}"
     finally:
         if video_path and os.path.exists(video_path): os.remove(video_path)
         if audio_path and os.path.exists(audio_path): os.remove(audio_path)
 # --- Agent Definition ---
 class LangChainAgent:
@@ -161,7 +169,7 @@ with gr.Blocks() as demo:
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
-    for key in ["GROQ_API_KEY", "TAVILY_API_KEY", "SPACE_ID"]:
         print(f"✅ {key} secret is set." if os.getenv(key) else f"⚠️ WARNING: {key} secret is not set.")
     print("-"*(60 + len(" App Starting ")) + "\n")
-    demo.launch(debug=True, share=False)

 import pandas as pd
 from io import BytesIO
 import re
+import ffmpeg # Using the ffmpeg-python wrapper
 # --- Tool-specific Imports ---
 from pytube import YouTube
         stream = yt.streams.filter(only_audio=True).first()
         video_path = stream.download(output_path=TEMP_DIR)
         audio_path = os.path.join(TEMP_DIR, "output.mp3")
+        # Use ffmpeg-python to convert the downloaded file to mp3
+        (
+            ffmpeg
+            .input(video_path)
+            .output(audio_path, **{'q:a': 0, 'map': 'a'}) # Set audio quality and select audio stream
+            .overwrite_output() # Corresponds to the -y flag
+            .run(quiet=True) # Use quiet=True to avoid printing ffmpeg logs
+        )
+        print(f"Audio extracted to: {audio_path}")
         client = Groq(api_key=os.getenv("GROQ_API_KEY"))
         with open(audio_path, "rb") as audio_file:
             transcription = client.audio.transcriptions.create(file=audio_file, model="whisper-large-v3", response_format="text")
     except Exception as e:
         return f"Error during YouTube transcription: {e}"
     finally:
+        # Clean up temporary files
         if video_path and os.path.exists(video_path): os.remove(video_path)
         if audio_path and os.path.exists(audio_path): os.remove(audio_path)
+        print("Cleaned up temporary files.")
 # --- Agent Definition ---
 class LangChainAgent:
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
+    for key in ["GROQ_API_KEY", "TAVILY_API_KEY"]:
         print(f"✅ {key} secret is set." if os.getenv(key) else f"⚠️ WARNING: {key} secret is not set.")
     print("-"*(60 + len(" App Starting ")) + "\n")
+    demo.launch(debug=True, share=False)