Update app.py
Browse files
app.py
CHANGED
|
@@ -4,8 +4,7 @@ import requests
|
|
| 4 |
import pandas as pd
|
| 5 |
from io import BytesIO
|
| 6 |
import re
|
| 7 |
-
import
|
| 8 |
-
import ffmpeg
|
| 9 |
|
| 10 |
# --- Tool-specific Imports ---
|
| 11 |
from pytube import YouTube
|
|
@@ -55,10 +54,17 @@ def transcribe_youtube_video(video_url: str) -> str:
|
|
| 55 |
stream = yt.streams.filter(only_audio=True).first()
|
| 56 |
video_path = stream.download(output_path=TEMP_DIR)
|
| 57 |
audio_path = os.path.join(TEMP_DIR, "output.mp3")
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
|
| 63 |
with open(audio_path, "rb") as audio_file:
|
| 64 |
transcription = client.audio.transcriptions.create(file=audio_file, model="whisper-large-v3", response_format="text")
|
|
@@ -66,8 +72,10 @@ def transcribe_youtube_video(video_url: str) -> str:
|
|
| 66 |
except Exception as e:
|
| 67 |
return f"Error during YouTube transcription: {e}"
|
| 68 |
finally:
|
|
|
|
| 69 |
if video_path and os.path.exists(video_path): os.remove(video_path)
|
| 70 |
if audio_path and os.path.exists(audio_path): os.remove(audio_path)
|
|
|
|
| 71 |
|
| 72 |
# --- Agent Definition ---
|
| 73 |
class LangChainAgent:
|
|
@@ -161,7 +169,7 @@ with gr.Blocks() as demo:
|
|
| 161 |
|
| 162 |
if __name__ == "__main__":
|
| 163 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
| 164 |
-
for key in ["GROQ_API_KEY", "TAVILY_API_KEY"
|
| 165 |
print(f"✅ {key} secret is set." if os.getenv(key) else f"⚠️ WARNING: {key} secret is not set.")
|
| 166 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
| 167 |
-
demo.launch(debug=True, share=False)
|
|
|
|
| 4 |
import pandas as pd
|
| 5 |
from io import BytesIO
|
| 6 |
import re
|
| 7 |
+
import ffmpeg # Using the ffmpeg-python wrapper
|
|
|
|
| 8 |
|
| 9 |
# --- Tool-specific Imports ---
|
| 10 |
from pytube import YouTube
|
|
|
|
| 54 |
stream = yt.streams.filter(only_audio=True).first()
|
| 55 |
video_path = stream.download(output_path=TEMP_DIR)
|
| 56 |
audio_path = os.path.join(TEMP_DIR, "output.mp3")
|
| 57 |
+
|
| 58 |
+
# Use ffmpeg-python to convert the downloaded file to mp3
|
| 59 |
+
(
|
| 60 |
+
ffmpeg
|
| 61 |
+
.input(video_path)
|
| 62 |
+
.output(audio_path, **{'q:a': 0, 'map': 'a'}) # Set audio quality and select audio stream
|
| 63 |
+
.overwrite_output() # Corresponds to the -y flag
|
| 64 |
+
.run(quiet=True) # Use quiet=True to avoid printing ffmpeg logs
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
print(f"Audio extracted to: {audio_path}")
|
| 68 |
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
|
| 69 |
with open(audio_path, "rb") as audio_file:
|
| 70 |
transcription = client.audio.transcriptions.create(file=audio_file, model="whisper-large-v3", response_format="text")
|
|
|
|
| 72 |
except Exception as e:
|
| 73 |
return f"Error during YouTube transcription: {e}"
|
| 74 |
finally:
|
| 75 |
+
# Clean up temporary files
|
| 76 |
if video_path and os.path.exists(video_path): os.remove(video_path)
|
| 77 |
if audio_path and os.path.exists(audio_path): os.remove(audio_path)
|
| 78 |
+
print("Cleaned up temporary files.")
|
| 79 |
|
| 80 |
# --- Agent Definition ---
|
| 81 |
class LangChainAgent:
|
|
|
|
| 169 |
|
| 170 |
if __name__ == "__main__":
|
| 171 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
| 172 |
+
for key in ["GROQ_API_KEY", "TAVILY_API_KEY"]:
|
| 173 |
print(f"✅ {key} secret is set." if os.getenv(key) else f"⚠️ WARNING: {key} secret is not set.")
|
| 174 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
| 175 |
+
demo.launch(debug=True, share=False)
|