Spaces:
Sleeping
Sleeping
Raj Jayendrakumar Muchhala
commited on
Commit
Β·
78ca458
1
Parent(s):
debb8ec
support transcription
Browse files- app.py +81 -21
- requirements.txt +4 -1
app.py
CHANGED
|
@@ -4,6 +4,9 @@ from clipper_prompts import CLIPPER_SYSTEM_MESSAGE, CLIPPER_USER_MESSAGE
|
|
| 4 |
from prompts import SYSTEM_MESSAGE, USER_MESSAGE
|
| 5 |
import json
|
| 6 |
import os
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
# Set Streamlit layout to wide mode
|
| 9 |
st.set_page_config(layout="wide")
|
|
@@ -49,32 +52,89 @@ col_transcript, col_output = st.columns([1, 1])
|
|
| 49 |
|
| 50 |
# Left Column: Transcript Input
|
| 51 |
with col_transcript:
|
| 52 |
-
st.subheader("π
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
if media_file.type.startswith("video"):
|
| 74 |
st.video(media_file)
|
| 75 |
elif media_file.type.startswith("audio"):
|
| 76 |
st.audio(media_file)
|
| 77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
# Right Column: Clip Plan Generation and Extraction
|
| 80 |
with col_output:
|
|
|
|
| 4 |
from prompts import SYSTEM_MESSAGE, USER_MESSAGE
|
| 5 |
import json
|
| 6 |
import os
|
| 7 |
+
import yt_dlp
|
| 8 |
+
import ffmpeg
|
| 9 |
+
from tempfile import NamedTemporaryFile
|
| 10 |
|
| 11 |
# Set Streamlit layout to wide mode
|
| 12 |
st.set_page_config(layout="wide")
|
|
|
|
| 52 |
|
| 53 |
# Left Column: Transcript Input
|
| 54 |
with col_transcript:
|
| 55 |
+
st.subheader("π Enter Video Source")
|
| 56 |
+
youtube_url = st.text_input("Enter YouTube Video URL")
|
| 57 |
+
media_file = st.file_uploader("Or upload a video/audio file", type=["mp4", "mov", "avi", "mp3", "wav", "ogg"])
|
| 58 |
+
transcript = ""
|
| 59 |
+
|
| 60 |
+
def download_youtube_audio(url):
|
| 61 |
+
ydl_opts = {
|
| 62 |
+
"format": "bestaudio/best",
|
| 63 |
+
"extractaudio": True,
|
| 64 |
+
"audioformat": "mp3",
|
| 65 |
+
"outtmpl": "% (id)s.%(ext)s",
|
| 66 |
+
}
|
| 67 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 68 |
+
info = ydl.extract_info(url, download=True)
|
| 69 |
+
filename = ydl.prepare_filename(info).replace(".webm", ".mp3").replace(".m4a", ".mp3")
|
| 70 |
+
return compress_audio(filename)
|
| 71 |
+
|
| 72 |
+
def compress_audio(input_path, target_size_mb=25):
|
| 73 |
+
"""Compress audio only if it exceeds the target size, adjusting bitrate dynamically."""
|
| 74 |
+
output_path = input_path.replace(".mp3", "_compressed.mp3")
|
| 75 |
+
|
| 76 |
+
# Check file size in MB
|
| 77 |
+
file_size_mb = os.path.getsize(input_path) / (1024 * 1024)
|
| 78 |
+
|
| 79 |
+
if file_size_mb <= target_size_mb:
|
| 80 |
+
return input_path # No need to compress if already under limit
|
| 81 |
+
|
| 82 |
+
# Estimate appropriate bitrate (targeting 90% of desired size)
|
| 83 |
+
target_bitrate_kbps = int((target_size_mb * 1024 * 1024 * 8) / (file_size_mb * 1.1)) # 10% buffer
|
| 84 |
+
target_bitrate_kbps = max(target_bitrate_kbps, 32) # Prevent extreme low-quality audio
|
| 85 |
+
|
| 86 |
+
ffmpeg.input(input_path).output(output_path, audio_bitrate=f"{target_bitrate_kbps}k").run(overwrite_output=True)
|
| 87 |
+
return output_path
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def transcribe_audio(file_path):
|
| 91 |
+
whisper_client = OpenAI(api_key=OPENAI_API_KEY, base_url="https://api.openai.com/v1")
|
| 92 |
+
transcription_args = {
|
| 93 |
+
"file": None,
|
| 94 |
+
"model": "whisper-1",
|
| 95 |
+
"response_format": "verbose_json",
|
| 96 |
+
"timestamp_granularities": ["word"],
|
| 97 |
+
"timeout": 360,
|
| 98 |
+
"prompt": "The audio may not contain speech, do not make up words."
|
| 99 |
+
}
|
| 100 |
+
with open(file_path, "rb") as audio_file:
|
| 101 |
+
transcription_args["file"] = audio_file
|
| 102 |
+
transcript_response = whisper_client.audio.transcriptions.create(**transcription_args)
|
| 103 |
+
|
| 104 |
+
transcript_words = transcript_response.words
|
| 105 |
+
transcript = " ".join([word['word'] for word in transcript_words])
|
| 106 |
+
return transcript
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
if youtube_url:
|
| 110 |
+
st.video(youtube_url)
|
| 111 |
+
elif media_file:
|
| 112 |
if media_file.type.startswith("video"):
|
| 113 |
st.video(media_file)
|
| 114 |
elif media_file.type.startswith("audio"):
|
| 115 |
st.audio(media_file)
|
| 116 |
|
| 117 |
+
if st.button("Transcribe Video"):
|
| 118 |
+
with st.spinner("Processing... This may take a few minutes."):
|
| 119 |
+
try:
|
| 120 |
+
if youtube_url:
|
| 121 |
+
audio_path = download_youtube_audio(youtube_url)
|
| 122 |
+
transcript = transcribe_audio(audio_path)
|
| 123 |
+
elif media_file:
|
| 124 |
+
with NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
|
| 125 |
+
temp_audio.write(media_file.read())
|
| 126 |
+
temp_audio.close()
|
| 127 |
+
audio_path = compress_audio(temp_audio.name)
|
| 128 |
+
transcript = transcribe_audio(audio_path)
|
| 129 |
+
else:
|
| 130 |
+
st.error("β Please provide a YouTube link or upload a file.")
|
| 131 |
+
except Exception as e:
|
| 132 |
+
st.error(f"Error: {str(e)}")
|
| 133 |
+
|
| 134 |
+
# Display the extracted transcript
|
| 135 |
+
st.subheader("π Transcript")
|
| 136 |
+
transcript = st.text_area("Generated Transcript", transcript, height=300)
|
| 137 |
+
|
| 138 |
|
| 139 |
# Right Column: Clip Plan Generation and Extraction
|
| 140 |
with col_output:
|
requirements.txt
CHANGED
|
@@ -1 +1,4 @@
|
|
| 1 |
-
openai
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openai
|
| 2 |
+
yt-dlp
|
| 3 |
+
pydub
|
| 4 |
+
ffmpeg-python
|