Spaces:
Sleeping
Sleeping
Raj Jayendrakumar Muchhala
commited on
Commit
Β·
e033e0f
1
Parent(s):
78ca458
whisper jax
Browse files- app.py +21 -81
- requirements.txt +1 -4
app.py
CHANGED
|
@@ -4,9 +4,6 @@ from clipper_prompts import CLIPPER_SYSTEM_MESSAGE, CLIPPER_USER_MESSAGE
|
|
| 4 |
from prompts import SYSTEM_MESSAGE, USER_MESSAGE
|
| 5 |
import json
|
| 6 |
import os
|
| 7 |
-
import yt_dlp
|
| 8 |
-
import ffmpeg
|
| 9 |
-
from tempfile import NamedTemporaryFile
|
| 10 |
|
| 11 |
# Set Streamlit layout to wide mode
|
| 12 |
st.set_page_config(layout="wide")
|
|
@@ -52,89 +49,32 @@ col_transcript, col_output = st.columns([1, 1])
|
|
| 52 |
|
| 53 |
# Left Column: Transcript Input
|
| 54 |
with col_transcript:
|
| 55 |
-
st.subheader("π
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
transcript
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
"
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
# Check file size in MB
|
| 77 |
-
file_size_mb = os.path.getsize(input_path) / (1024 * 1024)
|
| 78 |
-
|
| 79 |
-
if file_size_mb <= target_size_mb:
|
| 80 |
-
return input_path # No need to compress if already under limit
|
| 81 |
-
|
| 82 |
-
# Estimate appropriate bitrate (targeting 90% of desired size)
|
| 83 |
-
target_bitrate_kbps = int((target_size_mb * 1024 * 1024 * 8) / (file_size_mb * 1.1)) # 10% buffer
|
| 84 |
-
target_bitrate_kbps = max(target_bitrate_kbps, 32) # Prevent extreme low-quality audio
|
| 85 |
-
|
| 86 |
-
ffmpeg.input(input_path).output(output_path, audio_bitrate=f"{target_bitrate_kbps}k").run(overwrite_output=True)
|
| 87 |
-
return output_path
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
def transcribe_audio(file_path):
|
| 91 |
-
whisper_client = OpenAI(api_key=OPENAI_API_KEY, base_url="https://api.openai.com/v1")
|
| 92 |
-
transcription_args = {
|
| 93 |
-
"file": None,
|
| 94 |
-
"model": "whisper-1",
|
| 95 |
-
"response_format": "verbose_json",
|
| 96 |
-
"timestamp_granularities": ["word"],
|
| 97 |
-
"timeout": 360,
|
| 98 |
-
"prompt": "The audio may not contain speech, do not make up words."
|
| 99 |
-
}
|
| 100 |
-
with open(file_path, "rb") as audio_file:
|
| 101 |
-
transcription_args["file"] = audio_file
|
| 102 |
-
transcript_response = whisper_client.audio.transcriptions.create(**transcription_args)
|
| 103 |
-
|
| 104 |
-
transcript_words = transcript_response.words
|
| 105 |
-
transcript = " ".join([word['word'] for word in transcript_words])
|
| 106 |
-
return transcript
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
if youtube_url:
|
| 110 |
-
st.video(youtube_url)
|
| 111 |
-
elif media_file:
|
| 112 |
if media_file.type.startswith("video"):
|
| 113 |
st.video(media_file)
|
| 114 |
elif media_file.type.startswith("audio"):
|
| 115 |
st.audio(media_file)
|
| 116 |
|
| 117 |
-
if st.button("Transcribe Video"):
|
| 118 |
-
with st.spinner("Processing... This may take a few minutes."):
|
| 119 |
-
try:
|
| 120 |
-
if youtube_url:
|
| 121 |
-
audio_path = download_youtube_audio(youtube_url)
|
| 122 |
-
transcript = transcribe_audio(audio_path)
|
| 123 |
-
elif media_file:
|
| 124 |
-
with NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
|
| 125 |
-
temp_audio.write(media_file.read())
|
| 126 |
-
temp_audio.close()
|
| 127 |
-
audio_path = compress_audio(temp_audio.name)
|
| 128 |
-
transcript = transcribe_audio(audio_path)
|
| 129 |
-
else:
|
| 130 |
-
st.error("β Please provide a YouTube link or upload a file.")
|
| 131 |
-
except Exception as e:
|
| 132 |
-
st.error(f"Error: {str(e)}")
|
| 133 |
-
|
| 134 |
-
# Display the extracted transcript
|
| 135 |
-
st.subheader("π Transcript")
|
| 136 |
-
transcript = st.text_area("Generated Transcript", transcript, height=300)
|
| 137 |
-
|
| 138 |
|
| 139 |
# Right Column: Clip Plan Generation and Extraction
|
| 140 |
with col_output:
|
|
|
|
| 4 |
from prompts import SYSTEM_MESSAGE, USER_MESSAGE
|
| 5 |
import json
|
| 6 |
import os
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
# Set Streamlit layout to wide mode
|
| 9 |
st.set_page_config(layout="wide")
|
|
|
|
| 49 |
|
| 50 |
# Left Column: Transcript Input
|
| 51 |
with col_transcript:
|
| 52 |
+
st.subheader("π Paste Your Transcript")
|
| 53 |
+
transcript = st.text_area("Enter the transcript here:", height=400)
|
| 54 |
+
|
| 55 |
+
# Add reference link below the transcript text box
|
| 56 |
+
st.markdown("---")
|
| 57 |
+
st.markdown(
|
| 58 |
+
"""
|
| 59 |
+
<div style="font-size:18px; font-weight:bold; margin-top:10px;">
|
| 60 |
+
Need a transcript? Use <a href="https://huggingface.co/spaces/sanchit-gandhi/whisper-jax-spaces" target="_blank" style="color:#007bff; text-decoration:none;">
|
| 61 |
+
OpenAI Whisper on Hugging Face</a> to generate one from your audio or video.
|
| 62 |
+
</div>
|
| 63 |
+
""",
|
| 64 |
+
unsafe_allow_html=True
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
st.markdown("---")
|
| 68 |
+
st.subheader("π₯ Video/Audio Upload & Playback")
|
| 69 |
+
|
| 70 |
+
media_file = st.file_uploader("Upload a video or audio file", type=["mp4", "mov", "avi", "mp3", "wav", "ogg"])
|
| 71 |
+
if media_file is not None:
|
| 72 |
+
# Detect media type and play accordingly
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
if media_file.type.startswith("video"):
|
| 74 |
st.video(media_file)
|
| 75 |
elif media_file.type.startswith("audio"):
|
| 76 |
st.audio(media_file)
|
| 77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
# Right Column: Clip Plan Generation and Extraction
|
| 80 |
with col_output:
|
requirements.txt
CHANGED
|
@@ -1,4 +1 @@
|
|
| 1 |
-
openai
|
| 2 |
-
yt-dlp
|
| 3 |
-
pydub
|
| 4 |
-
ffmpeg-python
|
|
|
|
| 1 |
+
openai
|
|
|
|
|
|
|
|
|