Spaces:

rm8630
/

ai-transcript-clipper

Sleeping

App Files Files Community

Raj Jayendrakumar Muchhala commited on Feb 7, 2025

Commit

e033e0f

1 Parent(s): 78ca458

whisper jax

Browse files

Files changed (2) hide show

app.py +21 -81
requirements.txt +1 -4

app.py CHANGED Viewed

@@ -4,9 +4,6 @@ from clipper_prompts import CLIPPER_SYSTEM_MESSAGE, CLIPPER_USER_MESSAGE
 from prompts import SYSTEM_MESSAGE, USER_MESSAGE
 import json
 import os
-import yt_dlp
-import ffmpeg
-from tempfile import NamedTemporaryFile
 # Set Streamlit layout to wide mode
 st.set_page_config(layout="wide")
@@ -52,89 +49,32 @@ col_transcript, col_output = st.columns([1, 1])
 # Left Column: Transcript Input
 with col_transcript:
-    st.subheader("📝 Enter Video Source")
-    youtube_url = st.text_input("Enter YouTube Video URL")
-    media_file = st.file_uploader("Or upload a video/audio file", type=["mp4", "mov", "avi", "mp3", "wav", "ogg"])
-    transcript = ""
-    def download_youtube_audio(url):
-        ydl_opts = {
-            "format": "bestaudio/best",
-            "extractaudio": True,
-            "audioformat": "mp3",
-            "outtmpl": "% (id)s.%(ext)s",
-        }
-        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            info = ydl.extract_info(url, download=True)
-            filename = ydl.prepare_filename(info).replace(".webm", ".mp3").replace(".m4a", ".mp3")
-            return compress_audio(filename)
-    def compress_audio(input_path, target_size_mb=25):
-        """Compress audio only if it exceeds the target size, adjusting bitrate dynamically."""
-        output_path = input_path.replace(".mp3", "_compressed.mp3")
-        # Check file size in MB
-        file_size_mb = os.path.getsize(input_path) / (1024 * 1024)
-        if file_size_mb <= target_size_mb:
-            return input_path  # No need to compress if already under limit
-        # Estimate appropriate bitrate (targeting 90% of desired size)
-        target_bitrate_kbps = int((target_size_mb * 1024 * 1024 * 8) / (file_size_mb * 1.1))  # 10% buffer
-        target_bitrate_kbps = max(target_bitrate_kbps, 32)  # Prevent extreme low-quality audio
-        ffmpeg.input(input_path).output(output_path, audio_bitrate=f"{target_bitrate_kbps}k").run(overwrite_output=True)
-        return output_path
-    def transcribe_audio(file_path):
-        whisper_client = OpenAI(api_key=OPENAI_API_KEY, base_url="https://api.openai.com/v1")
-        transcription_args = {
-            "file": None,
-            "model": "whisper-1",
-            "response_format": "verbose_json",
-            "timestamp_granularities": ["word"],
-            "timeout": 360,
-            "prompt": "The audio may not contain speech, do not make up words."
-        }
-        with open(file_path, "rb") as audio_file:
-            transcription_args["file"] = audio_file
-            transcript_response = whisper_client.audio.transcriptions.create(**transcription_args)
-        transcript_words = transcript_response.words
-        transcript = " ".join([word['word'] for word in transcript_words])
-        return transcript
-    if youtube_url:
-        st.video(youtube_url)
-    elif media_file:
         if media_file.type.startswith("video"):
             st.video(media_file)
         elif media_file.type.startswith("audio"):
             st.audio(media_file)
-    if st.button("Transcribe Video"):
-        with st.spinner("Processing... This may take a few minutes."):
-            try:
-                if youtube_url:
-                    audio_path = download_youtube_audio(youtube_url)
-                    transcript = transcribe_audio(audio_path)
-                elif media_file:
-                    with NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
-                        temp_audio.write(media_file.read())
-                        temp_audio.close()
-                        audio_path = compress_audio(temp_audio.name)
-                        transcript = transcribe_audio(audio_path)
-                else:
-                    st.error("❌ Please provide a YouTube link or upload a file.")
-            except Exception as e:
-                st.error(f"Error: {str(e)}")
-    # Display the extracted transcript
-    st.subheader("📝 Transcript")
-    transcript = st.text_area("Generated Transcript", transcript, height=300)
 # Right Column: Clip Plan Generation and Extraction
 with col_output:

 from prompts import SYSTEM_MESSAGE, USER_MESSAGE
 import json
 import os
 # Set Streamlit layout to wide mode
 st.set_page_config(layout="wide")
 # Left Column: Transcript Input
 with col_transcript:
+    st.subheader("📝 Paste Your Transcript")
+    transcript = st.text_area("Enter the transcript here:", height=400)
+    # Add reference link below the transcript text box
+    st.markdown("---")
+    st.markdown(
+        """
+        <div style="font-size:18px; font-weight:bold; margin-top:10px;">
+            Need a transcript? Use <a href="https://huggingface.co/spaces/sanchit-gandhi/whisper-jax-spaces" target="_blank" style="color:#007bff; text-decoration:none;">
+            OpenAI Whisper on Hugging Face</a> to generate one from your audio or video.
+        </div>
+        """,
+        unsafe_allow_html=True
+    )
+    st.markdown("---")
+    st.subheader("🎥 Video/Audio Upload & Playback")
+    media_file = st.file_uploader("Upload a video or audio file", type=["mp4", "mov", "avi", "mp3", "wav", "ogg"])
+    if media_file is not None:
+        # Detect media type and play accordingly
         if media_file.type.startswith("video"):
             st.video(media_file)
         elif media_file.type.startswith("audio"):
             st.audio(media_file)
 # Right Column: Clip Plan Generation and Extraction
 with col_output:

requirements.txt CHANGED Viewed

@@ -1,4 +1 @@
-openai
-yt-dlp
-pydub
-ffmpeg-python


1	+ openai