Raj Jayendrakumar Muchhala commited on
Commit
e033e0f
Β·
1 Parent(s): 78ca458

whisper jax

Browse files
Files changed (2) hide show
  1. app.py +21 -81
  2. requirements.txt +1 -4
app.py CHANGED
@@ -4,9 +4,6 @@ from clipper_prompts import CLIPPER_SYSTEM_MESSAGE, CLIPPER_USER_MESSAGE
4
  from prompts import SYSTEM_MESSAGE, USER_MESSAGE
5
  import json
6
  import os
7
- import yt_dlp
8
- import ffmpeg
9
- from tempfile import NamedTemporaryFile
10
 
11
  # Set Streamlit layout to wide mode
12
  st.set_page_config(layout="wide")
@@ -52,89 +49,32 @@ col_transcript, col_output = st.columns([1, 1])
52
 
53
  # Left Column: Transcript Input
54
  with col_transcript:
55
- st.subheader("πŸ“ Enter Video Source")
56
- youtube_url = st.text_input("Enter YouTube Video URL")
57
- media_file = st.file_uploader("Or upload a video/audio file", type=["mp4", "mov", "avi", "mp3", "wav", "ogg"])
58
- transcript = ""
59
-
60
- def download_youtube_audio(url):
61
- ydl_opts = {
62
- "format": "bestaudio/best",
63
- "extractaudio": True,
64
- "audioformat": "mp3",
65
- "outtmpl": "% (id)s.%(ext)s",
66
- }
67
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
68
- info = ydl.extract_info(url, download=True)
69
- filename = ydl.prepare_filename(info).replace(".webm", ".mp3").replace(".m4a", ".mp3")
70
- return compress_audio(filename)
71
-
72
- def compress_audio(input_path, target_size_mb=25):
73
- """Compress audio only if it exceeds the target size, adjusting bitrate dynamically."""
74
- output_path = input_path.replace(".mp3", "_compressed.mp3")
75
-
76
- # Check file size in MB
77
- file_size_mb = os.path.getsize(input_path) / (1024 * 1024)
78
-
79
- if file_size_mb <= target_size_mb:
80
- return input_path # No need to compress if already under limit
81
-
82
- # Estimate appropriate bitrate (targeting 90% of desired size)
83
- target_bitrate_kbps = int((target_size_mb * 1024 * 1024 * 8) / (file_size_mb * 1.1)) # 10% buffer
84
- target_bitrate_kbps = max(target_bitrate_kbps, 32) # Prevent extreme low-quality audio
85
-
86
- ffmpeg.input(input_path).output(output_path, audio_bitrate=f"{target_bitrate_kbps}k").run(overwrite_output=True)
87
- return output_path
88
-
89
-
90
- def transcribe_audio(file_path):
91
- whisper_client = OpenAI(api_key=OPENAI_API_KEY, base_url="https://api.openai.com/v1")
92
- transcription_args = {
93
- "file": None,
94
- "model": "whisper-1",
95
- "response_format": "verbose_json",
96
- "timestamp_granularities": ["word"],
97
- "timeout": 360,
98
- "prompt": "The audio may not contain speech, do not make up words."
99
- }
100
- with open(file_path, "rb") as audio_file:
101
- transcription_args["file"] = audio_file
102
- transcript_response = whisper_client.audio.transcriptions.create(**transcription_args)
103
-
104
- transcript_words = transcript_response.words
105
- transcript = " ".join([word['word'] for word in transcript_words])
106
- return transcript
107
-
108
-
109
- if youtube_url:
110
- st.video(youtube_url)
111
- elif media_file:
112
  if media_file.type.startswith("video"):
113
  st.video(media_file)
114
  elif media_file.type.startswith("audio"):
115
  st.audio(media_file)
116
 
117
- if st.button("Transcribe Video"):
118
- with st.spinner("Processing... This may take a few minutes."):
119
- try:
120
- if youtube_url:
121
- audio_path = download_youtube_audio(youtube_url)
122
- transcript = transcribe_audio(audio_path)
123
- elif media_file:
124
- with NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
125
- temp_audio.write(media_file.read())
126
- temp_audio.close()
127
- audio_path = compress_audio(temp_audio.name)
128
- transcript = transcribe_audio(audio_path)
129
- else:
130
- st.error("❌ Please provide a YouTube link or upload a file.")
131
- except Exception as e:
132
- st.error(f"Error: {str(e)}")
133
-
134
- # Display the extracted transcript
135
- st.subheader("πŸ“ Transcript")
136
- transcript = st.text_area("Generated Transcript", transcript, height=300)
137
-
138
 
139
  # Right Column: Clip Plan Generation and Extraction
140
  with col_output:
 
4
  from prompts import SYSTEM_MESSAGE, USER_MESSAGE
5
  import json
6
  import os
 
 
 
7
 
8
  # Set Streamlit layout to wide mode
9
  st.set_page_config(layout="wide")
 
49
 
50
  # Left Column: Transcript Input
51
  with col_transcript:
52
+ st.subheader("πŸ“ Paste Your Transcript")
53
+ transcript = st.text_area("Enter the transcript here:", height=400)
54
+
55
+ # Add reference link below the transcript text box
56
+ st.markdown("---")
57
+ st.markdown(
58
+ """
59
+ <div style="font-size:18px; font-weight:bold; margin-top:10px;">
60
+ Need a transcript? Use <a href="https://huggingface.co/spaces/sanchit-gandhi/whisper-jax-spaces" target="_blank" style="color:#007bff; text-decoration:none;">
61
+ OpenAI Whisper on Hugging Face</a> to generate one from your audio or video.
62
+ </div>
63
+ """,
64
+ unsafe_allow_html=True
65
+ )
66
+
67
+ st.markdown("---")
68
+ st.subheader("πŸŽ₯ Video/Audio Upload & Playback")
69
+
70
+ media_file = st.file_uploader("Upload a video or audio file", type=["mp4", "mov", "avi", "mp3", "wav", "ogg"])
71
+ if media_file is not None:
72
+ # Detect media type and play accordingly
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  if media_file.type.startswith("video"):
74
  st.video(media_file)
75
  elif media_file.type.startswith("audio"):
76
  st.audio(media_file)
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  # Right Column: Clip Plan Generation and Extraction
80
  with col_output:
requirements.txt CHANGED
@@ -1,4 +1 @@
1
- openai
2
- yt-dlp
3
- pydub
4
- ffmpeg-python
 
1
+ openai