NightPrince commited on
Commit
adf67fb
·
verified ·
1 Parent(s): 59536c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -14
app.py CHANGED
@@ -6,6 +6,7 @@ import soundfile as sf
6
  import tempfile
7
  import os
8
  from pydub import AudioSegment
 
9
  import time
10
 
11
  # Custom CSS for gloomy elegant styling
@@ -153,25 +154,94 @@ st.markdown("""
153
  </style>
154
  """, unsafe_allow_html=True)
155
 
156
- SUPPORTED_TYPES = ['wav', 'mp3', 'ogg', 'flac', 'm4a']
 
 
 
 
 
157
 
158
  # Load NeMo model once
159
  @st.cache_resource
160
  def load_model():
161
- model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.from_pretrained(
162
- model_name="nvidia/stt_ar_fastconformer_hybrid_large_pcd_v1.0"
163
- )
164
- return model
 
 
 
 
165
 
166
  model = load_model()
167
 
168
  # Helper: Convert any audio to 16kHz mono WAV
169
  def convert_audio(uploaded_file, target_sample_rate=16000):
170
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_out:
171
- audio = AudioSegment.from_file(uploaded_file)
172
- audio = audio.set_frame_rate(target_sample_rate).set_channels(1)
173
- audio.export(tmp_out.name, format="wav")
174
- return tmp_out.name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
  # App UI
177
  st.markdown("""
@@ -186,7 +256,7 @@ st.markdown("""
186
  <div class="card">
187
  <div style="display: flex; gap: 1rem; margin-bottom: 1rem;">
188
  <span class="feature-icon">🔊</span>
189
- <span>Supports WAV, MP3, OGG, FLAC, M4A</span>
190
  </div>
191
  <div style="display: flex; gap: 1rem; margin-bottom: 1rem;">
192
  <span class="feature-icon">⚡</span>
@@ -198,6 +268,14 @@ st.markdown("""
198
  uploaded_file = st.file_uploader("Drag and drop audio file here", type=SUPPORTED_TYPES)
199
 
200
  if uploaded_file is not None:
 
 
 
 
 
 
 
 
201
  # Convert to 16kHz mono wav
202
  with st.spinner("Preparing audio for transcription..."):
203
  processed_wav = convert_audio(uploaded_file)
@@ -248,9 +326,19 @@ if uploaded_file is not None:
248
  """, unsafe_allow_html=True)
249
 
250
  # Actual transcription
251
- with st.spinner(""):
252
- result = model.transcribe([processed_wav])
253
- transcript = result[0].text
 
 
 
 
 
 
 
 
 
 
254
 
255
  # Update progress to complete
256
  progress_container.markdown("""
 
6
  import tempfile
7
  import os
8
  from pydub import AudioSegment
9
+ import moviepy.editor as mp
10
  import time
11
 
12
  # Custom CSS for gloomy elegant styling
 
154
  </style>
155
  """, unsafe_allow_html=True)
156
 
157
+ # Support common audio + video file extensions. Streamlit's file_uploader uses these
158
+ SUPPORTED_TYPES = ['wav', 'mp3', 'ogg', 'flac', 'm4a', 'aac', 'wma',
159
+ # video types
160
+ 'mp4', 'mov', 'mkv', 'avi', 'webm']
161
+
162
+ VIDEO_TYPES = {'mp4', 'mov', 'mkv', 'avi', 'webm'}
163
 
164
  # Load NeMo model once
165
  @st.cache_resource
166
  def load_model():
167
+ try:
168
+ model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.from_pretrained(
169
+ model_name="nvidia/stt_ar_fastconformer_hybrid_large_pcd_v1.0"
170
+ )
171
+ return model
172
+ except Exception as e:
173
+ # Re-raise so the UI can present a friendly error when called
174
+ raise RuntimeError(f"Failed to load NeMo model: {e}")
175
 
176
  model = load_model()
177
 
178
  # Helper: Convert any audio to 16kHz mono WAV
179
  def convert_audio(uploaded_file, target_sample_rate=16000):
180
+ """
181
+ Convert an uploaded audio or video file to a 16kHz mono WAV file and return the
182
+ temporary file path. Supports video files by extracting the audio track first.
183
+
184
+ uploaded_file can be a Streamlit UploadedFile-like object or a path-like object.
185
+ """
186
+ # Determine filename/extension
187
+ filename = getattr(uploaded_file, "name", None)
188
+ if filename is None:
189
+ # fallback name
190
+ filename = "uploaded"
191
+
192
+ ext = filename.split('.')[-1].lower()
193
+
194
+ # Save the raw upload to a temporary file first (moviepy / pydub operate on paths)
195
+ with tempfile.NamedTemporaryFile(delete=False, suffix=f".{ext}") as tmp_in:
196
+ try:
197
+ # uploaded_file may be a BytesIO-like with .read()
198
+ data = uploaded_file.read()
199
+ except Exception:
200
+ # If it's already a path string, just copy
201
+ with open(uploaded_file, 'rb') as fsrc:
202
+ data = fsrc.read()
203
+ tmp_in.write(data)
204
+ tmp_in_path = tmp_in.name
205
+
206
+ # If it's a video type, extract audio using moviepy
207
+ try:
208
+ if ext in VIDEO_TYPES:
209
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_out:
210
+ try:
211
+ clip = mp.VideoFileClip(tmp_in_path)
212
+ # moviepy will write a WAV; we can ensure sample rate later with pydub
213
+ clip.audio.write_audiofile(tmp_out.name, fps=target_sample_rate, logger=None)
214
+ clip.close()
215
+ except Exception:
216
+ # fallback: try to open as audio via pydub
217
+ audio = AudioSegment.from_file(tmp_in_path)
218
+ audio = audio.set_frame_rate(target_sample_rate).set_channels(1)
219
+ audio.export(tmp_out.name, format="wav")
220
+ finally:
221
+ # cleanup input video file
222
+ try:
223
+ os.remove(tmp_in_path)
224
+ except Exception:
225
+ pass
226
+ return tmp_out.name
227
+ else:
228
+ # It's an audio file - use pydub to convert to wav 16k mono
229
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_out:
230
+ audio = AudioSegment.from_file(tmp_in_path)
231
+ audio = audio.set_frame_rate(target_sample_rate).set_channels(1)
232
+ audio.export(tmp_out.name, format="wav")
233
+ try:
234
+ os.remove(tmp_in_path)
235
+ except Exception:
236
+ pass
237
+ return tmp_out.name
238
+ except Exception as e:
239
+ # Attempt to clean up and re-raise as RuntimeError with context
240
+ try:
241
+ os.remove(tmp_in_path)
242
+ except Exception:
243
+ pass
244
+ raise RuntimeError(f"Failed to convert uploaded file to WAV: {e}")
245
 
246
  # App UI
247
  st.markdown("""
 
256
  <div class="card">
257
  <div style="display: flex; gap: 1rem; margin-bottom: 1rem;">
258
  <span class="feature-icon">🔊</span>
259
+ <span>Supports many audio formats and common video types (MP4, MOV, MKV). Upload audio or video and the app will extract audio automatically.</span>
260
  </div>
261
  <div style="display: flex; gap: 1rem; margin-bottom: 1rem;">
262
  <span class="feature-icon">⚡</span>
 
268
  uploaded_file = st.file_uploader("Drag and drop audio file here", type=SUPPORTED_TYPES)
269
 
270
  if uploaded_file is not None:
271
+ # Basic size check (Streamlit UploadedFile has .size in bytes)
272
+ try:
273
+ file_size_mb = uploaded_file.size / (1024 * 1024)
274
+ except Exception:
275
+ file_size_mb = None
276
+
277
+ if file_size_mb is not None and file_size_mb > 500:
278
+ st.warning("Large file detected (>500MB). Processing may take a long time or fail. Consider uploading a smaller file.")
279
  # Convert to 16kHz mono wav
280
  with st.spinner("Preparing audio for transcription..."):
281
  processed_wav = convert_audio(uploaded_file)
 
326
  """, unsafe_allow_html=True)
327
 
328
  # Actual transcription
329
+ try:
330
+ with st.spinner(""):
331
+ result = model.transcribe([processed_wav])
332
+ transcript = result[0].text
333
+ except Exception as e:
334
+ st.error(f"Transcription failed: {e}")
335
+ # Cleanup
336
+ try:
337
+ os.remove(processed_wav)
338
+ except Exception:
339
+ pass
340
+ progress_container.empty()
341
+ raise
342
 
343
  # Update progress to complete
344
  progress_container.markdown("""