NightPrince commited on
Commit
b9d1565
·
verified ·
1 Parent(s): 06511a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -69
app.py CHANGED
@@ -5,9 +5,11 @@ import nemo.collections.asr as nemo_asr
5
  import soundfile as sf
6
  import tempfile
7
  import os
8
- from pydub import AudioSegment
9
- import moviepy.editor as mp
10
  import time
 
 
 
 
11
 
12
  # Custom CSS for gloomy elegant styling
13
  st.markdown("""
@@ -154,12 +156,42 @@ st.markdown("""
154
  </style>
155
  """, unsafe_allow_html=True)
156
 
157
- # Support common audio + video file extensions. Streamlit's file_uploader uses these
158
- SUPPORTED_TYPES = ['wav', 'mp3', 'ogg', 'flac', 'm4a', 'aac', 'wma',
159
- # video types
160
- 'mp4', 'mov', 'mkv', 'avi', 'webm']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
- VIDEO_TYPES = {'mp4', 'mov', 'mkv', 'avi', 'webm'}
 
 
 
163
 
164
  # Load NeMo model once
165
  @st.cache_resource
@@ -175,73 +207,88 @@ def load_model():
175
 
176
  model = load_model()
177
 
178
- # Helper: Convert any audio to 16kHz mono WAV
 
 
 
 
179
  def convert_audio(uploaded_file, target_sample_rate=16000):
180
  """
181
- Convert an uploaded audio or video file to a 16kHz mono WAV file and return the
182
- temporary file path. Supports video files by extracting the audio track first.
183
-
184
- uploaded_file can be a Streamlit UploadedFile-like object or a path-like object.
 
 
 
 
 
185
  """
186
- # Determine filename/extension
187
- filename = getattr(uploaded_file, "name", None)
188
- if filename is None:
189
- # fallback name
190
- filename = "uploaded"
191
-
192
- ext = filename.split('.')[-1].lower()
193
-
194
- # Save the raw upload to a temporary file first (moviepy / pydub operate on paths)
195
- with tempfile.NamedTemporaryFile(delete=False, suffix=f".{ext}") as tmp_in:
196
- try:
197
- # uploaded_file may be a BytesIO-like with .read()
198
- data = uploaded_file.read()
199
- except Exception:
200
- # If it's already a path string, just copy
201
- with open(uploaded_file, 'rb') as fsrc:
202
- data = fsrc.read()
203
- tmp_in.write(data)
204
- tmp_in_path = tmp_in.name
205
-
206
- # If it's a video type, extract audio using moviepy
207
  try:
208
- if ext in VIDEO_TYPES:
209
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_out:
210
- try:
211
- clip = mp.VideoFileClip(tmp_in_path)
212
- # moviepy will write a WAV; we can ensure sample rate later with pydub
213
- clip.audio.write_audiofile(tmp_out.name, fps=target_sample_rate, logger=None)
214
- clip.close()
215
- except Exception:
216
- # fallback: try to open as audio via pydub
217
- audio = AudioSegment.from_file(tmp_in_path)
218
- audio = audio.set_frame_rate(target_sample_rate).set_channels(1)
219
- audio.export(tmp_out.name, format="wav")
220
- finally:
221
- # cleanup input video file
222
- try:
223
- os.remove(tmp_in_path)
224
- except Exception:
225
- pass
226
- return tmp_out.name
227
  else:
228
- # It's an audio file - use pydub to convert to wav 16k mono
229
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_out:
230
- audio = AudioSegment.from_file(tmp_in_path)
231
- audio = audio.set_frame_rate(target_sample_rate).set_channels(1)
232
- audio.export(tmp_out.name, format="wav")
233
- try:
234
- os.remove(tmp_in_path)
235
- except Exception:
236
- pass
237
- return tmp_out.name
238
- except Exception as e:
239
- # Attempt to clean up and re-raise as RuntimeError with context
 
 
 
 
 
 
 
 
 
 
 
 
240
  try:
241
- os.remove(tmp_in_path)
242
- except Exception:
243
- pass
244
- raise RuntimeError(f"Failed to convert uploaded file to WAV: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
 
246
  # App UI
247
  st.markdown("""
@@ -265,7 +312,8 @@ st.markdown("""
265
  </div>
266
  """, unsafe_allow_html=True)
267
 
268
- uploaded_file = st.file_uploader("Drag and drop audio file here", type=SUPPORTED_TYPES)
 
269
 
270
  if uploaded_file is not None:
271
  # Basic size check (Streamlit UploadedFile has .size in bytes)
 
5
  import soundfile as sf
6
  import tempfile
7
  import os
 
 
8
  import time
9
+ import magic # for file type detection
10
+ import ffmpeg
11
+ import subprocess
12
+ from pathlib import Path
13
 
14
  # Custom CSS for gloomy elegant styling
15
  st.markdown("""
 
156
  </style>
157
  """, unsafe_allow_html=True)
158
 
159
+ # Check if ffmpeg is available
160
+ def check_ffmpeg():
161
+ try:
162
+ subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True)
163
+ return True
164
+ except (subprocess.SubprocessError, FileNotFoundError):
165
+ return False
166
+
167
+ if not check_ffmpeg():
168
+ st.error("FFmpeg is not installed or not found in PATH. Please install FFmpeg to use this application.")
169
+ st.markdown("""
170
+ ### How to install FFmpeg:
171
+
172
+ **Windows (using Chocolatey):**
173
+ ```
174
+ choco install ffmpeg
175
+ ```
176
+
177
+ **Windows (manual):**
178
+ 1. Download from [ffmpeg.org](https://ffmpeg.org/download.html)
179
+ 2. Extract and add the bin folder to your system PATH
180
+
181
+ **After installing**, restart this application.
182
+ """)
183
+ st.stop()
184
+
185
+ # Accept any file - we'll detect type server-side
186
+ AUDIO_MIMETYPES = {
187
+ 'audio/wav', 'audio/x-wav', 'audio/mpeg', 'audio/ogg', 'audio/flac',
188
+ 'audio/x-m4a', 'audio/aac', 'audio/x-ms-wma'
189
+ }
190
 
191
+ VIDEO_MIMETYPES = {
192
+ 'video/mp4', 'video/quicktime', 'video/x-matroska', 'video/x-msvideo',
193
+ 'video/webm', 'video/x-ms-wmv'
194
+ }
195
 
196
  # Load NeMo model once
197
  @st.cache_resource
 
207
 
208
  model = load_model()
209
 
210
+ def detect_file_type(file_data):
211
+ """Detect the MIME type of a file using python-magic"""
212
+ mime = magic.from_buffer(file_data, mime=True)
213
+ return mime
214
+
215
  def convert_audio(uploaded_file, target_sample_rate=16000):
216
  """
217
+ Convert any audio or video file to a 16kHz mono WAV using FFmpeg.
218
+ Returns the path to the converted temporary WAV file.
219
+
220
+ Args:
221
+ uploaded_file: A Streamlit UploadedFile or path-like object
222
+ target_sample_rate: Output sample rate (default 16000 Hz)
223
+
224
+ Returns:
225
+ str: Path to the converted temporary WAV file
226
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  try:
228
+ # Read the file data
229
+ if hasattr(uploaded_file, 'read'):
230
+ file_data = uploaded_file.read()
231
+ uploaded_file.seek(0) # Reset position for later use
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  else:
233
+ with open(uploaded_file, 'rb') as f:
234
+ file_data = f.read()
235
+
236
+ # Detect file type
237
+ mime_type = detect_file_type(file_data)
238
+
239
+ # Save to temporary input file
240
+ suffix = '.tmp'
241
+ if mime_type in AUDIO_MIMETYPES:
242
+ suffix = '.audio' + suffix
243
+ elif mime_type in VIDEO_MIMETYPES:
244
+ suffix = '.video' + suffix
245
+
246
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_in:
247
+ if hasattr(uploaded_file, 'read'):
248
+ uploaded_file.seek(0)
249
+ tmp_in.write(uploaded_file.read())
250
+ else:
251
+ tmp_in.write(file_data)
252
+ tmp_in_path = tmp_in.name
253
+
254
+ # Create output WAV file
255
+ output_path = tempfile.mktemp(suffix='.wav')
256
+
257
  try:
258
+ # Build the ffmpeg conversion pipeline
259
+ stream = ffmpeg.input(tmp_in_path)
260
+
261
+ # Extract audio from video if needed
262
+ if mime_type in VIDEO_MIMETYPES:
263
+ stream = stream.audio
264
+
265
+ # Convert to 16kHz mono WAV
266
+ stream = ffmpeg.output(
267
+ stream,
268
+ output_path,
269
+ acodec='pcm_s16le', # 16-bit PCM
270
+ ac=1, # mono
271
+ ar=target_sample_rate,# sample rate
272
+ loglevel='error' # reduce ffmpeg output
273
+ )
274
+
275
+ # Run the conversion
276
+ ffmpeg.run(stream, overwrite_output=True)
277
+
278
+ return output_path
279
+
280
+ except ffmpeg.Error as e:
281
+ raise RuntimeError(f"FFmpeg error during conversion: {e.stderr.decode()}")
282
+
283
+ finally:
284
+ # Clean up input temp file
285
+ try:
286
+ os.remove(tmp_in_path)
287
+ except Exception:
288
+ pass
289
+
290
+ except Exception as e:
291
+ raise RuntimeError(f"Failed to convert file to WAV: {str(e)}")
292
 
293
  # App UI
294
  st.markdown("""
 
312
  </div>
313
  """, unsafe_allow_html=True)
314
 
315
+ uploaded_file = st.file_uploader("Drag and drop any audio or video file here", type=None,
316
+ help="Supports any audio or video format that FFmpeg can handle")
317
 
318
  if uploaded_file is not None:
319
  # Basic size check (Streamlit UploadedFile has .size in bytes)