Update app.py
Browse files
app.py
CHANGED
|
@@ -5,9 +5,11 @@ import nemo.collections.asr as nemo_asr
|
|
| 5 |
import soundfile as sf
|
| 6 |
import tempfile
|
| 7 |
import os
|
| 8 |
-
from pydub import AudioSegment
|
| 9 |
-
import moviepy.editor as mp
|
| 10 |
import time
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
# Custom CSS for gloomy elegant styling
|
| 13 |
st.markdown("""
|
|
@@ -154,12 +156,42 @@ st.markdown("""
|
|
| 154 |
</style>
|
| 155 |
""", unsafe_allow_html=True)
|
| 156 |
|
| 157 |
-
#
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
|
| 162 |
-
|
|
|
|
|
|
|
|
|
|
| 163 |
|
| 164 |
# Load NeMo model once
|
| 165 |
@st.cache_resource
|
|
@@ -175,73 +207,88 @@ def load_model():
|
|
| 175 |
|
| 176 |
model = load_model()
|
| 177 |
|
| 178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
def convert_audio(uploaded_file, target_sample_rate=16000):
|
| 180 |
"""
|
| 181 |
-
Convert
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
"""
|
| 186 |
-
# Determine filename/extension
|
| 187 |
-
filename = getattr(uploaded_file, "name", None)
|
| 188 |
-
if filename is None:
|
| 189 |
-
# fallback name
|
| 190 |
-
filename = "uploaded"
|
| 191 |
-
|
| 192 |
-
ext = filename.split('.')[-1].lower()
|
| 193 |
-
|
| 194 |
-
# Save the raw upload to a temporary file first (moviepy / pydub operate on paths)
|
| 195 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=f".{ext}") as tmp_in:
|
| 196 |
-
try:
|
| 197 |
-
# uploaded_file may be a BytesIO-like with .read()
|
| 198 |
-
data = uploaded_file.read()
|
| 199 |
-
except Exception:
|
| 200 |
-
# If it's already a path string, just copy
|
| 201 |
-
with open(uploaded_file, 'rb') as fsrc:
|
| 202 |
-
data = fsrc.read()
|
| 203 |
-
tmp_in.write(data)
|
| 204 |
-
tmp_in_path = tmp_in.name
|
| 205 |
-
|
| 206 |
-
# If it's a video type, extract audio using moviepy
|
| 207 |
try:
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
# moviepy will write a WAV; we can ensure sample rate later with pydub
|
| 213 |
-
clip.audio.write_audiofile(tmp_out.name, fps=target_sample_rate, logger=None)
|
| 214 |
-
clip.close()
|
| 215 |
-
except Exception:
|
| 216 |
-
# fallback: try to open as audio via pydub
|
| 217 |
-
audio = AudioSegment.from_file(tmp_in_path)
|
| 218 |
-
audio = audio.set_frame_rate(target_sample_rate).set_channels(1)
|
| 219 |
-
audio.export(tmp_out.name, format="wav")
|
| 220 |
-
finally:
|
| 221 |
-
# cleanup input video file
|
| 222 |
-
try:
|
| 223 |
-
os.remove(tmp_in_path)
|
| 224 |
-
except Exception:
|
| 225 |
-
pass
|
| 226 |
-
return tmp_out.name
|
| 227 |
else:
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
try:
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
|
| 246 |
# App UI
|
| 247 |
st.markdown("""
|
|
@@ -265,7 +312,8 @@ st.markdown("""
|
|
| 265 |
</div>
|
| 266 |
""", unsafe_allow_html=True)
|
| 267 |
|
| 268 |
-
uploaded_file = st.file_uploader("Drag and drop audio file here", type=
|
|
|
|
| 269 |
|
| 270 |
if uploaded_file is not None:
|
| 271 |
# Basic size check (Streamlit UploadedFile has .size in bytes)
|
|
|
|
| 5 |
import soundfile as sf
|
| 6 |
import tempfile
|
| 7 |
import os
|
|
|
|
|
|
|
| 8 |
import time
|
| 9 |
+
import magic # for file type detection
|
| 10 |
+
import ffmpeg
|
| 11 |
+
import subprocess
|
| 12 |
+
from pathlib import Path
|
| 13 |
|
| 14 |
# Custom CSS for gloomy elegant styling
|
| 15 |
st.markdown("""
|
|
|
|
| 156 |
</style>
|
| 157 |
""", unsafe_allow_html=True)
|
| 158 |
|
| 159 |
+
# Check if ffmpeg is available
|
| 160 |
+
def check_ffmpeg():
|
| 161 |
+
try:
|
| 162 |
+
subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True)
|
| 163 |
+
return True
|
| 164 |
+
except (subprocess.SubprocessError, FileNotFoundError):
|
| 165 |
+
return False
|
| 166 |
+
|
| 167 |
+
if not check_ffmpeg():
|
| 168 |
+
st.error("FFmpeg is not installed or not found in PATH. Please install FFmpeg to use this application.")
|
| 169 |
+
st.markdown("""
|
| 170 |
+
### How to install FFmpeg:
|
| 171 |
+
|
| 172 |
+
**Windows (using Chocolatey):**
|
| 173 |
+
```
|
| 174 |
+
choco install ffmpeg
|
| 175 |
+
```
|
| 176 |
+
|
| 177 |
+
**Windows (manual):**
|
| 178 |
+
1. Download from [ffmpeg.org](https://ffmpeg.org/download.html)
|
| 179 |
+
2. Extract and add the bin folder to your system PATH
|
| 180 |
+
|
| 181 |
+
**After installing**, restart this application.
|
| 182 |
+
""")
|
| 183 |
+
st.stop()
|
| 184 |
+
|
| 185 |
+
# Accept any file - we'll detect type server-side
|
| 186 |
+
AUDIO_MIMETYPES = {
|
| 187 |
+
'audio/wav', 'audio/x-wav', 'audio/mpeg', 'audio/ogg', 'audio/flac',
|
| 188 |
+
'audio/x-m4a', 'audio/aac', 'audio/x-ms-wma'
|
| 189 |
+
}
|
| 190 |
|
| 191 |
+
VIDEO_MIMETYPES = {
|
| 192 |
+
'video/mp4', 'video/quicktime', 'video/x-matroska', 'video/x-msvideo',
|
| 193 |
+
'video/webm', 'video/x-ms-wmv'
|
| 194 |
+
}
|
| 195 |
|
| 196 |
# Load NeMo model once
|
| 197 |
@st.cache_resource
|
|
|
|
| 207 |
|
| 208 |
model = load_model()
|
| 209 |
|
| 210 |
+
def detect_file_type(file_data):
|
| 211 |
+
"""Detect the MIME type of a file using python-magic"""
|
| 212 |
+
mime = magic.from_buffer(file_data, mime=True)
|
| 213 |
+
return mime
|
| 214 |
+
|
| 215 |
def convert_audio(uploaded_file, target_sample_rate=16000):
|
| 216 |
"""
|
| 217 |
+
Convert any audio or video file to a 16kHz mono WAV using FFmpeg.
|
| 218 |
+
Returns the path to the converted temporary WAV file.
|
| 219 |
+
|
| 220 |
+
Args:
|
| 221 |
+
uploaded_file: A Streamlit UploadedFile or path-like object
|
| 222 |
+
target_sample_rate: Output sample rate (default 16000 Hz)
|
| 223 |
+
|
| 224 |
+
Returns:
|
| 225 |
+
str: Path to the converted temporary WAV file
|
| 226 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
try:
|
| 228 |
+
# Read the file data
|
| 229 |
+
if hasattr(uploaded_file, 'read'):
|
| 230 |
+
file_data = uploaded_file.read()
|
| 231 |
+
uploaded_file.seek(0) # Reset position for later use
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
else:
|
| 233 |
+
with open(uploaded_file, 'rb') as f:
|
| 234 |
+
file_data = f.read()
|
| 235 |
+
|
| 236 |
+
# Detect file type
|
| 237 |
+
mime_type = detect_file_type(file_data)
|
| 238 |
+
|
| 239 |
+
# Save to temporary input file
|
| 240 |
+
suffix = '.tmp'
|
| 241 |
+
if mime_type in AUDIO_MIMETYPES:
|
| 242 |
+
suffix = '.audio' + suffix
|
| 243 |
+
elif mime_type in VIDEO_MIMETYPES:
|
| 244 |
+
suffix = '.video' + suffix
|
| 245 |
+
|
| 246 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_in:
|
| 247 |
+
if hasattr(uploaded_file, 'read'):
|
| 248 |
+
uploaded_file.seek(0)
|
| 249 |
+
tmp_in.write(uploaded_file.read())
|
| 250 |
+
else:
|
| 251 |
+
tmp_in.write(file_data)
|
| 252 |
+
tmp_in_path = tmp_in.name
|
| 253 |
+
|
| 254 |
+
# Create output WAV file
|
| 255 |
+
output_path = tempfile.mktemp(suffix='.wav')
|
| 256 |
+
|
| 257 |
try:
|
| 258 |
+
# Build the ffmpeg conversion pipeline
|
| 259 |
+
stream = ffmpeg.input(tmp_in_path)
|
| 260 |
+
|
| 261 |
+
# Extract audio from video if needed
|
| 262 |
+
if mime_type in VIDEO_MIMETYPES:
|
| 263 |
+
stream = stream.audio
|
| 264 |
+
|
| 265 |
+
# Convert to 16kHz mono WAV
|
| 266 |
+
stream = ffmpeg.output(
|
| 267 |
+
stream,
|
| 268 |
+
output_path,
|
| 269 |
+
acodec='pcm_s16le', # 16-bit PCM
|
| 270 |
+
ac=1, # mono
|
| 271 |
+
ar=target_sample_rate,# sample rate
|
| 272 |
+
loglevel='error' # reduce ffmpeg output
|
| 273 |
+
)
|
| 274 |
+
|
| 275 |
+
# Run the conversion
|
| 276 |
+
ffmpeg.run(stream, overwrite_output=True)
|
| 277 |
+
|
| 278 |
+
return output_path
|
| 279 |
+
|
| 280 |
+
except ffmpeg.Error as e:
|
| 281 |
+
raise RuntimeError(f"FFmpeg error during conversion: {e.stderr.decode()}")
|
| 282 |
+
|
| 283 |
+
finally:
|
| 284 |
+
# Clean up input temp file
|
| 285 |
+
try:
|
| 286 |
+
os.remove(tmp_in_path)
|
| 287 |
+
except Exception:
|
| 288 |
+
pass
|
| 289 |
+
|
| 290 |
+
except Exception as e:
|
| 291 |
+
raise RuntimeError(f"Failed to convert file to WAV: {str(e)}")
|
| 292 |
|
| 293 |
# App UI
|
| 294 |
st.markdown("""
|
|
|
|
| 312 |
</div>
|
| 313 |
""", unsafe_allow_html=True)
|
| 314 |
|
| 315 |
+
uploaded_file = st.file_uploader("Drag and drop any audio or video file here", type=None,
|
| 316 |
+
help="Supports any audio or video format that FFmpeg can handle")
|
| 317 |
|
| 318 |
if uploaded_file is not None:
|
| 319 |
# Basic size check (Streamlit UploadedFile has .size in bytes)
|