Update app.py
Browse files
app.py
CHANGED
|
@@ -6,6 +6,7 @@ import soundfile as sf
|
|
| 6 |
import tempfile
|
| 7 |
import os
|
| 8 |
from pydub import AudioSegment
|
|
|
|
| 9 |
import time
|
| 10 |
|
| 11 |
# Custom CSS for gloomy elegant styling
|
|
@@ -153,25 +154,94 @@ st.markdown("""
|
|
| 153 |
</style>
|
| 154 |
""", unsafe_allow_html=True)
|
| 155 |
|
| 156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
|
| 158 |
# Load NeMo model once
|
| 159 |
@st.cache_resource
|
| 160 |
def load_model():
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
|
| 166 |
model = load_model()
|
| 167 |
|
| 168 |
# Helper: Convert any audio to 16kHz mono WAV
|
| 169 |
def convert_audio(uploaded_file, target_sample_rate=16000):
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
|
| 176 |
# App UI
|
| 177 |
st.markdown("""
|
|
@@ -186,7 +256,7 @@ st.markdown("""
|
|
| 186 |
<div class="card">
|
| 187 |
<div style="display: flex; gap: 1rem; margin-bottom: 1rem;">
|
| 188 |
<span class="feature-icon">🔊</span>
|
| 189 |
-
<span>Supports
|
| 190 |
</div>
|
| 191 |
<div style="display: flex; gap: 1rem; margin-bottom: 1rem;">
|
| 192 |
<span class="feature-icon">⚡</span>
|
|
@@ -198,6 +268,14 @@ st.markdown("""
|
|
| 198 |
uploaded_file = st.file_uploader("Drag and drop audio file here", type=SUPPORTED_TYPES)
|
| 199 |
|
| 200 |
if uploaded_file is not None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
# Convert to 16kHz mono wav
|
| 202 |
with st.spinner("Preparing audio for transcription..."):
|
| 203 |
processed_wav = convert_audio(uploaded_file)
|
|
@@ -248,9 +326,19 @@ if uploaded_file is not None:
|
|
| 248 |
""", unsafe_allow_html=True)
|
| 249 |
|
| 250 |
# Actual transcription
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
|
| 255 |
# Update progress to complete
|
| 256 |
progress_container.markdown("""
|
|
|
|
| 6 |
import tempfile
|
| 7 |
import os
|
| 8 |
from pydub import AudioSegment
|
| 9 |
+
import moviepy.editor as mp
|
| 10 |
import time
|
| 11 |
|
| 12 |
# Custom CSS for gloomy elegant styling
|
|
|
|
| 154 |
</style>
|
| 155 |
""", unsafe_allow_html=True)
|
| 156 |
|
| 157 |
+
# Support common audio + video file extensions. Streamlit's file_uploader uses these
|
| 158 |
+
SUPPORTED_TYPES = ['wav', 'mp3', 'ogg', 'flac', 'm4a', 'aac', 'wma',
|
| 159 |
+
# video types
|
| 160 |
+
'mp4', 'mov', 'mkv', 'avi', 'webm']
|
| 161 |
+
|
| 162 |
+
VIDEO_TYPES = {'mp4', 'mov', 'mkv', 'avi', 'webm'}
|
| 163 |
|
| 164 |
# Load NeMo model once
|
| 165 |
@st.cache_resource
|
| 166 |
def load_model():
|
| 167 |
+
try:
|
| 168 |
+
model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.from_pretrained(
|
| 169 |
+
model_name="nvidia/stt_ar_fastconformer_hybrid_large_pcd_v1.0"
|
| 170 |
+
)
|
| 171 |
+
return model
|
| 172 |
+
except Exception as e:
|
| 173 |
+
# Re-raise so the UI can present a friendly error when called
|
| 174 |
+
raise RuntimeError(f"Failed to load NeMo model: {e}")
|
| 175 |
|
| 176 |
model = load_model()
|
| 177 |
|
| 178 |
# Helper: Convert any audio to 16kHz mono WAV
|
| 179 |
def convert_audio(uploaded_file, target_sample_rate=16000):
|
| 180 |
+
"""
|
| 181 |
+
Convert an uploaded audio or video file to a 16kHz mono WAV file and return the
|
| 182 |
+
temporary file path. Supports video files by extracting the audio track first.
|
| 183 |
+
|
| 184 |
+
uploaded_file can be a Streamlit UploadedFile-like object or a path-like object.
|
| 185 |
+
"""
|
| 186 |
+
# Determine filename/extension
|
| 187 |
+
filename = getattr(uploaded_file, "name", None)
|
| 188 |
+
if filename is None:
|
| 189 |
+
# fallback name
|
| 190 |
+
filename = "uploaded"
|
| 191 |
+
|
| 192 |
+
ext = filename.split('.')[-1].lower()
|
| 193 |
+
|
| 194 |
+
# Save the raw upload to a temporary file first (moviepy / pydub operate on paths)
|
| 195 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=f".{ext}") as tmp_in:
|
| 196 |
+
try:
|
| 197 |
+
# uploaded_file may be a BytesIO-like with .read()
|
| 198 |
+
data = uploaded_file.read()
|
| 199 |
+
except Exception:
|
| 200 |
+
# If it's already a path string, just copy
|
| 201 |
+
with open(uploaded_file, 'rb') as fsrc:
|
| 202 |
+
data = fsrc.read()
|
| 203 |
+
tmp_in.write(data)
|
| 204 |
+
tmp_in_path = tmp_in.name
|
| 205 |
+
|
| 206 |
+
# If it's a video type, extract audio using moviepy
|
| 207 |
+
try:
|
| 208 |
+
if ext in VIDEO_TYPES:
|
| 209 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_out:
|
| 210 |
+
try:
|
| 211 |
+
clip = mp.VideoFileClip(tmp_in_path)
|
| 212 |
+
# moviepy will write a WAV; we can ensure sample rate later with pydub
|
| 213 |
+
clip.audio.write_audiofile(tmp_out.name, fps=target_sample_rate, logger=None)
|
| 214 |
+
clip.close()
|
| 215 |
+
except Exception:
|
| 216 |
+
# fallback: try to open as audio via pydub
|
| 217 |
+
audio = AudioSegment.from_file(tmp_in_path)
|
| 218 |
+
audio = audio.set_frame_rate(target_sample_rate).set_channels(1)
|
| 219 |
+
audio.export(tmp_out.name, format="wav")
|
| 220 |
+
finally:
|
| 221 |
+
# cleanup input video file
|
| 222 |
+
try:
|
| 223 |
+
os.remove(tmp_in_path)
|
| 224 |
+
except Exception:
|
| 225 |
+
pass
|
| 226 |
+
return tmp_out.name
|
| 227 |
+
else:
|
| 228 |
+
# It's an audio file - use pydub to convert to wav 16k mono
|
| 229 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_out:
|
| 230 |
+
audio = AudioSegment.from_file(tmp_in_path)
|
| 231 |
+
audio = audio.set_frame_rate(target_sample_rate).set_channels(1)
|
| 232 |
+
audio.export(tmp_out.name, format="wav")
|
| 233 |
+
try:
|
| 234 |
+
os.remove(tmp_in_path)
|
| 235 |
+
except Exception:
|
| 236 |
+
pass
|
| 237 |
+
return tmp_out.name
|
| 238 |
+
except Exception as e:
|
| 239 |
+
# Attempt to clean up and re-raise as RuntimeError with context
|
| 240 |
+
try:
|
| 241 |
+
os.remove(tmp_in_path)
|
| 242 |
+
except Exception:
|
| 243 |
+
pass
|
| 244 |
+
raise RuntimeError(f"Failed to convert uploaded file to WAV: {e}")
|
| 245 |
|
| 246 |
# App UI
|
| 247 |
st.markdown("""
|
|
|
|
| 256 |
<div class="card">
|
| 257 |
<div style="display: flex; gap: 1rem; margin-bottom: 1rem;">
|
| 258 |
<span class="feature-icon">🔊</span>
|
| 259 |
+
<span>Supports many audio formats and common video types (MP4, MOV, MKV). Upload audio or video and the app will extract audio automatically.</span>
|
| 260 |
</div>
|
| 261 |
<div style="display: flex; gap: 1rem; margin-bottom: 1rem;">
|
| 262 |
<span class="feature-icon">⚡</span>
|
|
|
|
| 268 |
uploaded_file = st.file_uploader("Drag and drop audio file here", type=SUPPORTED_TYPES)
|
| 269 |
|
| 270 |
if uploaded_file is not None:
|
| 271 |
+
# Basic size check (Streamlit UploadedFile has .size in bytes)
|
| 272 |
+
try:
|
| 273 |
+
file_size_mb = uploaded_file.size / (1024 * 1024)
|
| 274 |
+
except Exception:
|
| 275 |
+
file_size_mb = None
|
| 276 |
+
|
| 277 |
+
if file_size_mb is not None and file_size_mb > 500:
|
| 278 |
+
st.warning("Large file detected (>500MB). Processing may take a long time or fail. Consider uploading a smaller file.")
|
| 279 |
# Convert to 16kHz mono wav
|
| 280 |
with st.spinner("Preparing audio for transcription..."):
|
| 281 |
processed_wav = convert_audio(uploaded_file)
|
|
|
|
| 326 |
""", unsafe_allow_html=True)
|
| 327 |
|
| 328 |
# Actual transcription
|
| 329 |
+
try:
|
| 330 |
+
with st.spinner(""):
|
| 331 |
+
result = model.transcribe([processed_wav])
|
| 332 |
+
transcript = result[0].text
|
| 333 |
+
except Exception as e:
|
| 334 |
+
st.error(f"Transcription failed: {e}")
|
| 335 |
+
# Cleanup
|
| 336 |
+
try:
|
| 337 |
+
os.remove(processed_wav)
|
| 338 |
+
except Exception:
|
| 339 |
+
pass
|
| 340 |
+
progress_container.empty()
|
| 341 |
+
raise
|
| 342 |
|
| 343 |
# Update progress to complete
|
| 344 |
progress_container.markdown("""
|