Spaces:

NightPrince
/

Arabic-Transcriber-Pro

Running

App Files Files Community

NightPrince commited on Sep 30

Commit

adf67fb

verified ·

1 Parent(s): 59536c6

Update app.py

Browse files

Files changed (1) hide show

app.py +102 -14

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import soundfile as sf
 import tempfile
 import os
 from pydub import AudioSegment
 import time
 # Custom CSS for gloomy elegant styling
@@ -153,25 +154,94 @@ st.markdown("""
     </style>
 """, unsafe_allow_html=True)
-SUPPORTED_TYPES = ['wav', 'mp3', 'ogg', 'flac', 'm4a']
 # Load NeMo model once
 @st.cache_resource
 def load_model():
-    model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.from_pretrained(
-        model_name="nvidia/stt_ar_fastconformer_hybrid_large_pcd_v1.0"
-    )
-    return model
 model = load_model()
 # Helper: Convert any audio to 16kHz mono WAV
 def convert_audio(uploaded_file, target_sample_rate=16000):
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_out:
-        audio = AudioSegment.from_file(uploaded_file)
-        audio = audio.set_frame_rate(target_sample_rate).set_channels(1)
-        audio.export(tmp_out.name, format="wav")
-        return tmp_out.name
 # App UI
 st.markdown("""
@@ -186,7 +256,7 @@ st.markdown("""
     <div class="card">
         <div style="display: flex; gap: 1rem; margin-bottom: 1rem;">
             <span class="feature-icon">🔊</span>
-            <span>Supports WAV, MP3, OGG, FLAC, M4A</span>
         </div>
         <div style="display: flex; gap: 1rem; margin-bottom: 1rem;">
             <span class="feature-icon">⚡</span>
@@ -198,6 +268,14 @@ st.markdown("""
 uploaded_file = st.file_uploader("Drag and drop audio file here", type=SUPPORTED_TYPES)
 if uploaded_file is not None:
     # Convert to 16kHz mono wav
     with st.spinner("Preparing audio for transcription..."):
         processed_wav = convert_audio(uploaded_file)
@@ -248,9 +326,19 @@ if uploaded_file is not None:
         """, unsafe_allow_html=True)
         # Actual transcription
-        with st.spinner(""):
-            result = model.transcribe([processed_wav])
-            transcript = result[0].text
         # Update progress to complete
         progress_container.markdown("""

 import tempfile
 import os
 from pydub import AudioSegment
+import moviepy.editor as mp
 import time
 # Custom CSS for gloomy elegant styling
     </style>
 """, unsafe_allow_html=True)
+# Support common audio + video file extensions. Streamlit's file_uploader uses these
+SUPPORTED_TYPES = ['wav', 'mp3', 'ogg', 'flac', 'm4a', 'aac', 'wma',
+                   # video types
+                   'mp4', 'mov', 'mkv', 'avi', 'webm']
+VIDEO_TYPES = {'mp4', 'mov', 'mkv', 'avi', 'webm'}
 # Load NeMo model once
 @st.cache_resource
 def load_model():
+    try:
+        model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.from_pretrained(
+            model_name="nvidia/stt_ar_fastconformer_hybrid_large_pcd_v1.0"
+        )
+        return model
+    except Exception as e:
+        # Re-raise so the UI can present a friendly error when called
+        raise RuntimeError(f"Failed to load NeMo model: {e}")
 model = load_model()
 # Helper: Convert any audio to 16kHz mono WAV
 def convert_audio(uploaded_file, target_sample_rate=16000):
+    """
+    Convert an uploaded audio or video file to a 16kHz mono WAV file and return the
+    temporary file path. Supports video files by extracting the audio track first.
+    uploaded_file can be a Streamlit UploadedFile-like object or a path-like object.
+    """
+    # Determine filename/extension
+    filename = getattr(uploaded_file, "name", None)
+    if filename is None:
+        # fallback name
+        filename = "uploaded"
+    ext = filename.split('.')[-1].lower()
+    # Save the raw upload to a temporary file first (moviepy / pydub operate on paths)
+    with tempfile.NamedTemporaryFile(delete=False, suffix=f".{ext}") as tmp_in:
+        try:
+            # uploaded_file may be a BytesIO-like with .read()
+            data = uploaded_file.read()
+        except Exception:
+            # If it's already a path string, just copy
+            with open(uploaded_file, 'rb') as fsrc:
+                data = fsrc.read()
+        tmp_in.write(data)
+        tmp_in_path = tmp_in.name
+    # If it's a video type, extract audio using moviepy
+    try:
+        if ext in VIDEO_TYPES:
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_out:
+                try:
+                    clip = mp.VideoFileClip(tmp_in_path)
+                    # moviepy will write a WAV; we can ensure sample rate later with pydub
+                    clip.audio.write_audiofile(tmp_out.name, fps=target_sample_rate, logger=None)
+                    clip.close()
+                except Exception:
+                    # fallback: try to open as audio via pydub
+                    audio = AudioSegment.from_file(tmp_in_path)
+                    audio = audio.set_frame_rate(target_sample_rate).set_channels(1)
+                    audio.export(tmp_out.name, format="wav")
+                finally:
+                    # cleanup input video file
+                    try:
+                        os.remove(tmp_in_path)
+                    except Exception:
+                        pass
+                return tmp_out.name
+        else:
+            # It's an audio file - use pydub to convert to wav 16k mono
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_out:
+                audio = AudioSegment.from_file(tmp_in_path)
+                audio = audio.set_frame_rate(target_sample_rate).set_channels(1)
+                audio.export(tmp_out.name, format="wav")
+                try:
+                    os.remove(tmp_in_path)
+                except Exception:
+                    pass
+                return tmp_out.name
+    except Exception as e:
+        # Attempt to clean up and re-raise as RuntimeError with context
+        try:
+            os.remove(tmp_in_path)
+        except Exception:
+            pass
+        raise RuntimeError(f"Failed to convert uploaded file to WAV: {e}")
 # App UI
 st.markdown("""
     <div class="card">
         <div style="display: flex; gap: 1rem; margin-bottom: 1rem;">
             <span class="feature-icon">🔊</span>
+            <span>Supports many audio formats and common video types (MP4, MOV, MKV). Upload audio or video and the app will extract audio automatically.</span>
         </div>
         <div style="display: flex; gap: 1rem; margin-bottom: 1rem;">
             <span class="feature-icon">⚡</span>
 uploaded_file = st.file_uploader("Drag and drop audio file here", type=SUPPORTED_TYPES)
 if uploaded_file is not None:
+    # Basic size check (Streamlit UploadedFile has .size in bytes)
+    try:
+        file_size_mb = uploaded_file.size / (1024 * 1024)
+    except Exception:
+        file_size_mb = None
+    if file_size_mb is not None and file_size_mb > 500:
+        st.warning("Large file detected (>500MB). Processing may take a long time or fail. Consider uploading a smaller file.")
     # Convert to 16kHz mono wav
     with st.spinner("Preparing audio for transcription..."):
         processed_wav = convert_audio(uploaded_file)
         """, unsafe_allow_html=True)
         # Actual transcription
+        try:
+            with st.spinner(""):
+                result = model.transcribe([processed_wav])
+                transcript = result[0].text
+        except Exception as e:
+            st.error(f"Transcription failed: {e}")
+            # Cleanup
+            try:
+                os.remove(processed_wav)
+            except Exception:
+                pass
+            progress_container.empty()
+            raise
         # Update progress to complete
         progress_container.markdown("""