Spaces:

NightPrince
/

Arabic-Transcriber-Pro

Running

File size: 13,598 Bytes

# filename: pro_arabic_transcriper.py

import streamlit as st
import nemo.collections.asr as nemo_asr
import soundfile as sf
import tempfile
import os
import time
import magic  # for file type detection
import ffmpeg
import subprocess
from pathlib import Path

# Custom CSS for gloomy elegant styling
st.markdown("""
    <style>
        :root {
            --primary: #3a506b;
            --secondary: #5bc0be;
            --accent: #e55934;
            --background: #1c2541;
            --card: #0b132b;
            --text: #e0e0e0;
            --text-secondary: #b8b8b8;
        }
        
        .stApp {
            background-color: var(--background);
            color: var(--text);
        }
        
        .main .block-container {
            max-width: 1200px;
            padding: 2rem 3rem;
        }
        
        .card {
            background-color: var(--card);
            border-radius: 8px;
            padding: 1.5rem;
            margin-bottom: 1.5rem;
            border-left: 3px solid var(--secondary);
        }
        
        .header {
            background: linear-gradient(135deg, #0b132b, #1c2541);
            color: white;
            padding: 2rem 3rem;
            margin: -2rem -3rem 2rem -3rem;
            border-bottom: 1px solid rgba(91, 192, 190, 0.2);
        }
        
        .stButton>button {
            background: var(--primary);
            color: white;
            border: none;
            border-radius: 6px;
            padding: 0.7rem 1.5rem;
            font-weight: 500;
            transition: all 0.2s ease;
            border: 1px solid rgba(91, 192, 190, 0.3);
        }
        
        .stButton>button:hover {
            background: #2c3e5a;
            color: white;
        }
        
        .stDownloadButton>button {
            background: var(--secondary);
            color: #0b132b;
        }
        
        .stDownloadButton>button:hover {
            background: #4aa8a6;
            color: #0b132b;
        }
        
        .transcript-container {
            background-color: rgba(11, 19, 43, 0.7);
            border-radius: 8px;
            padding: 1.5rem;
            margin-top: 1rem;
            border: 1px solid rgba(91, 192, 190, 0.1);
        }
        
        .transcript-box {
            background-color: transparent;
            font-size: 1.1rem;
            line-height: 1.8;
            min-height: 150px;
            direction: rtl;
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
            color: var(--text);
            white-space: pre-wrap;
        }
        
        .stats {
            display: flex;
            gap: 1rem;
            margin-top: 1rem;
        }
        
        .stat-box {
            background-color: rgba(58, 80, 107, 0.5);
            padding: 0.8rem 1rem;
            border-radius: 6px;
            flex: 1;
            min-width: 100px;
            text-align: center;
            border: 1px solid rgba(91, 192, 190, 0.1);
        }
        
        .stat-value {
            font-size: 1.2rem;
            font-weight: bold;
            color: var(--secondary);
        }
        
        .progress-container {
            height: 6px;
            background-color: rgba(58, 80, 107, 0.5);
            border-radius: 3px;
            margin: 1.5rem 0;
            overflow: hidden;
        }
        
        .progress-bar {
            height: 100%;
            background: linear-gradient(90deg, var(--secondary), #4aa8a6);
            border-radius: 3px;
            transition: width 0.4s ease;
        }
        
        h1, h2, h3 {
            color: var(--text) !important;
        }
        
        .file-uploader {
            border: 2px dashed var(--secondary);
            border-radius: 8px;
            padding: 2rem;
            text-align: center;
            background-color: rgba(91, 192, 190, 0.05);
            margin-bottom: 1.5rem;
        }
        
        .feature-icon {
            color: var(--secondary);
            margin-right: 0.5rem;
        }
        
        .stSpinner > div {
            border-color: var(--secondary) transparent transparent transparent !important;
        }
    </style>
""", unsafe_allow_html=True)

# Check if ffmpeg is available
def check_ffmpeg():
    try:
        subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True)
        return True
    except (subprocess.SubprocessError, FileNotFoundError):
        return False

if not check_ffmpeg():
    st.error("FFmpeg is not installed or not found in PATH. Please install FFmpeg to use this application.")
    st.markdown("""
    ### How to install FFmpeg:
    
    **Windows (using Chocolatey):**
    ```
    choco install ffmpeg
    ```
    
    **Windows (manual):**
    1. Download from [ffmpeg.org](https://ffmpeg.org/download.html)
    2. Extract and add the bin folder to your system PATH
    
    **After installing**, restart this application.
    """)
    st.stop()

# Accept any file - we'll detect type server-side
AUDIO_MIMETYPES = {
    'audio/wav', 'audio/x-wav', 'audio/mpeg', 'audio/ogg', 'audio/flac',
    'audio/x-m4a', 'audio/aac', 'audio/x-ms-wma'
}

VIDEO_MIMETYPES = {
    'video/mp4', 'video/quicktime', 'video/x-matroska', 'video/x-msvideo',
    'video/webm', 'video/x-ms-wmv'
}

# Load NeMo model once
@st.cache_resource
def load_model():
    try:
        model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.from_pretrained(
            model_name="nvidia/stt_ar_fastconformer_hybrid_large_pcd_v1.0"
        )
        return model
    except Exception as e:
        # Re-raise so the UI can present a friendly error when called
        raise RuntimeError(f"Failed to load NeMo model: {e}")

model = load_model()

def detect_file_type(file_data):
    """Detect the MIME type of a file using python-magic"""
    mime = magic.from_buffer(file_data, mime=True)
    return mime

def convert_audio(uploaded_file, target_sample_rate=16000):
    """
    Convert any audio or video file to a 16kHz mono WAV using FFmpeg.
    Returns the path to the converted temporary WAV file.
    
    Args:
        uploaded_file: A Streamlit UploadedFile or path-like object
        target_sample_rate: Output sample rate (default 16000 Hz)
    
    Returns:
        str: Path to the converted temporary WAV file
    """
    try:
        # Read the file data
        if hasattr(uploaded_file, 'read'):
            file_data = uploaded_file.read()
            uploaded_file.seek(0)  # Reset position for later use
        else:
            with open(uploaded_file, 'rb') as f:
                file_data = f.read()
        
        # Detect file type
        mime_type = detect_file_type(file_data)
        
        # Save to temporary input file
        suffix = '.tmp'
        if mime_type in AUDIO_MIMETYPES:
            suffix = '.audio' + suffix
        elif mime_type in VIDEO_MIMETYPES:
            suffix = '.video' + suffix
        
        with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_in:
            if hasattr(uploaded_file, 'read'):
                uploaded_file.seek(0)
                tmp_in.write(uploaded_file.read())
            else:
                tmp_in.write(file_data)
            tmp_in_path = tmp_in.name
        
        # Create output WAV file
        output_path = tempfile.mktemp(suffix='.wav')
        
        try:
            # Build the ffmpeg conversion pipeline
            stream = ffmpeg.input(tmp_in_path)
            
            # Extract audio from video if needed
            if mime_type in VIDEO_MIMETYPES:
                stream = stream.audio
            
            # Convert to 16kHz mono WAV
            stream = ffmpeg.output(
                stream,
                output_path,
                acodec='pcm_s16le',  # 16-bit PCM
                ac=1,                 # mono
                ar=target_sample_rate,# sample rate
                loglevel='error'      # reduce ffmpeg output
            )
            
            # Run the conversion
            ffmpeg.run(stream, overwrite_output=True)
            
            return output_path
            
        except ffmpeg.Error as e:
            raise RuntimeError(f"FFmpeg error during conversion: {e.stderr.decode()}")
            
        finally:
            # Clean up input temp file
            try:
                os.remove(tmp_in_path)
            except Exception:
                pass
                
    except Exception as e:
        raise RuntimeError(f"Failed to convert file to WAV: {str(e)}")

# App UI
st.markdown("""
    <div class="header">
        <h1 style="margin-bottom: 0.5rem;">Arabic Transcriber Pro</h1>
        <p style="color: var(--text-secondary); margin-top: 0;">Convert speech to text with the highest accuracy</p>
    </div>
""", unsafe_allow_html=True)

# Main content - single wide column layout
st.markdown("""
    <div class="card">
        <div style="display: flex; gap: 1rem; margin-bottom: 1rem;">
            <span class="feature-icon">🔊</span>
            <span>Supports many audio formats and common video types (MP4, MOV, MKV). Upload audio or video and the app will extract audio automatically.</span>
        </div>
        <div style="display: flex; gap: 1rem; margin-bottom: 1rem;">
            <span class="feature-icon">⚡</span>
            <span>Fast processing with advanced AI</span>
        </div>
    </div>
""", unsafe_allow_html=True)

uploaded_file = st.file_uploader("Drag and drop any audio or video file here", type=None, 
    help="Supports any audio or video format that FFmpeg can handle")

if uploaded_file is not None:
    # Basic size check (Streamlit UploadedFile has .size in bytes)
    try:
        file_size_mb = uploaded_file.size / (1024 * 1024)
    except Exception:
        file_size_mb = None

    if file_size_mb is not None and file_size_mb > 500:
        st.warning("Large file detected (>500MB). Processing may take a long time or fail. Consider uploading a smaller file.")
    # Convert to 16kHz mono wav
    with st.spinner("Preparing audio for transcription..."):
        processed_wav = convert_audio(uploaded_file)
    
    # Show audio info
    data, sample_rate = sf.read(processed_wav)
    channels = 1 if len(data.shape) == 1 else data.shape[1]
    duration = len(data) / sample_rate
    
    # Show audio player and info
    st.audio(processed_wav, format="audio/wav")
    
    st.markdown("### Audio Details")
    st.markdown("""
        <div class="stats">
            <div class="stat-box">
                <div>Duration</div>
                <div class="stat-value">{:.1f}s</div>
            </div>
            <div class="stat-box">
                <div>Sample Rate</div>
                <div class="stat-value">{} Hz</div>
            </div>
            <div class="stat-box">
                <div>Channels</div>
                <div class="stat-value">{}</div>
            </div>
        </div>
    """.format(duration, sample_rate, channels), unsafe_allow_html=True)
    
    # Transcription
    if st.button("Transcribe Audio", type="primary"):
        # Create a progress container
        progress_container = st.empty()
        progress_container.markdown("""
            <div class="progress-container">
                <div class="progress-bar" style="width: 30%;"></div>
            </div>
            <div style="text-align: center; margin-top: 5px; color: var(--secondary);">Processing audio...</div>
        """, unsafe_allow_html=True)
        
        time.sleep(0.8)
        progress_container.markdown("""
            <div class="progress-container">
                <div class="progress-bar" style="width: 70%;"></div>
            </div>
            <div style="text-align: center; margin-top: 5px; color: var(--secondary);">Transcribing content...</div>
        """, unsafe_allow_html=True)
        
        # Actual transcription
        try:
            with st.spinner(""):
                result = model.transcribe([processed_wav])
                transcript = result[0].text
        except Exception as e:
            st.error(f"Transcription failed: {e}")
            # Cleanup
            try:
                os.remove(processed_wav)
            except Exception:
                pass
            progress_container.empty()
            raise
        
        # Update progress to complete
        progress_container.markdown("""
            <div class="progress-container">
                <div class="progress-bar" style="width: 100%;"></div>
            </div>
            <div style="text-align: center; margin-top: 5px; color: var(--secondary);">Transcription complete</div>
        """, unsafe_allow_html=True)
        
        time.sleep(0.5)
        progress_container.empty()
        
        st.markdown("### Transcription Results")
        st.markdown(f"""
            <div class="transcript-container">
                <div class="transcript-box">{transcript}</div>
            </div>
        """, unsafe_allow_html=True)
        
        # Download button
        st.download_button("Download Transcript", transcript, 
                          file_name="arabic_transcript.txt")
        
        # Cleanup
        os.remove(processed_wav)

# Minimal footer
st.markdown("---")
st.markdown("""
    <div style="text-align: center; color: var(--text-secondary); padding: 20px; font-size: 0.9rem;">
        <p>Powered by NeMo ASR and Streamlit | Professional Arabic Transcription Service</p>
        <p>©YahyaAlnwsany | 2025 Arabic Transcriber Pro | All rights reserved</p>
    </div>
""", unsafe_allow_html=True)