Spaces:

kassaby
/

recitation-segmenter-app

Paused

File size: 8,845 Bytes

"""
Quran Audio Enhancer - Hugging Face Gradio Space
=================================================
GUI كامل لتحسين جودة تلاوة القرآن الكريم
"""

import numpy as np
import scipy.signal as signal
import librosa
import soundfile as sf
import noisereduce as nr
import gradio as gr
import tempfile
import os


# ─────────────────────────────────────────────
# دوال المعالجة
# ─────────────────────────────────────────────

def load_audio(file_path: str, sr: int = 22050):
    audio, sample_rate = librosa.load(file_path, sr=sr, mono=True)
    return audio, sample_rate


def remove_dc_offset(audio):
    return (audio - np.mean(audio)).astype(np.float32)


def reduce_noise(audio, sr, strength):
    noise_clip = audio[:int(sr * 0.5)] if len(audio) > sr else audio
    return nr.reduce_noise(
        y=audio, sr=sr, y_noise=noise_clip,
        prop_decrease=strength, stationary=False,
        n_fft=2048, win_length=2048, hop_length=512,
        n_std_thresh_stationary=1.5, chunk_size=60000, use_torch=False
    )


def apply_bandpass_filter(audio, sr, low_hz=80, high_hz=8000):
    nyquist = sr / 2
    low = low_hz / nyquist
    high = min(high_hz / nyquist, 0.99)
    b, a = signal.butter(6, [low, high], btype='band')
    return signal.filtfilt(b, a, audio).astype(np.float32)


def enhance_clarity(audio):
    harmonic, _ = librosa.effects.hpss(audio, margin=3.0)
    return (0.8 * harmonic + 0.2 * audio).astype(np.float32)


def apply_de_essing(audio, sr, threshold=0.4):
    nyquist = sr / 2
    low = 5000 / nyquist
    high = min(10000 / nyquist, 0.99)
    b, a = signal.butter(4, [low, high], btype='band')
    sibilant = signal.filtfilt(b, a, audio)
    sib_rms = np.sqrt(np.convolve(sibilant**2, np.ones(512)/512, mode='same'))
    max_rms = np.max(sib_rms) + 1e-8
    mask = np.where(sib_rms / max_rms > threshold,
                    threshold / (sib_rms / max_rms + 1e-8), 1.0)
    return (audio - sibilant + sibilant * mask).astype(np.float32)


def normalize_loudness(audio, target_db):
    rms = np.sqrt(np.mean(audio ** 2))
    if rms < 1e-8:
        return audio
    target_rms = 10 ** (target_db / 20)
    return np.clip(audio * (target_rms / rms), -1.0, 1.0).astype(np.float32)


def analyze_quality(audio, sr):
    rms_db = float(20 * np.log10(np.sqrt(np.mean(audio**2)) + 1e-8))
    peak_db = float(20 * np.log10(np.max(np.abs(audio)) + 1e-8))
    frames = librosa.util.frame(audio, frame_length=512, hop_length=512)
    frame_rms = np.sqrt(np.mean(frames**2, axis=0))
    noise_floor_db = float(20 * np.log10(np.percentile(frame_rms, 10) + 1e-8))
    noise_label = "🟢 منخفضة" if noise_floor_db < -50 else "🟡 متوسطة" if noise_floor_db < -35 else "🔴 مرتفعة"
    return rms_db, peak_db, noise_floor_db, noise_label


# ─────────────────────────────────────────────
# الدالة الرئيسية للمعالجة
# ─────────────────────────────────────────────

def process_audio(audio_file, noise_strength, apply_bandpass,
                  apply_enhancement, apply_deessing, target_db, output_format):

    if audio_file is None:
        return None, "⚠️ الرجاء رفع ملف صوتي أولاً."

    # تحميل
    audio, sr = load_audio(audio_file, sr=22050)
    duration = len(audio) / sr

    # تحليل قبل
    rms_b, peak_b, noise_b, noise_label_b = analyze_quality(audio, sr)

    # معالجة
    audio = remove_dc_offset(audio)
    audio = reduce_noise(audio, sr, noise_strength)
    if apply_bandpass:
        audio = apply_bandpass_filter(audio, sr)
    if apply_enhancement:
        audio = enhance_clarity(audio)
    if apply_deessing:
        audio = apply_de_essing(audio, sr)
    audio = normalize_loudness(audio, target_db)

    # تحليل بعد
    rms_a, peak_a, noise_a, noise_label_a = analyze_quality(audio, sr)

    # حفظ
    ext = "wav" if output_format == "WAV" else "flac"
    out_path = tempfile.mktemp(suffix=f"_enhanced.{ext}")
    sf.write(out_path, audio, sr,
             format=ext.upper(),
             subtype='PCM_16' if ext == 'wav' else None)

    # تقرير
    report = f"""
## 📊 تقرير المعالجة

| | قبل | بعد |
|---|---|---|
| مستوى الصوت (RMS) | {rms_b:.1f} dBFS | {rms_a:.1f} dBFS |
| الذروة | {peak_b:.1f} dBFS | {peak_a:.1f} dBFS |
| مستوى الضوضاء | {noise_b:.1f} dBFS | {noise_a:.1f} dBFS |
| تقدير الضوضاء | {noise_label_b} | {noise_label_a} |

**⏱️ مدة الملف:** {duration:.1f} ثانية  
**🎵 معدل العينات:** {sr} Hz  
**📁 الصيغة:** {output_format}

### الخطوات المطبّقة:
{"✅" if True else "❌"} إزالة DC Offset  
✅ إزالة الضوضاء (قوة: {noise_strength})  
{"✅" if apply_bandpass else "❌"} فلتر الترددات الصوتية  
{"✅" if apply_enhancement else "❌"} تحسين الوضوح  
{"✅" if apply_deessing else "❌"} De-essing  
✅ تعديل مستوى الصوت → {target_db} dBFS
    """.strip()

    return out_path, report


# ─────────────────────────────────────────────
# واجهة Gradio
# ─────────────────────────────────────────────

with gr.Blocks(
    title="🕌 Quran Audio Enhancer",
) as demo:

    gr.HTML("""
        <div class='title-text'>
            <h1>🕌 Quran Audio Enhancer</h1>
        </div>
        <div class='subtitle-text'>
            <p>أداة لتحسين جودة تلاوة القرآن الكريم — إزالة الضوضاء وتحسين الصوت</p>
        </div>
    """)

    with gr.Row():
        # العمود الأيسر: الإدخال والإعدادات
        with gr.Column(scale=1):
            gr.Markdown("### 📁 رفع الملف الصوتي")
            audio_input = gr.Audio(
                label="ارفع الملف هنا (WAV, MP3, FLAC, OGG, M4A)",
                type="filepath",
            )

            gr.Markdown("### ⚙️ إعدادات المعالجة")

            noise_strength = gr.Slider(
                minimum=0.0, maximum=1.0, value=0.75, step=0.05,
                label="قوة إزالة الضوضاء",
                info="0 = خفيف جداً | 1 = قوي جداً"
            )

            target_db = gr.Slider(
                minimum=-40.0, maximum=-6.0, value=-18.0, step=1.0,
                label="مستوى الصوت النهائي (dBFS)",
                info="القيمة الموصى بها: -18"
            )

            with gr.Row():
                apply_bandpass = gr.Checkbox(value=True, label="فلتر الترددات الصوتية")
                apply_enhancement = gr.Checkbox(value=True, label="تحسين الوضوح")
                apply_deessing = gr.Checkbox(value=True, label="De-essing")

            output_format = gr.Radio(
                choices=["WAV", "FLAC"],
                value="WAV",
                label="صيغة الإخراج"
            )

            process_btn = gr.Button(
                "🚀 ابدأ المعالجة",
                variant="primary",
                size="lg"
            )

        # العمود الأيمن: النتيجة
        with gr.Column(scale=1):
            gr.Markdown("### 🎵 الملف المحسّن")
            audio_output = gr.Audio(
                label="استمع وحمّل الملف المحسّن",
                type="filepath",
              
            )

            gr.Markdown("### 📊 التقرير")
            report_output = gr.Markdown(
                value="*سيظهر التقرير بعد المعالجة...*"
            )

    # ربط الزر
    process_btn.click(
        fn=process_audio,
        inputs=[
            audio_input, noise_strength, apply_bandpass,
            apply_enhancement, apply_deessing, target_db, output_format
        ],
        outputs=[audio_output, report_output],
    )

    gr.Markdown("""
    ---
    **نصائح للحصول على أفضل نتيجة:**
    - استخدم `قوة إزالة الضوضاء` بين 0.6 و0.85 للتلاوات
    - إذا كان الصوت يبدو اصطناعياً، قلل القوة
    - صيغة FLAC أفضل للأرشفة | WAV للاستخدام العادي
    """)


if __name__ == "__main__":
    demo.launch()