Spaces:

iRecite
/

iRecite-MVP-API

Sleeping

File size: 4,406 Bytes

4ca6263

import json
import numpy as np
import parselmouth

AUDIO_PATH = "sample_trim.wav"
CANON_PATH = "data/fatiha_canonical_fallback.json"
OUT_PATH = "output/feedback_madd.json"

# --- Heuristic thresholds (MVP) ---
# Quranic madd lengths depend on rule; for MVP we just classify by duration.
TOO_SHORT_SEC = 0.15
OK_MAX_SEC = 0.35
TOO_LONG_SEC = 0.35

def extract_long_voiced_segments(sound: parselmouth.Sound):
    intensity = sound.to_intensity(time_step=0.01)
    times = intensity.xs()
    vals = intensity.values[0]

    thr = np.percentile(vals, 60)
    voiced = vals > thr

    segments = []
    in_seg = False
    start = None

    for t, v in zip(times, voiced):
        if v and not in_seg:
            in_seg = True
            start = float(t)
        elif (not v) and in_seg:
            in_seg = False
            end = float(t)
            if end - start >= 0.06:
                segments.append((start, end))
    if in_seg and start is not None:
        end = float(times[-1])
        if end - start >= 0.06:
            segments.append((start, end))

    # Return only the longer ones as Madd candidates
    longish = [(s, e, e - s) for (s, e) in segments if (e - s) >= 0.18]
    return longish

def madd_words_in_order(canon):
    """
    Returns list of dicts in recitation order where madd_positions exists.
    """
    items = []
    for ay in canon["ayahs"]:
        for w in ay["word_info"]:
            if w.get("madd_positions_base_index"):
                items.append({
                    "ayah": ay["ayah"],
                    "word": w["word"],
                    "base": w["base"],
                    "madd_positions_base_index": w["madd_positions_base_index"],
                    "phonemes_fallback": w.get("phonemes_fallback", "")
                })
    return items

def classify_duration(d):
    if d < TOO_SHORT_SEC:
        return "too_short"
    if d <= OK_MAX_SEC:
        return "ok"
    return "too_long"

def confidence_from_duration(d):
    # crude confidence: farther from ok band → higher confidence
    if d < TOO_SHORT_SEC:
        return min(0.95, 0.60 + (TOO_SHORT_SEC - d) * 2.0)
    if d <= OK_MAX_SEC:
        return 0.55
    return min(0.95, 0.60 + (d - OK_MAX_SEC) * 1.2)

def main():
    # Load canonical word info
    with open(CANON_PATH, "r", encoding="utf-8") as f:
        canon = json.load(f)

    madd_targets = madd_words_in_order(canon)

    # Load audio
    snd = parselmouth.Sound(AUDIO_PATH)
    longish = extract_long_voiced_segments(snd)

    feedback = {
        "surah": canon["surah"],
        "riwayah": canon["riwayah"],
        "rule": "Madd (MVP heuristic)",
        "audio_path": AUDIO_PATH,
        "notes": [
            "This MVP uses intensity-based voiced segments and maps long segments to Madd-eligible words in order.",
            "Replace with real forced alignment + Quranic-Phonemizer later for Tajweed-accurate placement."
        ],
        "segments_detected": [{"start": s, "end": e, "dur": d} for (s, e, d) in longish],
        "madd_targets": madd_targets,
        "results": []
    }

    # Map segments to madd targets sequentially
    n = min(len(longish), len(madd_targets))
    for i in range(n):
        s, e, d = longish[i]
        tgt = madd_targets[i]
        label = classify_duration(d)
        conf = float(round(confidence_from_duration(d), 3))

        # Simple user-facing tip
        if label == "too_short":
            tip = "Extend the vowel a bit more (madd)."
        elif label == "too_long":
            tip = "Shorten the vowel slightly (avoid over-stretching)."
        else:
            tip = "Madd length looks OK."

        feedback["results"].append({
            "index": i + 1,
            "ayah": tgt["ayah"],
            "word": tgt["word"],
            "timestamp": {"start": round(s, 3), "end": round(e, 3)},
            "duration_sec": round(d, 3),
            "classification": label,
            "confidence": conf,
            "tip": tip
        })

    with open(OUT_PATH, "w", encoding="utf-8") as f:
        json.dump(feedback, f, ensure_ascii=False, indent=2)

    print("OK ✅ wrote", OUT_PATH)
    print("Long segments:", len(longish))
    print("Madd target words:", len(madd_targets))
    print("Mapped results:", len(feedback["results"]))
    if feedback["results"]:
        print("Sample result:", feedback["results"][0])

if __name__ == "__main__":
    main()