File size: 4,406 Bytes
4ca6263
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import json
import numpy as np
import parselmouth

AUDIO_PATH = "sample_trim.wav"
CANON_PATH = "data/fatiha_canonical_fallback.json"
OUT_PATH = "output/feedback_madd.json"

# --- Heuristic thresholds (MVP) ---
# Quranic madd lengths depend on rule; for MVP we just classify by duration.
TOO_SHORT_SEC = 0.15
OK_MAX_SEC = 0.35
TOO_LONG_SEC = 0.35

def extract_long_voiced_segments(sound: parselmouth.Sound):
    intensity = sound.to_intensity(time_step=0.01)
    times = intensity.xs()
    vals = intensity.values[0]

    thr = np.percentile(vals, 60)
    voiced = vals > thr

    segments = []
    in_seg = False
    start = None

    for t, v in zip(times, voiced):
        if v and not in_seg:
            in_seg = True
            start = float(t)
        elif (not v) and in_seg:
            in_seg = False
            end = float(t)
            if end - start >= 0.06:
                segments.append((start, end))
    if in_seg and start is not None:
        end = float(times[-1])
        if end - start >= 0.06:
            segments.append((start, end))

    # Return only the longer ones as Madd candidates
    longish = [(s, e, e - s) for (s, e) in segments if (e - s) >= 0.18]
    return longish

def madd_words_in_order(canon):
    """
    Returns list of dicts in recitation order where madd_positions exists.
    """
    items = []
    for ay in canon["ayahs"]:
        for w in ay["word_info"]:
            if w.get("madd_positions_base_index"):
                items.append({
                    "ayah": ay["ayah"],
                    "word": w["word"],
                    "base": w["base"],
                    "madd_positions_base_index": w["madd_positions_base_index"],
                    "phonemes_fallback": w.get("phonemes_fallback", "")
                })
    return items

def classify_duration(d):
    if d < TOO_SHORT_SEC:
        return "too_short"
    if d <= OK_MAX_SEC:
        return "ok"
    return "too_long"

def confidence_from_duration(d):
    # crude confidence: farther from ok band → higher confidence
    if d < TOO_SHORT_SEC:
        return min(0.95, 0.60 + (TOO_SHORT_SEC - d) * 2.0)
    if d <= OK_MAX_SEC:
        return 0.55
    return min(0.95, 0.60 + (d - OK_MAX_SEC) * 1.2)

def main():
    # Load canonical word info
    with open(CANON_PATH, "r", encoding="utf-8") as f:
        canon = json.load(f)

    madd_targets = madd_words_in_order(canon)

    # Load audio
    snd = parselmouth.Sound(AUDIO_PATH)
    longish = extract_long_voiced_segments(snd)

    feedback = {
        "surah": canon["surah"],
        "riwayah": canon["riwayah"],
        "rule": "Madd (MVP heuristic)",
        "audio_path": AUDIO_PATH,
        "notes": [
            "This MVP uses intensity-based voiced segments and maps long segments to Madd-eligible words in order.",
            "Replace with real forced alignment + Quranic-Phonemizer later for Tajweed-accurate placement."
        ],
        "segments_detected": [{"start": s, "end": e, "dur": d} for (s, e, d) in longish],
        "madd_targets": madd_targets,
        "results": []
    }

    # Map segments to madd targets sequentially
    n = min(len(longish), len(madd_targets))
    for i in range(n):
        s, e, d = longish[i]
        tgt = madd_targets[i]
        label = classify_duration(d)
        conf = float(round(confidence_from_duration(d), 3))

        # Simple user-facing tip
        if label == "too_short":
            tip = "Extend the vowel a bit more (madd)."
        elif label == "too_long":
            tip = "Shorten the vowel slightly (avoid over-stretching)."
        else:
            tip = "Madd length looks OK."

        feedback["results"].append({
            "index": i + 1,
            "ayah": tgt["ayah"],
            "word": tgt["word"],
            "timestamp": {"start": round(s, 3), "end": round(e, 3)},
            "duration_sec": round(d, 3),
            "classification": label,
            "confidence": conf,
            "tip": tip
        })

    with open(OUT_PATH, "w", encoding="utf-8") as f:
        json.dump(feedback, f, ensure_ascii=False, indent=2)

    print("OK ✅ wrote", OUT_PATH)
    print("Long segments:", len(longish))
    print("Madd target words:", len(madd_targets))
    print("Mapped results:", len(feedback["results"]))
    if feedback["results"]:
        print("Sample result:", feedback["results"][0])

if __name__ == "__main__":
    main()