Spaces:
Sleeping
Sleeping
File size: 4,406 Bytes
4ca6263 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import json
import numpy as np
import parselmouth
AUDIO_PATH = "sample_trim.wav"
CANON_PATH = "data/fatiha_canonical_fallback.json"
OUT_PATH = "output/feedback_madd.json"
# --- Heuristic thresholds (MVP) ---
# Quranic madd lengths depend on rule; for MVP we just classify by duration.
TOO_SHORT_SEC = 0.15
OK_MAX_SEC = 0.35
TOO_LONG_SEC = 0.35
def extract_long_voiced_segments(sound: parselmouth.Sound):
intensity = sound.to_intensity(time_step=0.01)
times = intensity.xs()
vals = intensity.values[0]
thr = np.percentile(vals, 60)
voiced = vals > thr
segments = []
in_seg = False
start = None
for t, v in zip(times, voiced):
if v and not in_seg:
in_seg = True
start = float(t)
elif (not v) and in_seg:
in_seg = False
end = float(t)
if end - start >= 0.06:
segments.append((start, end))
if in_seg and start is not None:
end = float(times[-1])
if end - start >= 0.06:
segments.append((start, end))
# Return only the longer ones as Madd candidates
longish = [(s, e, e - s) for (s, e) in segments if (e - s) >= 0.18]
return longish
def madd_words_in_order(canon):
"""
Returns list of dicts in recitation order where madd_positions exists.
"""
items = []
for ay in canon["ayahs"]:
for w in ay["word_info"]:
if w.get("madd_positions_base_index"):
items.append({
"ayah": ay["ayah"],
"word": w["word"],
"base": w["base"],
"madd_positions_base_index": w["madd_positions_base_index"],
"phonemes_fallback": w.get("phonemes_fallback", "")
})
return items
def classify_duration(d):
if d < TOO_SHORT_SEC:
return "too_short"
if d <= OK_MAX_SEC:
return "ok"
return "too_long"
def confidence_from_duration(d):
# crude confidence: farther from ok band → higher confidence
if d < TOO_SHORT_SEC:
return min(0.95, 0.60 + (TOO_SHORT_SEC - d) * 2.0)
if d <= OK_MAX_SEC:
return 0.55
return min(0.95, 0.60 + (d - OK_MAX_SEC) * 1.2)
def main():
# Load canonical word info
with open(CANON_PATH, "r", encoding="utf-8") as f:
canon = json.load(f)
madd_targets = madd_words_in_order(canon)
# Load audio
snd = parselmouth.Sound(AUDIO_PATH)
longish = extract_long_voiced_segments(snd)
feedback = {
"surah": canon["surah"],
"riwayah": canon["riwayah"],
"rule": "Madd (MVP heuristic)",
"audio_path": AUDIO_PATH,
"notes": [
"This MVP uses intensity-based voiced segments and maps long segments to Madd-eligible words in order.",
"Replace with real forced alignment + Quranic-Phonemizer later for Tajweed-accurate placement."
],
"segments_detected": [{"start": s, "end": e, "dur": d} for (s, e, d) in longish],
"madd_targets": madd_targets,
"results": []
}
# Map segments to madd targets sequentially
n = min(len(longish), len(madd_targets))
for i in range(n):
s, e, d = longish[i]
tgt = madd_targets[i]
label = classify_duration(d)
conf = float(round(confidence_from_duration(d), 3))
# Simple user-facing tip
if label == "too_short":
tip = "Extend the vowel a bit more (madd)."
elif label == "too_long":
tip = "Shorten the vowel slightly (avoid over-stretching)."
else:
tip = "Madd length looks OK."
feedback["results"].append({
"index": i + 1,
"ayah": tgt["ayah"],
"word": tgt["word"],
"timestamp": {"start": round(s, 3), "end": round(e, 3)},
"duration_sec": round(d, 3),
"classification": label,
"confidence": conf,
"tip": tip
})
with open(OUT_PATH, "w", encoding="utf-8") as f:
json.dump(feedback, f, ensure_ascii=False, indent=2)
print("OK ✅ wrote", OUT_PATH)
print("Long segments:", len(longish))
print("Madd target words:", len(madd_targets))
print("Mapped results:", len(feedback["results"]))
if feedback["results"]:
print("Sample result:", feedback["results"][0])
if __name__ == "__main__":
main() |