iRecite-MVP-API / step8_madd_signal.py
didodev
Deploy iRecite MVP API (Docker + FastAPI)
4ca6263
import parselmouth
import numpy as np
AUDIO_PATH = "sample_trim.wav"
def main():
snd = parselmouth.Sound(AUDIO_PATH)
duration = snd.get_total_duration()
print("Audio duration (sec):", round(duration, 2))
# Intensity (energy over time)
intensity = snd.to_intensity(time_step=0.01)
times = intensity.xs()
vals = intensity.values[0]
# Simple segmentation: find "voiced-ish" regions by intensity threshold
thr = np.percentile(vals, 60) # adaptive threshold
voiced = vals > thr
# Convert boolean mask into segments [start, end]
segments = []
in_seg = False
start = None
for t, v in zip(times, voiced):
if v and not in_seg:
in_seg = True
start = t
elif (not v) and in_seg:
in_seg = False
end = t
if end - start >= 0.06: # ignore tiny blips
segments.append((start, end))
if in_seg and start is not None:
end = times[-1]
if end - start >= 0.06:
segments.append((start, end))
# Print segments
print("Candidate voiced segments:", len(segments))
for i, (s, e) in enumerate(segments[:12], 1):
print(f"{i:02d}. {s:.2f} -> {e:.2f} (dur {e-s:.2f}s)")
# Heuristic "madd-like" durations: anything > 0.18s is suspiciously long vowel
longish = [(s, e, e - s) for (s, e) in segments if (e - s) >= 0.18]
print("\nLong segments (possible Madd candidates):", len(longish))
for i, (s, e, d) in enumerate(longish[:12], 1):
print(f"{i:02d}. {s:.2f} -> {e:.2f} (dur {d:.2f}s)")
if __name__ == "__main__":
main()