{"segments": [[49.0, 54.0], [66.0, 71.0], [101.0, 106.0]], "p_music": [0.0, 7.73, 0.0], "p_speech": [58.93, 50.63, 24.63], "labels": ["P(~Music) = 0.0 | P(~Speech) = 58.93", "P(~Music) = 7.73 | P(~Speech) = 50.63", "P(~Music) = 0.0 | P(~Speech) = 24.63"]}