Spaces:

clr
/

pce

Sleeping

App Files Files Community

catiR commited on Oct 27, 2023

Commit

1efac6a

1 Parent(s): c5c9abd

force align tts, add voices

Browse files

Files changed (1) hide show

scripts/clusterprosody.py +11 -9

scripts/clusterprosody.py CHANGED Viewed

@@ -224,14 +224,14 @@ def match_tts(clusters, speech_data, tts_data, tts_align, words, seg_aligns, voi
     bad_data = {f'{words}**{r}': speech_data[f'{words}**{r}'] for r,c in clusters if c==bad_cluster}
     #tts_fig_p = plot_pitch_tts(matched_data,tts_data, tts_align, words,seg_aligns,best_cluster,voice)
-    tts_fig_p = plot_one_cluster(words,'pitch',matched_data,seg_aligns,cluster,tts_data=tts_data,tts_align=tts_align,voice=voice)
-    fig_mid_p = plot_one_cluster(words,'pitch',mid_data,seg_aligns,cluster)
-    fig_bad_p = plot_one_cluster(words,'pitch',bad_data,seg_aligns,cluster)
-    tts_fig_e = plot_one_cluster(words,'rmse',matched_data,seg_aligns,cluster,tts_data=tts_data,tts_align=tts_align,voice=voice)
-    fig_mid_e = plot_one_cluster(words,'rmse',mid_data,seg_aligns,cluster)
-    fig_bad_e = plot_one_cluster(words,'rmse',bad_data,seg_aligns,cluster)
     return best_cluster_score, tts_fig_p, fig_mid_p, fig_bad_p, tts_fig_e, fig_mid_e, fig_bad_e
@@ -375,9 +375,11 @@ def plot_one_cluster(words,feature,speech_data,seg_aligns,cluster_id,tts_data=No
     if feature.lower() in ['pitch','f0']:
         fname = 'Pitch'
         ffunc = lambda x: [p for p,e in x]
     elif feature.lower() in ['energy', 'rmse']:
         fname = 'Energy'
         ffunc = lambda x: [e for p,e in x]
     else:
         print('problem with the figure')
         return fig
@@ -407,13 +409,13 @@ def plot_one_cluster(words,feature,speech_data,seg_aligns,cluster_id,tts_data=No
                 bound_line = np.mean([word_times[i][2],word_times[i+1][1]])
                 plt.axvline(x=bound_line, color=colors[cc], linestyle='--', linewidth=1, label=f"Speaker {spk} -> {word_times[i+1][0]}")
-        plt.scatter(feat_xvals, feats, color=colors[cc], label=f"Speaker {spk}")
         cc += 1
         if cc >= len(colors):
             cc=0
     if voice:
-        tfeats = [p for p,e in tts_data]
         t_xvals = [x*0.005 for x in range(len(tfeats))]
         if len(tts_align)>1:
@@ -425,7 +427,7 @@ def plot_one_cluster(words,feature,speech_data,seg_aligns,cluster_id,tts_data=No
             for i in range(1,len(tts_align)-1):
                 bound_line = np.mean([tts_align[i][2],tts_align[i+1][1]])
                 plt.axvline(x=bound_line, color="black", linestyle='--', linewidth=1, label=f"TTS -> {tts_align[i+1][0]}")
-        plt.scatter(t_xvals, tfeats, color="black", label=f"TTS {voice}")
     #plt.legend()

     bad_data = {f'{words}**{r}': speech_data[f'{words}**{r}'] for r,c in clusters if c==bad_cluster}
     #tts_fig_p = plot_pitch_tts(matched_data,tts_data, tts_align, words,seg_aligns,best_cluster,voice)
+    tts_fig_p = plot_one_cluster(words,'pitch',matched_data,seg_aligns,best_cluster,tts_data=tts_data,tts_align=tts_align,voice=voice)
+    fig_mid_p = plot_one_cluster(words,'pitch',mid_data,seg_aligns,mid_cluster)
+    fig_bad_p = plot_one_cluster(words,'pitch',bad_data,seg_aligns,bad_cluster)
+    tts_fig_e = plot_one_cluster(words,'rmse',matched_data,seg_aligns,best_cluster,tts_data=tts_data,tts_align=tts_align,voice=voice)
+    fig_mid_e = plot_one_cluster(words,'rmse',mid_data,seg_aligns,mid_cluster)
+    fig_bad_e = plot_one_cluster(words,'rmse',bad_data,seg_aligns,bad_cluster)
     return best_cluster_score, tts_fig_p, fig_mid_p, fig_bad_p, tts_fig_e, fig_mid_e, fig_bad_e
     if feature.lower() in ['pitch','f0']:
         fname = 'Pitch'
         ffunc = lambda x: [p for p,e in x]
+        pfunc = plt.scatter
     elif feature.lower() in ['energy', 'rmse']:
         fname = 'Energy'
         ffunc = lambda x: [e for p,e in x]
+        pfunc = plt.plot
     else:
         print('problem with the figure')
         return fig
                 bound_line = np.mean([word_times[i][2],word_times[i+1][1]])
                 plt.axvline(x=bound_line, color=colors[cc], linestyle='--', linewidth=1, label=f"Speaker {spk} -> {word_times[i+1][0]}")
+        pfunc(feat_xvals, feats, color=colors[cc], label=f"Speaker {spk}")
         cc += 1
         if cc >= len(colors):
             cc=0
     if voice:
+        tfeats = ffunc(tts_data)
         t_xvals = [x*0.005 for x in range(len(tfeats))]
         if len(tts_align)>1:
             for i in range(1,len(tts_align)-1):
                 bound_line = np.mean([tts_align[i][2],tts_align[i+1][1]])
                 plt.axvline(x=bound_line, color="black", linestyle='--', linewidth=1, label=f"TTS -> {tts_align[i+1][0]}")
+        pfunc(t_xvals, tfeats, color="black", label=f"TTS {voice}")
     #plt.legend()