Spaces:

clr
/

pce

Sleeping

App Files Files Community

catiR commited on Oct 13, 2023

Commit

2defee0

1 Parent(s): e8d7f64

f0

Browse files

Files changed (2) hide show

scripts/reaper2pass.py +15 -2
scripts/runSQ.py +22 -33

scripts/reaper2pass.py CHANGED Viewed

@@ -27,7 +27,8 @@ def reaper_soundfile(sound_path, orig_filetype):
-def get_reaper(wav_path, maxf0='700', minf0='50', reaper_path = "REAPER/build/reaper"):
     f0_data = subprocess.run([reaper_path, "-i", wav_path, '-f', '/dev/stdout', '-x', maxf0, '-m', minf0, '-a'],capture_output=True).stdout
     #print('PLAIN:',f0_data)
@@ -41,6 +42,18 @@ def get_reaper(wav_path, maxf0='700', minf0='50', reaper_path = "REAPER/build/re
     return f0_data
 # 2 pass pitch estimation
@@ -50,7 +63,7 @@ def estimate_pitch(sound_path):
     if orig_ftype == '.wav':
         wav_path = sound_path
     else:
-        tmp_path = reaper_soundfile(sound_path)
         wav_path = tmp_path
     print('REAPER FILE PATH:', wav_path)

+def get_reaper_data(wav_path, maxf0='700', minf0='50', reaper_path = "REAPER/build/reaper"):
     f0_data = subprocess.run([reaper_path, "-i", wav_path, '-f', '/dev/stdout', '-x', maxf0, '-m', minf0, '-a'],capture_output=True).stdout
     #print('PLAIN:',f0_data)
     return f0_data
+# currently,
+# take the simplified list data from get_reaper_data,
+#  with format Time  F0Val only at times with existing F0Val,
+#   and write that to a text file.
+# alternate would be letting reaper write its own files
+#  instead of capturing the stdout...
+def save_pitch(f0_data, save_path,hed=True):
+    with open(save_path,'w') as handle:
+        if hed:
+            handle.write('TIME\tF0\n')
+        handle.write(''.join(['\t'.join(l) + '\n' for l in f0_data]))
 # 2 pass pitch estimation
     if orig_ftype == '.wav':
         wav_path = sound_path
     else:
+        tmp_path = reaper_soundfile(sound_path, orig_ftype)
         wav_path = tmp_path
     print('REAPER FILE PATH:', wav_path)

scripts/runSQ.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import os, unicodedata
 from scripts.ctcalign import aligner, wav16m
 from scripts.tapi import tiro
-from scripts.reaper2pass import estimate_pitch
 # given a Sentence string,
 # using a metadata file of SQ, // SQL1adult_metadata.tsv
@@ -14,6 +14,7 @@ def run(sentence, voices):
     #voices = ['Alfur','Dilja','Karl', 'Dora']
     # On tts.tiro.is speech marks are only available
     # for the voices: Alfur, Dilja, Karl and Dora.
     corpus_meta = '/home/user/app/human_data/SQL1adult10s_metadata.tsv'
     speech_dir = '/home/user/app/human_data/audio/squeries/'
@@ -29,10 +30,10 @@ def run(sentence, voices):
     meta = get_recordings(norm_sentence, corpus_meta)
     if meta:
         align_human(meta,speech_aligns,speech_dir,align_model_path)
-        f0_human(meta, speech_f0, speech_dir, 'TODO path to reaper')
     if voices:
         temp_a_sample = get_tts(sentence,voices,tts_dir)
-        f0_tts(sentence, voices, tts_dir, 'TODO path to reaper')
     # by now, all the data to cluster and eval exists in the right place.
     # (after the last todo of saving pitch to disk instead of only list)
@@ -112,7 +113,7 @@ def align_human(meta,align_dir,speech_dir,model_path):
 # check if f0s exist for all of those files.
 # if not, warn, and make them with TODO reaper
-def f0_human(meta, f0_dir, speech_dir, reaper_path):
     no_f0 = []
     for rec in meta:
@@ -126,31 +127,20 @@ def f0_human(meta, f0_dir, speech_dir, reaper_path):
             os.makedirs(f0_dir)
         for rec in no_f0:
             wav_path = f'{speech_dir}{rec[2]}'
-            temp_data_f0 = estimate_pitch(wav_path)
-            print('2ND PASS PITCHES FOR', wav_path)
-            print(temp_data_f0)
-        #TODO
-        # Current Todo:
-        # have pitch saved to file instead of returned to data
-        # tbqh can write my own simplified files instead of make reaper write
-        #whatever.
     else:
         print('All speech pitch trackings existed')
-# # # # # # # # #
-#################
-# TODO
-# IMPLEMENT GOOD 2 STEP PITCH ESTIMATION
-# TODO
-#################
-# # # # # # # # #
 # check if the TTS wavs + align jsons exist for this sentence
 # if not, warn and make them with TAPI ******
@@ -188,7 +178,7 @@ def get_tts(sentence,voices,ttsdir):
 # check if the TTS f0s exist
 # if not warn + make
 # TODO collapse functions
-def f0_tts(sentence, voices, ttsdir, reaper_path):
     # assume the first 64 chars of sentence are enough
     dpath = sentence.replace(' ','_')[:65]
@@ -202,7 +192,14 @@ def f0_tts(sentence, voices, ttsdir, reaper_path):
     if no_f0:
         print(f'Need to estimate pitch for {len(no_f0)} voices')
-        #TODO
     else:
         print('All TTS pitch trackings existed')
@@ -211,14 +208,6 @@ def f0_tts(sentence, voices, ttsdir, reaper_path):
-#run()
 # https://colab.research.google.com/drive/1RApnJEocx3-mqdQC2h5SH8vucDkSlQYt?authuser=1#scrollTo=410ecd91fa29bc73
 # CLUSTER the humans

 import os, unicodedata
 from scripts.ctcalign import aligner, wav16m
 from scripts.tapi import tiro
+from scripts.reaper2pass import estimate_pitch, save_pitch
 # given a Sentence string,
 # using a metadata file of SQ, // SQL1adult_metadata.tsv
     #voices = ['Alfur','Dilja','Karl', 'Dora']
     # On tts.tiro.is speech marks are only available
     # for the voices: Alfur, Dilja, Karl and Dora.
+    # in practise, only for alfur and dilja.
     corpus_meta = '/home/user/app/human_data/SQL1adult10s_metadata.tsv'
     speech_dir = '/home/user/app/human_data/audio/squeries/'
     meta = get_recordings(norm_sentence, corpus_meta)
     if meta:
         align_human(meta,speech_aligns,speech_dir,align_model_path)
+        f0_human(meta, speech_f0, speech_dir)
     if voices:
         temp_a_sample = get_tts(sentence,voices,tts_dir)
+        f0_tts(sentence, voices, tts_dir)
     # by now, all the data to cluster and eval exists in the right place.
     # (after the last todo of saving pitch to disk instead of only list)
 # check if f0s exist for all of those files.
 # if not, warn, and make them with TODO reaper
+def f0_human(meta, f0_dir, speech_dir):
     no_f0 = []
     for rec in meta:
             os.makedirs(f0_dir)
         for rec in no_f0:
             wav_path = f'{speech_dir}{rec[2]}'
+            fpath = f0_dir + rec[2].replace('.wav','.f0')
+            f0_data = estimate_pitch(wav_path)
+            save_pitch(f0_data,fpath)
+            print('2ND PASS PITCHES OF', fpath)
+            print(f0_data)
     else:
         print('All speech pitch trackings existed')
 # check if the TTS wavs + align jsons exist for this sentence
 # if not, warn and make them with TAPI ******
 # check if the TTS f0s exist
 # if not warn + make
 # TODO collapse functions
+def f0_tts(sentence, voices, ttsdir):
     # assume the first 64 chars of sentence are enough
     dpath = sentence.replace(' ','_')[:65]
     if no_f0:
         print(f'Need to estimate pitch for {len(no_f0)} voices')
+        for v in voices:
+            wav_path = f'{ttsdir}{dpath}/{v}.wav'
+            fpath = f'{ttsdir}{dpath}/{v}.f0'
+            f0_data = estimate_pitch(wav_path)
+            save_pitch(f0_data,fpath)
+            print('2ND PASS PITCHES OF', fpath)
+            print(f0_data)
     else:
         print('All TTS pitch trackings existed')
 # https://colab.research.google.com/drive/1RApnJEocx3-mqdQC2h5SH8vucDkSlQYt?authuser=1#scrollTo=410ecd91fa29bc73
 # CLUSTER the humans