Spaces:

fffiloni
/

bark-transformers-example

Paused

App Files Files Community

fffiloni commited on Aug 21, 2023

Commit

d483b19

1 Parent(s): d9716fe

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -9

app.py CHANGED Viewed

@@ -19,9 +19,63 @@ model.enable_cpu_offload()
 import numpy as np
 from scipy.io.wavfile import write as write_wav
 def infer(text_prompt):
     text_prompt = text_prompt
     inputs = processor(text_prompt).to(device)
     with torch.inference_mode():
@@ -29,19 +83,21 @@ def infer(text_prompt):
     audio_array = speech_output[0].cpu().numpy().squeeze()
     print(f'AUDIO_ARRAY: {audio_array}')
-    # Assuming audio_out contains audio data and the sampling rate
     sampling_rate = model.generation_config.sample_rate
     print(f'sampling_rate: {sampling_rate}')
-    #If you want to return a WAV file :
-    # Ensure the audio data is properly scaled (between -1 and 1 for 16-bit audio)
-    #audio_data = np.int16(audio_array * 32767)  # Scale for 16-bit signed integer
-    #write_wav("output.wav", sampling_rate, audio_data)
-    #return "output.wav"
-    return (sampling_rate, audio_array)
 with gr.Blocks() as demo:
     with gr.Column():

 import numpy as np
 from scipy.io.wavfile import write as write_wav
+def split_text_into_sentences(text):
+    sentences = []
+    current_sentence = ''
+    words = text.split()
+    for word in words:
+        current_sentence += ' ' + word
+        if word.endswith('.'):
+            sentences.append(current_sentence.strip())
+            current_sentence = ''
+    if current_sentence:
+        sentences.append(current_sentence.strip())
+    return sentences
+def join_wav_files(input_files, output_file):
+    # Open the first input file to get its parameters
+    with wave.open(input_files[0], 'rb') as first_file:
+        # Get the audio parameters from the first file
+        params = first_file.getparams()
+        # Create a new wave file for writing the joined audio
+        with wave.open(output_file, 'wb') as output:
+            output.setparams(params)
+            # Iterate over the input files and write their audio data to the output file
+            for input_file in input_files:
+                with wave.open(input_file, 'rb') as input:
+                    output.writeframes(input.readframes(input.getnframes()))
 def infer(text_prompt):
+    print("""
+    —
+    Cutting text in chunks
+    —
+    """)
+    input_waves = []
+    text_chunks = split_text_into_sentences(text_prompt)
+    for i, chunk in enumerate(text_chunks):
+        print(chunk)
+        result = generate(chunk, i, "wav")
+        print(result)
+        input_waves.append(result)
+    output_wav = 'full_story.wav'
+    join_wav_files(input_waves, output_wav)
+    return 'full_story.wav'
+def generate(text_prompt, i, out_type):
     text_prompt = text_prompt
     inputs = processor(text_prompt).to(device)
     with torch.inference_mode():
     audio_array = speech_output[0].cpu().numpy().squeeze()
     print(f'AUDIO_ARRAY: {audio_array}')
+    # Assuming audio_array contains audio data and the sampling rate
     sampling_rate = model.generation_config.sample_rate
     print(f'sampling_rate: {sampling_rate}')
+    if out_type == "numpy":
+        return (sampling_rate, audio_array)
+    elif out_type == "wav":
+        #If you want to return a WAV file :
+        # Ensure the audio data is properly scaled (between -1 and 1 for 16-bit audio)
+        audio_data = np.int16(audio_array * 32767)  # Scale for 16-bit signed integer
+        write_wav(f"output_{i}.wav", sampling_rate, audio_data)
+        return f"output_{i}.wav"
 with gr.Blocks() as demo:
     with gr.Column():