fffiloni commited on
Commit
d483b19
·
1 Parent(s): d9716fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -9
app.py CHANGED
@@ -19,9 +19,63 @@ model.enable_cpu_offload()
19
  import numpy as np
20
  from scipy.io.wavfile import write as write_wav
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  def infer(text_prompt):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  text_prompt = text_prompt
 
25
  inputs = processor(text_prompt).to(device)
26
 
27
  with torch.inference_mode():
@@ -29,19 +83,21 @@ def infer(text_prompt):
29
 
30
  audio_array = speech_output[0].cpu().numpy().squeeze()
31
  print(f'AUDIO_ARRAY: {audio_array}')
32
-
33
- # Assuming audio_out contains audio data and the sampling rate
34
  sampling_rate = model.generation_config.sample_rate
35
  print(f'sampling_rate: {sampling_rate}')
36
 
37
- #If you want to return a WAV file :
38
- # Ensure the audio data is properly scaled (between -1 and 1 for 16-bit audio)
39
-
40
- #audio_data = np.int16(audio_array * 32767) # Scale for 16-bit signed integer
41
- #write_wav("output.wav", sampling_rate, audio_data)
42
- #return "output.wav"
 
 
 
43
 
44
- return (sampling_rate, audio_array)
45
 
46
  with gr.Blocks() as demo:
47
  with gr.Column():
 
19
  import numpy as np
20
  from scipy.io.wavfile import write as write_wav
21
 
22
+ def split_text_into_sentences(text):
23
+ sentences = []
24
+ current_sentence = ''
25
+ words = text.split()
26
+
27
+ for word in words:
28
+ current_sentence += ' ' + word
29
+ if word.endswith('.'):
30
+ sentences.append(current_sentence.strip())
31
+ current_sentence = ''
32
+
33
+ if current_sentence:
34
+ sentences.append(current_sentence.strip())
35
+
36
+ return sentences
37
+
38
+ def join_wav_files(input_files, output_file):
39
+ # Open the first input file to get its parameters
40
+ with wave.open(input_files[0], 'rb') as first_file:
41
+ # Get the audio parameters from the first file
42
+ params = first_file.getparams()
43
+
44
+ # Create a new wave file for writing the joined audio
45
+ with wave.open(output_file, 'wb') as output:
46
+ output.setparams(params)
47
+
48
+ # Iterate over the input files and write their audio data to the output file
49
+ for input_file in input_files:
50
+ with wave.open(input_file, 'rb') as input:
51
+ output.writeframes(input.readframes(input.getnframes()))
52
+
53
 
54
  def infer(text_prompt):
55
+ print("""
56
+
57
+ Cutting text in chunks
58
+
59
+ """)
60
+ input_waves = []
61
+
62
+ text_chunks = split_text_into_sentences(text_prompt)
63
+ for i, chunk in enumerate(text_chunks):
64
+ print(chunk)
65
+ result = generate(chunk, i, "wav")
66
+ print(result)
67
+ input_waves.append(result)
68
+
69
+ output_wav = 'full_story.wav'
70
+
71
+ join_wav_files(input_waves, output_wav)
72
+
73
+ return 'full_story.wav'
74
+
75
+
76
+ def generate(text_prompt, i, out_type):
77
  text_prompt = text_prompt
78
+
79
  inputs = processor(text_prompt).to(device)
80
 
81
  with torch.inference_mode():
 
83
 
84
  audio_array = speech_output[0].cpu().numpy().squeeze()
85
  print(f'AUDIO_ARRAY: {audio_array}')
86
+
87
+ # Assuming audio_array contains audio data and the sampling rate
88
  sampling_rate = model.generation_config.sample_rate
89
  print(f'sampling_rate: {sampling_rate}')
90
 
91
+ if out_type == "numpy":
92
+ return (sampling_rate, audio_array)
93
+ elif out_type == "wav":
94
+ #If you want to return a WAV file :
95
+ # Ensure the audio data is properly scaled (between -1 and 1 for 16-bit audio)
96
+
97
+ audio_data = np.int16(audio_array * 32767) # Scale for 16-bit signed integer
98
+ write_wav(f"output_{i}.wav", sampling_rate, audio_data)
99
+ return f"output_{i}.wav"
100
 
 
101
 
102
  with gr.Blocks() as demo:
103
  with gr.Column():