fffiloni commited on
Commit
a2e218b
·
1 Parent(s): 6ea5dd5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -17
app.py CHANGED
@@ -18,32 +18,22 @@ model.enable_cpu_offload()
18
 
19
  import numpy as np
20
  from scipy.io.wavfile import write as write_wav
21
- #from pydub import AudioSegment
22
- #import soundfile as sf
23
 
24
  def infer(text_prompt):
25
  text_prompt = text_prompt
26
  inputs = processor(text_prompt).to(device)
27
 
28
  with torch.inference_mode():
29
- speech_output = model.generate(**inputs, do_sample = True, fine_temperature = 0.4, coarse_temperature = 0.8)
30
- #audio_out = speech_output[0].cpu().numpy()
31
- audio_array = speech_output[0].cpu().numpy().squeeze()
32
 
33
- # Assuming audio_out contains audio data and the sampling rate
34
- sampling_rate = model.generation_config.sample_rate
35
- write_wav("output.wav", sampling_rate, audio_array)
36
- #wavfile.write("output.wav", sampling_rate, audio_out)
37
 
38
 
39
-
40
-
41
-
42
- #wav_path = "output.wav"
43
- #mp3_path = "output.mp3"
44
-
45
- #audio = AudioSegment.from_wav(wav_path)
46
- #audio.export(mp3_path, format="mp3")
47
 
48
  return "output.wav"
49
 
 
18
 
19
  import numpy as np
20
  from scipy.io.wavfile import write as write_wav
21
+
 
22
 
23
  def infer(text_prompt):
24
  text_prompt = text_prompt
25
  inputs = processor(text_prompt).to(device)
26
 
27
  with torch.inference_mode():
28
+ speech_output = model.generate(**inputs)
29
+
30
+ audio_array = speech_output[0].cpu().numpy().squeeze()
31
 
32
+ # Assuming audio_out contains audio data and the sampling rate
33
+ sampling_rate = model.generation_config.sample_rate
34
+ write_wav("output.wav", sampling_rate, audio_array)
 
35
 
36
 
 
 
 
 
 
 
 
 
37
 
38
  return "output.wav"
39