fffiloni commited on
Commit
0e6aaa2
·
1 Parent(s): 0c330d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -4
app.py CHANGED
@@ -16,16 +16,21 @@ model = BetterTransformer.transform(model, keep_original_model=False)
16
  # Enable CPU offload
17
  model.enable_cpu_offload()
18
 
19
- from transformers import AutoProcessor
 
20
 
21
  def infer(text_prompt):
22
  text_prompt = text_prompt
23
  inputs = processor(text_prompt).to(device)
24
  with torch.inference_mode():
25
  speech_output = model.generate(**inputs, do_sample = True, fine_temperature = 0.4, coarse_temperature = 0.8)
26
- #audio_out = speech_output[0].cpu().numpy()
27
- audio_out = speech_output[0]
28
- return audio_out
 
 
 
 
29
 
30
  with gr.Blocks() as demo:
31
  with gr.Column():
 
16
  # Enable CPU offload
17
  model.enable_cpu_offload()
18
 
19
+ import numpy as np
20
+ from scipy.io import wavfile
21
 
22
  def infer(text_prompt):
23
  text_prompt = text_prompt
24
  inputs = processor(text_prompt).to(device)
25
  with torch.inference_mode():
26
  speech_output = model.generate(**inputs, do_sample = True, fine_temperature = 0.4, coarse_temperature = 0.8)
27
+ audio_out = speech_output[0].cpu().numpy()
28
+
29
+ # Assuming audio_out contains audio data and the sampling rate
30
+ sampling_rate = 44100 # Replace with your actual sampling rate
31
+ wavfile.write("output.wav", sampling_rate, audio_out)
32
+
33
+ return "output.wav"
34
 
35
  with gr.Blocks() as demo:
36
  with gr.Column():