fffiloni commited on
Commit
7e6ee13
·
1 Parent(s): 52f610b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -0
app.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import BarkModel
4
+ from optimum.bettertransformer import BetterTransformer
5
+
6
+ model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16)
7
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
8
+ model = model.to(device)
9
+
10
+ # Use bettertransform for flash attention
11
+ model = BetterTransformer.transform(model, keep_original_model=False)
12
+
13
+ # Enable CPU offload
14
+ model.enable_cpu_offload()
15
+
16
+ from transformers import AutoProcessor
17
+
18
+ def infer(text_prompt):
19
+ text_prompt = text_prompt
20
+ inputs = processor(text_prompt).to(device)
21
+ with torch.inference_mode():
22
+ speech_output = measure_latency_and_memory_use(model, inputs, nb_loops = 5)
23
+ audio_out = speech_output[0].cpu().numpy()
24
+ return audio_out
25
+
26
+ with gr.Blocks() as demo:
27
+ with gr.Column():
28
+ prompt = gr.Textbox(label="prompt")
29
+ submit.btn = gr.Button("Submit")
30
+ audio_out = gr.Audio(type="numpy")
31
+
32
+ demo.launch()
33
+