Omnibus commited on
Commit
c32e515
·
0 Parent(s):

Duplicate from Omnibus/Bark-simple

Browse files
Files changed (4) hide show
  1. .gitattributes +35 -0
  2. README.md +12 -0
  3. app.py +121 -0
  4. requirements.txt +6 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Bark Simple
3
+ emoji: 🐕
4
+ colorFrom: red
5
+ colorTo: gray
6
+ sdk: gradio
7
+ sdk_version: 3.41.2
8
+ app_file: app.py
9
+ duplicated_from: Omnibus/Bark-simple
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from pathlib import Path
4
+ from transformers import AutoProcessor, BarkModel
5
+ import scipy
6
+ from pytube import YouTube
7
+ from pydub import AudioSegment
8
+ from TTS.api import TTS
9
+ #import ffmpeg
10
+
11
+
12
+ # device = "cuda" if torch.cuda.is_available() else "cpu"
13
+ # model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16).to(device)
14
+ # model.enable_cpu_offload()
15
+
16
+ device = "cpu"
17
+
18
+
19
+ processor = AutoProcessor.from_pretrained("suno/bark-small")
20
+ model = BarkModel.from_pretrained("suno/bark-small").to(device)
21
+ num_list = ["1","2","3","4","5","6","7","8","9","10"]
22
+ lang_list = ["en","de"]
23
+
24
+ def run_bark(text, n, lang):
25
+ #history_prompt = []
26
+ semantic_prompt=f"v2/{lang}_speaker_{int(n)-1}"
27
+
28
+ #text=["Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as playing tic tac toe."],
29
+ inputs = processor(text=text,
30
+ voice_preset = semantic_prompt,
31
+ return_tensors="pt",
32
+ )
33
+
34
+ speech_values = model.generate(**inputs, do_sample=True)
35
+ sampling_rate = model.generation_config.sample_rate
36
+
37
+ #sampling_rate = model.config.sample_rate
38
+ #sampling_rate = 24000
39
+ scipy.io.wavfile.write("bark_out.wav", rate=sampling_rate, data=speech_values.cpu().numpy().squeeze())
40
+ return ("bark_out.wav")
41
+
42
+ def custom_bark(inp):
43
+ speaker_wav=Path("Mid.mp3")
44
+ tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to(device)
45
+ tts.tts_to_file(inp, speaker_wav=speaker_wav, language="en", file_path="output.wav")
46
+ return ("output.wav")
47
+
48
+ def load_video_yt(vid):
49
+ yt = YouTube(vid)
50
+ vid = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download(filename="tmp.mp4")
51
+ vid_aud = yt.streams.filter(only_audio=True)[0].download(filename="tmp_aud.mp4")
52
+ print (yt.length)
53
+ return vid, vid_aud, "tmp_aud.mp4"
54
+
55
+ def trim_clip(clip, start_t, end_t):
56
+ clip = Path("tmp_aud.mp4")
57
+ #clip = "tmp_aud.mp3"
58
+ # Open an mp3 file
59
+ song = AudioSegment.from_file("tmp_aud.mp4",
60
+ format="mp4")
61
+
62
+ # start and end time
63
+ #start_min = 0
64
+ #start_sec = 10
65
+ #end_min = 0
66
+ #end_sec = 55
67
+ start_min = int(start_t.split(":",1)[0])
68
+ start_sec = int(start_t.split(":",1)[1])
69
+ end_min = int(end_t.split(":",1)[0])
70
+ end_sec = int(end_t.split(":",1)[1])
71
+ # pydub does things in milliseconds, so convert time
72
+ start = ((start_min*60)+start_sec)*1000
73
+ end = ((end_min*60)+end_sec)*1000
74
+ #start = 0
75
+ #end = 15*1000
76
+ # song clip of 10 seconds from starting
77
+ first_10_seconds = song[start: end]
78
+
79
+ # save file
80
+ first_10_seconds.export("Mid.mp3", format="mp3")
81
+ print("New Audio file is created and saved")
82
+
83
+ return "Mid.mp3"
84
+
85
+ with gr.Blocks() as app:
86
+ with gr.Column():
87
+ in_text = gr.Textbox()
88
+ with gr.Tab("Default"):
89
+ with gr.Row():
90
+ speaker_num = gr.Dropdown(label="Speaker Voice", choices=num_list,value="1")
91
+ speaker_lang = gr.Dropdown(label="Speaker Language", choices=lang_list,value="en")
92
+ go_btn = gr.Button()
93
+ with gr.Tab("Upload"):
94
+ with gr.Row():
95
+ with gr.Column():
96
+ in_aud_mic = gr.Audio(source='microphone')
97
+ in_aud_file = gr.Audio(source='upload', interactive = True)
98
+ aud_file = gr.File()
99
+ with gr.Column():
100
+ in_aud_yt = gr.Textbox(label="YouTube URL")
101
+ load_yt_btn = gr.Button("Load URL")
102
+ with gr.Column():
103
+ with gr.Row():
104
+ start_time = gr.Textbox(label = "Start", value = "0:00", placeholder = "0:23")
105
+ end_time = gr.Textbox(label = "End", value = "0:01", placeholder = "1:12")
106
+
107
+ trim_clip_btn = gr.Button("Trim Clip")
108
+ trim_aud = gr.Audio(source='upload', interactive = False)
109
+ alt_go_btn = gr.Button()
110
+ yt_vid = gr.Video(type = 'filepath')
111
+ #speaker_num = gr.Number(value=0)
112
+
113
+ with gr.Column():
114
+ out_audio = gr.Audio()
115
+
116
+ go_btn.click(run_bark,[in_text, speaker_num, speaker_lang],out_audio)
117
+ load_yt_btn.click(load_video_yt, in_aud_yt, [yt_vid,in_aud_file,aud_file])
118
+ trim_clip_btn.click(trim_clip,[aud_file, start_time, end_time],trim_aud)
119
+ alt_go_btn.click(custom_bark, in_text, out_audio)
120
+
121
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ scipy
4
+ pytube
5
+ moviepy
6
+ TTS