Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,7 +5,7 @@ from transformers import AutoProcessor, BarkModel
|
|
| 5 |
import scipy
|
| 6 |
from pytube import YouTube
|
| 7 |
from pydub import AudioSegment
|
| 8 |
-
|
| 9 |
#import ffmpeg
|
| 10 |
|
| 11 |
|
|
@@ -39,6 +39,12 @@ def run_bark(text, n, lang):
|
|
| 39 |
scipy.io.wavfile.write("bark_out.wav", rate=sampling_rate, data=speech_values.cpu().numpy().squeeze())
|
| 40 |
return ("bark_out.wav")
|
| 41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
def load_video_yt(vid):
|
| 43 |
yt = YouTube(vid)
|
| 44 |
vid = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download(filename="tmp.mp4")
|
|
@@ -54,10 +60,10 @@ def trim_clip(clip, start_t, end_t):
|
|
| 54 |
format="mp4")
|
| 55 |
|
| 56 |
# start and end time
|
| 57 |
-
start_min = 0
|
| 58 |
-
start_sec = 10
|
| 59 |
-
end_min = 0
|
| 60 |
-
end_sec = 55
|
| 61 |
start_min = int(start_t.split(":",1)[0])
|
| 62 |
start_sec = int(start_t.split(":",1)[1])
|
| 63 |
end_min = int(end_t.split(":",1)[0])
|
|
@@ -100,9 +106,8 @@ with gr.Blocks() as app:
|
|
| 100 |
|
| 101 |
trim_clip_btn = gr.Button("Trim Clip")
|
| 102 |
trim_aud = gr.Audio(source='upload', interactive = False)
|
| 103 |
-
yt_vid = gr.Video(type = 'filepath')
|
| 104 |
-
trim_vid=gr.Video()
|
| 105 |
alt_go_btn = gr.Button()
|
|
|
|
| 106 |
#speaker_num = gr.Number(value=0)
|
| 107 |
|
| 108 |
with gr.Column():
|
|
@@ -111,6 +116,6 @@ with gr.Blocks() as app:
|
|
| 111 |
go_btn.click(run_bark,[in_text, speaker_num, speaker_lang],out_audio)
|
| 112 |
load_yt_btn.click(load_video_yt, in_aud_yt, [yt_vid,in_aud_file,aud_file])
|
| 113 |
trim_clip_btn.click(trim_clip,[aud_file, start_time, end_time],trim_aud)
|
| 114 |
-
|
| 115 |
|
| 116 |
app.launch()
|
|
|
|
| 5 |
import scipy
|
| 6 |
from pytube import YouTube
|
| 7 |
from pydub import AudioSegment
|
| 8 |
+
from TTS.api import TTS
|
| 9 |
#import ffmpeg
|
| 10 |
|
| 11 |
|
|
|
|
| 39 |
scipy.io.wavfile.write("bark_out.wav", rate=sampling_rate, data=speech_values.cpu().numpy().squeeze())
|
| 40 |
return ("bark_out.wav")
|
| 41 |
|
| 42 |
+
def custom_bark(inp):
|
| 43 |
+
speaker_wav=Path("Mid.mp3")
|
| 44 |
+
tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to(device)
|
| 45 |
+
tts.tts_to_file("This is voice cloning.", speaker_wav=speaker_wav, language="en", file_path="output.wav")
|
| 46 |
+
return ("output.wav")
|
| 47 |
+
|
| 48 |
def load_video_yt(vid):
|
| 49 |
yt = YouTube(vid)
|
| 50 |
vid = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download(filename="tmp.mp4")
|
|
|
|
| 60 |
format="mp4")
|
| 61 |
|
| 62 |
# start and end time
|
| 63 |
+
#start_min = 0
|
| 64 |
+
#start_sec = 10
|
| 65 |
+
#end_min = 0
|
| 66 |
+
#end_sec = 55
|
| 67 |
start_min = int(start_t.split(":",1)[0])
|
| 68 |
start_sec = int(start_t.split(":",1)[1])
|
| 69 |
end_min = int(end_t.split(":",1)[0])
|
|
|
|
| 106 |
|
| 107 |
trim_clip_btn = gr.Button("Trim Clip")
|
| 108 |
trim_aud = gr.Audio(source='upload', interactive = False)
|
|
|
|
|
|
|
| 109 |
alt_go_btn = gr.Button()
|
| 110 |
+
yt_vid = gr.Video(type = 'filepath')
|
| 111 |
#speaker_num = gr.Number(value=0)
|
| 112 |
|
| 113 |
with gr.Column():
|
|
|
|
| 116 |
go_btn.click(run_bark,[in_text, speaker_num, speaker_lang],out_audio)
|
| 117 |
load_yt_btn.click(load_video_yt, in_aud_yt, [yt_vid,in_aud_file,aud_file])
|
| 118 |
trim_clip_btn.click(trim_clip,[aud_file, start_time, end_time],trim_aud)
|
| 119 |
+
alt_go_btn.click(custom_bark, trim_aud, out_audio)
|
| 120 |
|
| 121 |
app.launch()
|