Remove vocals from no ui code
Browse files
app.py
CHANGED
|
@@ -15,12 +15,11 @@ NUMBER = 100
|
|
| 15 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 16 |
DOWNLOAD = True
|
| 17 |
SLICE_AUDIO = True
|
| 18 |
-
SEPARE_VOCALS = False
|
| 19 |
TRANSCRIBE_AUDIO = True
|
| 20 |
CONCATENATE_TRANSCRIPTIONS = True
|
| 21 |
TRANSLATE_TRANSCRIPTIONS = True
|
| 22 |
ADD_SUBTITLES_TO_VIDEO = True
|
| 23 |
-
REMOVE_FILES =
|
| 24 |
if DEVICE == "cpu":
|
| 25 |
# I supose that I am on huggingface server
|
| 26 |
SECONDS = 300
|
|
@@ -161,12 +160,9 @@ language_dict = union_language_dict()
|
|
| 161 |
def subtify_no_ui():
|
| 162 |
number_works = 7
|
| 163 |
progress_bar = tqdm(total=number_works, desc="Subtify")
|
| 164 |
-
folder_vocals = "vocals"
|
| 165 |
folder_chunck = "chunks"
|
| 166 |
folder_concatenated = "concatenated_transcriptions"
|
| 167 |
folder_translated_transcriptions = "translated_transcriptions"
|
| 168 |
-
if not os.path.exists(folder_vocals):
|
| 169 |
-
os.makedirs(folder_vocals)
|
| 170 |
if not os.path.exists(folder_chunck):
|
| 171 |
os.makedirs(folder_chunck)
|
| 172 |
if not os.path.exists(folder_concatenated):
|
|
@@ -178,16 +174,16 @@ def subtify_no_ui():
|
|
| 178 |
if DOWNLOAD:
|
| 179 |
print('*'*NUMBER)
|
| 180 |
# url = "https://www.twitch.tv/videos/1936119752" # twitch Rob Mula 2 horas
|
| 181 |
-
|
| 182 |
# url = "https://www.youtube.com/watch?v=cgx0QnXo1OU" # ✅ smart home, un solo hablante, 4:42 minutos
|
| 183 |
-
url = "https://www.youtube.com/watch?v=dgOBxhi19T8" # ✅ rob mula, muchos hablantes, 4:28 minutos
|
| 184 |
# url = "https://www.youtube.com/watch?v=Coj72EzmX20" # rob mula, un solo hablante, 16 minutos
|
| 185 |
# url = "https://www.youtube.com/watch?v=Tqth0fKo0_g" # Conversación short
|
| 186 |
print(f"Downloading video and audio from {url}")
|
| 187 |
python_file = "download.py"
|
| 188 |
command = f"python {python_file} {url}"
|
| 189 |
os.system(command)
|
| 190 |
-
sleep(
|
| 191 |
print('*'*NUMBER)
|
| 192 |
print("\n\n")
|
| 193 |
progress_bar.update(1)
|
|
@@ -204,66 +200,29 @@ def subtify_no_ui():
|
|
| 204 |
print("\n\n")
|
| 205 |
progress_bar.update(1)
|
| 206 |
|
| 207 |
-
|
| 208 |
-
chunck_file = "chunks/output_files.txt"
|
| 209 |
-
print('*'*NUMBER)
|
| 210 |
-
if SEPARE_VOCALS:
|
| 211 |
-
print("Get vocals")
|
| 212 |
-
python_file = "separe_vocals.py"
|
| 213 |
-
command = f"python {python_file} {chunck_file} {DEVICE}"
|
| 214 |
-
os.system(command)
|
| 215 |
-
if REMOVE_FILES:
|
| 216 |
-
with open(chunck_file, 'r') as f:
|
| 217 |
-
files = f.read().splitlines()
|
| 218 |
-
for file in files:
|
| 219 |
-
command = f"rm {file}"
|
| 220 |
-
os.system(command)
|
| 221 |
-
else:
|
| 222 |
-
print("Moving chunks")
|
| 223 |
-
with open(f"{folder_vocals}/speakers.txt", 'w') as f:
|
| 224 |
-
f.write(str(0))
|
| 225 |
-
if REMOVE_FILES:
|
| 226 |
-
command = f"mv {folder_chunck}/*.mp3 {folder_vocals}/"
|
| 227 |
-
os.system(command)
|
| 228 |
-
else:
|
| 229 |
-
command = f"cp {folder_chunck}/*.mp3 {folder_vocals}/"
|
| 230 |
-
os.system(command)
|
| 231 |
-
print('*'*NUMBER)
|
| 232 |
-
print("\n\n")
|
| 233 |
-
progress_bar.update(1)
|
| 234 |
-
|
| 235 |
-
################# Transcript vocals ##################
|
| 236 |
-
speakers_file = "vocals/speakers.txt"
|
| 237 |
if TRANSCRIBE_AUDIO:
|
| 238 |
print('*'*NUMBER)
|
| 239 |
-
print("Transcript
|
|
|
|
|
|
|
|
|
|
| 240 |
python_file = "transcribe.py"
|
| 241 |
-
|
| 242 |
-
|
|
|
|
|
|
|
| 243 |
os.system(command)
|
| 244 |
if REMOVE_FILES:
|
| 245 |
-
|
| 246 |
-
with open(chunck_file, 'r') as f:
|
| 247 |
files = f.read().splitlines()
|
| 248 |
-
with open(speakers_file, 'r') as f:
|
| 249 |
-
speakers = f.read().splitlines()
|
| 250 |
-
speakers = int(speakers[0])
|
| 251 |
for file in files:
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
command = f"rm {vocal}"
|
| 259 |
-
os.system(command)
|
| 260 |
-
else:
|
| 261 |
-
vocals_extension = "mp3"
|
| 262 |
-
file_name, _ = file.split(".")
|
| 263 |
-
_, file_name = file_name.split("/")
|
| 264 |
-
vocal = f'{vocals_folder}/{file_name}.{vocals_extension}'
|
| 265 |
-
command = f"rm {vocal}"
|
| 266 |
-
os.system(command)
|
| 267 |
print('*'*NUMBER)
|
| 268 |
print("\n\n")
|
| 269 |
progress_bar.update(1)
|
|
@@ -272,8 +231,13 @@ def subtify_no_ui():
|
|
| 272 |
if CONCATENATE_TRANSCRIPTIONS:
|
| 273 |
print('*'*NUMBER)
|
| 274 |
print("Concatenate transcriptions")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
python_file = "concat_transcriptions.py"
|
| 276 |
-
command = f"python {python_file} {chunck_file} {SECONDS}
|
| 277 |
os.system(command)
|
| 278 |
if REMOVE_FILES:
|
| 279 |
with open(chunck_file, 'r') as f:
|
|
@@ -606,7 +570,9 @@ def hide_textbobes_progress_info():
|
|
| 606 |
|
| 607 |
def subtify():
|
| 608 |
with gr.Blocks() as demo:
|
| 609 |
-
num_speaker = [
|
|
|
|
|
|
|
| 610 |
|
| 611 |
# Layout
|
| 612 |
gr.Markdown(html_social_media)
|
|
|
|
| 15 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 16 |
DOWNLOAD = True
|
| 17 |
SLICE_AUDIO = True
|
|
|
|
| 18 |
TRANSCRIBE_AUDIO = True
|
| 19 |
CONCATENATE_TRANSCRIPTIONS = True
|
| 20 |
TRANSLATE_TRANSCRIPTIONS = True
|
| 21 |
ADD_SUBTITLES_TO_VIDEO = True
|
| 22 |
+
REMOVE_FILES = True
|
| 23 |
if DEVICE == "cpu":
|
| 24 |
# I supose that I am on huggingface server
|
| 25 |
SECONDS = 300
|
|
|
|
| 160 |
def subtify_no_ui():
|
| 161 |
number_works = 7
|
| 162 |
progress_bar = tqdm(total=number_works, desc="Subtify")
|
|
|
|
| 163 |
folder_chunck = "chunks"
|
| 164 |
folder_concatenated = "concatenated_transcriptions"
|
| 165 |
folder_translated_transcriptions = "translated_transcriptions"
|
|
|
|
|
|
|
| 166 |
if not os.path.exists(folder_chunck):
|
| 167 |
os.makedirs(folder_chunck)
|
| 168 |
if not os.path.exists(folder_concatenated):
|
|
|
|
| 174 |
if DOWNLOAD:
|
| 175 |
print('*'*NUMBER)
|
| 176 |
# url = "https://www.twitch.tv/videos/1936119752" # twitch Rob Mula 2 horas
|
| 177 |
+
url = "https://www.youtube.com/watch?v=yX5EJf4R77s" # ✅ debate, varios hablantes, 3 minutos
|
| 178 |
# url = "https://www.youtube.com/watch?v=cgx0QnXo1OU" # ✅ smart home, un solo hablante, 4:42 minutos
|
| 179 |
+
# url = "https://www.youtube.com/watch?v=dgOBxhi19T8" # ✅ rob mula, muchos hablantes, 4:28 minutos
|
| 180 |
# url = "https://www.youtube.com/watch?v=Coj72EzmX20" # rob mula, un solo hablante, 16 minutos
|
| 181 |
# url = "https://www.youtube.com/watch?v=Tqth0fKo0_g" # Conversación short
|
| 182 |
print(f"Downloading video and audio from {url}")
|
| 183 |
python_file = "download.py"
|
| 184 |
command = f"python {python_file} {url}"
|
| 185 |
os.system(command)
|
| 186 |
+
sleep(1)
|
| 187 |
print('*'*NUMBER)
|
| 188 |
print("\n\n")
|
| 189 |
progress_bar.update(1)
|
|
|
|
| 200 |
print("\n\n")
|
| 201 |
progress_bar.update(1)
|
| 202 |
|
| 203 |
+
################# Transcript slices ##################
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
if TRANSCRIBE_AUDIO:
|
| 205 |
print('*'*NUMBER)
|
| 206 |
+
print("Transcript slices")
|
| 207 |
+
chunks_folder = "chunks"
|
| 208 |
+
if not os.path.exists(chunks_folder):
|
| 209 |
+
os.makedirs(chunks_folder)
|
| 210 |
python_file = "transcribe.py"
|
| 211 |
+
chunks_file = "chunks/output_files.txt"
|
| 212 |
+
number_of_speakers = 10
|
| 213 |
+
source_languaje = "English"
|
| 214 |
+
command = f"python {python_file} {chunks_file} {source_languaje} {number_of_speakers} {DEVICE}"
|
| 215 |
os.system(command)
|
| 216 |
if REMOVE_FILES:
|
| 217 |
+
with open(chunks_file, 'r') as f:
|
|
|
|
| 218 |
files = f.read().splitlines()
|
|
|
|
|
|
|
|
|
|
| 219 |
for file in files:
|
| 220 |
+
audios_extension = "mp3"
|
| 221 |
+
file_name, _ = file.split(".")
|
| 222 |
+
_, file_name = file_name.split("/")
|
| 223 |
+
vocal = f'{chunks_folder}/{file_name}.{audios_extension}'
|
| 224 |
+
command = f"rm {vocal}"
|
| 225 |
+
os.system(command)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
print('*'*NUMBER)
|
| 227 |
print("\n\n")
|
| 228 |
progress_bar.update(1)
|
|
|
|
| 231 |
if CONCATENATE_TRANSCRIPTIONS:
|
| 232 |
print('*'*NUMBER)
|
| 233 |
print("Concatenate transcriptions")
|
| 234 |
+
folder_concatenated = "concatenated_transcriptions"
|
| 235 |
+
if not os.path.exists(folder_concatenated):
|
| 236 |
+
os.makedirs(folder_concatenated)
|
| 237 |
+
|
| 238 |
+
chunck_file = "chunks/output_files.txt"
|
| 239 |
python_file = "concat_transcriptions.py"
|
| 240 |
+
command = f"python {python_file} {chunck_file} {SECONDS}"
|
| 241 |
os.system(command)
|
| 242 |
if REMOVE_FILES:
|
| 243 |
with open(chunck_file, 'r') as f:
|
|
|
|
| 570 |
|
| 571 |
def subtify():
|
| 572 |
with gr.Blocks() as demo:
|
| 573 |
+
num_speaker = []
|
| 574 |
+
for i in range(100, 0, -1):
|
| 575 |
+
num_speaker.append(i)
|
| 576 |
|
| 577 |
# Layout
|
| 578 |
gr.Markdown(html_social_media)
|