Spaces:
Build error
Build error
Initial Commit
Browse files- app.py +392 -0
- languages.py +147 -0
- packages.txt +42 -0
- requirements.txt +5 -0
- subtitle.py +101 -0
app.py
ADDED
|
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
|
| 3 |
+
import gradio as gr
|
| 4 |
+
import yt_dlp as youtube_dl
|
| 5 |
+
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, WhisperTokenizer, pipeline
|
| 6 |
+
from transformers.pipelines.audio_utils import ffmpeg_read
|
| 7 |
+
|
| 8 |
+
import tempfile
|
| 9 |
+
import os
|
| 10 |
+
import time
|
| 11 |
+
import requests
|
| 12 |
+
from playwright.sync_api import sync_playwright
|
| 13 |
+
|
| 14 |
+
from languages import get_language_names
|
| 15 |
+
from subtitle import text_output, subtitle_output
|
| 16 |
+
|
| 17 |
+
import subprocess
|
| 18 |
+
|
| 19 |
+
try:
|
| 20 |
+
import spaces
|
| 21 |
+
USING_SPACES = True
|
| 22 |
+
except ImportError:
|
| 23 |
+
USING_SPACES = False
|
| 24 |
+
|
| 25 |
+
subprocess.run(
|
| 26 |
+
"pip install flash-attn --no-build-isolation",
|
| 27 |
+
env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
|
| 28 |
+
shell=True,
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
os.system("playwright install")
|
| 32 |
+
|
| 33 |
+
YT_LENGTH_LIMIT_S = 360
|
| 34 |
+
SPACES_GPU_DURATION = 90
|
| 35 |
+
|
| 36 |
+
device = 0 if torch.cuda.is_available() else "cpu"
|
| 37 |
+
|
| 38 |
+
def gpu_decorator(duration=60):
|
| 39 |
+
def actual_decorator(func):
|
| 40 |
+
if USING_SPACES:
|
| 41 |
+
return spaces.GPU(duration=duration)(func)
|
| 42 |
+
return func
|
| 43 |
+
return actual_decorator
|
| 44 |
+
|
| 45 |
+
def device_info():
|
| 46 |
+
try:
|
| 47 |
+
subprocess.run(["df", "-h"], check=True)
|
| 48 |
+
subprocess.run(["lsblk"], check=True)
|
| 49 |
+
subprocess.run(["free", "-h"], check=True)
|
| 50 |
+
subprocess.run(["lscpu"], check=True)
|
| 51 |
+
subprocess.run(["nvidia-smi"], check=True)
|
| 52 |
+
except subprocess.CalledProcessError as e:
|
| 53 |
+
print(f"Command failed: {e}")
|
| 54 |
+
|
| 55 |
+
@gpu_decorator(duration=SPACES_GPU_DURATION)
|
| 56 |
+
def transcribe(inputs, model, language, batch_size, chunk_length_s, stride_length_s, task, timestamp_mode, progress=gr.Progress(track_tqdm=True)):
|
| 57 |
+
try:
|
| 58 |
+
if inputs is None:
|
| 59 |
+
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
|
| 60 |
+
|
| 61 |
+
torch_dtype = torch.float16
|
| 62 |
+
|
| 63 |
+
model_gen = AutoModelForSpeechSeq2Seq.from_pretrained(
|
| 64 |
+
model, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
|
| 65 |
+
)
|
| 66 |
+
model_gen.to(device)
|
| 67 |
+
|
| 68 |
+
processor = AutoProcessor.from_pretrained(model)
|
| 69 |
+
tokenizer = WhisperTokenizer.from_pretrained(model)
|
| 70 |
+
|
| 71 |
+
pipe = pipeline(
|
| 72 |
+
task="automatic-speech-recognition",
|
| 73 |
+
model=model_gen,
|
| 74 |
+
chunk_length_s=chunk_length_s,
|
| 75 |
+
stride_length_s=stride_length_s,
|
| 76 |
+
tokenizer=tokenizer,
|
| 77 |
+
feature_extractor=processor.feature_extractor,
|
| 78 |
+
torch_dtype=torch_dtype,
|
| 79 |
+
model_kwargs={"attn_implementation": "flash_attention_2"},
|
| 80 |
+
device=device,
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
generate_kwargs = {}
|
| 84 |
+
if language != "Automatic Detection" and model.endswith(".en") == False:
|
| 85 |
+
generate_kwargs["language"] = language
|
| 86 |
+
if model.endswith(".en") == False:
|
| 87 |
+
generate_kwargs["task"] = task
|
| 88 |
+
|
| 89 |
+
output = pipe(inputs, batch_size=batch_size, generate_kwargs=generate_kwargs, return_timestamps=timestamp_mode)
|
| 90 |
+
|
| 91 |
+
print(output)
|
| 92 |
+
print({"inputs": inputs, "model": model, "language": language, "batch_size": batch_size, "chunk_length_s": chunk_length_s, "stride_length_s": stride_length_s, "task": task, "timestamp_mode": timestamp_mode})
|
| 93 |
+
|
| 94 |
+
if not timestamp_mode:
|
| 95 |
+
text = output['text']
|
| 96 |
+
return text_output(inputs, text)
|
| 97 |
+
else:
|
| 98 |
+
chunks = output['chunks']
|
| 99 |
+
return subtitle_output(inputs, chunks)
|
| 100 |
+
|
| 101 |
+
except Exception as e:
|
| 102 |
+
error_message = str(e)
|
| 103 |
+
raise gr.Error(error_message, duration=10)
|
| 104 |
+
|
| 105 |
+
def _download_yt_audio(yt_url, filename):
|
| 106 |
+
info_loader = youtube_dl.YoutubeDL()
|
| 107 |
+
|
| 108 |
+
try:
|
| 109 |
+
info = info_loader.extract_info(yt_url, download=False)
|
| 110 |
+
except youtube_dl.utils.DownloadError as err:
|
| 111 |
+
raise gr.Error(str(err))
|
| 112 |
+
|
| 113 |
+
file_length = info.get("duration_string")
|
| 114 |
+
if not file_length:
|
| 115 |
+
raise gr.Error("Video duration is unavailable.")
|
| 116 |
+
|
| 117 |
+
file_h_m_s = file_length.split(":")
|
| 118 |
+
file_h_m_s = [int(sub_length) for sub_length in file_h_m_s]
|
| 119 |
+
|
| 120 |
+
if len(file_h_m_s) == 1:
|
| 121 |
+
file_h_m_s.insert(0, 0)
|
| 122 |
+
if len(file_h_m_s) == 2:
|
| 123 |
+
file_h_m_s.insert(0, 0)
|
| 124 |
+
|
| 125 |
+
file_length_s = file_h_m_s[0] * 3600 + file_h_m_s[1] * 60 + file_h_m_s[2]
|
| 126 |
+
|
| 127 |
+
if file_length_s > YT_LENGTH_LIMIT_S:
|
| 128 |
+
yt_length_limit_hms = time.strftime("%HH:%MM:%SS", time.gmtime(YT_LENGTH_LIMIT_S))
|
| 129 |
+
file_length_hms = time.strftime("%HH:%MM:%SS", time.gmtime(file_length_s))
|
| 130 |
+
raise gr.Error(f"Maximum YouTube length is {yt_length_limit_hms}, got {file_length_hms} YouTube video.", duration=10)
|
| 131 |
+
|
| 132 |
+
try:
|
| 133 |
+
ydl_opts = {
|
| 134 |
+
"outtmpl": filename,
|
| 135 |
+
"format": "bestaudio[ext=m4a]/best",
|
| 136 |
+
}
|
| 137 |
+
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
| 138 |
+
ydl.download([yt_url])
|
| 139 |
+
except youtube_dl.utils.ExtractorError as err:
|
| 140 |
+
available_formats = info_loader.extract_info(yt_url, download=False)['formats']
|
| 141 |
+
raise gr.Error(f"Requested format not available. Available formats: {available_formats}", duration=10)
|
| 142 |
+
|
| 143 |
+
def _return_yt_video_id(yt_url):
|
| 144 |
+
if "https://www.youtube.com/watch?v=" in yt_url:
|
| 145 |
+
video_id = yt_url.split("?v=")[-1]
|
| 146 |
+
elif "https://youtu.be/" in yt_url:
|
| 147 |
+
video_id = yt_url.split("be/")[1]
|
| 148 |
+
return video_id
|
| 149 |
+
|
| 150 |
+
def _return_yt_html_embed(yt_url):
|
| 151 |
+
video_id = _return_yt_video_id(yt_url)
|
| 152 |
+
HTML_str = (
|
| 153 |
+
f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
|
| 154 |
+
" </center>"
|
| 155 |
+
)
|
| 156 |
+
return HTML_str
|
| 157 |
+
|
| 158 |
+
def _return_yt_thumbnail(yt_url):
|
| 159 |
+
video_id = _return_yt_video_id(yt_url)
|
| 160 |
+
if not video_id:
|
| 161 |
+
raise ValueError("Invalid YouTube URL: Unable to extract video ID.")
|
| 162 |
+
thumbnail_url = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
|
| 163 |
+
thumbnail_path = None
|
| 164 |
+
try:
|
| 165 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as temp_file:
|
| 166 |
+
response = requests.get(thumbnail_url)
|
| 167 |
+
if response.status_code == 200:
|
| 168 |
+
temp_file.write(response.content)
|
| 169 |
+
thumbnail_path = temp_file.name
|
| 170 |
+
else:
|
| 171 |
+
raise Exception(f"Failed to retrieve thumbnail. Status code: {response.status_code}")
|
| 172 |
+
except Exception as e:
|
| 173 |
+
print(f"Error occurred: {e}")
|
| 174 |
+
return None
|
| 175 |
+
return thumbnail_path
|
| 176 |
+
|
| 177 |
+
def _return_yt_info(yt_url):
|
| 178 |
+
video_id = _return_yt_video_id(yt_url)
|
| 179 |
+
try:
|
| 180 |
+
with sync_playwright() as p:
|
| 181 |
+
browser = p.chromium.launch(headless=True)
|
| 182 |
+
page = browser.new_page()
|
| 183 |
+
|
| 184 |
+
page.goto(yt_url)
|
| 185 |
+
|
| 186 |
+
page.wait_for_load_state("networkidle")
|
| 187 |
+
|
| 188 |
+
title = page.title()
|
| 189 |
+
description = page.query_selector("meta[name='description']").get_attribute("content")
|
| 190 |
+
keywords = page.query_selector("meta[name='keywords']").get_attribute("content")
|
| 191 |
+
|
| 192 |
+
gr_title = gr.Textbox(label="YouTube Title", visible=True, value=title)
|
| 193 |
+
gr_description = gr.Textbox(label="YouTube Description", visible=True, value=description)
|
| 194 |
+
gr_keywords = gr.Textbox(label="YouTube Keywords", visible=True, value=keywords)
|
| 195 |
+
|
| 196 |
+
browser.close()
|
| 197 |
+
return gr_title, gr_description, gr_keywords
|
| 198 |
+
except Exception as e:
|
| 199 |
+
print(e)
|
| 200 |
+
return gr.Textbox(visible=False), gr.Textbox(visible=False), gr.Textbox(visible=False)
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
def return_youtube(yt_url):
|
| 204 |
+
html_embed_str = _return_yt_html_embed(yt_url)
|
| 205 |
+
thumbnail = _return_yt_thumbnail(yt_url)
|
| 206 |
+
gr_html = gr.HTML(label="Youtube Video", visible=True, value=html_embed_str)
|
| 207 |
+
gr_thumbnail = gr.Image(label="Youtube Thumbnail", visible=True, value=thumbnail)
|
| 208 |
+
gr_title, gr_description, gr_keywords = _return_yt_info(yt_url)
|
| 209 |
+
return gr_html, gr_thumbnail, gr_title, gr_description, gr_keywords
|
| 210 |
+
|
| 211 |
+
@gpu_decorator(duration=SPACES_GPU_DURATION)
|
| 212 |
+
def yt_transcribe(yt_url, model, language, batch_size, chunk_length_s, stride_length_s, task, timestamp_mode):
|
| 213 |
+
gr_html, gr_thumbnail, gr_title, gr_description, gr_keywords = return_youtube(yt_url)
|
| 214 |
+
try:
|
| 215 |
+
with tempfile.TemporaryDirectory() as tmpdirname:
|
| 216 |
+
filepath = os.path.join(tmpdirname, "video.mp4")
|
| 217 |
+
_download_yt_audio(yt_url, filepath)
|
| 218 |
+
with open(filepath, "rb") as f:
|
| 219 |
+
inputs = f.read()
|
| 220 |
+
|
| 221 |
+
inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
|
| 222 |
+
inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
|
| 223 |
+
|
| 224 |
+
torch_dtype = torch.float16
|
| 225 |
+
|
| 226 |
+
model_gen = AutoModelForSpeechSeq2Seq.from_pretrained(
|
| 227 |
+
model, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
|
| 228 |
+
)
|
| 229 |
+
model_gen.to(device)
|
| 230 |
+
|
| 231 |
+
processor = AutoProcessor.from_pretrained(model)
|
| 232 |
+
tokenizer = WhisperTokenizer.from_pretrained(model)
|
| 233 |
+
|
| 234 |
+
pipe = pipeline(
|
| 235 |
+
task="automatic-speech-recognition",
|
| 236 |
+
model=model_gen,
|
| 237 |
+
chunk_length_s=chunk_length_s,
|
| 238 |
+
stride_length_s=stride_length_s,
|
| 239 |
+
tokenizer=tokenizer,
|
| 240 |
+
feature_extractor=processor.feature_extractor,
|
| 241 |
+
torch_dtype=torch_dtype,
|
| 242 |
+
model_kwargs={"attn_implementation": "flash_attention_2"},
|
| 243 |
+
device=device,
|
| 244 |
+
)
|
| 245 |
+
|
| 246 |
+
generate_kwargs = {}
|
| 247 |
+
if language != "Automatic Detection" and model.endswith(".en") == False:
|
| 248 |
+
generate_kwargs["language"] = language
|
| 249 |
+
if model.endswith(".en") == False:
|
| 250 |
+
generate_kwargs["task"] = task
|
| 251 |
+
|
| 252 |
+
output = pipe(inputs, batch_size=batch_size, generate_kwargs=generate_kwargs, return_timestamps=timestamp_mode)
|
| 253 |
+
|
| 254 |
+
print(output)
|
| 255 |
+
print({"inputs": yt_url, "model": model, "language": language, "batch_size": batch_size, "chunk_length_s": chunk_length_s, "stride_length_s": stride_length_s, "task": task, "timestamp_mode": timestamp_mode})
|
| 256 |
+
|
| 257 |
+
if not timestamp_mode:
|
| 258 |
+
text = output['text']
|
| 259 |
+
subtitle, files = text_output(inputs, text)
|
| 260 |
+
else:
|
| 261 |
+
chunks = output['chunks']
|
| 262 |
+
subtitle, files = subtitle_output(inputs, chunks)
|
| 263 |
+
return subtitle, files, gr_title, gr_html, gr_thumbnail, gr_description, gr_keywords
|
| 264 |
+
|
| 265 |
+
except Exception as e:
|
| 266 |
+
error_message = str(e)
|
| 267 |
+
gr.Warning(error_message, duration=10)
|
| 268 |
+
return gr.Textbox(visible=False),gr.Textbox(visible=False), gr_title, gr_html, gr_thumbnail, gr_description, gr_keywords
|
| 269 |
+
|
| 270 |
+
demo = gr.Blocks()
|
| 271 |
+
|
| 272 |
+
file_transcribe = gr.Interface(
|
| 273 |
+
fn=transcribe,
|
| 274 |
+
inputs=[
|
| 275 |
+
gr.Audio(sources=['upload', 'microphone'], type="filepath", label="Audio file"),
|
| 276 |
+
gr.Dropdown(
|
| 277 |
+
choices=[
|
| 278 |
+
"openai/whisper-tiny",
|
| 279 |
+
"openai/whisper-base",
|
| 280 |
+
"openai/whisper-small",
|
| 281 |
+
"openai/whisper-medium",
|
| 282 |
+
"openai/whisper-large",
|
| 283 |
+
"openai/whisper-large-v1",
|
| 284 |
+
"openai/whisper-large-v2", "distil-whisper/distil-large-v2",
|
| 285 |
+
"openai/whisper-large-v3", "openai/whisper-large-v3-turbo", "distil-whisper/distil-large-v3", "xaviviro/whisper-large-v3-catalan-finetuned-v2",
|
| 286 |
+
],
|
| 287 |
+
value="openai/whisper-large-v3-turbo",
|
| 288 |
+
label="Model Name",
|
| 289 |
+
allow_custom_value=True,
|
| 290 |
+
),
|
| 291 |
+
gr.Dropdown(choices=["Automatic Detection"] + sorted(get_language_names()), value="Automatic Detection", label="Language", interactive = True,),
|
| 292 |
+
gr.Slider(label="Batch Size", minimum=1, maximum=32, value=16, step=1),
|
| 293 |
+
gr.Slider(label="Chunk Length (s)", minimum=1, maximum=60, value=17.5, step=0.1),
|
| 294 |
+
gr.Slider(label="Stride Length (s)", minimum=1, maximum=30, value=1, step=0.1),
|
| 295 |
+
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
|
| 296 |
+
gr.Dropdown(
|
| 297 |
+
choices=[True, False, "word"],
|
| 298 |
+
value=True,
|
| 299 |
+
label="Timestamp Mode"
|
| 300 |
+
),
|
| 301 |
+
],
|
| 302 |
+
outputs=[gr.Textbox(label="Output"), gr.File(label="Download Files")],
|
| 303 |
+
title="Whisper: Transcribe Audio",
|
| 304 |
+
flagging_mode="auto",
|
| 305 |
+
)
|
| 306 |
+
|
| 307 |
+
video_transcribe = gr.Interface(
|
| 308 |
+
fn=transcribe,
|
| 309 |
+
inputs=[
|
| 310 |
+
gr.Video(sources=["upload", "webcam"], label="Video file", show_label=False, show_download_button=False, show_share_button=False, streaming=True),
|
| 311 |
+
gr.Dropdown(
|
| 312 |
+
choices=[
|
| 313 |
+
"openai/whisper-tiny",
|
| 314 |
+
"openai/whisper-base",
|
| 315 |
+
"openai/whisper-small",
|
| 316 |
+
"openai/whisper-medium",
|
| 317 |
+
"openai/whisper-large",
|
| 318 |
+
"openai/whisper-large-v1",
|
| 319 |
+
"openai/whisper-large-v2", "distil-whisper/distil-large-v2",
|
| 320 |
+
"openai/whisper-large-v3", "openai/whisper-large-v3-turbo", "distil-whisper/distil-large-v3", "xaviviro/whisper-large-v3-catalan-finetuned-v2",
|
| 321 |
+
],
|
| 322 |
+
value="openai/whisper-large-v3-turbo",
|
| 323 |
+
label="Model Name",
|
| 324 |
+
allow_custom_value=True,
|
| 325 |
+
),
|
| 326 |
+
gr.Dropdown(choices=["Automatic Detection"] + sorted(get_language_names()), value="Automatic Detection", label="Language", interactive = True,),
|
| 327 |
+
gr.Slider(label="Batch Size", minimum=1, maximum=32, value=16, step=1),
|
| 328 |
+
gr.Slider(label="Chunk Length (s)", minimum=1, maximum=60, value=17.5, step=0.1),
|
| 329 |
+
gr.Slider(label="Stride Length (s)", minimum=1, maximum=30, value=1, step=0.1),
|
| 330 |
+
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
|
| 331 |
+
gr.Dropdown(
|
| 332 |
+
choices=[True, False, "word"],
|
| 333 |
+
value=True,
|
| 334 |
+
label="Timestamp Mode"
|
| 335 |
+
),
|
| 336 |
+
],
|
| 337 |
+
outputs=[gr.Textbox(label="Output"), gr.File(label="Download Files")],
|
| 338 |
+
title="Whisper: Transcribe Video",
|
| 339 |
+
flagging_mode="auto",
|
| 340 |
+
)
|
| 341 |
+
|
| 342 |
+
yt_transcribe = gr.Interface(
|
| 343 |
+
fn=yt_transcribe,
|
| 344 |
+
inputs=[
|
| 345 |
+
gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
|
| 346 |
+
gr.Dropdown(
|
| 347 |
+
choices=[
|
| 348 |
+
"openai/whisper-tiny",
|
| 349 |
+
"openai/whisper-base",
|
| 350 |
+
"openai/whisper-small",
|
| 351 |
+
"openai/whisper-medium",
|
| 352 |
+
"openai/whisper-large",
|
| 353 |
+
"openai/whisper-large-v1",
|
| 354 |
+
"openai/whisper-large-v2", "distil-whisper/distil-large-v2",
|
| 355 |
+
"openai/whisper-large-v3", "openai/whisper-large-v3-turbo", "distil-whisper/distil-large-v3", "xaviviro/whisper-large-v3-catalan-finetuned-v2",
|
| 356 |
+
],
|
| 357 |
+
value="openai/whisper-large-v3-turbo",
|
| 358 |
+
label="Model Name",
|
| 359 |
+
allow_custom_value=True,
|
| 360 |
+
),
|
| 361 |
+
gr.Dropdown(choices=["Automatic Detection"] + sorted(get_language_names()), value="Automatic Detection", label="Language", interactive = True,),
|
| 362 |
+
gr.Slider(label="Batch Size", minimum=1, maximum=32, value=16, step=1),
|
| 363 |
+
gr.Slider(label="Chunk Length (s)", minimum=1, maximum=60, value=17.5, step=0.1),
|
| 364 |
+
gr.Slider(label="Stride Length (s)", minimum=1, maximum=30, value=1, step=0.1),
|
| 365 |
+
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
|
| 366 |
+
gr.Dropdown(
|
| 367 |
+
choices=[True, False, "word"],
|
| 368 |
+
value=True,
|
| 369 |
+
label="Timestamp Mode"
|
| 370 |
+
),
|
| 371 |
+
],
|
| 372 |
+
outputs=[
|
| 373 |
+
gr.Textbox(label="Output"),
|
| 374 |
+
gr.File(label="Download Files"),
|
| 375 |
+
gr.Textbox(label="Youtube Title"),
|
| 376 |
+
gr.HTML(label="Youtube Video"),
|
| 377 |
+
gr.Image(label="Youtube Thumbnail"),
|
| 378 |
+
gr.Textbox(label="Youtube Description"),
|
| 379 |
+
gr.Textbox(label="Youtube Keywords"),
|
| 380 |
+
],
|
| 381 |
+
title="Whisper: Transcribe YouTube",
|
| 382 |
+
flagging_mode="auto",
|
| 383 |
+
)
|
| 384 |
+
|
| 385 |
+
with demo:
|
| 386 |
+
gr.TabbedInterface(
|
| 387 |
+
interface_list=[file_transcribe, video_transcribe, yt_transcribe],
|
| 388 |
+
tab_names=["Audio", "Video", "YouTube"]
|
| 389 |
+
)
|
| 390 |
+
|
| 391 |
+
if __name__ == "__main__":
|
| 392 |
+
demo.queue().launch(ssr_mode=False)
|
languages.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class Language():
|
| 2 |
+
def __init__(self, code, name):
|
| 3 |
+
self.code = code
|
| 4 |
+
self.name = name
|
| 5 |
+
|
| 6 |
+
def __str__(self):
|
| 7 |
+
return "Language(code={}, name={})".format(self.code, self.name)
|
| 8 |
+
|
| 9 |
+
LANGUAGES = [
|
| 10 |
+
Language('en', 'English'),
|
| 11 |
+
Language('zh', 'Chinese'),
|
| 12 |
+
Language('de', 'German'),
|
| 13 |
+
Language('es', 'Spanish'),
|
| 14 |
+
Language('ru', 'Russian'),
|
| 15 |
+
Language('ko', 'Korean'),
|
| 16 |
+
Language('fr', 'French'),
|
| 17 |
+
Language('ja', 'Japanese'),
|
| 18 |
+
Language('pt', 'Portuguese'),
|
| 19 |
+
Language('tr', 'Turkish'),
|
| 20 |
+
Language('pl', 'Polish'),
|
| 21 |
+
Language('ca', 'Catalan'),
|
| 22 |
+
Language('nl', 'Dutch'),
|
| 23 |
+
Language('ar', 'Arabic'),
|
| 24 |
+
Language('sv', 'Swedish'),
|
| 25 |
+
Language('it', 'Italian'),
|
| 26 |
+
Language('id', 'Indonesian'),
|
| 27 |
+
Language('hi', 'Hindi'),
|
| 28 |
+
Language('fi', 'Finnish'),
|
| 29 |
+
Language('vi', 'Vietnamese'),
|
| 30 |
+
Language('he', 'Hebrew'),
|
| 31 |
+
Language('uk', 'Ukrainian'),
|
| 32 |
+
Language('el', 'Greek'),
|
| 33 |
+
Language('ms', 'Malay'),
|
| 34 |
+
Language('cs', 'Czech'),
|
| 35 |
+
Language('ro', 'Romanian'),
|
| 36 |
+
Language('da', 'Danish'),
|
| 37 |
+
Language('hu', 'Hungarian'),
|
| 38 |
+
Language('ta', 'Tamil'),
|
| 39 |
+
Language('no', 'Norwegian'),
|
| 40 |
+
Language('th', 'Thai'),
|
| 41 |
+
Language('ur', 'Urdu'),
|
| 42 |
+
Language('hr', 'Croatian'),
|
| 43 |
+
Language('bg', 'Bulgarian'),
|
| 44 |
+
Language('lt', 'Lithuanian'),
|
| 45 |
+
Language('la', 'Latin'),
|
| 46 |
+
Language('mi', 'Maori'),
|
| 47 |
+
Language('ml', 'Malayalam'),
|
| 48 |
+
Language('cy', 'Welsh'),
|
| 49 |
+
Language('sk', 'Slovak'),
|
| 50 |
+
Language('te', 'Telugu'),
|
| 51 |
+
Language('fa', 'Persian'),
|
| 52 |
+
Language('lv', 'Latvian'),
|
| 53 |
+
Language('bn', 'Bengali'),
|
| 54 |
+
Language('sr', 'Serbian'),
|
| 55 |
+
Language('az', 'Azerbaijani'),
|
| 56 |
+
Language('sl', 'Slovenian'),
|
| 57 |
+
Language('kn', 'Kannada'),
|
| 58 |
+
Language('et', 'Estonian'),
|
| 59 |
+
Language('mk', 'Macedonian'),
|
| 60 |
+
Language('br', 'Breton'),
|
| 61 |
+
Language('eu', 'Basque'),
|
| 62 |
+
Language('is', 'Icelandic'),
|
| 63 |
+
Language('hy', 'Armenian'),
|
| 64 |
+
Language('ne', 'Nepali'),
|
| 65 |
+
Language('mn', 'Mongolian'),
|
| 66 |
+
Language('bs', 'Bosnian'),
|
| 67 |
+
Language('kk', 'Kazakh'),
|
| 68 |
+
Language('sq', 'Albanian'),
|
| 69 |
+
Language('sw', 'Swahili'),
|
| 70 |
+
Language('gl', 'Galician'),
|
| 71 |
+
Language('mr', 'Marathi'),
|
| 72 |
+
Language('pa', 'Punjabi'),
|
| 73 |
+
Language('si', 'Sinhala'),
|
| 74 |
+
Language('km', 'Khmer'),
|
| 75 |
+
Language('sn', 'Shona'),
|
| 76 |
+
Language('yo', 'Yoruba'),
|
| 77 |
+
Language('so', 'Somali'),
|
| 78 |
+
Language('af', 'Afrikaans'),
|
| 79 |
+
Language('oc', 'Occitan'),
|
| 80 |
+
Language('ka', 'Georgian'),
|
| 81 |
+
Language('be', 'Belarusian'),
|
| 82 |
+
Language('tg', 'Tajik'),
|
| 83 |
+
Language('sd', 'Sindhi'),
|
| 84 |
+
Language('gu', 'Gujarati'),
|
| 85 |
+
Language('am', 'Amharic'),
|
| 86 |
+
Language('yi', 'Yiddish'),
|
| 87 |
+
Language('lo', 'Lao'),
|
| 88 |
+
Language('uz', 'Uzbek'),
|
| 89 |
+
Language('fo', 'Faroese'),
|
| 90 |
+
Language('ht', 'Haitian creole'),
|
| 91 |
+
Language('ps', 'Pashto'),
|
| 92 |
+
Language('tk', 'Turkmen'),
|
| 93 |
+
Language('nn', 'Nynorsk'),
|
| 94 |
+
Language('mt', 'Maltese'),
|
| 95 |
+
Language('sa', 'Sanskrit'),
|
| 96 |
+
Language('lb', 'Luxembourgish'),
|
| 97 |
+
Language('my', 'Myanmar'),
|
| 98 |
+
Language('bo', 'Tibetan'),
|
| 99 |
+
Language('tl', 'Tagalog'),
|
| 100 |
+
Language('mg', 'Malagasy'),
|
| 101 |
+
Language('as', 'Assamese'),
|
| 102 |
+
Language('tt', 'Tatar'),
|
| 103 |
+
Language('haw', 'Hawaiian'),
|
| 104 |
+
Language('ln', 'Lingala'),
|
| 105 |
+
Language('ha', 'Hausa'),
|
| 106 |
+
Language('ba', 'Bashkir'),
|
| 107 |
+
Language('jw', 'Javanese'),
|
| 108 |
+
Language('su', 'Sundanese')
|
| 109 |
+
]
|
| 110 |
+
|
| 111 |
+
_TO_LANGUAGE_CODE = {
|
| 112 |
+
**{language.code: language for language in LANGUAGES},
|
| 113 |
+
"burmese": "my",
|
| 114 |
+
"valencian": "ca",
|
| 115 |
+
"flemish": "nl",
|
| 116 |
+
"haitian": "ht",
|
| 117 |
+
"letzeburgesch": "lb",
|
| 118 |
+
"pushto": "ps",
|
| 119 |
+
"panjabi": "pa",
|
| 120 |
+
"moldavian": "ro",
|
| 121 |
+
"moldovan": "ro",
|
| 122 |
+
"sinhalese": "si",
|
| 123 |
+
"castilian": "es",
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
_FROM_LANGUAGE_NAME = {
|
| 127 |
+
**{language.name.lower(): language for language in LANGUAGES}
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
def get_language_from_code(language_code, default=None) -> Language:
|
| 131 |
+
"""Return the language name from the language code."""
|
| 132 |
+
return _TO_LANGUAGE_CODE.get(language_code, default)
|
| 133 |
+
|
| 134 |
+
def get_language_from_name(language, default=None) -> Language:
|
| 135 |
+
"""Return the language code from the language name."""
|
| 136 |
+
return _FROM_LANGUAGE_NAME.get(language.lower() if language else None, default)
|
| 137 |
+
|
| 138 |
+
def get_language_names():
|
| 139 |
+
"""Return a list of language names."""
|
| 140 |
+
return [language.name for language in LANGUAGES]
|
| 141 |
+
|
| 142 |
+
if __name__ == "__main__":
|
| 143 |
+
# Test lookup
|
| 144 |
+
print(get_language_from_code('en'))
|
| 145 |
+
print(get_language_from_name('English'))
|
| 146 |
+
|
| 147 |
+
print(get_language_names())
|
packages.txt
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ffmpeg
|
| 2 |
+
libnss3
|
| 3 |
+
libnspr4
|
| 4 |
+
libatk1.0-0
|
| 5 |
+
libatk-bridge2.0-0
|
| 6 |
+
libcups2
|
| 7 |
+
libxcomposite1
|
| 8 |
+
libxdamage1
|
| 9 |
+
libxrandr2
|
| 10 |
+
libgbm1
|
| 11 |
+
libpango-1.0-0
|
| 12 |
+
libpangocairo-1.0-0
|
| 13 |
+
libasound2
|
| 14 |
+
libxshmfence1
|
| 15 |
+
libx11-xcb1
|
| 16 |
+
libxext6
|
| 17 |
+
libxtst6
|
| 18 |
+
libxinerama1
|
| 19 |
+
libwayland-client0
|
| 20 |
+
libwayland-cursor0
|
| 21 |
+
libwayland-egl1
|
| 22 |
+
libdbus-1-3
|
| 23 |
+
libatspi2.0-0
|
| 24 |
+
libdrm2
|
| 25 |
+
libgtk-3-0
|
| 26 |
+
libgdk-pixbuf2.0-0
|
| 27 |
+
libgstreamer1.0-0
|
| 28 |
+
libwoff1
|
| 29 |
+
libgstreamer-plugins-base1.0-0
|
| 30 |
+
libgstreamer-gl1.0-0
|
| 31 |
+
libharfbuzz-icu0
|
| 32 |
+
libenchant-2-2
|
| 33 |
+
libsecret-1-0
|
| 34 |
+
libhyphen0
|
| 35 |
+
libmanette-0.2-0
|
| 36 |
+
libgles2
|
| 37 |
+
libgstreamer1.0-0
|
| 38 |
+
libgstreamer-plugins-base1.0-0
|
| 39 |
+
gstreamer1.0-plugins-good
|
| 40 |
+
gstreamer1.0-plugins-bad
|
| 41 |
+
gstreamer1.0-plugins-ugly
|
| 42 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
transformers
|
| 2 |
+
pydub
|
| 3 |
+
yt-dlp
|
| 4 |
+
accelerate
|
| 5 |
+
playwright
|
subtitle.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class Subtitle:
|
| 2 |
+
def __init__(self, ext="srt"):
|
| 3 |
+
sub_dict = {
|
| 4 |
+
"srt": {
|
| 5 |
+
"coma": ",",
|
| 6 |
+
"header": "",
|
| 7 |
+
"format": self._srt_format,
|
| 8 |
+
},
|
| 9 |
+
"vtt": {
|
| 10 |
+
"coma": ".",
|
| 11 |
+
"header": "WebVTT\n\n",
|
| 12 |
+
"format": self._vtt_format,
|
| 13 |
+
},
|
| 14 |
+
"txt": {
|
| 15 |
+
"coma": "",
|
| 16 |
+
"header": "",
|
| 17 |
+
"format": self._txt_format,
|
| 18 |
+
},
|
| 19 |
+
"lrc": {
|
| 20 |
+
"coma": "",
|
| 21 |
+
"header": "",
|
| 22 |
+
"format": self._lrc_format,
|
| 23 |
+
},
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
self.ext = ext
|
| 27 |
+
self.coma = sub_dict[ext]["coma"]
|
| 28 |
+
self.header = sub_dict[ext]["header"]
|
| 29 |
+
self.format_fn = sub_dict[ext]["format"]
|
| 30 |
+
|
| 31 |
+
def timeformat(self, time):
|
| 32 |
+
hours, remainder = divmod(time, 3600)
|
| 33 |
+
minutes, seconds = divmod(remainder, 60)
|
| 34 |
+
milliseconds = (time - int(time)) * 1000
|
| 35 |
+
return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}{self.coma}{int(milliseconds):03d}"
|
| 36 |
+
|
| 37 |
+
def seconds_to_lrc_timestamp(self, time):
|
| 38 |
+
minutes = int(time // 60)
|
| 39 |
+
secs = time % 60
|
| 40 |
+
return f"[{minutes:02}:{secs:06.3f}]"
|
| 41 |
+
|
| 42 |
+
def _srt_format(self, i, segment):
|
| 43 |
+
start_time = self.timeformat(segment['timestamp'][0])
|
| 44 |
+
end_time = self.timeformat(segment['timestamp'][1] if segment['timestamp'][1] else segment['timestamp'][0])
|
| 45 |
+
return f"{i + 1}\n{start_time} --> {end_time}\n{segment['text']}\n\n"
|
| 46 |
+
|
| 47 |
+
def _vtt_format(self, i, segment):
|
| 48 |
+
start_time = self.timeformat(segment['timestamp'][0])
|
| 49 |
+
end_time = self.timeformat(segment['timestamp'][1] if segment['timestamp'][1] else segment['timestamp'][0])
|
| 50 |
+
return f"{start_time} --> {end_time}\n{segment['text']}\n\n"
|
| 51 |
+
|
| 52 |
+
def _txt_format(self, i, segment):
|
| 53 |
+
return f"{segment['text']}\n"
|
| 54 |
+
|
| 55 |
+
def _lrc_format(self, i, segment):
|
| 56 |
+
start_time = self.seconds_to_lrc_timestamp(segment['timestamp'][0])
|
| 57 |
+
return f"{start_time}{segment['text']}\n"
|
| 58 |
+
|
| 59 |
+
def get_subtitle(self, segments):
|
| 60 |
+
output = self.header
|
| 61 |
+
for i, segment in enumerate(segments):
|
| 62 |
+
segment['text'] = segment['text'].lstrip()
|
| 63 |
+
try:
|
| 64 |
+
output += self.format_fn(i, segment)
|
| 65 |
+
except Exception as e:
|
| 66 |
+
print(e, segment)
|
| 67 |
+
return output
|
| 68 |
+
|
| 69 |
+
def write_subtitle(self, segments, output_file):
|
| 70 |
+
output_file_with_ext = f"{output_file}.{self.ext}"
|
| 71 |
+
subtitle = self.get_subtitle(segments)
|
| 72 |
+
|
| 73 |
+
with open(output_file_with_ext, 'w', encoding='utf-8') as f:
|
| 74 |
+
f.write(subtitle)
|
| 75 |
+
|
| 76 |
+
def write_file(output_file,subtitle):
|
| 77 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
| 78 |
+
f.write(subtitle)
|
| 79 |
+
|
| 80 |
+
def subtitle_output(inputs, chunks):
|
| 81 |
+
file_name = inputs.split('/')[-1].split('.')[0]
|
| 82 |
+
lrc_sub = Subtitle("lrc")
|
| 83 |
+
srt_sub = Subtitle("srt")
|
| 84 |
+
vtt_sub = Subtitle("vtt")
|
| 85 |
+
txt_sub = Subtitle("txt")
|
| 86 |
+
lrc = lrc_sub.get_subtitle(chunks)
|
| 87 |
+
srt = srt_sub.get_subtitle(chunks)
|
| 88 |
+
vtt = vtt_sub.get_subtitle(chunks)
|
| 89 |
+
txt = txt_sub.get_subtitle(chunks)
|
| 90 |
+
write_file(file_name+".lrc",lrc)
|
| 91 |
+
write_file(file_name+".srt",srt)
|
| 92 |
+
write_file(file_name+".vtt",vtt)
|
| 93 |
+
write_file(file_name+".txt",txt)
|
| 94 |
+
files_out = [file_name+".lrc", file_name+".srt", file_name+".vtt", file_name+".txt"]
|
| 95 |
+
return lrc, files_out
|
| 96 |
+
|
| 97 |
+
def text_output(inputs, text):
|
| 98 |
+
file_name = inputs.split('/')[-1].split('.')[0]
|
| 99 |
+
write_file(file_name+".txt",text)
|
| 100 |
+
files_out = [file_name+".txt"]
|
| 101 |
+
return text, files_out
|