|
|
import datetime |
|
|
import subprocess |
|
|
from pathlib import Path |
|
|
|
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
import torch |
|
|
import whisper |
|
|
from fastcore.foundation import L, working_directory |
|
|
|
|
|
|
|
|
def start_app(): |
|
|
subprocess.run(["streamlit", "run", "app.py"]) |
|
|
|
|
|
|
|
|
def get_audio(url: str): |
|
|
audio_path = Path("./audio") |
|
|
with working_directory(audio_path): |
|
|
|
|
|
subprocess.run(["yt-dlp", "-x", "--audio-format", "wav", url]) |
|
|
|
|
|
def get_v_from_url(url): |
|
|
_, val = url.split('?v=') |
|
|
return val.split('&')[0] |
|
|
|
|
|
|
|
|
|
|
|
def annotate(audio_src, model_size="tiny"): |
|
|
device = 'cuda:0' if torch.cuda.is_available() else 'cpu' |
|
|
model = whisper.load_model(model_size, device=device) |
|
|
result = model.transcribe(audio_src) |
|
|
return result |
|
|
|
|
|
|
|
|
def get_time(seconds): |
|
|
return "{:0>8}".format(str(datetime.timedelta(seconds=seconds))) |
|
|
|
|
|
|
|
|
def df_from_result(result): |
|
|
df = pd.json_normalize(result["segments"]) |
|
|
df["start"] = df["start"].apply(get_time) |
|
|
df["end"] = df["end"].apply(get_time) |
|
|
return df |
|
|
|
|
|
|
|
|
def find_word_timestamp(df, *words): |
|
|
l = L() |
|
|
for word in words: |
|
|
vals = df["text"].str.find(word).values |
|
|
arr = np.where(vals > 1) |
|
|
times = list(df.iloc[arr]["start"].values) |
|
|
nt = L(times).map(lambda x: x.split(".")[:-1]) |
|
|
l.append(nt) |
|
|
return l |
|
|
|
|
|
|
|
|
|
|
|
def generate_srt(df): |
|
|
s = "" |
|
|
for i, (start, end, text) in enumerate(df[["start", "end", "text"]].values): |
|
|
start = start.replace(".", ",") |
|
|
end = end.replace(".", ",") |
|
|
s += f"{i}\n" |
|
|
s += f"{start} --> {end}\n" |
|
|
s += f"{text.strip()}\n\n" |
|
|
return s |
|
|
|
|
|
|
|
|
def write_srt(s, name): |
|
|
with open(f"{name}.srt", "w") as f: |
|
|
f.write(s) |
|
|
f.close() |
|
|
|