Update app.py
Browse files
app.py
CHANGED
|
@@ -3,10 +3,8 @@ import streamlit as st
|
|
| 3 |
import torch
|
| 4 |
from transformers import WhisperForConditionalGeneration, WhisperProcessor
|
| 5 |
import librosa
|
| 6 |
-
import moviepy.editor as mp
|
| 7 |
import srt
|
| 8 |
from datetime import timedelta
|
| 9 |
-
from tempfile import NamedTemporaryFile
|
| 10 |
|
| 11 |
# λͺ¨λΈ λ° νλ‘μΈμ λ‘λ
|
| 12 |
@st.cache_resource
|
|
@@ -19,65 +17,73 @@ model, processor = load_model()
|
|
| 19 |
|
| 20 |
# μΉ μ ν리μΌμ΄μ
μΈν°νμ΄μ€
|
| 21 |
st.title("Whisper μλ§ μμ±κΈ°")
|
| 22 |
-
st.write("
|
| 23 |
|
| 24 |
-
#
|
| 25 |
-
|
| 26 |
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
st.write("μμμ μ²λ¦¬νλ μ€μ
λλ€...")
|
| 33 |
-
progress_bar.progress(10)
|
| 34 |
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
|
| 41 |
-
|
|
|
|
| 42 |
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
audio, sr = librosa.load(audio_path, sr=16000)
|
| 46 |
|
| 47 |
-
|
|
|
|
|
|
|
| 48 |
|
| 49 |
-
|
| 50 |
-
st.write("λͺ¨λΈμ ν΅ν΄ μλ§μ μμ±νλ μ€μ
λλ€...")
|
| 51 |
-
inputs = processor(audio, return_tensors="pt", sampling_rate=16000)
|
| 52 |
-
with torch.no_grad():
|
| 53 |
-
predicted_ids = model.generate(inputs["input_features"], max_length=2048)
|
| 54 |
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
-
|
| 58 |
|
| 59 |
-
|
| 60 |
-
st.write("SRT νμΌμ μμ±νλ μ€μ
λλ€...")
|
| 61 |
-
lines = transcription.split(". ")
|
| 62 |
-
subs = []
|
| 63 |
-
step = len(audio) / sr / len(lines)
|
| 64 |
-
start_time = 0.0
|
| 65 |
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
|
|
|
| 70 |
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
-
|
| 74 |
-
with open(srt_file_path, "w", encoding="utf-8") as f:
|
| 75 |
-
f.write(srt_content)
|
| 76 |
|
| 77 |
-
|
| 78 |
|
| 79 |
-
|
|
|
|
|
|
|
| 80 |
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
import torch
|
| 4 |
from transformers import WhisperForConditionalGeneration, WhisperProcessor
|
| 5 |
import librosa
|
|
|
|
| 6 |
import srt
|
| 7 |
from datetime import timedelta
|
|
|
|
| 8 |
|
| 9 |
# λͺ¨λΈ λ° νλ‘μΈμ λ‘λ
|
| 10 |
@st.cache_resource
|
|
|
|
| 17 |
|
| 18 |
# μΉ μ ν리μΌμ΄μ
μΈν°νμ΄μ€
|
| 19 |
st.title("Whisper μλ§ μμ±κΈ°")
|
| 20 |
+
st.write("WAV νμΌμ μ
λ‘λνμ¬ μλ§μ μμ±νμΈμ.")
|
| 21 |
|
| 22 |
+
# μ¬λ¬ WAV νμΌ μ
λ‘λ
|
| 23 |
+
uploaded_files = st.file_uploader("μ¬κΈ°μ WAV νμΌλ€μ λλκ·Έ μ€ λλ‘ νμΈμ", type=["wav"], accept_multiple_files=True)
|
| 24 |
|
| 25 |
+
# νμΌ λͺ©λ‘μ 보μ¬μ€
|
| 26 |
+
if uploaded_files:
|
| 27 |
+
st.write("μ
λ‘λλ νμΌ λͺ©λ‘:")
|
| 28 |
+
for uploaded_file in uploaded_files:
|
| 29 |
+
st.write(uploaded_file.name)
|
|
|
|
|
|
|
| 30 |
|
| 31 |
+
# μ€ν λ²νΌ
|
| 32 |
+
if st.button("μ€ν"):
|
| 33 |
+
combined_subs = []
|
| 34 |
+
last_end_time = timedelta(0)
|
| 35 |
+
subtitle_index = 1
|
| 36 |
|
| 37 |
+
for uploaded_file in uploaded_files:
|
| 38 |
+
st.write(f"μ²λ¦¬ μ€: {uploaded_file.name}")
|
| 39 |
|
| 40 |
+
# μ§νλ° μ΄κΈ°ν
|
| 41 |
+
progress_bar = st.progress(0)
|
|
|
|
| 42 |
|
| 43 |
+
# WAV νμΌ λ‘λ λ° μ²λ¦¬
|
| 44 |
+
st.write("μ€λμ€ νμΌμ μ²λ¦¬νλ μ€μ
λλ€...")
|
| 45 |
+
audio, sr = librosa.load(uploaded_file, sr=16000)
|
| 46 |
|
| 47 |
+
progress_bar.progress(50)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
+
# Whisper λͺ¨λΈλ‘ λ³ν
|
| 50 |
+
st.write("λͺ¨λΈμ ν΅ν΄ μλ§μ μμ±νλ μ€μ
λλ€...")
|
| 51 |
+
inputs = processor(audio, return_tensors="pt", sampling_rate=16000)
|
| 52 |
+
with torch.no_grad():
|
| 53 |
+
predicted_ids = model.generate(inputs["input_features"], max_length=2048)
|
| 54 |
|
| 55 |
+
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0].strip()
|
| 56 |
|
| 57 |
+
progress_bar.progress(80)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
+
# SRT μλ§ μμ±
|
| 60 |
+
st.write("SRT νμΌμ μμ±νλ μ€μ
λλ€...")
|
| 61 |
+
lines = transcription.split(". ")
|
| 62 |
+
step = len(audio) / sr / len(lines)
|
| 63 |
+
start_time = last_end_time
|
| 64 |
|
| 65 |
+
for line in lines:
|
| 66 |
+
end_time = start_time + timedelta(seconds=step)
|
| 67 |
+
combined_subs.append(
|
| 68 |
+
srt.Subtitle(index=subtitle_index, start=start_time, end=end_time, content=line)
|
| 69 |
+
)
|
| 70 |
+
start_time = end_time
|
| 71 |
+
subtitle_index += 1
|
| 72 |
|
| 73 |
+
last_end_time = start_time # λ€μ νμΌμ μμ μκ°μ μ‘°μ νκΈ° μν΄ λ§μ§λ§ λ μκ°μ κΈ°λ‘
|
|
|
|
|
|
|
| 74 |
|
| 75 |
+
progress_bar.progress(100)
|
| 76 |
|
| 77 |
+
# λͺ¨λ μλ§μ νλμ SRT νμΌλ‘ μ μ₯
|
| 78 |
+
st.write("μ΅μ’
SRT νμΌμ μμ±νλ μ€μ
λλ€...")
|
| 79 |
+
srt_content = srt.compose(combined_subs)
|
| 80 |
|
| 81 |
+
final_srt_file_path = "combined_output.srt"
|
| 82 |
+
with open(final_srt_file_path, "w", encoding="utf-8") as f:
|
| 83 |
+
f.write(srt_content)
|
| 84 |
+
|
| 85 |
+
st.success("μ΅μ’
SRT νμΌμ΄ μ±κ³΅μ μΌλ‘ μμ±λμμ΅λλ€!")
|
| 86 |
+
|
| 87 |
+
# μ΅μ’
SRT νμΌ λ€μ΄λ‘λ λ²νΌ
|
| 88 |
+
with open(final_srt_file_path, "rb") as srt_file:
|
| 89 |
+
st.download_button(label="SRT νμΌ λ€μ΄λ‘λ", data=srt_file, file_name=final_srt_file_path, mime="text/srt")
|