Spaces:

lcjln
/

AIME

Sleeping

App Files Files Community

lcjln commited on Sep 3, 2024

Commit

8f5fb37

verified ·

1 Parent(s): 3085b46

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -48

app.py CHANGED Viewed

@@ -3,10 +3,8 @@ import streamlit as st
 import torch
 from transformers import WhisperForConditionalGeneration, WhisperProcessor
 import librosa
-import moviepy.editor as mp
 import srt
 from datetime import timedelta
-from tempfile import NamedTemporaryFile
 # 모델 및 프로세서 로드
 @st.cache_resource
@@ -19,65 +17,73 @@ model, processor = load_model()
 # 웹 애플리케이션 인터페이스
 st.title("Whisper 자막 생성기")
-st.write("영상 파일을 업로드하여 자막을 생성하세요.")
-# 영상 파일 업로드
-uploaded_file = st.file_uploader("여기에 영상 파일을 드래그 앤 드롭 하세요", type=["mp4", "mkv", "mov"])
-if uploaded_file is not None:
-    # 진행바 초기화
-    progress_bar = st.progress(0)
-    # 업로드된 영상 파일에서 오디오 추출
-    st.write("영상을 처리하는 중입니다...")
-    progress_bar.progress(10)
-    with NamedTemporaryFile(suffix=".mp4") as temp_video_file:
-        temp_video_file.write(uploaded_file.read())
-        video = mp.VideoFileClip(temp_video_file.name)
-        audio_path = temp_video_file.name.replace(".mp4", ".wav")
-        video.audio.write_audiofile(audio_path, codec='pcm_s16le')
-    progress_bar.progress(30)
-    # 오디오 파일 로드 및 처리
-    st.write("오디오 파일을 처리하는 중입니다...")
-    audio, sr = librosa.load(audio_path, sr=16000)
-    progress_bar.progress(50)
-    # Whisper 모델로 변환
-    st.write("모델을 통해 자막을 생성하는 중입니다...")
-    inputs = processor(audio, return_tensors="pt", sampling_rate=16000)
-    with torch.no_grad():
-        predicted_ids = model.generate(inputs["input_features"], max_length=2048)
-    transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0].strip()
-    progress_bar.progress(80)
-    # 자막을 SRT 파일로 저장
-    st.write("SRT 파일을 생성하는 중입니다...")
-    lines = transcription.split(". ")
-    subs = []
-    step = len(audio) / sr / len(lines)
-    start_time = 0.0
-    for i, line in enumerate(lines):
-        end_time = start_time + step
-        subs.append(srt.Subtitle(index=i+1, start=timedelta(seconds=start_time), end=timedelta(seconds=end_time), content=line))
-        start_time = end_time
-    srt_content = srt.compose(subs)
-    srt_file_path = audio_path.replace(".wav", ".srt")
-    with open(srt_file_path, "w", encoding="utf-8") as f:
-        f.write(srt_content)
-    progress_bar.progress(100)
-    st.success("SRT 파일이 성공적으로 생성되었습니다!")
-    # SRT 파일 다운로드 버튼
-    with open(srt_file_path, "rb") as srt_file:
-        st.download_button(label="SRT 파일 다운로드", data=srt_file, file_name=os.path.basename(srt_file_path), mime="text/srt")

 import torch
 from transformers import WhisperForConditionalGeneration, WhisperProcessor
 import librosa
 import srt
 from datetime import timedelta
 # 모델 및 프로세서 로드
 @st.cache_resource
 # 웹 애플리케이션 인터페이스
 st.title("Whisper 자막 생성기")
+st.write("WAV 파일을 업로드하여 자막을 생성하세요.")
+# 여러 WAV 파일 업로드
+uploaded_files = st.file_uploader("여기에 WAV 파일들을 드래그 앤 드롭 하세요", type=["wav"], accept_multiple_files=True)
+# 파일 목록을 보여줌
+if uploaded_files:
+    st.write("업로드된 파일 목록:")
+    for uploaded_file in uploaded_files:
+        st.write(uploaded_file.name)
+    # 실행 버튼
+    if st.button("실행"):
+        combined_subs = []
+        last_end_time = timedelta(0)
+        subtitle_index = 1
+        for uploaded_file in uploaded_files:
+            st.write(f"처리 중: {uploaded_file.name}")
+            # 진행바 초기화
+            progress_bar = st.progress(0)
+            # WAV 파일 로드 및 처리
+            st.write("오디오 파일을 처리하는 중입니다...")
+            audio, sr = librosa.load(uploaded_file, sr=16000)
+            progress_bar.progress(50)
+            # Whisper 모델로 변환
+            st.write("모델을 통해 자막을 생성하는 중입니다...")
+            inputs = processor(audio, return_tensors="pt", sampling_rate=16000)
+            with torch.no_grad():
+                predicted_ids = model.generate(inputs["input_features"], max_length=2048)
+            transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0].strip()
+            progress_bar.progress(80)
+            # SRT 자막 생성
+            st.write("SRT 파일을 생성하는 중입니다...")
+            lines = transcription.split(". ")
+            step = len(audio) / sr / len(lines)
+            start_time = last_end_time
+            for line in lines:
+                end_time = start_time + timedelta(seconds=step)
+                combined_subs.append(
+                    srt.Subtitle(index=subtitle_index, start=start_time, end=end_time, content=line)
+                )
+                start_time = end_time
+                subtitle_index += 1
+            last_end_time = start_time  # 다음 파일의 시작 시간을 조정하기 위해 마지막 끝 시간을 기록
+            progress_bar.progress(100)
+        # 모든 자막을 하나의 SRT 파일로 저장
+        st.write("최종 SRT 파일을 생성하는 중입니다...")
+        srt_content = srt.compose(combined_subs)
+        final_srt_file_path = "combined_output.srt"
+        with open(final_srt_file_path, "w", encoding="utf-8") as f:
+            f.write(srt_content)
+        st.success("최종 SRT 파일이 성공적으로 생성되었습니다!")
+        # 최종 SRT 파일 다운로드 버튼
+        with open(final_srt_file_path, "rb") as srt_file:
+            st.download_button(label="SRT 파일 다운로드", data=srt_file, file_name=final_srt_file_path, mime="text/srt")