File size: 4,236 Bytes
f86362b 7abe680 f86362b 7abe680 f86362b 7abe680 f86362b 0452b90 f86362b 7abe680 f86362b 0c670e4 f86362b 7abe680 f86362b 7abe680 f86362b e69dac4 f86362b 7abe680 f86362b 7abe680 f86362b 7abe680 f86362b 7abe680 f86362b 7abe680 f86362b 7abe680 f86362b 7abe680 f86362b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import streamlit as st
import whisper
from pytube import YouTube
from pydub import AudioSegment
import pandas as pd
import anthropic
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
import io
from elevenlabs import generate, set_api_key
import subprocess
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
import os
from elevenlabs import set_api_key
set_api_key(st.secrets["xi_api_key"])
def shorten_audio(filename):
cut_filename = "cut_audio.mp4"
audio = AudioSegment.from_file(filename)
cut_audio = audio[: 60 * 1000]
cut_audio.export(cut_filename, format="mp4")
return cut_filename
def generate_translation(original_text, destination_language):
anthropic = Anthropic(api_key=st.secrets["anthropic"])
prompt = (
f"{HUMAN_PROMPT} Please translate this video transcript into {destination_language}. You will get "
f"to the translation directly after I prompted 'the translation:'"
f"{AI_PROMPT} Understood, I will get to the translation without any opening lines."
f"{HUMAN_PROMPT} Great! this is the transcript: {original_text}; the translation:"
)
resp = anthropic.completions.create(
prompt=f"{prompt} {AI_PROMPT}",
model="claude-2",
stop_sequences=[HUMAN_PROMPT],
max_tokens_to_sample=900,
)
print(resp.completion)
return resp.completion
def generate_dubs(text):
filename = "output.mp3"
set_api_key(st.secrets["xi_api_key"])
audio = generate(text=text, voice="Liam Evans", model="eleven_multilingual_v1")
audio_io = io.BytesIO(audio)
insert_audio = AudioSegment.from_file(audio_io, format="mp3")
insert_audio.export(filename, format="mp3")
return filename
def combine_video(video_filename, audio_filename):
ffmpeg_extract_subclip(video_filename, 0, 60, targetname="cut_video.mp4")
output_filename = "output.mp4"
command = [
"ffmpeg",
"-y",
"-i",
"cut_video.mp4",
"-i",
audio_filename,
"-c:v",
"copy",
"-c:a",
"aac",
output_filename,
]
subprocess.run(command)
return output_filename
st.title("AutoDubs 📺🎵")
link = st.text_input("Link to Youtube Video", key="link")
language = st.selectbox(
"Translate to",
("French", "German", "Hindi", "Italian", "Polish", "Portuguese", "Spanish"),
)
if st.button("Transcribe!"):
print(f"downloading from link: {link}")
model = whisper.load_model("base")
yt = YouTube(link)
if yt is not None:
st.subheader(yt.title)
st.image(yt.thumbnail_url)
audio_name = st.caption("Downloading audio stream...")
audio_streams = yt.streams.filter(only_audio=True)
filename = audio_streams.first().download()
print("filename: ", filename)
if filename:
audio_name.caption(filename)
cut_filename = shorten_audio(filename)
transcription = model.transcribe(cut_filename)
print(transcription)
if transcription:
df = pd.DataFrame(
transcription["segments"], columns=["start", "end", "text"]
)
st.dataframe(df)
print(transcription["text"])
dubbing_caption = st.caption("Dubbing...")
translation = generate_translation(transcription["text"], language)
dubbing_caption = st.caption("Begin dubbing...")
dubs_audio = generate_dubs(translation)
dubbing_caption.caption("Dubs generated! combining with the video...")
video_streams = yt.streams.filter(only_video=True)
video_filename = video_streams.first().download()
if video_filename:
dubbing_caption.caption(
"Video downloaded! combining the video and the dubs..."
)
output_filename = combine_video(video_filename, dubs_audio)
if os.path.exists(output_filename):
dubbing_caption.caption("Video successfully dubbed! Enjoy! 😀")
st.video(output_filename)
|