| import streamlit as st | |
| import whisper | |
| from pytube import YouTube | |
| from pydub import AudioSegment | |
| import pandas as pd | |
| import anthropic | |
| from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT | |
| import io | |
| from elevenlabs import generate, set_api_key | |
| import subprocess | |
| from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip | |
| import os | |
| from elevenlabs import set_api_key | |
| set_api_key(st.secrets["xi_api_key"]) | |
| def shorten_audio(filename): | |
| cut_filename = "cut_audio.mp4" | |
| audio = AudioSegment.from_file(filename) | |
| cut_audio = audio[: 60 * 1000] | |
| cut_audio.export(cut_filename, format="mp4") | |
| return cut_filename | |
| def generate_translation(original_text, destination_language): | |
| anthropic = Anthropic(api_key=st.secrets["anthropic"]) | |
| prompt = ( | |
| f"{HUMAN_PROMPT} Please translate this video transcript into {destination_language}. You will get " | |
| f"to the translation directly after I prompted 'the translation:'" | |
| f"{AI_PROMPT} Understood, I will get to the translation without any opening lines." | |
| f"{HUMAN_PROMPT} Great! this is the transcript: {original_text}; the translation:" | |
| ) | |
| resp = anthropic.completions.create( | |
| prompt=f"{prompt} {AI_PROMPT}", | |
| model="claude-2", | |
| stop_sequences=[HUMAN_PROMPT], | |
| max_tokens_to_sample=900, | |
| ) | |
| print(resp.completion) | |
| return resp.completion | |
| def generate_dubs(text): | |
| filename = "output.mp3" | |
| set_api_key(st.secrets["xi_api_key"]) | |
| audio = generate(text=text, voice="Liam Evans", model="eleven_multilingual_v1") | |
| audio_io = io.BytesIO(audio) | |
| insert_audio = AudioSegment.from_file(audio_io, format="mp3") | |
| insert_audio.export(filename, format="mp3") | |
| return filename | |
| def combine_video(video_filename, audio_filename): | |
| ffmpeg_extract_subclip(video_filename, 0, 60, targetname="cut_video.mp4") | |
| output_filename = "output.mp4" | |
| command = [ | |
| "ffmpeg", | |
| "-y", | |
| "-i", | |
| "cut_video.mp4", | |
| "-i", | |
| audio_filename, | |
| "-c:v", | |
| "copy", | |
| "-c:a", | |
| "aac", | |
| output_filename, | |
| ] | |
| subprocess.run(command) | |
| return output_filename | |
| st.title("AutoDubs πΊπ΅") | |
| link = st.text_input("Link to Youtube Video", key="link") | |
| language = st.selectbox( | |
| "Translate to", | |
| ("French", "German", "Hindi", "Italian", "Polish", "Portuguese", "Spanish"), | |
| ) | |
| if st.button("Transcribe!"): | |
| print(f"downloading from link: {link}") | |
| model = whisper.load_model("base") | |
| yt = YouTube(link) | |
| if yt is not None: | |
| st.subheader(yt.title) | |
| st.image(yt.thumbnail_url) | |
| audio_name = st.caption("Downloading audio stream...") | |
| audio_streams = yt.streams.filter(only_audio=True) | |
| filename = audio_streams.first().download() | |
| print("filename: ", filename) | |
| if filename: | |
| audio_name.caption(filename) | |
| cut_filename = shorten_audio(filename) | |
| transcription = model.transcribe(cut_filename) | |
| print(transcription) | |
| if transcription: | |
| df = pd.DataFrame( | |
| transcription["segments"], columns=["start", "end", "text"] | |
| ) | |
| st.dataframe(df) | |
| print(transcription["text"]) | |
| dubbing_caption = st.caption("Dubbing...") | |
| translation = generate_translation(transcription["text"], language) | |
| dubbing_caption = st.caption("Begin dubbing...") | |
| dubs_audio = generate_dubs(translation) | |
| dubbing_caption.caption("Dubs generated! combining with the video...") | |
| video_streams = yt.streams.filter(only_video=True) | |
| video_filename = video_streams.first().download() | |
| if video_filename: | |
| dubbing_caption.caption( | |
| "Video downloaded! combining the video and the dubs..." | |
| ) | |
| output_filename = combine_video(video_filename, dubs_audio) | |
| if os.path.exists(output_filename): | |
| dubbing_caption.caption("Video successfully dubbed! Enjoy! π") | |
| st.video(output_filename) | |