Spaces:

Artificial-superintelligence
/

Aita

Paused

App Files Files Community

Aita / app.py

Artificial-superintelligence

Update app.py

3369106 verified over 1 year ago

raw

history blame contribute delete

7.48 kB

	import streamlit as st
	from moviepy.editor import VideoFileClip, AudioFileClip, concatenate_audioclips
	import whisper
	from transformers import MBartForConditionalGeneration, MBartTokenizer
	from gtts import gTTS
	import torch
	import tempfile
	import os
	import numpy as np
	from pydub import AudioSegment
	import librosa
	import warnings
	warnings.filterwarnings('ignore')

	# Initialize models and configs
	@st.cache_resource
	def load_models():
	whisper_model = whisper.load_model("large")
	tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
	model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
	return whisper_model, tokenizer, model

	# Tamil language configuration
	TAMIL_CONFIG = {
	'code': 'ta',
	'whisper_code': 'tamil',
	'mbart_code': 'ta_IN',
	'gtts_code': 'ta',
	'voice_speed': 1.1, # Adjust speed for better sync
	'sample_rate': 22050
	}

	# Streamlit UI setup
	st.set_page_config(page_title="Tamil Video Dubbing AI", page_icon="🎥", layout="wide")

	def create_custom_style():
	st.markdown("""
	<style>
	.stApp {
	background-color: #f5f5f5;
	}
	.main {
	padding: 2rem;
	}
	.stButton>button {
	background-color: #FF4B4B;
	color: white;
	font-weight: bold;
	}
	</style>
	""", unsafe_allow_html=True)

	create_custom_style()

	def translate_text(text, tokenizer, model):
	"""Enhanced translation specifically for Tamil using MBart"""
	inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
	translated_tokens = model.generate(
	**inputs,
	forced_bos_token_id=tokenizer.lang_code_to_id["ta_IN"],
	num_beams=5,
	length_penalty=1.0,
	max_length=512,
	min_length=0,
	do_sample=True,
	temperature=0.7
	)
	return tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]

	def process_audio_for_sync(audio_path, target_speed=1.0):
	"""Process audio for better synchronization"""
	audio = AudioSegment.from_file(audio_path)

	# Adjust speed without changing pitch
	if target_speed != 1.0:
	sound_with_altered_frame_rate = audio._spawn(audio.raw_data, overrides={
	"frame_rate": int(audio.frame_rate * target_speed)
	})
	audio = sound_with_altered_frame_rate.set_frame_rate(audio.frame_rate)

	return audio

	def main():
	st.title("🎥 Tamil Video Dubbing AI")
	st.markdown("### Advanced Video Translation and Dubbing System")

	# Load models
	try:
	with st.spinner("Loading AI models..."):
	whisper_model, tokenizer, translation_model = load_models()
	st.success("Models loaded successfully! 🚀")
	except Exception as e:
	st.error(f"Error loading models: {e}")
	return

	# File uploader with progress
	video_file = st.file_uploader("Upload your video file", type=["mp4", "mov", "avi"])

	if video_file:
	# Video preview
	st.video(video_file)

	# Advanced settings
	with st.expander("Advanced Settings"):
	voice_speed = st.slider("Voice Speed", 0.5, 1.5, TAMIL_CONFIG['voice_speed'], 0.1)
	quality_level = st.select_slider(
	"Translation Quality",
	options=["Draft", "Standard", "High Quality"],
	value="Standard"
	)

	if st.button("Start Tamil Dubbing", key="start_dubbing"):
	try:
	with st.spinner("Processing your video..."):
	# Save uploaded video
	temp_video_path = tempfile.mktemp(suffix='.mp4')
	with open(temp_video_path, 'wb') as f:
	f.write(video_file.read())

	# Process steps with progress bar
	progress_bar = st.progress(0)
	status_text = st.empty()

	# Extract audio
	status_text.text("Extracting audio...")
	video = VideoFileClip(temp_video_path)
	audio_path = tempfile.mktemp(suffix=".wav")
	video.audio.write_audiofile(audio_path, fps=TAMIL_CONFIG['sample_rate'])
	progress_bar.progress(20)

	# Transcribe
	status_text.text("Transcribing audio...")
	result = whisper_model.transcribe(audio_path, language=TAMIL_CONFIG['whisper_code'])
	original_text = result["text"]
	progress_bar.progress(40)

	# Translate
	status_text.text("Translating to Tamil...")
	translated_text = translate_text(original_text, tokenizer, translation_model)
	progress_bar.progress(60)

	# Generate Tamil speech
	status_text.text("Generating Tamil speech...")
	tts = gTTS(text=translated_text, lang=TAMIL_CONFIG['gtts_code'])
	translated_audio_path = tempfile.mktemp(suffix=".mp3")
	tts.save(translated_audio_path)
	progress_bar.progress(80)

	# Final video creation
	status_text.text("Creating final video...")
	dubbed_audio = process_audio_for_sync(translated_audio_path, voice_speed)
	final_audio_path = tempfile.mktemp(suffix=".wav")
	dubbed_audio.export(final_audio_path, format="wav")

	# Combine video with new audio
	final_video_path = tempfile.mktemp(suffix=".mp4")
	final_audio = AudioFileClip(final_audio_path)
	final_video = video.set_audio(final_audio)
	final_video.write_videofile(final_video_path, codec='libx264', audio_codec='aac')
	progress_bar.progress(100)

	# Display results
	st.success("Video dubbed successfully! 🎉")
	st.video(final_video_path)

	# Download options
	col1, col2 = st.columns(2)
	with col1:
	with open(final_video_path, "rb") as f:
	st.download_button(
	"Download Dubbed Video",
	f,
	file_name="tamil_dubbed_video.mp4",
	mime="video/mp4"
	)

	with col2:
	st.download_button(
	"Download Tamil Script",
	translated_text,
	file_name="tamil_script.txt",
	mime="text/plain"
	)

	# Clean up
	for path in [temp_video_path, audio_path, translated_audio_path,
	final_audio_path, final_video_path]:
	if os.path.exists(path):
	os.remove(path)

	except Exception as e:
	st.error(f"An error occurred: {e}")
	st.info("Please try again with a different video or check your internet connection.")

	if __name__ == "__main__":
	main()