Spaces:

nihun
/

Translator

Sleeping

App Files Files Community

Translator / app.py

nihun

Update app.py

9258a34 verified 2 months ago

raw

history blame contribute delete

26.3 kB

	"""
	🎌 Anime Translator with Lip-Sync
	=================================

	A Streamlit application that translates text between English and Hindi,
	converts it to speech, and generates a lip-synced anime avatar animation.
	"""

	import streamlit as st
	from pathlib import Path
	import tempfile
	import time
	import shutil
	import os
	import subprocess
	from shutil import which
	from typing import Tuple, Optional
	import base64

	# Import utility modules
	from utils.translator import translate_text, detect_language
	from utils.tts_engine import synthesize_speech, get_audio_duration
	from utils.lipsync import generate_lipsync_gif
	from utils.speech_to_text import transcribe_audio, get_language_code
	from utils.avatar_manager import list_avatars, get_avatar_preview, ensure_sample_avatar

	# =============================================================================
	# FFmpeg Configuration
	# =============================================================================

	def configure_ffmpeg():
	"""Configure FFmpeg path for pydub on Windows."""
	possible_paths = [
	r"C:\ffmpeg\bin",
	r"C:\Program Files\ffmpeg\bin",
	r"C:\Program Files (x86)\ffmpeg\bin",
	os.path.expanduser("~\\ffmpeg\\bin"),
	r"C:\Users\Nishant Pratap\ffmpeg\bin",
	]

	if which("ffmpeg") is not None:
	return True

	for path in possible_paths:
	ffmpeg_exe = os.path.join(path, "ffmpeg.exe")
	if os.path.exists(ffmpeg_exe):
	os.environ["PATH"] = path + os.pathsep + os.environ.get("PATH", "")
	try:
	from pydub import AudioSegment
	AudioSegment.converter = os.path.join(path, "ffmpeg.exe")
	AudioSegment.ffprobe = os.path.join(path, "ffprobe.exe")
	except:
	pass
	return True

	return False


	def check_ffmpeg_detailed():
	"""Check FFmpeg installation and return detailed status."""
	status = {
	"ffmpeg_in_path": False,
	"ffmpeg_works": False,
	"ffprobe_works": False,
	"pydub_works": False,
	"error_message": None
	}

	ffmpeg_path = which("ffmpeg")
	status["ffmpeg_in_path"] = ffmpeg_path is not None

	try:
	result = subprocess.run(
	["ffmpeg", "-version"],
	capture_output=True,
	text=True,
	timeout=5
	)
	status["ffmpeg_works"] = result.returncode == 0
	except Exception as e:
	status["error_message"] = str(e)

	try:
	result = subprocess.run(
	["ffprobe", "-version"],
	capture_output=True,
	text=True,
	timeout=5
	)
	status["ffprobe_works"] = result.returncode == 0
	except Exception:
	pass

	try:
	from pydub import AudioSegment
	silence = AudioSegment.silent(duration=100)
	status["pydub_works"] = True
	except Exception as e:
	status["pydub_works"] = False
	if not status["error_message"]:
	status["error_message"] = str(e)

	return status


	ffmpeg_found = configure_ffmpeg()

	# =============================================================================
	# Configuration
	# =============================================================================

	AVATARS_DIR = Path("./avatars")
	TEMP_DIR = Path(tempfile.gettempdir()) / "anime_translator"

	AVATARS_DIR.mkdir(parents=True, exist_ok=True)
	TEMP_DIR.mkdir(parents=True, exist_ok=True)

	# Page configuration
	st.set_page_config(
	page_title="🎌 Anime Translator",
	page_icon="🎌",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Initialize session state for animation control
	if 'animation_playing' not in st.session_state:
	st.session_state.animation_playing = True
	if 'current_gif_path' not in st.session_state:
	st.session_state.current_gif_path = None

	# =============================================================================
	# Custom CSS Styling - UPDATED WITH ANIMATION FIX
	# =============================================================================

	st.markdown("""
	<style>
	.main {
	background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%);
	}

	.main-header {
	background: linear-gradient(90deg, #e94560, #ff6b6b);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	font-size: 3rem;
	font-weight: bold;
	text-align: center;
	padding: 1rem;
	margin-bottom: 2rem;
	}

	.stButton > button {
	background: linear-gradient(90deg, #e94560, #ff6b6b);
	color: white;
	border: none;
	border-radius: 25px;
	padding: 0.75rem 2rem;
	font-weight: bold;
	transition: all 0.3s ease;
	width: 100%;
	}

	.stButton > button:hover {
	transform: translateY(-2px);
	box-shadow: 0 5px 20px rgba(233, 69, 96, 0.4);
	}

	.result-box {
	background: linear-gradient(135deg, rgba(233, 69, 96, 0.1), rgba(255, 107, 107, 0.1));
	border-radius: 15px;
	padding: 1.5rem;
	border: 1px solid rgba(233, 69, 96, 0.3);
	margin: 1rem 0;
	}

	.info-box {
	background: rgba(100, 200, 255, 0.1);
	border-left: 4px solid #64c8ff;
	padding: 1rem;
	border-radius: 0 10px 10px 0;
	margin: 1rem 0;
	}

	.success-box {
	background: rgba(100, 255, 150, 0.1);
	border-left: 4px solid #64ff96;
	padding: 1rem;
	border-radius: 0 10px 10px 0;
	}

	#MainMenu {visibility: hidden;}
	footer {visibility: hidden;}

	.stTabs [data-baseweb="tab-list"] {
	gap: 8px;
	}

	.stTabs [data-baseweb="tab"] {
	background: rgba(255, 255, 255, 0.05);
	border-radius: 10px;
	padding: 10px 20px;
	}

	.stTabs [aria-selected="true"] {
	background: linear-gradient(90deg, #e94560, #ff6b6b);
	}

	/* ============================================= */
	/* ANIMATION CONTAINER - FIXED SIZE */
	/* ============================================= */

	.animation-container {
	width: 100%;
	max-width: 400px;
	height: 400px;
	margin: 0 auto;
	display: flex;
	align-items: center;
	justify-content: center;
	background: rgba(0, 0, 0, 0.2);
	border-radius: 15px;
	overflow: hidden;
	position: relative;
	}

	.animation-container img {
	max-width: 100%;
	max-height: 100%;
	object-fit: contain;
	}

	.animation-container.paused img {
	animation-play-state: paused !important;
	}

	/* Static image when paused */
	.animation-static {
	width: 100%;
	max-width: 400px;
	height: 400px;
	margin: 0 auto;
	display: flex;
	align-items: center;
	justify-content: center;
	background: rgba(0, 0, 0, 0.2);
	border-radius: 15px;
	overflow: hidden;
	}

	.animation-static img {
	max-width: 100%;
	max-height: 100%;
	object-fit: contain;
	}

	/* Animation controls */
	.animation-controls {
	display: flex;
	justify-content: center;
	gap: 10px;
	margin-top: 10px;
	}

	.control-btn {
	background: linear-gradient(90deg, #e94560, #ff6b6b);
	color: white;
	border: none;
	border-radius: 20px;
	padding: 8px 20px;
	cursor: pointer;
	font-weight: bold;
	transition: all 0.3s ease;
	}

	.control-btn:hover {
	transform: scale(1.05);
	box-shadow: 0 3px 15px rgba(233, 69, 96, 0.4);
	}

	.control-btn.stop {
	background: linear-gradient(90deg, #666, #888);
	}

	/* Fixed height result column */
	.result-column {
	min-height: 600px;
	}
	</style>
	""", unsafe_allow_html=True)

	# =============================================================================
	# Helper Functions
	# =============================================================================

	def cleanup_temp_files(older_than_sec: int = 3600) -> None:
	"""Clean up old temporary files."""
	now = time.time()
	try:
	for path in TEMP_DIR.iterdir():
	try:
	if now - path.stat().st_mtime > older_than_sec:
	if path.is_file():
	path.unlink()
	elif path.is_dir():
	shutil.rmtree(path)
	except Exception:
	pass
	except Exception:
	pass


	def get_gif_first_frame(gif_path: str) -> Optional[str]:
	"""Extract the first frame of a GIF as a static image."""
	try:
	from PIL import Image
	import io

	with Image.open(gif_path) as img:
	# Get first frame
	img.seek(0)
	first_frame = img.copy()

	# Save to bytes
	buffer = io.BytesIO()
	first_frame.save(buffer, format='PNG')
	buffer.seek(0)

	# Convert to base64
	img_base64 = base64.b64encode(buffer.getvalue()).decode()
	return img_base64
	except Exception as e:
	print(f"Error extracting first frame: {e}")
	return None


	def display_animation_with_controls(gif_path: str, key_prefix: str = ""):
	"""Display animation with play/pause/stop controls."""

	if not gif_path or not os.path.exists(gif_path):
	st.info("ℹ️ No animation available")
	return

	# Read GIF file
	with open(gif_path, "rb") as f:
	gif_data = f.read()
	gif_base64 = base64.b64encode(gif_data).decode()

	# Get first frame for static display
	first_frame_base64 = get_gif_first_frame(gif_path)

	# Animation state key
	state_key = f"{key_prefix}_playing"
	if state_key not in st.session_state:
	st.session_state[state_key] = True

	# Control buttons
	col1, col2, col3 = st.columns([1, 1, 1])

	with col1:
	if st.button("▶️ Play", key=f"{key_prefix}_play", use_container_width=True):
	st.session_state[state_key] = True
	st.rerun()

	with col2:
	if st.button("⏸️ Pause", key=f"{key_prefix}_pause", use_container_width=True):
	st.session_state[state_key] = False
	st.rerun()

	with col3:
	if st.button("⏹️ Stop", key=f"{key_prefix}_stop", use_container_width=True):
	st.session_state[state_key] = False
	st.rerun()

	# Display animation or static frame
	if st.session_state[state_key]:
	# Playing - show animated GIF
	st.markdown(
	f'''
	<div class="animation-container">
	<img src="data:image/gif;base64,{gif_base64}" alt="Lip-sync animation">
	</div>
	''',
	unsafe_allow_html=True
	)
	else:
	# Paused/Stopped - show first frame
	if first_frame_base64:
	st.markdown(
	f'''
	<div class="animation-static">
	<img src="data:image/png;base64,{first_frame_base64}" alt="Animation paused">
	</div>
	<p style="text-align: center; color: #888; margin-top: 10px;">⏸️ Animation Paused</p>
	''',
	unsafe_allow_html=True
	)
	else:
	st.info("Animation paused")

	# Download button
	st.download_button(
	label="📥 Download Animation",
	data=gif_data,
	file_name="lipsync_animation.gif",
	mime="image/gif",
	key=f"{key_prefix}_download",
	use_container_width=True
	)


	def process_translation_pipeline(
	text: str,
	source_lang: str,
	target_lang: str,
	avatar_name: str
	) -> Tuple[str, Optional[str], Optional[str]]:
	"""Main processing pipeline: translate, synthesize speech, generate animation."""

	# Step 1: Translate text
	try:
	translated_text = translate_text(text, source_lang, target_lang)
	except Exception as e:
	raise Exception(f"Translation failed: {str(e)}")

	# Step 2: Synthesize speech
	try:
	audio_path = synthesize_speech(translated_text, target_lang, TEMP_DIR)
	except Exception as e:
	raise Exception(f"Speech synthesis failed: {str(e)}")

	# Step 3: Generate lip-sync animation
	gif_path = None
	try:
	gif_path = generate_lipsync_gif(
	avatar_name=avatar_name,
	audio_path=audio_path,
	avatars_dir=AVATARS_DIR,
	output_dir=TEMP_DIR,
	fps=12
	)
	except Exception as e:
	print(f"Animation generation warning: {str(e)}")
	gif_path = None

	return translated_text, audio_path, gif_path


	# =============================================================================
	# Sidebar
	# =============================================================================

	with st.sidebar:
	st.markdown("## ⚙️ Settings")

	# Avatar selection
	st.markdown("### 🎭 Avatar Selection")
	avatars = list_avatars(AVATARS_DIR)

	if avatars:
	selected_avatar = st.selectbox(
	"Choose your avatar",
	options=avatars,
	index=0,
	help="Select an anime avatar for lip-sync animation"
	)

	preview = get_avatar_preview(selected_avatar, AVATARS_DIR)
	if preview:
	st.image(preview, caption=f"Preview: {selected_avatar}", use_container_width=True)
	else:
	st.warning("No avatars found. Creating sample avatar...")
	ensure_sample_avatar(AVATARS_DIR)
	selected_avatar = "sample"
	st.rerun()

	st.markdown("---")

	# Language settings
	st.markdown("### 🌐 Language Settings")

	source_language = st.selectbox(
	"Source Language",
	options=["auto", "en", "hi"],
	format_func=lambda x: {"auto": "🔄 Auto-detect", "en": "🇬🇧 English", "hi": "🇮🇳 Hindi"}[x],
	index=0
	)

	target_language = st.selectbox(
	"Target Language",
	options=["en", "hi"],
	format_func=lambda x: {"en": "🇬🇧 English", "hi": "🇮🇳 Hindi"}[x],
	index=1
	)

	st.markdown("---")

	# Animation settings
	st.markdown("### 🎬 Animation Settings")

	animation_size = st.slider(
	"Animation Size",
	min_value=200,
	max_value=500,
	value=350,
	step=50,
	help="Adjust the display size of the animation"
	)

	auto_play = st.checkbox("Auto-play animation", value=True)

	st.markdown("---")

	# System status
	st.markdown("### 🔧 System Status")

	ffmpeg_status = check_ffmpeg_detailed()

	if ffmpeg_status["ffmpeg_works"]:
	st.success("✅ FFmpeg: Working")
	else:
	st.error("❌ FFmpeg: Not working")

	if ffmpeg_status["pydub_works"]:
	st.success("✅ Pydub: Working")
	else:
	st.warning("⚠️ Pydub: Limited (fallback mode)")

	if ffmpeg_status["error_message"]:
	with st.expander("🔍 Error Details"):
	st.code(ffmpeg_status["error_message"])
	st.markdown("""
	To fix FFmpeg:
	```bash
	conda install -c conda-forge ffmpeg
	```
	Or download from: https://www.gyan.dev/ffmpeg/builds/
	""")

	st.markdown("---")

	# Info section
	st.markdown("### ℹ️ About")
	st.markdown("""
	Translate text between English and Hindi with lip-synced avatar animation.

	Features:
	- 🎤 Voice input
	- 🔄 Auto detection
	- 🗣️ Text-to-speech
	- 🎬 Lip-sync animation
	""")

	if st.button("🧹 Clear Temp Files"):
	cleanup_temp_files(older_than_sec=0)
	st.success("Cleared!")


	# =============================================================================
	# Main Content
	# =============================================================================

	st.markdown('<h1 class="main-header">🎌 Anime Translator</h1>', unsafe_allow_html=True)
	st.markdown(
	'<p style="text-align: center; color: #888; font-size: 1.2rem;">'
	'Translate • Speak • Animate</p>',
	unsafe_allow_html=True
	)

	# Tabs
	tab1, tab2 = st.tabs(["📝 Text Input", "🎤 Voice Input"])

	# =============================================================================
	# Tab 1: Text Input
	# =============================================================================

	with tab1:
	col1, col2 = st.columns([1, 1])

	with col1:
	st.markdown("### 📝 Enter Your Text")

	text_input = st.text_area(
	"Type or paste your text here",
	height=150,
	placeholder="Enter text in English or Hindi...\nउदाहरण: नमस्ते, आप कैसे हैं?\nExample: Hello, how are you?",
	key="text_input"
	)

	if text_input:
	detected = detect_language(text_input)
	st.markdown(
	f'<div class="info-box">'
	f'📊 Characters: {len(text_input)} \| '
	f'🔍 Detected: {"🇮🇳 Hindi" if detected == "hi" else "🇬🇧 English"}'
	f'</div>',
	unsafe_allow_html=True
	)

	translate_btn = st.button(
	"🚀 Translate & Animate",
	key="translate_text_btn",
	use_container_width=True
	)

	with col2:
	st.markdown("### 🎬 Result")

	# Create a container with fixed height
	result_container = st.container()

	with result_container:
	if translate_btn and text_input:
	with st.spinner("🔄 Processing..."):
	progress = st.progress(0)
	status_text = st.empty()

	try:
	status_text.text("📝 Translating...")
	progress.progress(33)

	translated, audio_path, gif_path = process_translation_pipeline(
	text_input,
	source_language,
	target_language,
	selected_avatar
	)

	status_text.text("🗣️ Generating speech...")
	progress.progress(66)

	status_text.text("🎬 Creating animation...")
	progress.progress(100)

	progress.empty()
	status_text.empty()

	# Store results in session state
	st.session_state['text_result'] = {
	'translated': translated,
	'audio_path': audio_path,
	'gif_path': gif_path
	}
	st.session_state['text_animation_playing'] = auto_play

	except Exception as e:
	progress.empty()
	status_text.empty()
	st.error(f"❌ Error: {str(e)}")

	elif translate_btn:
	st.warning("⚠️ Please enter some text to translate.")

	# Display stored results
	if 'text_result' in st.session_state:
	result = st.session_state['text_result']

	# Display translated text
	st.markdown(
	f'<div class="result-box">'
	f'<h4>📜 Translated Text:</h4>'
	f'<p style="font-size: 1.2rem;">{result["translated"]}</p>'
	f'</div>',
	unsafe_allow_html=True
	)

	# Audio player
	if result['audio_path'] and os.path.exists(result['audio_path']):
	st.markdown("#### 🔊 Audio")
	st.audio(result['audio_path'], format="audio/mp3")

	# Animation display with controls
	if result['gif_path'] and os.path.exists(result['gif_path']):
	st.markdown("#### 🎭 Lip-Sync Animation")
	display_animation_with_controls(result['gif_path'], key_prefix="text")
	else:
	st.info("ℹ️ Animation not available (FFmpeg may be missing)")


	# =============================================================================
	# Tab 2: Voice Input
	# =============================================================================

	with tab2:
	col1, col2 = st.columns([1, 1])

	with col1:
	st.markdown("### 🎤 Voice Recording")

	st.markdown("""
	<div class="info-box">
	<strong>Instructions:</strong><br>
	1. Upload an audio file (WAV, MP3, etc.)<br>
	2. Or use the audio recorder below<br>
	3. Click "Transcribe & Translate"
	</div>
	""", unsafe_allow_html=True)

	uploaded_audio = st.file_uploader(
	"Upload an audio file",
	type=["wav", "mp3", "ogg", "flac", "m4a"],
	help="Supported formats: WAV, MP3, OGG, FLAC, M4A"
	)

	recorded_audio = None
	try:
	from audio_recorder_streamlit import audio_recorder
	st.markdown("Or record directly:")
	recorded_audio = audio_recorder(
	text="🎙️ Click to record",
	recording_color="#e94560",
	neutral_color="#6c757d",
	icon_name="microphone",
	icon_size="2x"
	)
	except ImportError:
	st.info("💡 For recording: `pip install audio-recorder-streamlit`")

	voice_lang = st.selectbox(
	"Recording Language",
	options=["en", "hi"],
	format_func=lambda x: {"en": "🇬🇧 English", "hi": "🇮🇳 Hindi"}[x]
	)

	voice_btn = st.button(
	"🎯 Transcribe & Translate",
	key="voice_btn",
	use_container_width=True
	)

	with col2:
	st.markdown("### 🎬 Result")

	audio_to_process = None

	if uploaded_audio is not None:
	temp_audio_path = TEMP_DIR / f"uploaded_{int(time.time()*1000)}.wav"
	with open(temp_audio_path, "wb") as f:
	f.write(uploaded_audio.getbuffer())
	audio_to_process = str(temp_audio_path)
	st.audio(uploaded_audio)

	elif recorded_audio is not None:
	temp_audio_path = TEMP_DIR / f"recorded_{int(time.time()*1000)}.wav"
	with open(temp_audio_path, "wb") as f:
	f.write(recorded_audio)
	audio_to_process = str(temp_audio_path)
	st.audio(recorded_audio, format="audio/wav")

	if voice_btn:
	if audio_to_process:
	with st.spinner("🔄 Processing voice..."):
	try:
	st.text("🎤 Transcribing...")
	lang_code = get_language_code(voice_lang)
	transcribed_text, success = transcribe_audio(audio_to_process, lang_code)

	if success:
	translated, audio_path, gif_path = process_translation_pipeline(
	transcribed_text,
	voice_lang,
	target_language,
	selected_avatar
	)

	# Store results in session state
	st.session_state['voice_result'] = {
	'transcribed': transcribed_text,
	'translated': translated,
	'audio_path': audio_path,
	'gif_path': gif_path
	}
	st.session_state['voice_animation_playing'] = auto_play

	else:
	st.error(f"❌ {transcribed_text}")
	except Exception as e:
	st.error(f"❌ Error: {str(e)}")
	else:
	st.warning("⚠️ Please upload or record audio first.")

	# Display stored results
	if 'voice_result' in st.session_state:
	result = st.session_state['voice_result']

	st.markdown(
	f'<div class="success-box">'
	f'<strong>📝 Transcribed:</strong> {result["transcribed"]}'
	f'</div>',
	unsafe_allow_html=True
	)

	st.markdown(
	f'<div class="result-box">'
	f'<h4>📜 Translated:</h4>'
	f'<p style="font-size: 1.2rem;">{result["translated"]}</p>'
	f'</div>',
	unsafe_allow_html=True
	)

	if result['audio_path'] and os.path.exists(result['audio_path']):
	st.markdown("#### 🔊 Audio")
	st.audio(result['audio_path'], format="audio/mp3")

	if result['gif_path'] and os.path.exists(result['gif_path']):
	st.markdown("#### 🎭 Animation")
	display_animation_with_controls(result['gif_path'], key_prefix="voice")


	# =============================================================================
	# Footer
	# =============================================================================

	st.markdown("---")
	st.markdown(
	"""
	<div style="text-align: center; color: #666; padding: 1rem;">
	<p>Made By Praveen</p>
	</div>
	""",
	unsafe_allow_html=True
	)