""" ๐ŸŽŒ Anime Translator with Lip-Sync ================================= A Streamlit application that translates text between English and Hindi, converts it to speech, and generates a lip-synced anime avatar animation. """ import streamlit as st from pathlib import Path import tempfile import time import shutil import os import subprocess from shutil import which from typing import Tuple, Optional import base64 # Import utility modules from utils.translator import translate_text, detect_language from utils.tts_engine import synthesize_speech, get_audio_duration from utils.lipsync import generate_lipsync_gif from utils.speech_to_text import transcribe_audio, get_language_code from utils.avatar_manager import list_avatars, get_avatar_preview, ensure_sample_avatar # ============================================================================= # FFmpeg Configuration # ============================================================================= def configure_ffmpeg(): """Configure FFmpeg path for pydub on Windows.""" possible_paths = [ r"C:\ffmpeg\bin", r"C:\Program Files\ffmpeg\bin", r"C:\Program Files (x86)\ffmpeg\bin", os.path.expanduser("~\\ffmpeg\\bin"), r"C:\Users\Nishant Pratap\ffmpeg\bin", ] if which("ffmpeg") is not None: return True for path in possible_paths: ffmpeg_exe = os.path.join(path, "ffmpeg.exe") if os.path.exists(ffmpeg_exe): os.environ["PATH"] = path + os.pathsep + os.environ.get("PATH", "") try: from pydub import AudioSegment AudioSegment.converter = os.path.join(path, "ffmpeg.exe") AudioSegment.ffprobe = os.path.join(path, "ffprobe.exe") except: pass return True return False def check_ffmpeg_detailed(): """Check FFmpeg installation and return detailed status.""" status = { "ffmpeg_in_path": False, "ffmpeg_works": False, "ffprobe_works": False, "pydub_works": False, "error_message": None } ffmpeg_path = which("ffmpeg") status["ffmpeg_in_path"] = ffmpeg_path is not None try: result = subprocess.run( ["ffmpeg", "-version"], capture_output=True, text=True, timeout=5 ) status["ffmpeg_works"] = result.returncode == 0 except Exception as e: status["error_message"] = str(e) try: result = subprocess.run( ["ffprobe", "-version"], capture_output=True, text=True, timeout=5 ) status["ffprobe_works"] = result.returncode == 0 except Exception: pass try: from pydub import AudioSegment silence = AudioSegment.silent(duration=100) status["pydub_works"] = True except Exception as e: status["pydub_works"] = False if not status["error_message"]: status["error_message"] = str(e) return status ffmpeg_found = configure_ffmpeg() # ============================================================================= # Configuration # ============================================================================= AVATARS_DIR = Path("./avatars") TEMP_DIR = Path(tempfile.gettempdir()) / "anime_translator" AVATARS_DIR.mkdir(parents=True, exist_ok=True) TEMP_DIR.mkdir(parents=True, exist_ok=True) # Page configuration st.set_page_config( page_title="๐ŸŽŒ Anime Translator", page_icon="๐ŸŽŒ", layout="wide", initial_sidebar_state="expanded" ) # Initialize session state for animation control if 'animation_playing' not in st.session_state: st.session_state.animation_playing = True if 'current_gif_path' not in st.session_state: st.session_state.current_gif_path = None # ============================================================================= # Custom CSS Styling - UPDATED WITH ANIMATION FIX # ============================================================================= st.markdown(""" """, unsafe_allow_html=True) # ============================================================================= # Helper Functions # ============================================================================= def cleanup_temp_files(older_than_sec: int = 3600) -> None: """Clean up old temporary files.""" now = time.time() try: for path in TEMP_DIR.iterdir(): try: if now - path.stat().st_mtime > older_than_sec: if path.is_file(): path.unlink() elif path.is_dir(): shutil.rmtree(path) except Exception: pass except Exception: pass def get_gif_first_frame(gif_path: str) -> Optional[str]: """Extract the first frame of a GIF as a static image.""" try: from PIL import Image import io with Image.open(gif_path) as img: # Get first frame img.seek(0) first_frame = img.copy() # Save to bytes buffer = io.BytesIO() first_frame.save(buffer, format='PNG') buffer.seek(0) # Convert to base64 img_base64 = base64.b64encode(buffer.getvalue()).decode() return img_base64 except Exception as e: print(f"Error extracting first frame: {e}") return None def display_animation_with_controls(gif_path: str, key_prefix: str = ""): """Display animation with play/pause/stop controls.""" if not gif_path or not os.path.exists(gif_path): st.info("โ„น๏ธ No animation available") return # Read GIF file with open(gif_path, "rb") as f: gif_data = f.read() gif_base64 = base64.b64encode(gif_data).decode() # Get first frame for static display first_frame_base64 = get_gif_first_frame(gif_path) # Animation state key state_key = f"{key_prefix}_playing" if state_key not in st.session_state: st.session_state[state_key] = True # Control buttons col1, col2, col3 = st.columns([1, 1, 1]) with col1: if st.button("โ–ถ๏ธ Play", key=f"{key_prefix}_play", use_container_width=True): st.session_state[state_key] = True st.rerun() with col2: if st.button("โธ๏ธ Pause", key=f"{key_prefix}_pause", use_container_width=True): st.session_state[state_key] = False st.rerun() with col3: if st.button("โน๏ธ Stop", key=f"{key_prefix}_stop", use_container_width=True): st.session_state[state_key] = False st.rerun() # Display animation or static frame if st.session_state[state_key]: # Playing - show animated GIF st.markdown( f'''
Lip-sync animation
''', unsafe_allow_html=True ) else: # Paused/Stopped - show first frame if first_frame_base64: st.markdown( f'''
Animation paused

โธ๏ธ Animation Paused

''', unsafe_allow_html=True ) else: st.info("Animation paused") # Download button st.download_button( label="๐Ÿ“ฅ Download Animation", data=gif_data, file_name="lipsync_animation.gif", mime="image/gif", key=f"{key_prefix}_download", use_container_width=True ) def process_translation_pipeline( text: str, source_lang: str, target_lang: str, avatar_name: str ) -> Tuple[str, Optional[str], Optional[str]]: """Main processing pipeline: translate, synthesize speech, generate animation.""" # Step 1: Translate text try: translated_text = translate_text(text, source_lang, target_lang) except Exception as e: raise Exception(f"Translation failed: {str(e)}") # Step 2: Synthesize speech try: audio_path = synthesize_speech(translated_text, target_lang, TEMP_DIR) except Exception as e: raise Exception(f"Speech synthesis failed: {str(e)}") # Step 3: Generate lip-sync animation gif_path = None try: gif_path = generate_lipsync_gif( avatar_name=avatar_name, audio_path=audio_path, avatars_dir=AVATARS_DIR, output_dir=TEMP_DIR, fps=12 ) except Exception as e: print(f"Animation generation warning: {str(e)}") gif_path = None return translated_text, audio_path, gif_path # ============================================================================= # Sidebar # ============================================================================= with st.sidebar: st.markdown("## โš™๏ธ Settings") # Avatar selection st.markdown("### ๐ŸŽญ Avatar Selection") avatars = list_avatars(AVATARS_DIR) if avatars: selected_avatar = st.selectbox( "Choose your avatar", options=avatars, index=0, help="Select an anime avatar for lip-sync animation" ) preview = get_avatar_preview(selected_avatar, AVATARS_DIR) if preview: st.image(preview, caption=f"Preview: {selected_avatar}", use_container_width=True) else: st.warning("No avatars found. Creating sample avatar...") ensure_sample_avatar(AVATARS_DIR) selected_avatar = "sample" st.rerun() st.markdown("---") # Language settings st.markdown("### ๐ŸŒ Language Settings") source_language = st.selectbox( "Source Language", options=["auto", "en", "hi"], format_func=lambda x: {"auto": "๐Ÿ”„ Auto-detect", "en": "๐Ÿ‡ฌ๐Ÿ‡ง English", "hi": "๐Ÿ‡ฎ๐Ÿ‡ณ Hindi"}[x], index=0 ) target_language = st.selectbox( "Target Language", options=["en", "hi"], format_func=lambda x: {"en": "๐Ÿ‡ฌ๐Ÿ‡ง English", "hi": "๐Ÿ‡ฎ๐Ÿ‡ณ Hindi"}[x], index=1 ) st.markdown("---") # Animation settings st.markdown("### ๐ŸŽฌ Animation Settings") animation_size = st.slider( "Animation Size", min_value=200, max_value=500, value=350, step=50, help="Adjust the display size of the animation" ) auto_play = st.checkbox("Auto-play animation", value=True) st.markdown("---") # System status st.markdown("### ๐Ÿ”ง System Status") ffmpeg_status = check_ffmpeg_detailed() if ffmpeg_status["ffmpeg_works"]: st.success("โœ… FFmpeg: Working") else: st.error("โŒ FFmpeg: Not working") if ffmpeg_status["pydub_works"]: st.success("โœ… Pydub: Working") else: st.warning("โš ๏ธ Pydub: Limited (fallback mode)") if ffmpeg_status["error_message"]: with st.expander("๐Ÿ” Error Details"): st.code(ffmpeg_status["error_message"]) st.markdown(""" **To fix FFmpeg:** ```bash conda install -c conda-forge ffmpeg ``` Or download from: https://www.gyan.dev/ffmpeg/builds/ """) st.markdown("---") # Info section st.markdown("### โ„น๏ธ About") st.markdown(""" Translate text between English and Hindi with lip-synced avatar animation. **Features:** - ๐ŸŽค Voice input - ๐Ÿ”„ Auto detection - ๐Ÿ—ฃ๏ธ Text-to-speech - ๐ŸŽฌ Lip-sync animation """) if st.button("๐Ÿงน Clear Temp Files"): cleanup_temp_files(older_than_sec=0) st.success("Cleared!") # ============================================================================= # Main Content # ============================================================================= st.markdown('

๐ŸŽŒ Anime Translator

', unsafe_allow_html=True) st.markdown( '

' 'Translate โ€ข Speak โ€ข Animate

', unsafe_allow_html=True ) # Tabs tab1, tab2 = st.tabs(["๐Ÿ“ Text Input", "๐ŸŽค Voice Input"]) # ============================================================================= # Tab 1: Text Input # ============================================================================= with tab1: col1, col2 = st.columns([1, 1]) with col1: st.markdown("### ๐Ÿ“ Enter Your Text") text_input = st.text_area( "Type or paste your text here", height=150, placeholder="Enter text in English or Hindi...\nเค‰เคฆเคพเคนเคฐเคฃ: เคจเคฎเคธเฅเคคเฅ‡, เค†เคช เค•เฅˆเคธเฅ‡ เคนเฅˆเค‚?\nExample: Hello, how are you?", key="text_input" ) if text_input: detected = detect_language(text_input) st.markdown( f'
' f'๐Ÿ“Š Characters: {len(text_input)} | ' f'๐Ÿ” Detected: {"๐Ÿ‡ฎ๐Ÿ‡ณ Hindi" if detected == "hi" else "๐Ÿ‡ฌ๐Ÿ‡ง English"}' f'
', unsafe_allow_html=True ) translate_btn = st.button( "๐Ÿš€ Translate & Animate", key="translate_text_btn", use_container_width=True ) with col2: st.markdown("### ๐ŸŽฌ Result") # Create a container with fixed height result_container = st.container() with result_container: if translate_btn and text_input: with st.spinner("๐Ÿ”„ Processing..."): progress = st.progress(0) status_text = st.empty() try: status_text.text("๐Ÿ“ Translating...") progress.progress(33) translated, audio_path, gif_path = process_translation_pipeline( text_input, source_language, target_language, selected_avatar ) status_text.text("๐Ÿ—ฃ๏ธ Generating speech...") progress.progress(66) status_text.text("๐ŸŽฌ Creating animation...") progress.progress(100) progress.empty() status_text.empty() # Store results in session state st.session_state['text_result'] = { 'translated': translated, 'audio_path': audio_path, 'gif_path': gif_path } st.session_state['text_animation_playing'] = auto_play except Exception as e: progress.empty() status_text.empty() st.error(f"โŒ Error: {str(e)}") elif translate_btn: st.warning("โš ๏ธ Please enter some text to translate.") # Display stored results if 'text_result' in st.session_state: result = st.session_state['text_result'] # Display translated text st.markdown( f'
' f'

๐Ÿ“œ Translated Text:

' f'

{result["translated"]}

' f'
', unsafe_allow_html=True ) # Audio player if result['audio_path'] and os.path.exists(result['audio_path']): st.markdown("#### ๐Ÿ”Š Audio") st.audio(result['audio_path'], format="audio/mp3") # Animation display with controls if result['gif_path'] and os.path.exists(result['gif_path']): st.markdown("#### ๐ŸŽญ Lip-Sync Animation") display_animation_with_controls(result['gif_path'], key_prefix="text") else: st.info("โ„น๏ธ Animation not available (FFmpeg may be missing)") # ============================================================================= # Tab 2: Voice Input # ============================================================================= with tab2: col1, col2 = st.columns([1, 1]) with col1: st.markdown("### ๐ŸŽค Voice Recording") st.markdown("""
Instructions:
1. Upload an audio file (WAV, MP3, etc.)
2. Or use the audio recorder below
3. Click "Transcribe & Translate"
""", unsafe_allow_html=True) uploaded_audio = st.file_uploader( "Upload an audio file", type=["wav", "mp3", "ogg", "flac", "m4a"], help="Supported formats: WAV, MP3, OGG, FLAC, M4A" ) recorded_audio = None try: from audio_recorder_streamlit import audio_recorder st.markdown("**Or record directly:**") recorded_audio = audio_recorder( text="๐ŸŽ™๏ธ Click to record", recording_color="#e94560", neutral_color="#6c757d", icon_name="microphone", icon_size="2x" ) except ImportError: st.info("๐Ÿ’ก For recording: `pip install audio-recorder-streamlit`") voice_lang = st.selectbox( "Recording Language", options=["en", "hi"], format_func=lambda x: {"en": "๐Ÿ‡ฌ๐Ÿ‡ง English", "hi": "๐Ÿ‡ฎ๐Ÿ‡ณ Hindi"}[x] ) voice_btn = st.button( "๐ŸŽฏ Transcribe & Translate", key="voice_btn", use_container_width=True ) with col2: st.markdown("### ๐ŸŽฌ Result") audio_to_process = None if uploaded_audio is not None: temp_audio_path = TEMP_DIR / f"uploaded_{int(time.time()*1000)}.wav" with open(temp_audio_path, "wb") as f: f.write(uploaded_audio.getbuffer()) audio_to_process = str(temp_audio_path) st.audio(uploaded_audio) elif recorded_audio is not None: temp_audio_path = TEMP_DIR / f"recorded_{int(time.time()*1000)}.wav" with open(temp_audio_path, "wb") as f: f.write(recorded_audio) audio_to_process = str(temp_audio_path) st.audio(recorded_audio, format="audio/wav") if voice_btn: if audio_to_process: with st.spinner("๐Ÿ”„ Processing voice..."): try: st.text("๐ŸŽค Transcribing...") lang_code = get_language_code(voice_lang) transcribed_text, success = transcribe_audio(audio_to_process, lang_code) if success: translated, audio_path, gif_path = process_translation_pipeline( transcribed_text, voice_lang, target_language, selected_avatar ) # Store results in session state st.session_state['voice_result'] = { 'transcribed': transcribed_text, 'translated': translated, 'audio_path': audio_path, 'gif_path': gif_path } st.session_state['voice_animation_playing'] = auto_play else: st.error(f"โŒ {transcribed_text}") except Exception as e: st.error(f"โŒ Error: {str(e)}") else: st.warning("โš ๏ธ Please upload or record audio first.") # Display stored results if 'voice_result' in st.session_state: result = st.session_state['voice_result'] st.markdown( f'
' f'๐Ÿ“ Transcribed: {result["transcribed"]}' f'
', unsafe_allow_html=True ) st.markdown( f'
' f'

๐Ÿ“œ Translated:

' f'

{result["translated"]}

' f'
', unsafe_allow_html=True ) if result['audio_path'] and os.path.exists(result['audio_path']): st.markdown("#### ๐Ÿ”Š Audio") st.audio(result['audio_path'], format="audio/mp3") if result['gif_path'] and os.path.exists(result['gif_path']): st.markdown("#### ๐ŸŽญ Animation") display_animation_with_controls(result['gif_path'], key_prefix="voice") # ============================================================================= # Footer # ============================================================================= st.markdown("---") st.markdown( """

Made By Praveen

""", unsafe_allow_html=True )