Translator / app.py
nihun's picture
Update app.py
9258a34 verified
"""
๐ŸŽŒ Anime Translator with Lip-Sync
=================================
A Streamlit application that translates text between English and Hindi,
converts it to speech, and generates a lip-synced anime avatar animation.
"""
import streamlit as st
from pathlib import Path
import tempfile
import time
import shutil
import os
import subprocess
from shutil import which
from typing import Tuple, Optional
import base64
# Import utility modules
from utils.translator import translate_text, detect_language
from utils.tts_engine import synthesize_speech, get_audio_duration
from utils.lipsync import generate_lipsync_gif
from utils.speech_to_text import transcribe_audio, get_language_code
from utils.avatar_manager import list_avatars, get_avatar_preview, ensure_sample_avatar
# =============================================================================
# FFmpeg Configuration
# =============================================================================
def configure_ffmpeg():
"""Configure FFmpeg path for pydub on Windows."""
possible_paths = [
r"C:\ffmpeg\bin",
r"C:\Program Files\ffmpeg\bin",
r"C:\Program Files (x86)\ffmpeg\bin",
os.path.expanduser("~\\ffmpeg\\bin"),
r"C:\Users\Nishant Pratap\ffmpeg\bin",
]
if which("ffmpeg") is not None:
return True
for path in possible_paths:
ffmpeg_exe = os.path.join(path, "ffmpeg.exe")
if os.path.exists(ffmpeg_exe):
os.environ["PATH"] = path + os.pathsep + os.environ.get("PATH", "")
try:
from pydub import AudioSegment
AudioSegment.converter = os.path.join(path, "ffmpeg.exe")
AudioSegment.ffprobe = os.path.join(path, "ffprobe.exe")
except:
pass
return True
return False
def check_ffmpeg_detailed():
"""Check FFmpeg installation and return detailed status."""
status = {
"ffmpeg_in_path": False,
"ffmpeg_works": False,
"ffprobe_works": False,
"pydub_works": False,
"error_message": None
}
ffmpeg_path = which("ffmpeg")
status["ffmpeg_in_path"] = ffmpeg_path is not None
try:
result = subprocess.run(
["ffmpeg", "-version"],
capture_output=True,
text=True,
timeout=5
)
status["ffmpeg_works"] = result.returncode == 0
except Exception as e:
status["error_message"] = str(e)
try:
result = subprocess.run(
["ffprobe", "-version"],
capture_output=True,
text=True,
timeout=5
)
status["ffprobe_works"] = result.returncode == 0
except Exception:
pass
try:
from pydub import AudioSegment
silence = AudioSegment.silent(duration=100)
status["pydub_works"] = True
except Exception as e:
status["pydub_works"] = False
if not status["error_message"]:
status["error_message"] = str(e)
return status
ffmpeg_found = configure_ffmpeg()
# =============================================================================
# Configuration
# =============================================================================
AVATARS_DIR = Path("./avatars")
TEMP_DIR = Path(tempfile.gettempdir()) / "anime_translator"
AVATARS_DIR.mkdir(parents=True, exist_ok=True)
TEMP_DIR.mkdir(parents=True, exist_ok=True)
# Page configuration
st.set_page_config(
page_title="๐ŸŽŒ Anime Translator",
page_icon="๐ŸŽŒ",
layout="wide",
initial_sidebar_state="expanded"
)
# Initialize session state for animation control
if 'animation_playing' not in st.session_state:
st.session_state.animation_playing = True
if 'current_gif_path' not in st.session_state:
st.session_state.current_gif_path = None
# =============================================================================
# Custom CSS Styling - UPDATED WITH ANIMATION FIX
# =============================================================================
st.markdown("""
<style>
.main {
background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%);
}
.main-header {
background: linear-gradient(90deg, #e94560, #ff6b6b);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
font-size: 3rem;
font-weight: bold;
text-align: center;
padding: 1rem;
margin-bottom: 2rem;
}
.stButton > button {
background: linear-gradient(90deg, #e94560, #ff6b6b);
color: white;
border: none;
border-radius: 25px;
padding: 0.75rem 2rem;
font-weight: bold;
transition: all 0.3s ease;
width: 100%;
}
.stButton > button:hover {
transform: translateY(-2px);
box-shadow: 0 5px 20px rgba(233, 69, 96, 0.4);
}
.result-box {
background: linear-gradient(135deg, rgba(233, 69, 96, 0.1), rgba(255, 107, 107, 0.1));
border-radius: 15px;
padding: 1.5rem;
border: 1px solid rgba(233, 69, 96, 0.3);
margin: 1rem 0;
}
.info-box {
background: rgba(100, 200, 255, 0.1);
border-left: 4px solid #64c8ff;
padding: 1rem;
border-radius: 0 10px 10px 0;
margin: 1rem 0;
}
.success-box {
background: rgba(100, 255, 150, 0.1);
border-left: 4px solid #64ff96;
padding: 1rem;
border-radius: 0 10px 10px 0;
}
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
.stTabs [data-baseweb="tab-list"] {
gap: 8px;
}
.stTabs [data-baseweb="tab"] {
background: rgba(255, 255, 255, 0.05);
border-radius: 10px;
padding: 10px 20px;
}
.stTabs [aria-selected="true"] {
background: linear-gradient(90deg, #e94560, #ff6b6b);
}
/* ============================================= */
/* ANIMATION CONTAINER - FIXED SIZE */
/* ============================================= */
.animation-container {
width: 100%;
max-width: 400px;
height: 400px;
margin: 0 auto;
display: flex;
align-items: center;
justify-content: center;
background: rgba(0, 0, 0, 0.2);
border-radius: 15px;
overflow: hidden;
position: relative;
}
.animation-container img {
max-width: 100%;
max-height: 100%;
object-fit: contain;
}
.animation-container.paused img {
animation-play-state: paused !important;
}
/* Static image when paused */
.animation-static {
width: 100%;
max-width: 400px;
height: 400px;
margin: 0 auto;
display: flex;
align-items: center;
justify-content: center;
background: rgba(0, 0, 0, 0.2);
border-radius: 15px;
overflow: hidden;
}
.animation-static img {
max-width: 100%;
max-height: 100%;
object-fit: contain;
}
/* Animation controls */
.animation-controls {
display: flex;
justify-content: center;
gap: 10px;
margin-top: 10px;
}
.control-btn {
background: linear-gradient(90deg, #e94560, #ff6b6b);
color: white;
border: none;
border-radius: 20px;
padding: 8px 20px;
cursor: pointer;
font-weight: bold;
transition: all 0.3s ease;
}
.control-btn:hover {
transform: scale(1.05);
box-shadow: 0 3px 15px rgba(233, 69, 96, 0.4);
}
.control-btn.stop {
background: linear-gradient(90deg, #666, #888);
}
/* Fixed height result column */
.result-column {
min-height: 600px;
}
</style>
""", unsafe_allow_html=True)
# =============================================================================
# Helper Functions
# =============================================================================
def cleanup_temp_files(older_than_sec: int = 3600) -> None:
"""Clean up old temporary files."""
now = time.time()
try:
for path in TEMP_DIR.iterdir():
try:
if now - path.stat().st_mtime > older_than_sec:
if path.is_file():
path.unlink()
elif path.is_dir():
shutil.rmtree(path)
except Exception:
pass
except Exception:
pass
def get_gif_first_frame(gif_path: str) -> Optional[str]:
"""Extract the first frame of a GIF as a static image."""
try:
from PIL import Image
import io
with Image.open(gif_path) as img:
# Get first frame
img.seek(0)
first_frame = img.copy()
# Save to bytes
buffer = io.BytesIO()
first_frame.save(buffer, format='PNG')
buffer.seek(0)
# Convert to base64
img_base64 = base64.b64encode(buffer.getvalue()).decode()
return img_base64
except Exception as e:
print(f"Error extracting first frame: {e}")
return None
def display_animation_with_controls(gif_path: str, key_prefix: str = ""):
"""Display animation with play/pause/stop controls."""
if not gif_path or not os.path.exists(gif_path):
st.info("โ„น๏ธ No animation available")
return
# Read GIF file
with open(gif_path, "rb") as f:
gif_data = f.read()
gif_base64 = base64.b64encode(gif_data).decode()
# Get first frame for static display
first_frame_base64 = get_gif_first_frame(gif_path)
# Animation state key
state_key = f"{key_prefix}_playing"
if state_key not in st.session_state:
st.session_state[state_key] = True
# Control buttons
col1, col2, col3 = st.columns([1, 1, 1])
with col1:
if st.button("โ–ถ๏ธ Play", key=f"{key_prefix}_play", use_container_width=True):
st.session_state[state_key] = True
st.rerun()
with col2:
if st.button("โธ๏ธ Pause", key=f"{key_prefix}_pause", use_container_width=True):
st.session_state[state_key] = False
st.rerun()
with col3:
if st.button("โน๏ธ Stop", key=f"{key_prefix}_stop", use_container_width=True):
st.session_state[state_key] = False
st.rerun()
# Display animation or static frame
if st.session_state[state_key]:
# Playing - show animated GIF
st.markdown(
f'''
<div class="animation-container">
<img src="data:image/gif;base64,{gif_base64}" alt="Lip-sync animation">
</div>
''',
unsafe_allow_html=True
)
else:
# Paused/Stopped - show first frame
if first_frame_base64:
st.markdown(
f'''
<div class="animation-static">
<img src="data:image/png;base64,{first_frame_base64}" alt="Animation paused">
</div>
<p style="text-align: center; color: #888; margin-top: 10px;">โธ๏ธ Animation Paused</p>
''',
unsafe_allow_html=True
)
else:
st.info("Animation paused")
# Download button
st.download_button(
label="๐Ÿ“ฅ Download Animation",
data=gif_data,
file_name="lipsync_animation.gif",
mime="image/gif",
key=f"{key_prefix}_download",
use_container_width=True
)
def process_translation_pipeline(
text: str,
source_lang: str,
target_lang: str,
avatar_name: str
) -> Tuple[str, Optional[str], Optional[str]]:
"""Main processing pipeline: translate, synthesize speech, generate animation."""
# Step 1: Translate text
try:
translated_text = translate_text(text, source_lang, target_lang)
except Exception as e:
raise Exception(f"Translation failed: {str(e)}")
# Step 2: Synthesize speech
try:
audio_path = synthesize_speech(translated_text, target_lang, TEMP_DIR)
except Exception as e:
raise Exception(f"Speech synthesis failed: {str(e)}")
# Step 3: Generate lip-sync animation
gif_path = None
try:
gif_path = generate_lipsync_gif(
avatar_name=avatar_name,
audio_path=audio_path,
avatars_dir=AVATARS_DIR,
output_dir=TEMP_DIR,
fps=12
)
except Exception as e:
print(f"Animation generation warning: {str(e)}")
gif_path = None
return translated_text, audio_path, gif_path
# =============================================================================
# Sidebar
# =============================================================================
with st.sidebar:
st.markdown("## โš™๏ธ Settings")
# Avatar selection
st.markdown("### ๐ŸŽญ Avatar Selection")
avatars = list_avatars(AVATARS_DIR)
if avatars:
selected_avatar = st.selectbox(
"Choose your avatar",
options=avatars,
index=0,
help="Select an anime avatar for lip-sync animation"
)
preview = get_avatar_preview(selected_avatar, AVATARS_DIR)
if preview:
st.image(preview, caption=f"Preview: {selected_avatar}", use_container_width=True)
else:
st.warning("No avatars found. Creating sample avatar...")
ensure_sample_avatar(AVATARS_DIR)
selected_avatar = "sample"
st.rerun()
st.markdown("---")
# Language settings
st.markdown("### ๐ŸŒ Language Settings")
source_language = st.selectbox(
"Source Language",
options=["auto", "en", "hi"],
format_func=lambda x: {"auto": "๐Ÿ”„ Auto-detect", "en": "๐Ÿ‡ฌ๐Ÿ‡ง English", "hi": "๐Ÿ‡ฎ๐Ÿ‡ณ Hindi"}[x],
index=0
)
target_language = st.selectbox(
"Target Language",
options=["en", "hi"],
format_func=lambda x: {"en": "๐Ÿ‡ฌ๐Ÿ‡ง English", "hi": "๐Ÿ‡ฎ๐Ÿ‡ณ Hindi"}[x],
index=1
)
st.markdown("---")
# Animation settings
st.markdown("### ๐ŸŽฌ Animation Settings")
animation_size = st.slider(
"Animation Size",
min_value=200,
max_value=500,
value=350,
step=50,
help="Adjust the display size of the animation"
)
auto_play = st.checkbox("Auto-play animation", value=True)
st.markdown("---")
# System status
st.markdown("### ๐Ÿ”ง System Status")
ffmpeg_status = check_ffmpeg_detailed()
if ffmpeg_status["ffmpeg_works"]:
st.success("โœ… FFmpeg: Working")
else:
st.error("โŒ FFmpeg: Not working")
if ffmpeg_status["pydub_works"]:
st.success("โœ… Pydub: Working")
else:
st.warning("โš ๏ธ Pydub: Limited (fallback mode)")
if ffmpeg_status["error_message"]:
with st.expander("๐Ÿ” Error Details"):
st.code(ffmpeg_status["error_message"])
st.markdown("""
**To fix FFmpeg:**
```bash
conda install -c conda-forge ffmpeg
```
Or download from: https://www.gyan.dev/ffmpeg/builds/
""")
st.markdown("---")
# Info section
st.markdown("### โ„น๏ธ About")
st.markdown("""
Translate text between English and Hindi with lip-synced avatar animation.
**Features:**
- ๐ŸŽค Voice input
- ๐Ÿ”„ Auto detection
- ๐Ÿ—ฃ๏ธ Text-to-speech
- ๐ŸŽฌ Lip-sync animation
""")
if st.button("๐Ÿงน Clear Temp Files"):
cleanup_temp_files(older_than_sec=0)
st.success("Cleared!")
# =============================================================================
# Main Content
# =============================================================================
st.markdown('<h1 class="main-header">๐ŸŽŒ Anime Translator</h1>', unsafe_allow_html=True)
st.markdown(
'<p style="text-align: center; color: #888; font-size: 1.2rem;">'
'Translate โ€ข Speak โ€ข Animate</p>',
unsafe_allow_html=True
)
# Tabs
tab1, tab2 = st.tabs(["๐Ÿ“ Text Input", "๐ŸŽค Voice Input"])
# =============================================================================
# Tab 1: Text Input
# =============================================================================
with tab1:
col1, col2 = st.columns([1, 1])
with col1:
st.markdown("### ๐Ÿ“ Enter Your Text")
text_input = st.text_area(
"Type or paste your text here",
height=150,
placeholder="Enter text in English or Hindi...\nเค‰เคฆเคพเคนเคฐเคฃ: เคจเคฎเคธเฅเคคเฅ‡, เค†เคช เค•เฅˆเคธเฅ‡ เคนเฅˆเค‚?\nExample: Hello, how are you?",
key="text_input"
)
if text_input:
detected = detect_language(text_input)
st.markdown(
f'<div class="info-box">'
f'๐Ÿ“Š Characters: {len(text_input)} | '
f'๐Ÿ” Detected: {"๐Ÿ‡ฎ๐Ÿ‡ณ Hindi" if detected == "hi" else "๐Ÿ‡ฌ๐Ÿ‡ง English"}'
f'</div>',
unsafe_allow_html=True
)
translate_btn = st.button(
"๐Ÿš€ Translate & Animate",
key="translate_text_btn",
use_container_width=True
)
with col2:
st.markdown("### ๐ŸŽฌ Result")
# Create a container with fixed height
result_container = st.container()
with result_container:
if translate_btn and text_input:
with st.spinner("๐Ÿ”„ Processing..."):
progress = st.progress(0)
status_text = st.empty()
try:
status_text.text("๐Ÿ“ Translating...")
progress.progress(33)
translated, audio_path, gif_path = process_translation_pipeline(
text_input,
source_language,
target_language,
selected_avatar
)
status_text.text("๐Ÿ—ฃ๏ธ Generating speech...")
progress.progress(66)
status_text.text("๐ŸŽฌ Creating animation...")
progress.progress(100)
progress.empty()
status_text.empty()
# Store results in session state
st.session_state['text_result'] = {
'translated': translated,
'audio_path': audio_path,
'gif_path': gif_path
}
st.session_state['text_animation_playing'] = auto_play
except Exception as e:
progress.empty()
status_text.empty()
st.error(f"โŒ Error: {str(e)}")
elif translate_btn:
st.warning("โš ๏ธ Please enter some text to translate.")
# Display stored results
if 'text_result' in st.session_state:
result = st.session_state['text_result']
# Display translated text
st.markdown(
f'<div class="result-box">'
f'<h4>๐Ÿ“œ Translated Text:</h4>'
f'<p style="font-size: 1.2rem;">{result["translated"]}</p>'
f'</div>',
unsafe_allow_html=True
)
# Audio player
if result['audio_path'] and os.path.exists(result['audio_path']):
st.markdown("#### ๐Ÿ”Š Audio")
st.audio(result['audio_path'], format="audio/mp3")
# Animation display with controls
if result['gif_path'] and os.path.exists(result['gif_path']):
st.markdown("#### ๐ŸŽญ Lip-Sync Animation")
display_animation_with_controls(result['gif_path'], key_prefix="text")
else:
st.info("โ„น๏ธ Animation not available (FFmpeg may be missing)")
# =============================================================================
# Tab 2: Voice Input
# =============================================================================
with tab2:
col1, col2 = st.columns([1, 1])
with col1:
st.markdown("### ๐ŸŽค Voice Recording")
st.markdown("""
<div class="info-box">
<strong>Instructions:</strong><br>
1. Upload an audio file (WAV, MP3, etc.)<br>
2. Or use the audio recorder below<br>
3. Click "Transcribe & Translate"
</div>
""", unsafe_allow_html=True)
uploaded_audio = st.file_uploader(
"Upload an audio file",
type=["wav", "mp3", "ogg", "flac", "m4a"],
help="Supported formats: WAV, MP3, OGG, FLAC, M4A"
)
recorded_audio = None
try:
from audio_recorder_streamlit import audio_recorder
st.markdown("**Or record directly:**")
recorded_audio = audio_recorder(
text="๐ŸŽ™๏ธ Click to record",
recording_color="#e94560",
neutral_color="#6c757d",
icon_name="microphone",
icon_size="2x"
)
except ImportError:
st.info("๐Ÿ’ก For recording: `pip install audio-recorder-streamlit`")
voice_lang = st.selectbox(
"Recording Language",
options=["en", "hi"],
format_func=lambda x: {"en": "๐Ÿ‡ฌ๐Ÿ‡ง English", "hi": "๐Ÿ‡ฎ๐Ÿ‡ณ Hindi"}[x]
)
voice_btn = st.button(
"๐ŸŽฏ Transcribe & Translate",
key="voice_btn",
use_container_width=True
)
with col2:
st.markdown("### ๐ŸŽฌ Result")
audio_to_process = None
if uploaded_audio is not None:
temp_audio_path = TEMP_DIR / f"uploaded_{int(time.time()*1000)}.wav"
with open(temp_audio_path, "wb") as f:
f.write(uploaded_audio.getbuffer())
audio_to_process = str(temp_audio_path)
st.audio(uploaded_audio)
elif recorded_audio is not None:
temp_audio_path = TEMP_DIR / f"recorded_{int(time.time()*1000)}.wav"
with open(temp_audio_path, "wb") as f:
f.write(recorded_audio)
audio_to_process = str(temp_audio_path)
st.audio(recorded_audio, format="audio/wav")
if voice_btn:
if audio_to_process:
with st.spinner("๐Ÿ”„ Processing voice..."):
try:
st.text("๐ŸŽค Transcribing...")
lang_code = get_language_code(voice_lang)
transcribed_text, success = transcribe_audio(audio_to_process, lang_code)
if success:
translated, audio_path, gif_path = process_translation_pipeline(
transcribed_text,
voice_lang,
target_language,
selected_avatar
)
# Store results in session state
st.session_state['voice_result'] = {
'transcribed': transcribed_text,
'translated': translated,
'audio_path': audio_path,
'gif_path': gif_path
}
st.session_state['voice_animation_playing'] = auto_play
else:
st.error(f"โŒ {transcribed_text}")
except Exception as e:
st.error(f"โŒ Error: {str(e)}")
else:
st.warning("โš ๏ธ Please upload or record audio first.")
# Display stored results
if 'voice_result' in st.session_state:
result = st.session_state['voice_result']
st.markdown(
f'<div class="success-box">'
f'<strong>๐Ÿ“ Transcribed:</strong> {result["transcribed"]}'
f'</div>',
unsafe_allow_html=True
)
st.markdown(
f'<div class="result-box">'
f'<h4>๐Ÿ“œ Translated:</h4>'
f'<p style="font-size: 1.2rem;">{result["translated"]}</p>'
f'</div>',
unsafe_allow_html=True
)
if result['audio_path'] and os.path.exists(result['audio_path']):
st.markdown("#### ๐Ÿ”Š Audio")
st.audio(result['audio_path'], format="audio/mp3")
if result['gif_path'] and os.path.exists(result['gif_path']):
st.markdown("#### ๐ŸŽญ Animation")
display_animation_with_controls(result['gif_path'], key_prefix="voice")
# =============================================================================
# Footer
# =============================================================================
st.markdown("---")
st.markdown(
"""
<div style="text-align: center; color: #666; padding: 1rem;">
<p>Made By Praveen</p>
</div>
""",
unsafe_allow_html=True
)