Spaces:

nihun
/

Translator

Sleeping

App Files Files Community

nihun commited on Nov 26, 2025

Commit

85c18a5

verified ·

1 Parent(s): db1995c

Upload 19 files

Browse files

Files changed (19) hide show

Dockerfile +52 -20
app.py +607 -0
avatars/sample/base.png +0 -0
avatars/sample/mouth_0.png +0 -0
avatars/sample/mouth_1.png +0 -0
avatars/sample/mouth_2.png +0 -0
requirements.txt +11 -3
utils/__init__.py +16 -0
utils/__pycache__/__init__.cpython-310.pyc +0 -0
utils/__pycache__/avatar_manager.cpython-310.pyc +0 -0
utils/__pycache__/lipsync.cpython-310.pyc +0 -0
utils/__pycache__/speech_to_text.cpython-310.pyc +0 -0
utils/__pycache__/translator.cpython-310.pyc +0 -0
utils/__pycache__/tts_engine.cpython-310.pyc +0 -0
utils/avatar_manager.py +189 -0
utils/lipsync.py +207 -0
utils/speech_to_text.py +113 -0
utils/translator.py +84 -0
utils/tts_engine.py +88 -0

Dockerfile CHANGED Viewed

@@ -1,20 +1,52 @@
-FROM python:3.13.5-slim
-WORKDIR /app
-RUN apt-get update && apt-get install -y \
-    build-essential \
-    curl \
-    git \
-    && rm -rf /var/lib/apt/lists/*
-COPY requirements.txt ./
-COPY src/ ./src/
-RUN pip3 install -r requirements.txt
-EXPOSE 8501
-HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
-ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]

+# Use Python base image
+FROM python:3.10-slim
+# Set working directory
+WORKDIR /app
+# Install system dependencies including FFmpeg
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ffmpeg \
+    libsm6 \
+    libxext6 \
+    libgl1-mesa-glb \
+    git \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+# Create non-root user for security (required by HF Spaces)
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+# Set working directory for user
+WORKDIR $HOME/app
+# Copy requirements first (for caching)
+COPY --chown=user:user requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+# Copy application files
+COPY --chown=user:user . .
+# Create temp directory
+RUN mkdir -p temp
+# Expose Streamlit port
+EXPOSE 7860
+# Health check
+HEALTHCHECK CMD curl --fail http://localhost:7860/_stcore/health || exit 1
+# Set environment variables for Streamlit
+ENV STREAMLIT_SERVER_PORT=7860 \
+    STREAMLIT_SERVER_ADDRESS=0.0.0.0 \
+    STREAMLIT_SERVER_HEADLESS=true \
+    STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
+# Run the application
+CMD ["streamlit", "run", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,607 @@

+"""
+🎌 Anime Translator with Lip-Sync
+=================================
+A Streamlit application that translates text between English and Hindi,
+converts it to speech, and generates a lip-synced anime avatar animation.
+"""
+import streamlit as st
+from pathlib import Path
+import tempfile
+import time
+import shutil
+import os
+import subprocess
+from shutil import which
+from typing import Tuple, Optional
+# Import utility modules
+from utils.translator import translate_text, detect_language
+from utils.tts_engine import synthesize_speech, get_audio_duration
+from utils.lipsync import generate_lipsync_gif
+from utils.speech_to_text import transcribe_audio, get_language_code
+from utils.avatar_manager import list_avatars, get_avatar_preview, ensure_sample_avatar
+# =============================================================================
+# FFmpeg Configuration
+# =============================================================================
+def configure_ffmpeg():
+    """Configure FFmpeg path for pydub on Windows."""
+    possible_paths = [
+        r"C:\ffmpeg\bin",
+        r"C:\Program Files\ffmpeg\bin",
+        r"C:\Program Files (x86)\ffmpeg\bin",
+        os.path.expanduser("~\\ffmpeg\\bin"),
+        r"C:\Users\Nishant Pratap\ffmpeg\bin",  # Add your user-specific path
+    ]
+    if which("ffmpeg") is not None:
+        return True
+    for path in possible_paths:
+        ffmpeg_exe = os.path.join(path, "ffmpeg.exe")
+        if os.path.exists(ffmpeg_exe):
+            os.environ["PATH"] = path + os.pathsep + os.environ.get("PATH", "")
+            # Also set for pydub specifically
+            try:
+                from pydub import AudioSegment
+                AudioSegment.converter = os.path.join(path, "ffmpeg.exe")
+                AudioSegment.ffprobe = os.path.join(path, "ffprobe.exe")
+            except:
+                pass
+            return True
+    return False
+def check_ffmpeg_detailed():
+    """Check FFmpeg installation and return detailed status."""
+    status = {
+        "ffmpeg_in_path": False,
+        "ffmpeg_works": False,
+        "ffprobe_works": False,
+        "pydub_works": False,
+        "error_message": None
+    }
+    ffmpeg_path = which("ffmpeg")
+    status["ffmpeg_in_path"] = ffmpeg_path is not None
+    try:
+        result = subprocess.run(
+            ["ffmpeg", "-version"],
+            capture_output=True,
+            text=True,
+            timeout=5
+        )
+        status["ffmpeg_works"] = result.returncode == 0
+    except Exception as e:
+        status["error_message"] = str(e)
+    try:
+        result = subprocess.run(
+            ["ffprobe", "-version"],
+            capture_output=True,
+            text=True,
+            timeout=5
+        )
+        status["ffprobe_works"] = result.returncode == 0
+    except Exception:
+        pass
+    try:
+        from pydub import AudioSegment
+        silence = AudioSegment.silent(duration=100)
+        status["pydub_works"] = True
+    except Exception as e:
+        status["pydub_works"] = False
+        if not status["error_message"]:
+            status["error_message"] = str(e)
+    return status
+ffmpeg_found = configure_ffmpeg()
+# =============================================================================
+# Configuration
+# =============================================================================
+AVATARS_DIR = Path("./avatars")
+TEMP_DIR = Path(tempfile.gettempdir()) / "anime_translator"
+AVATARS_DIR.mkdir(parents=True, exist_ok=True)
+TEMP_DIR.mkdir(parents=True, exist_ok=True)
+# Page configuration
+st.set_page_config(
+    page_title="🎌 Anime Translator",
+    page_icon="🎌",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# =============================================================================
+# Custom CSS Styling
+# =============================================================================
+st.markdown("""
+<style>
+    .main {
+        background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%);
+    }
+    .main-header {
+        background: linear-gradient(90deg, #e94560, #ff6b6b);
+        -webkit-background-clip: text;
+        -webkit-text-fill-color: transparent;
+        font-size: 3rem;
+        font-weight: bold;
+        text-align: center;
+        padding: 1rem;
+        margin-bottom: 2rem;
+    }
+    .stButton > button {
+        background: linear-gradient(90deg, #e94560, #ff6b6b);
+        color: white;
+        border: none;
+        border-radius: 25px;
+        padding: 0.75rem 2rem;
+        font-weight: bold;
+        transition: all 0.3s ease;
+        width: 100%;
+    }
+    .stButton > button:hover {
+        transform: translateY(-2px);
+        box-shadow: 0 5px 20px rgba(233, 69, 96, 0.4);
+    }
+    .result-box {
+        background: linear-gradient(135deg, rgba(233, 69, 96, 0.1), rgba(255, 107, 107, 0.1));
+        border-radius: 15px;
+        padding: 1.5rem;
+        border: 1px solid rgba(233, 69, 96, 0.3);
+        margin: 1rem 0;
+    }
+    .info-box {
+        background: rgba(100, 200, 255, 0.1);
+        border-left: 4px solid #64c8ff;
+        padding: 1rem;
+        border-radius: 0 10px 10px 0;
+        margin: 1rem 0;
+    }
+    .success-box {
+        background: rgba(100, 255, 150, 0.1);
+        border-left: 4px solid #64ff96;
+        padding: 1rem;
+        border-radius: 0 10px 10px 0;
+    }
+    #MainMenu {visibility: hidden;}
+    footer {visibility: hidden;}
+    .stTabs [data-baseweb="tab-list"] {
+        gap: 8px;
+    }
+    .stTabs [data-baseweb="tab"] {
+        background: rgba(255, 255, 255, 0.05);
+        border-radius: 10px;
+        padding: 10px 20px;
+    }
+    .stTabs [aria-selected="true"] {
+        background: linear-gradient(90deg, #e94560, #ff6b6b);
+    }
+</style>
+""", unsafe_allow_html=True)
+# =============================================================================
+# Helper Functions
+# =============================================================================
+def cleanup_temp_files(older_than_sec: int = 3600) -> None:
+    """Clean up old temporary files."""
+    now = time.time()
+    try:
+        for path in TEMP_DIR.iterdir():
+            try:
+                if now - path.stat().st_mtime > older_than_sec:
+                    if path.is_file():
+                        path.unlink()
+                    elif path.is_dir():
+                        shutil.rmtree(path)
+            except Exception:
+                pass
+    except Exception:
+        pass
+def process_translation_pipeline(
+    text: str,
+    source_lang: str,
+    target_lang: str,
+    avatar_name: str
+) -> Tuple[str, Optional[str], Optional[str]]:
+    """Main processing pipeline: translate, synthesize speech, generate animation."""
+    # Step 1: Translate text
+    try:
+        translated_text = translate_text(text, source_lang, target_lang)
+    except Exception as e:
+        raise Exception(f"Translation failed: {str(e)}")
+    # Step 2: Synthesize speech
+    try:
+        audio_path = synthesize_speech(translated_text, target_lang, TEMP_DIR)
+    except Exception as e:
+        raise Exception(f"Speech synthesis failed: {str(e)}")
+    # Step 3: Generate lip-sync animation
+    gif_path = None
+    try:
+        gif_path = generate_lipsync_gif(
+            avatar_name=avatar_name,
+            audio_path=audio_path,
+            avatars_dir=AVATARS_DIR,
+            output_dir=TEMP_DIR,
+            fps=12
+        )
+    except Exception as e:
+        # Don't fail completely if animation fails
+        print(f"Animation generation warning: {str(e)}")
+        gif_path = None
+    return translated_text, audio_path, gif_path
+# =============================================================================
+# Sidebar
+# =============================================================================
+with st.sidebar:
+    st.markdown("## ⚙️ Settings")
+    # Avatar selection
+    st.markdown("### 🎭 Avatar Selection")
+    avatars = list_avatars(AVATARS_DIR)
+    if avatars:
+        selected_avatar = st.selectbox(
+            "Choose your avatar",
+            options=avatars,
+            index=0,
+            help="Select an anime avatar for lip-sync animation"
+        )
+        preview = get_avatar_preview(selected_avatar, AVATARS_DIR)
+        if preview:
+            st.image(preview, caption=f"Preview: {selected_avatar}", width="stretch")
+    else:
+        st.warning("No avatars found. Creating sample avatar...")
+        ensure_sample_avatar(AVATARS_DIR)
+        selected_avatar = "sample"
+        st.rerun()
+    st.markdown("---")
+    # Language settings
+    st.markdown("### 🌐 Language Settings")
+    source_language = st.selectbox(
+        "Source Language",
+        options=["auto", "en", "hi"],
+        format_func=lambda x: {"auto": "🔄 Auto-detect", "en": "🇬🇧 English", "hi": "🇮🇳 Hindi"}[x],
+        index=0
+    )
+    target_language = st.selectbox(
+        "Target Language",
+        options=["en", "hi"],
+        format_func=lambda x: {"en": "🇬🇧 English", "hi": "🇮🇳 Hindi"}[x],
+        index=1
+    )
+    st.markdown("---")
+    # System status
+    st.markdown("### 🔧 System Status")
+    ffmpeg_status = check_ffmpeg_detailed()
+    if ffmpeg_status["ffmpeg_works"]:
+        st.success("✅ FFmpeg: Working")
+    else:
+        st.error("❌ FFmpeg: Not working")
+    if ffmpeg_status["pydub_works"]:
+        st.success("✅ Pydub: Working")
+    else:
+        st.warning("⚠️ Pydub: Limited (fallback mode)")
+    if ffmpeg_status["error_message"]:
+        with st.expander("🔍 Error Details"):
+            st.code(ffmpeg_status["error_message"])
+            st.markdown("""
+            **To fix FFmpeg:**
+            ```bash
+            conda install -c conda-forge ffmpeg
+            ```
+            Or download from: https://www.gyan.dev/ffmpeg/builds/
+            """)
+    st.markdown("---")
+    # Info section
+    st.markdown("### ℹ️ About")
+    st.markdown("""
+    Translate text between English and Hindi with lip-synced avatar animation.
+    **Features:**
+    - 🎤 Voice input
+    - 🔄 Auto detection
+    - 🗣️ Text-to-speech
+    - 🎬 Lip-sync animation
+    """)
+    if st.button("🧹 Clear Temp Files"):
+        cleanup_temp_files(older_than_sec=0)
+        st.success("Cleared!")
+# =============================================================================
+# Main Content
+# =============================================================================
+st.markdown('<h1 class="main-header">🎌 Anime Translator</h1>', unsafe_allow_html=True)
+st.markdown(
+    '<p style="text-align: center; color: #888; font-size: 1.2rem;">'
+    'Translate • Speak • Animate</p>',
+    unsafe_allow_html=True
+)
+# Tabs
+tab1, tab2 = st.tabs(["📝 Text Input", "🎤 Voice Input"])
+# =============================================================================
+# Tab 1: Text Input
+# =============================================================================
+with tab1:
+    col1, col2 = st.columns([1, 1])
+    with col1:
+        st.markdown("### 📝 Enter Your Text")
+        text_input = st.text_area(
+            "Type or paste your text here",
+            height=150,
+            placeholder="Enter text in English or Hindi...\nउदाहरण: नमस्ते, आप कैसे हैं?\nExample: Hello, how are you?",
+            key="text_input"
+        )
+        if text_input:
+            detected = detect_language(text_input)
+            st.markdown(
+                f'<div class="info-box">'
+                f'📊 Characters: {len(text_input)} | '
+                f'🔍 Detected: {"🇮🇳 Hindi" if detected == "hi" else "🇬🇧 English"}'
+                f'</div>',
+                unsafe_allow_html=True
+            )
+        translate_btn = st.button(
+            "🚀 Translate & Animate",
+            key="translate_text_btn",
+            use_container_width=True
+        )
+    with col2:
+        st.markdown("### 🎬 Result")
+        if translate_btn and text_input:
+            with st.spinner("🔄 Processing..."):
+                progress = st.progress(0)
+                status_text = st.empty()
+                try:
+                    status_text.text("📝 Translating...")
+                    progress.progress(33)
+                    translated, audio_path, gif_path = process_translation_pipeline(
+                        text_input,
+                        source_language,
+                        target_language,
+                        selected_avatar
+                    )
+                    status_text.text("🗣️ Generating speech...")
+                    progress.progress(66)
+                    status_text.text("🎬 Creating animation...")
+                    progress.progress(100)
+                    progress.empty()
+                    status_text.empty()
+                    # Display translated text
+                    st.markdown(
+                        f'<div class="result-box">'
+                        f'<h4>📜 Translated Text:</h4>'
+                        f'<p style="font-size: 1.2rem;">{translated}</p>'
+                        f'</div>',
+                        unsafe_allow_html=True
+                    )
+                    # Audio player
+                    if audio_path and os.path.exists(audio_path):
+                        st.markdown("#### 🔊 Audio")
+                        st.audio(audio_path, format="audio/mp3")
+                    # Animation display
+                    if gif_path and os.path.exists(gif_path):
+                        st.markdown("#### 🎭 Lip-Sync Animation")
+                        st.image(gif_path, width="stretch")
+                        with open(gif_path, "rb") as f:
+                            st.download_button(
+                                label="📥 Download Animation",
+                                data=f,
+                                file_name="lipsync_animation.gif",
+                                mime="image/gif"
+                            )
+                    else:
+                        st.info("ℹ️ Animation not available (FFmpeg may be missing)")
+                except Exception as e:
+                    progress.empty()
+                    status_text.empty()
+                    st.error(f"❌ Error: {str(e)}")
+        elif translate_btn:
+            st.warning("⚠️ Please enter some text to translate.")
+# =============================================================================
+# Tab 2: Voice Input
+# =============================================================================
+with tab2:
+    col1, col2 = st.columns([1, 1])
+    with col1:
+        st.markdown("### 🎤 Voice Recording")
+        st.markdown("""
+        <div class="info-box">
+        <strong>Instructions:</strong><br>
+        1. Upload an audio file (WAV, MP3, etc.)<br>
+        2. Or use the audio recorder below<br>
+        3. Click "Transcribe & Translate"
+        </div>
+        """, unsafe_allow_html=True)
+        uploaded_audio = st.file_uploader(
+            "Upload an audio file",
+            type=["wav", "mp3", "ogg", "flac", "m4a"],
+            help="Supported formats: WAV, MP3, OGG, FLAC, M4A"
+        )
+        recorded_audio = None
+        try:
+            from audio_recorder_streamlit import audio_recorder
+            st.markdown("**Or record directly:**")
+            recorded_audio = audio_recorder(
+                text="🎙️ Click to record",
+                recording_color="#e94560",
+                neutral_color="#6c757d",
+                icon_name="microphone",
+                icon_size="2x"
+            )
+        except ImportError:
+            st.info("💡 For recording: `pip install audio-recorder-streamlit`")
+        voice_lang = st.selectbox(
+            "Recording Language",
+            options=["en", "hi"],
+            format_func=lambda x: {"en": "🇬🇧 English", "hi": "🇮🇳 Hindi"}[x]
+        )
+        voice_btn = st.button(
+            "🎯 Transcribe & Translate",
+            key="voice_btn",
+            use_container_width=True
+        )
+    with col2:
+        st.markdown("### 🎬 Result")
+        audio_to_process = None
+        if uploaded_audio is not None:
+            temp_audio_path = TEMP_DIR / f"uploaded_{int(time.time()*1000)}.wav"
+            with open(temp_audio_path, "wb") as f:
+                f.write(uploaded_audio.getbuffer())
+            audio_to_process = str(temp_audio_path)
+            st.audio(uploaded_audio)
+        elif recorded_audio is not None:
+            temp_audio_path = TEMP_DIR / f"recorded_{int(time.time()*1000)}.wav"
+            with open(temp_audio_path, "wb") as f:
+                f.write(recorded_audio)
+            audio_to_process = str(temp_audio_path)
+            st.audio(recorded_audio, format="audio/wav")
+        if voice_btn:
+            if audio_to_process:
+                with st.spinner("🔄 Processing voice..."):
+                    try:
+                        st.text("🎤 Transcribing...")
+                        lang_code = get_language_code(voice_lang)
+                        transcribed_text, success = transcribe_audio(audio_to_process, lang_code)
+                        if success:
+                            st.markdown(
+                                f'<div class="success-box">'
+                                f'<strong>📝 Transcribed:</strong> {transcribed_text}'
+                                f'</div>',
+                                unsafe_allow_html=True
+                            )
+                            translated, audio_path, gif_path = process_translation_pipeline(
+                                transcribed_text,
+                                voice_lang,
+                                target_language,
+                                selected_avatar
+                            )
+                            st.markdown(
+                                f'<div class="result-box">'
+                                f'<h4>📜 Translated:</h4>'
+                                f'<p style="font-size: 1.2rem;">{translated}</p>'
+                                f'</div>',
+                                unsafe_allow_html=True
+                            )
+                            if audio_path and os.path.exists(audio_path):
+                                st.markdown("#### 🔊 Audio")
+                                st.audio(audio_path, format="audio/mp3")
+                            if gif_path and os.path.exists(gif_path):
+                                st.markdown("#### 🎭 Animation")
+                                st.image(gif_path, width="stretch")
+                                with open(gif_path, "rb") as f:
+                                    st.download_button(
+                                        label="📥 Download",
+                                        data=f,
+                                        file_name="lipsync.gif",
+                                        mime="image/gif"
+                                    )
+                        else:
+                            st.error(f"❌ {transcribed_text}")
+                    except Exception as e:
+                        st.error(f"❌ Error: {str(e)}")
+            else:
+                st.warning("⚠️ Please upload or record audio first.")
+# =============================================================================
+# Footer
+# =============================================================================
+st.markdown("---")
+st.markdown(
+    """
+    <div style="text-align: center; color: #666; padding: 1rem;">
+        <p>Made By Praveen</p>
+    </div>
+    """,
+    unsafe_allow_html=True
+)

avatars/sample/base.png ADDED Viewed

avatars/sample/mouth_0.png ADDED Viewed

avatars/sample/mouth_1.png ADDED Viewed

avatars/sample/mouth_2.png ADDED Viewed

requirements.txt CHANGED Viewed

@@ -1,3 +1,11 @@
-altair
-pandas
-streamlit

+streamlit>=1.28.0
+deep-translator>=1.11.4
+gTTS>=2.4.0
+pydub>=0.25.1
+Pillow>=10.0.0
+imageio>=2.31.0
+numpy>=1.24.0
+SpeechRecognition>=3.10.0
+streamlit-webrtc>=0.47.0
+av>=10.0.0
+audio-recorder-streamlit>=0.0.8

utils/__init__.py ADDED Viewed

	@@ -0,0 +1,16 @@

+"""
+Utility modules for the Anime Translator application.
+This package contains:
+- translator: Text translation between languages
+- tts_engine: Text-to-speech synthesis
+- lipsync: Lip-sync animation generation
+- speech_to_text: Voice input processing
+- avatar_manager: Avatar image management
+"""
+from .translator import translate_text, detect_language
+from .tts_engine import synthesize_speech
+from .lipsync import generate_lipsync_gif, audio_to_rms_chunks
+from .speech_to_text import transcribe_audio
+from .avatar_manager import list_avatars, ensure_sample_avatar, get_avatar_preview

utils/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (845 Bytes). View file

utils/__pycache__/avatar_manager.cpython-310.pyc ADDED Viewed

Binary file (4.47 kB). View file

utils/__pycache__/lipsync.cpython-310.pyc ADDED Viewed

Binary file (5.35 kB). View file

utils/__pycache__/speech_to_text.cpython-310.pyc ADDED Viewed

Binary file (3.06 kB). View file

utils/__pycache__/translator.cpython-310.pyc ADDED Viewed

Binary file (2.18 kB). View file

utils/__pycache__/tts_engine.cpython-310.pyc ADDED Viewed

Binary file (2.3 kB). View file

utils/avatar_manager.py ADDED Viewed

	@@ -0,0 +1,189 @@

+"""
+Avatar Manager Module
+=====================
+Handles avatar discovery, creation, and management.
+Functions:
+    - ensure_sample_avatar: Create default sample avatar
+    - list_avatars: Get list of available avatars
+    - get_avatar_preview: Get preview image of an avatar
+"""
+from PIL import Image, ImageDraw
+from pathlib import Path
+from typing import List, Optional
+import numpy as np
+def ensure_sample_avatar(avatars_dir: Path) -> None:
+    """
+    Create a sample avatar if none exists.
+    Generates a simple animated avatar with:
+    - Base face image
+    - Three mouth positions (closed, medium, open)
+    Args:
+        avatars_dir: Base directory for avatars
+    Example:
+        >>> ensure_sample_avatar(Path("./avatars"))
+        # Creates ./avatars/sample/ with base.png and mouth_*.png
+    Note:
+        This creates a basic placeholder avatar. For better results,
+        create custom avatars with proper artwork.
+    """
+    sample_dir = avatars_dir / "sample"
+    # Check if sample already exists with content
+    if sample_dir.exists() and any(sample_dir.iterdir()):
+        return
+    # Create directory
+    sample_dir.mkdir(parents=True, exist_ok=True)
+    # Image dimensions
+    width, height = 512, 512
+    # Create base image (simple face background)
+    base = Image.new("RGBA", (width, height), (255, 220, 200, 255))
+    draw_base = ImageDraw.Draw(base)
+    # Draw simple face features on base
+    # Face circle
+    draw_base.ellipse([56, 56, 456, 456], fill=(255, 230, 210, 255), outline=(200, 150, 130, 255), width=3)
+    # Eyes
+    draw_base.ellipse([150, 180, 200, 230], fill=(255, 255, 255, 255), outline=(0, 0, 0, 255), width=2)
+    draw_base.ellipse([312, 180, 362, 230], fill=(255, 255, 255, 255), outline=(0, 0, 0, 255), width=2)
+    # Pupils
+    draw_base.ellipse([165, 195, 185, 215], fill=(50, 50, 50, 255))
+    draw_base.ellipse([327, 195, 347, 215], fill=(50, 50, 50, 255))
+    # Eyebrows
+    draw_base.arc([140, 150, 210, 190], start=200, end=340, fill=(100, 70, 50, 255), width=3)
+    draw_base.arc([302, 150, 372, 190], start=200, end=340, fill=(100, 70, 50, 255), width=3)
+    # Nose
+    draw_base.polygon([(256, 250), (240, 310), (272, 310)], fill=(240, 200, 180, 255))
+    # Hair (simple)
+    draw_base.arc([40, 20, 472, 300], start=180, end=360, fill=(80, 50, 30, 255), width=30)
+    base.save(sample_dir / "base.png")
+    # Create mouth frames (transparent overlays)
+    mouth_positions = [
+        # (y_offset, height) - Mouth closed to open
+        (0, 8),    # mouth_0: Nearly closed
+        (0, 20),   # mouth_1: Slightly open
+        (0, 35),   # mouth_2: Wide open
+    ]
+    for i, (y_off, mouth_height) in enumerate(mouth_positions):
+        # Create transparent image for mouth overlay
+        mouth_img = Image.new("RGBA", (width, height), (0, 0, 0, 0))
+        draw_mouth = ImageDraw.Draw(mouth_img)
+        # Calculate mouth position
+        mouth_y = 340 + y_off
+        mouth_left = 200
+        mouth_right = 312
+        # Draw mouth (ellipse shape)
+        draw_mouth.ellipse(
+            [mouth_left, mouth_y, mouth_right, mouth_y + mouth_height],
+            fill=(180, 80, 80, 255),
+            outline=(120, 50, 50, 255),
+            width=2
+        )
+        # Add inner mouth detail for open mouths
+        if mouth_height > 15:
+            inner_offset = 5
+            draw_mouth.ellipse(
+                [mouth_left + inner_offset, mouth_y + inner_offset,
+                 mouth_right - inner_offset, mouth_y + mouth_height - inner_offset],
+                fill=(100, 40, 40, 255)
+            )
+        mouth_img.save(sample_dir / f"mouth_{i}.png")
+def list_avatars(avatars_dir: Path) -> List[str]:
+    """
+    Get list of available avatar names.
+    Scans the avatars directory for valid avatar folders
+    (containing base.png and mouth_*.png files).
+    Args:
+        avatars_dir: Base directory containing avatar folders
+    Returns:
+        List of avatar folder names
+    Example:
+        >>> avatars = list_avatars(Path("./avatars"))
+        >>> print(avatars)
+        ['sample', 'anime_girl', 'anime_boy']
+    """
+    # Ensure sample avatar exists
+    ensure_sample_avatar(avatars_dir)
+    # Find all valid avatar directories
+    avatars = []
+    if avatars_dir.exists():
+        for path in avatars_dir.iterdir():
+            if path.is_dir():
+                # Check for required files
+                has_base = (path / "base.png").exists()
+                has_mouth = any(path.glob("mouth_*.png"))
+                if has_base and has_mouth:
+                    avatars.append(path.name)
+    return sorted(avatars)
+def get_avatar_preview(avatar_name: str, avatars_dir: Path) -> Optional[Image.Image]:
+    """
+    Get a preview image of an avatar.
+    Composites the base image with the first mouth frame
+    to show what the avatar looks like.
+    Args:
+        avatar_name: Name of the avatar folder
+        avatars_dir: Base directory containing avatar folders
+    Returns:
+        PIL Image object or None if avatar not found
+    Example:
+        >>> preview = get_avatar_preview("sample", Path("./avatars"))
+        >>> preview.show()
+    """
+    avatar_folder = avatars_dir / avatar_name
+    base_path = avatar_folder / "base.png"
+    if not base_path.exists():
+        return None
+    # Load base image
+    base = Image.open(base_path).convert("RGBA")
+    # Find first mouth frame
+    mouth_frames = sorted(avatar_folder.glob("mouth_*.png"))
+    if mouth_frames:
+        mouth = Image.open(mouth_frames[0]).convert("RGBA").resize(base.size)
+        # Composite mouth onto base
+        preview = Image.alpha_composite(base, mouth)
+    else:
+        preview = base
+    return preview

utils/lipsync.py ADDED Viewed

	@@ -0,0 +1,207 @@

+"""
+Lip-Sync Animation Module
+=========================
+Generates animated GIFs with lip-sync based on audio amplitude.
+Functions:
+    - audio_to_rms_chunks: Extract amplitude data from audio
+    - generate_lipsync_gif: Create lip-sync animation GIF
+"""
+from PIL import Image
+import imageio
+from pathlib import Path
+import time
+from typing import List, Optional
+import os
+def audio_to_rms_chunks(audio_path: str, chunk_ms: int = 80) -> List[float]:
+    """
+    Extract RMS (Root Mean Square) amplitude values from audio.
+    Splits audio into chunks and calculates the RMS value for each,
+    which represents the "loudness" of that segment.
+    Args:
+        audio_path: Path to the audio file (MP3)
+        chunk_ms: Duration of each chunk in milliseconds
+    Returns:
+        List of RMS values, one per chunk
+    """
+    try:
+        from pydub import AudioSegment
+        from pydub.utils import make_chunks
+        # Check if file exists
+        if not os.path.exists(audio_path):
+            raise FileNotFoundError(f"Audio file not found: {audio_path}")
+        # Load audio file
+        audio = AudioSegment.from_file(audio_path)
+        # Split into chunks
+        chunks = make_chunks(audio, chunk_ms)
+        # Calculate RMS for each chunk
+        rms_values = [chunk.rms for chunk in chunks if len(chunk) > 0]
+        return rms_values if rms_values else [0]
+    except Exception as e:
+        print(f"Error processing audio: {e}")
+        # Return default values if audio processing fails
+        return [100, 200, 150, 300, 250, 100, 200, 150]  # Fallback animation
+def audio_to_rms_chunks_simple(audio_path: str, chunk_ms: int = 80) -> List[float]:
+    """
+    Simple fallback method to generate fake RMS values based on file size.
+    Used when pydub/ffmpeg fails.
+    Args:
+        audio_path: Path to the audio file
+        chunk_ms: Duration of each chunk in milliseconds
+    Returns:
+        List of simulated RMS values
+    """
+    import math
+    try:
+        # Estimate duration based on file size (rough approximation)
+        file_size = os.path.getsize(audio_path)
+        # Approximate: MP3 at 128kbps = 16KB per second
+        estimated_duration_sec = file_size / 16000
+        # Calculate number of chunks
+        num_chunks = max(int(estimated_duration_sec * 1000 / chunk_ms), 10)
+        # Generate wave-like RMS values for natural-looking lip sync
+        rms_values = []
+        for i in range(num_chunks):
+            # Create a wave pattern
+            value = 150 + 100 * math.sin(i * 0.5) + 50 * math.sin(i * 1.2)
+            rms_values.append(max(50, value))
+        return rms_values
+    except Exception:
+        return [100, 200, 150, 300, 250, 100, 200, 150, 100, 200]
+def generate_lipsync_gif(
+    avatar_name: str,
+    audio_path: str,
+    avatars_dir: Path,
+    output_dir: Path,
+    fps: int = 12,
+    output_path: Optional[str] = None
+) -> str:
+    """
+    Generate a lip-sync animated GIF from avatar images and audio.
+    The animation works by:
+    1. Analyzing audio amplitude (RMS) over time
+    2. Selecting mouth frame based on amplitude level
+    3. Compositing mouth frame onto base avatar image
+    4. Combining all frames into an animated GIF
+    Args:
+        avatar_name: Name of avatar folder (e.g., 'sample')
+        audio_path: Path to the audio file to sync with
+        avatars_dir: Base directory containing avatar folders
+        output_dir: Directory to save the output GIF
+        fps: Frames per second for the animation
+        output_path: Optional custom output path
+    Returns:
+        Path to the generated GIF file
+    Raises:
+        FileNotFoundError: If avatar base.png or mouth frames not found
+    """
+    # Locate avatar folder and files
+    avatar_folder = avatars_dir / avatar_name
+    base_path = avatar_folder / "base.png"
+    mouth_frames_paths = sorted(avatar_folder.glob("mouth_*.png"))
+    # Validate avatar files exist
+    if not base_path.exists():
+        raise FileNotFoundError(f"Base image not found: {base_path}")
+    if not mouth_frames_paths:
+        raise FileNotFoundError(f"No mouth frames found in: {avatar_folder}")
+    # Load base image (the avatar face)
+    base_image = Image.open(base_path).convert("RGBA")
+    size = base_image.size
+    # Load all mouth frame images
+    mouth_frames = [
+        Image.open(path).convert("RGBA").resize(size)
+        for path in mouth_frames_paths
+    ]
+    # Calculate chunk duration to match target FPS
+    chunk_ms = int(1000 / fps)
+    # Try to extract audio amplitude data
+    try:
+        rms_values = audio_to_rms_chunks(audio_path, chunk_ms=chunk_ms)
+    except Exception as e:
+        print(f"Primary audio processing failed: {e}")
+        print("Using fallback animation method...")
+        rms_values = audio_to_rms_chunks_simple(audio_path, chunk_ms=chunk_ms)
+    # Handle edge case of empty or invalid audio
+    if not rms_values or all(v == 0 for v in rms_values):
+        rms_values = audio_to_rms_chunks_simple(audio_path, chunk_ms=chunk_ms)
+    # Normalize RMS values to 0-1 range
+    max_rms = max(rms_values) if max(rms_values) > 0 else 1
+    # Generate animation frames
+    frames = []
+    num_mouth_frames = len(mouth_frames)
+    for rms in rms_values:
+        # Calculate mouth openness ratio (0 to 1)
+        ratio = rms / max_rms
+        # Map ratio to mouth frame index
+        mouth_index = int(ratio * (num_mouth_frames - 1))
+        mouth_index = max(0, min(mouth_index, num_mouth_frames - 1))
+        # Composite mouth onto base image
+        mouth = mouth_frames[mouth_index]
+        frame = Image.alpha_composite(base_image, mouth)
+        # Convert to RGB for GIF compatibility
+        frame_rgb = Image.new("RGB", frame.size, (255, 255, 255))
+        frame_rgb.paste(frame, mask=frame.split()[-1] if frame.mode == 'RGBA' else None)
+        frames.append(frame_rgb)
+    # Ensure output directory exists
+    output_dir.mkdir(parents=True, exist_ok=True)
+    # Generate output filename
+    if output_path is None:
+        timestamp = int(time.time() * 1000)
+        output_path = str(output_dir / f"lipsync_{timestamp}.gif")
+    # Save as animated GIF
+    if frames:
+        # Use imageio to save GIF
+        imageio.mimsave(
+            output_path,
+            frames,
+            fps=fps,
+            loop=0  # Loop forever
+        )
+    else:
+        raise ValueError("No frames generated for animation")
+    return output_path

utils/speech_to_text.py ADDED Viewed

	@@ -0,0 +1,113 @@

+"""
+Speech-to-Text Module
+=====================
+Converts voice/audio input to text using speech recognition.
+Functions:
+    - transcribe_audio: Convert audio file to text
+    - transcribe_from_microphone: Real-time microphone transcription
+"""
+import speech_recognition as sr
+from pathlib import Path
+from typing import Optional, Tuple
+import tempfile
+from pydub import AudioSegment
+def transcribe_audio(
+    audio_path: str,
+    language: str = "en-US"
+) -> Tuple[str, bool]:
+    """
+    Transcribe audio file to text using Google Speech Recognition.
+    Supports various audio formats (WAV, MP3, etc.) and converts
+    them automatically for processing.
+    Args:
+        audio_path: Path to the audio file
+        language: Language code for recognition
+                 - 'en-US' for English (US)
+                 - 'hi-IN' for Hindi (India)
+    Returns:
+        Tuple of (transcribed_text, success_flag)
+        - If successful: (text, True)
+        - If failed: (error_message, False)
+    Example:
+        >>> text, success = transcribe_audio("recording.wav", "en-US")
+        >>> if success:
+        ...     print(f"You said: {text}")
+        ... else:
+        ...     print(f"Error: {text}")
+    Supported Formats:
+        - WAV (recommended)
+        - MP3
+        - FLAC
+        - OGG
+    """
+    recognizer = sr.Recognizer()
+    try:
+        # Convert audio to WAV format if needed
+        audio_path = Path(audio_path)
+        if audio_path.suffix.lower() != '.wav':
+            # Convert to WAV using pydub
+            audio = AudioSegment.from_file(str(audio_path))
+            # Create temporary WAV file
+            with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
+                wav_path = tmp.name
+                audio.export(wav_path, format='wav')
+        else:
+            wav_path = str(audio_path)
+        # Load audio file for recognition
+        with sr.AudioFile(wav_path) as source:
+            # Adjust for ambient noise
+            recognizer.adjust_for_ambient_noise(source, duration=0.5)
+            # Record the audio
+            audio_data = recognizer.record(source)
+        # Perform speech recognition
+        text = recognizer.recognize_google(audio_data, language=language)
+        return text, True
+    except sr.UnknownValueError:
+        return "Could not understand the audio. Please speak clearly.", False
+    except sr.RequestError as e:
+        return f"Speech recognition service error: {str(e)}", False
+    except Exception as e:
+        return f"Error processing audio: {str(e)}", False
+def get_language_code(lang: str) -> str:
+    """
+    Convert short language code to full speech recognition code.
+    Args:
+        lang: Short language code ('en' or 'hi')
+    Returns:
+        Full language code for speech recognition
+    Example:
+        >>> get_language_code('en')
+        'en-US'
+        >>> get_language_code('hi')
+        'hi-IN'
+    """
+    language_map = {
+        'en': 'en-US',
+        'hi': 'hi-IN',
+        'auto': 'en-US'  # Default to English for auto
+    }
+    return language_map.get(lang, 'en-US')

utils/translator.py ADDED Viewed

	@@ -0,0 +1,84 @@

+"""
+Translation Module
+==================
+Handles text translation between English and Hindi using deep-translator.
+Functions:
+    - detect_language: Auto-detect if text is English or Hindi
+    - translate_text: Translate text between languages
+"""
+from deep_translator import GoogleTranslator
+from typing import Literal
+def detect_language(text: str) -> Literal["en", "hi"]:
+    """
+    Detect if the input text is English or Hindi.
+    Uses Unicode range detection for Devanagari script (Hindi).
+    Args:
+        text: Input text string to analyze
+    Returns:
+        'hi' if Hindi/Devanagari characters found, 'en' otherwise
+    Example:
+        >>> detect_language("Hello World")
+        'en'
+        >>> detect_language("नमस्ते")
+        'hi'
+    """
+    # Check for Devanagari Unicode range (U+0900 to U+097F)
+    for char in text:
+        if '\u0900' <= char <= '\u097F':
+            return "hi"
+    return "en"
+def translate_text(
+    text: str,
+    source_lang: Literal["auto", "en", "hi"],
+    target_lang: Literal["en", "hi"]
+) -> str:
+    """
+    Translate text from source language to target language.
+    Uses Google Translator via deep-translator library for accurate
+    translations between English and Hindi.
+    Args:
+        text: The text to translate
+        source_lang: Source language code ('auto', 'en', or 'hi')
+        target_lang: Target language code ('en' or 'hi')
+    Returns:
+        Translated text string
+    Raises:
+        ValueError: If text is empty
+    Example:
+        >>> translate_text("Hello", "en", "hi")
+        'नमस्ते'
+        >>> translate_text("नमस्ते", "auto", "en")
+        'Hello'
+    """
+    # Validate input
+    if not text or not text.strip():
+        return ""
+    # Auto-detect source language if needed
+    if source_lang == "auto":
+        source_lang = detect_language(text)
+    # Skip translation if source and target are the same
+    if source_lang == target_lang:
+        return text
+    # Perform translation using Google Translator
+    translator = GoogleTranslator(source=source_lang, target=target_lang)
+    translated = translator.translate(text)
+    return translated

utils/tts_engine.py ADDED Viewed

	@@ -0,0 +1,88 @@

+"""
+Text-to-Speech Engine
+=====================
+Converts text to speech audio using Google Text-to-Speech (gTTS).
+Functions:
+    - synthesize_speech: Convert text to MP3 audio file
+    - get_audio_duration: Get duration of audio file
+"""
+from gtts import gTTS
+from pathlib import Path
+import time
+from typing import Literal
+import os
+def synthesize_speech(
+    text: str,
+    language: Literal["en", "hi"],
+    output_dir: Path,
+    slow: bool = False
+) -> str:
+    """
+    Convert text to speech and save as MP3 file.
+    Uses Google Text-to-Speech (gTTS) for natural-sounding
+    speech synthesis in English and Hindi.
+    Args:
+        text: Text to convert to speech
+        language: Language code ('en' for English, 'hi' for Hindi)
+        output_dir: Directory to save the audio file
+        slow: If True, speak slowly (useful for language learning)
+    Returns:
+        Path to the generated MP3 file
+    """
+    # Ensure output directory exists
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    # Generate unique filename using timestamp
+    timestamp = int(time.time() * 1000)
+    output_path = output_dir / f"tts_{timestamp}.mp3"
+    try:
+        # Create TTS object and save to file
+        tts = gTTS(text=text, lang=language, slow=slow)
+        tts.save(str(output_path))
+        # Verify file was created
+        if not output_path.exists():
+            raise FileNotFoundError(f"TTS file was not created: {output_path}")
+        # Verify file has content
+        if output_path.stat().st_size == 0:
+            raise ValueError("TTS file is empty")
+        return str(output_path)
+    except Exception as e:
+        print(f"TTS Error: {e}")
+        raise
+def get_audio_duration(audio_path: str) -> float:
+    """
+    Get the duration of an audio file in seconds.
+    Args:
+        audio_path: Path to the audio file
+    Returns:
+        Duration in seconds (estimated if pydub fails)
+    """
+    try:
+        from pydub import AudioSegment
+        audio = AudioSegment.from_file(audio_path)
+        return len(audio) / 1000.0  # Convert milliseconds to seconds
+    except Exception:
+        # Fallback: estimate based on file size
+        # Approximate: MP3 at 128kbps = 16KB per second
+        try:
+            file_size = os.path.getsize(audio_path)
+            return file_size / 16000
+        except Exception:
+            return 3.0  # Default 3 seconds